ref: c11957a7915d355d80ff7e6a21721ba05441ca29
parent: d8b894afc0ff3c81fc750826ac0016250e4f81e4
author: 9ferno <gophone2015@gmail.com>
date: Fri Aug 20 18:39:52 EDT 2021
replacing ip with 9front ip
--- a/os/ip/arp.c
+++ b/os/ip/arp.c
@@ -47,7 +47,8 @@
#define haship(s) ((s)[IPaddrlen-1]%NHASH)
-extern int ReTransTimer = RETRANS_TIMER;
+int ReTransTimer = RETRANS_TIMER;
+
static void rxmitproc(void *v);
void
@@ -57,145 +58,121 @@
f->arp->f = f;
f->arp->rxmt = nil;
f->arp->dropf = f->arp->dropl = nil;
- kproc("rxmitproc", rxmitproc, f->arp, 0);
+ kproc("rxmitproc", rxmitproc, f->arp);
}
-/*
- * create a new arp entry for an ip address.
- */
-static Arpent*
-newarp6(Arp *arp, uchar *ip, Ipifc *ifc, int addrxt)
+static void
+freeblistchain(Block *bp)
{
- uint t;
- Block *next, *xp;
- Arpent *a, *e, *f, **l;
- Medium *m = ifc->m;
- int empty;
+ Block *next;
- /* find oldest entry */
- e = &arp->cache[NCACHE];
- a = arp->cache;
- t = a->utime;
- for(f = a; f < e; f++){
- if(f->utime < t){
- t = f->utime;
- a = f;
- }
+ while(bp != nil){
+ next = bp->list;
+ freeblist(bp);
+ bp = next;
}
+}
- /* dump waiting packets */
- xp = a->hold;
- a->hold = nil;
+/* take out of re-transmit chain */
+static Arpent**
+rxmtunchain(Arp *arp, Arpent *a)
+{
+ Arpent **l;
- if(isv4(a->ip)){
- while(xp){
- next = xp->list;
- freeblist(xp);
- xp = next;
+ for(l = &arp->rxmt; *l != nil; l = &((*l)->nextrxt)){
+ if(*l == a){
+ *l = a->nextrxt;
+ break;
}
}
- else { // queue icmp unreachable for rxmitproc later on, w/o arp lock
- if(xp){
- if(arp->dropl == nil)
- arp->dropf = xp;
- else
- arp->dropl->list = xp;
+ a->nextrxt = nil;
+ return l;
+}
- for(next = xp->list; next; next = next->list)
- xp = next;
- arp->dropl = xp;
- wakeup(&arp->rxmtq);
- }
- }
+static void
+cleanarpent(Arp *arp, Arpent *a)
+{
+ Arpent **l;
+ Block *bp;
/* take out of current chain */
- l = &arp->hash[haship(a->ip)];
- for(f = *l; f; f = f->hash){
- if(f == a){
+ for(l = &arp->hash[haship(a->ip)]; *l != nil; l = &((*l)->hash)){
+ if(*l == a){
*l = a->hash;
break;
}
- l = &f->hash;
}
+ a->hash = nil;
- /* insert into new chain */
- l = &arp->hash[haship(ip)];
- a->hash = *l;
- *l = a;
+ /* dump waiting packets */
+ bp = a->hold;
+ a->hold = nil;
+ if(isv4(a->ip))
+ freeblistchain(bp);
+ else {
+ rxmtunchain(arp, a);
- memmove(a->ip, ip, sizeof(a->ip));
- a->utime = NOW;
- a->ctime = 0;
- a->type = m;
+ /* queue icmp unreachable for rxmitproc later on, w/o arp lock */
+ if(bp != nil){
+ if(arp->dropf == nil)
+ arp->dropf = bp;
+ else
+ arp->dropl->list = bp;
+ arp->dropl = a->last;
- a->rtime = NOW + ReTransTimer;
- a->rxtsrem = MAX_MULTICAST_SOLICIT;
- a->ifc = ifc;
- a->ifcid = ifc->ifcid;
-
- /* put to the end of re-transmit chain; addrxt is 0 when isv4(a->ip) */
- if(!ipismulticast(a->ip) && addrxt){
- l = &arp->rxmt;
- empty = (*l==nil);
-
- for(f = *l; f; f = f->nextrxt){
- if(f == a){
- *l = a->nextrxt;
- break;
- }
- l = &f->nextrxt;
+ if(bp == arp->dropf)
+ wakeup(&arp->rxmtq);
}
- for(f = *l; f; f = f->nextrxt){
- l = &f->nextrxt;
- }
- *l = a;
- if(empty)
- wakeup(&arp->rxmtq);
}
+ a->last = nil;
- a->nextrxt = nil;
+ a->ifc = nil;
+ a->ifcid = 0;
- return a;
-}
+ a->state = 0;
+ a->rxtsrem = 0;
-/* called with arp qlocked */
+ a->utime = 0;
+ a->ctime = 0;
-void
-cleanarpent(Arp *arp, Arpent *a)
+ memset(a->ip, 0, sizeof(a->ip));
+ memset(a->mac, 0, sizeof(a->mac));
+}
+
+/*
+ * create a new arp entry for an ip address on ifc.
+ */
+static Arpent*
+newarpent(Arp *arp, uchar *ip, Ipifc *ifc)
{
- Arpent *f, **l;
+ Arpent *a, *e, *f, **l;
+ ulong t;
- a->utime = 0;
- a->ctime = 0;
- a->type = 0;
- a->state = 0;
-
- /* take out of current chain */
- l = &arp->hash[haship(a->ip)];
- for(f = *l; f; f = f->hash){
- if(f == a){
- *l = a->hash;
- break;
+ /* find oldest entry */
+ e = &arp->cache[NCACHE];
+ a = arp->cache;
+ t = a->utime;
+ for(f = a; f < e; f++){
+ if(f->utime < t){
+ t = f->utime;
+ a = f;
}
- l = &f->hash;
}
+ cleanarpent(arp, a);
- /* take out of re-transmit chain */
- l = &arp->rxmt;
- for(f = *l; f; f = f->nextrxt){
- if(f == a){
- *l = a->nextrxt;
- break;
- }
- l = &f->nextrxt;
- }
- a->nextrxt = nil;
- a->hash = nil;
- a->hold = nil;
- a->last = nil;
- a->ifc = nil;
+ ipmove(a->ip, ip);
+ a->ifc = ifc;
+ a->ifcid = ifc->ifcid;
+
+ /* insert into new chain */
+ l = &arp->hash[haship(ip)];
+ a->hash = *l;
+ *l = a;
+
+ return a;
}
+
/*
* fill in the media address if we have it. Otherwise return an
* Arpent that represents the state of the address resolution FSM
@@ -207,7 +184,6 @@
{
int hash;
Arpent *a;
- Medium *type = ifc->m;
uchar v6ip[IPaddrlen];
if(version == V4){
@@ -217,30 +193,28 @@
qlock(arp);
hash = haship(ip);
- for(a = arp->hash[hash]; a; a = a->hash){
- if(memcmp(ip, a->ip, sizeof(a->ip)) == 0)
- if(type == a->type)
+ for(a = arp->hash[hash]; a != nil; a = a->hash){
+ if(a->ifc == ifc && a->ifcid == ifc->ifcid && ipcmp(ip, a->ip) == 0)
break;
}
-
if(a == nil){
- a = newarp6(arp, ip, ifc, (version != V4));
+ a = newarpent(arp, ip, ifc);
a->state = AWAIT;
}
a->utime = NOW;
if(a->state == AWAIT){
if(bp != nil){
- if(a->hold)
- a->last->list = bp;
- else
+ bp->list = nil;
+ if(a->hold == nil)
a->hold = bp;
+ else
+ a->last->list = bp;
a->last = bp;
- bp->list = nil;
}
return a; /* return with arp qlocked */
}
- memmove(mac, a->mac, a->type->maclen);
+ memmove(mac, a->mac, ifc->m->maclen);
/* remove old entries */
if(NOW - a->ctime > 15*60*1000)
@@ -269,132 +243,82 @@
arpresolve(Arp *arp, Arpent *a, Medium *type, uchar *mac)
{
Block *bp;
- Arpent *f, **l;
- if(!isv4(a->ip)){
- l = &arp->rxmt;
- for(f = *l; f; f = f->nextrxt){
- if(f == a){
- *l = a->nextrxt;
- break;
- }
- l = &f->nextrxt;
- }
- }
-
memmove(a->mac, mac, type->maclen);
- a->type = type;
+ if(a->state == AWAIT && !isv4(a->ip)){
+ rxmtunchain(arp, a);
+ a->rxtsrem = 0;
+ }
a->state = AOK;
- a->utime = NOW;
+ a->ctime = a->utime = NOW;
bp = a->hold;
- a->hold = nil;
+ a->hold = a->last = nil;
qunlock(arp);
return bp;
}
-void
-arpenter(Fs *fs, int version, uchar *ip, uchar *mac, int n, int refresh)
+int
+arpenter(Fs *fs, int version, uchar *ip, uchar *mac, int n, uchar *ia, Ipifc *ifc, int refresh)
{
- Arp *arp;
- Route *r;
- Arpent *a, *f, **l;
- Ipifc *ifc;
- Medium *type;
- Block *bp, *next;
uchar v6ip[IPaddrlen];
+ Block *bp, *next;
+ Arpent *a;
+ Route *r;
+ Arp *arp;
- arp = fs->arp;
+ if(ifc->m == nil || ifc->m->maclen != n || ifc->m->maclen == 0)
+ return -1;
- if(n != 6){
-// print("arp: len = %d\n", n);
- return;
- }
-
switch(version){
case V4:
- r = v4lookup(fs, ip, nil);
+ r = v4lookup(fs, ip, ia, nil);
v4tov6(v6ip, ip);
ip = v6ip;
break;
case V6:
- r = v6lookup(fs, ip, nil);
+ r = v6lookup(fs, ip, ia, nil);
break;
default:
panic("arpenter: version %d", version);
- return; /* to supress warnings */
+ return -1; /* to supress warnings */
}
- if(r == nil){
-// print("arp: no route for entry\n");
- return;
- }
+ if(r == nil || r->ifc != ifc || (r->type & (Rbcast|Rmulti)) != 0)
+ return -1;
- ifc = r->ifc;
- type = ifc->m;
-
+ arp = fs->arp;
qlock(arp);
- for(a = arp->hash[haship(ip)]; a; a = a->hash){
- if(a->type != type || (a->state != AWAIT && a->state != AOK))
+ for(a = arp->hash[haship(ip)]; a != nil; a = a->hash){
+ if(a->ifc != ifc || a->ifcid != ifc->ifcid)
continue;
-
if(ipcmp(a->ip, ip) == 0){
- a->state = AOK;
- memmove(a->mac, mac, type->maclen);
-
- if(version == V6){
- /* take out of re-transmit chain */
- l = &arp->rxmt;
- for(f = *l; f; f = f->nextrxt){
- if(f == a){
- *l = a->nextrxt;
- break;
- }
- l = &f->nextrxt;
- }
- }
-
- a->ifc = ifc;
- a->ifcid = ifc->ifcid;
- bp = a->hold;
- a->hold = nil;
if(version == V4)
ip += IPv4off;
- a->utime = NOW;
- a->ctime = a->utime;
- qunlock(arp);
-
- while(bp){
+ bp = arpresolve(arp, a, ifc->m, mac); /* unlocks arp */
+ for(; bp != nil; bp = next){
next = bp->list;
- if(ifc != nil){
- if(waserror()){
- runlock(ifc);
- nexterror();
- }
- rlock(ifc);
- if(ifc->m != nil)
- ifc->m->bwrite(ifc, bp, version, ip);
- else
- freeb(bp);
- runlock(ifc);
- poperror();
- } else
- freeb(bp);
- bp = next;
+ bp->list = nil;
+ if(waserror()){
+ freeblistchain(next);
+ break;
+ }
+ ipifcoput(ifc, bp, version, ip);
+ poperror();
}
- return;
+ return 1;
}
}
if(refresh == 0){
- a = newarp6(arp, ip, ifc, 0);
+ a = newarpent(arp, ip, ifc);
a->state = AOK;
- a->type = type;
- a->ctime = NOW;
- memmove(a->mac, mac, type->maclen);
+ a->ctime = a->utime = NOW;
+ memmove(a->mac, mac, n);
}
-
qunlock(arp);
+
+ return refresh == 0;
}
int
@@ -401,13 +325,12 @@
arpwrite(Fs *fs, char *s, int len)
{
int n;
- Route *r;
Arp *arp;
- Block *bp;
- Arpent *a, *fl, **l;
+ Arpent *a, *x;
Medium *m;
- char *f[4], buf[256];
- uchar ip[IPaddrlen], mac[MAClen];
+ Ipifc *ifc;
+ char *f[5], buf[256];
+ uchar ip[IPaddrlen], ia[IPaddrlen], mac[MAClen];
arp = fs->arp;
@@ -420,7 +343,7 @@
if(len > 0 && buf[len-1] == '\n')
buf[len-1] = 0;
- n = getfields(buf, f, 4, 1, " ");
+ n = getfields(buf, f, nelem(f), 1, " ");
if(strcmp(f[0], "flush") == 0){
qlock(arp);
for(a = arp->cache; a < &arp->cache[NCACHE]; a++){
@@ -427,19 +350,20 @@
memset(a->ip, 0, sizeof(a->ip));
memset(a->mac, 0, sizeof(a->mac));
a->hash = nil;
+ a->nextrxt = nil;
+ a->ifc = nil;
+ a->ifcid = 0;
a->state = 0;
+ a->rxtsrem = 0;
+ a->ctime = 0;
a->utime = 0;
- while(a->hold != nil){
- bp = a->hold->list;
- freeblist(a->hold);
- a->hold = bp;
- }
+ freeblistchain(a->hold);
+ a->hold = a->last = nil;
}
memset(arp->hash, 0, sizeof(arp->hash));
-// clear all pkts on these lists (rxmt, dropf/l)
+ freeblistchain(arp->dropf);
+ arp->dropf = arp->dropl = nil;
arp->rxmt = nil;
- arp->dropf = nil;
- arp->dropl = nil;
qunlock(arp);
} else if(strcmp(f[0], "add") == 0){
switch(n){
@@ -446,64 +370,53 @@
default:
error(Ebadarg);
case 3:
- parseip(ip, f[1]);
- if(isv4(ip))
- r = v4lookup(fs, ip+IPv4off, nil);
- else
- r = v6lookup(fs, ip, nil);
- if(r == nil)
- error("Destination unreachable");
- m = r->ifc->m;
- n = parsemac(mac, f[2], m->maclen);
+ if(parseip(ip, f[1]) == -1)
+ error(Ebadip);
+ if((n = parsemac(mac, f[2], sizeof(mac))) <= 0)
+ error(Ebadarp);
+ findlocalip(fs, ia, ip);
break;
case 4:
m = ipfindmedium(f[1]);
- if(m == nil)
+ if(m == nil || m->maclen == 0)
error(Ebadarp);
- parseip(ip, f[2]);
- n = parsemac(mac, f[3], m->maclen);
+ if(parseip(ip, f[2]) == -1)
+ error(Ebadip);
+ if((n = parsemac(mac, f[3], sizeof(mac))) != m->maclen)
+ error(Ebadarp);
+ findlocalip(fs, ia, ip);
break;
+ case 5:
+ m = ipfindmedium(f[1]);
+ if(m == nil || m->maclen == 0)
+ error(Ebadarp);
+ if(parseip(ip, f[2]) == -1)
+ error(Ebadip);
+ if((n = parsemac(mac, f[3], sizeof(mac))) != m->maclen)
+ error(Ebadarp);
+ if(parseip(ia, f[4]) == -1)
+ error(Ebadip);
+ break;
}
-
- if(m->ares == nil)
- error(Ebadarp);
-
- m->ares(fs, V6, ip, mac, n, 0);
+ if((ifc = findipifc(fs, ia, ia, Runi)) == nil)
+ error("no interface");
+ rlock(ifc);
+ if(!ipv6local(ifc, ia, 0, ip) || arpenter(fs, V6, ip, mac, n, ia, ifc, 0) < 0){
+ runlock(ifc);
+ error("destination unreachable");
+ }
+ runlock(ifc);
} else if(strcmp(f[0], "del") == 0){
- if(n != 2)
+ if (n != 2)
error(Ebadarg);
-
- parseip(ip, f[1]);
+ if (parseip(ip, f[1]) == -1)
+ error(Ebadip);
qlock(arp);
-
- l = &arp->hash[haship(ip)];
- for(a = *l; a; a = a->hash){
- if(memcmp(ip, a->ip, sizeof(a->ip)) == 0){
- *l = a->hash;
- break;
- }
- l = &a->hash;
+ for(a = arp->hash[haship(ip)]; a != nil; a = x){
+ x = a->hash;
+ if(ipcmp(ip, a->ip) == 0)
+ cleanarpent(arp, a);
}
-
- if(a){
- /* take out of re-transmit chain */
- l = &arp->rxmt;
- for(fl = *l; fl; fl = fl->nextrxt){
- if(fl == a){
- *l = a->nextrxt;
- break;
- }
- l = &fl->nextrxt;
- }
-
- a->nextrxt = nil;
- a->hash = nil;
- a->hold = nil;
- a->last = nil;
- a->ifc = nil;
- memset(a->ip, 0, sizeof(a->ip));
- memset(a->mac, 0, sizeof(a->mac));
- }
qunlock(arp);
} else
error(Ebadarp);
@@ -511,13 +424,6 @@
return len;
}
-enum
-{
- Alinelen= 90,
-};
-
-char *aformat = "%-6.6s %-8.8s %-40.40I %-32.32s\n";
-
static void
convmac(char *p, uchar *mac, int n)
{
@@ -526,136 +432,136 @@
}
int
-arpread(Arp *arp, char *p, ulong offset, int len)
+arpread(Arp *arp, char *s, ulong offset, int len)
{
+ char mac[2*MAClen+1], *state, *mname, *p;
+ uchar ip[IPaddrlen], ia[IPaddrlen];
+ Ipifc *ifc;
Arpent *a;
- int n;
- char mac[2*MAClen+1];
+ long n, o;
- if(offset % Alinelen)
- return 0;
-
- offset = offset/Alinelen;
- len = len/Alinelen;
-
- n = 0;
+ p = s;
+ o = -offset;
for(a = arp->cache; len > 0 && a < &arp->cache[NCACHE]; a++){
- if(a->state == 0)
+ if(a->state == 0 || (ifc = a->ifc) == nil)
continue;
- if(offset > 0){
- offset--;
+
+ rlock(ifc);
+ qlock(arp);
+ state = arpstate[a->state];
+ ipmove(ip, a->ip);
+ if(ifc->m == nil || a->ifcid != ifc->ifcid || !ipv6local(ifc, ia, 0, ip)){
+ qunlock(arp);
+ runlock(ifc);
continue;
}
- len--;
- qlock(arp);
- convmac(mac, a->mac, a->type->maclen);
- n += sprint(p+n, aformat, a->type->name, arpstate[a->state], a->ip, mac);
+ mname = ifc->m->name;
+ convmac(mac, a->mac, ifc->m->maclen);
qunlock(arp);
+ runlock(ifc);
+
+ n = snprint(up->genbuf, sizeof up->genbuf,
+ "%-6.6s %-4.4s %-40.40I %-16.16s %I\n",
+ mname, state, ip, mac, ia);
+ o += n;
+ if(o <= 0)
+ continue;
+ if(n > len)
+ break;
+ memmove(p, up->genbuf, n);
+ len -= n;
+ p += n;
}
- return n;
+ return p - s;
}
-extern int
-rxmitsols(Arp *arp)
+void
+ndpsendsol(Fs *f, Ipifc *ifc, Arpent *a)
{
- uint sflag;
- Block *next, *xp;
- Arpent *a, *b, **l;
- Fs *f;
- uchar ipsrc[IPaddrlen];
- Ipifc *ifc = nil;
- long nrxt;
+ uchar targ[IPaddrlen], src[IPaddrlen];
+ Arpent **l;
- qlock(arp);
- f = arp->f;
+ a->ctime = NOW;
+ if(a->rxtsrem == 0)
+ a->rxtsrem = MAX_MULTICAST_SOLICIT;
+ else
+ a->rxtsrem--;
- a = arp->rxmt;
- if(a==nil){
- nrxt = 0;
- goto dodrops; //return nrxt;
- }
- nrxt = a->rtime - NOW;
- if(nrxt > 3*ReTransTimer/4)
- goto dodrops; //return nrxt;
+ /* put on end of re-transmit chain */
+ for(l = rxmtunchain(f->arp, a); *l != nil; l = &(*l)->nextrxt)
+ ;
+ *l = a;
- for(; a; a = a->nextrxt){
- ifc = a->ifc;
- assert(ifc != nil);
- if((a->rxtsrem <= 0) || !(canrlock(ifc)) || (a->ifcid != ifc->ifcid)){
- xp = a->hold;
- a->hold = nil;
+ if(l == &f->arp->rxmt)
+ wakeup(&f->arp->rxmtq);
- if(xp){
- if(arp->dropl == nil)
- arp->dropf = xp;
- else
- arp->dropl->list = xp;
- }
+ /* try to use source address of original packet */
+ ipmove(targ, a->ip);
+ if(a->last != nil){
+ ipmove(src, ((Ip6hdr*)a->last->rp)->src);
+ arprelease(f->arp, a);
- cleanarpent(arp, a);
- }
- else
- break;
+ if(iplocalonifc(ifc, src) != nil || ipproxyifc(f, ifc, src))
+ goto send;
+ } else {
+ arprelease(f->arp, a);
}
- if(a == nil)
- goto dodrops;
+ if(!ipv6local(ifc, src, 0, targ))
+ return;
+send:
+ if(!waserror()){
+ icmpns(f, src, SRC_UNI, targ, TARG_MULTI, ifc->mac);
+ poperror();
+ }
+}
+static void
+rxmitsols(Arp *arp)
+{
+ Block *next, *bp;
+ Arpent *a;
+ Ipifc *ifc;
+ Route *r;
- qunlock(arp); /* for icmpns */
- if((sflag = ipv6anylocal(ifc, ipsrc)) != SRC_UNSPEC)
- icmpns(f, ipsrc, sflag, a->ip, TARG_MULTI, ifc->mac);
-
- runlock(ifc);
- qlock(arp);
-
- /* put to the end of re-transmit chain */
- l = &arp->rxmt;
- for(b = *l; b; b = b->nextrxt){
- if(b == a){
- *l = a->nextrxt;
- break;
+ qlock(arp);
+ while((a = arp->rxmt) != nil && NOW - a->ctime > 3*ReTransTimer/4){
+ if(a->rxtsrem > 0 && (ifc = a->ifc) != nil && canrlock(ifc)){
+ if(a->ifcid == ifc->ifcid){
+ ndpsendsol(arp->f, ifc, a); /* unlocks arp */
+ runlock(ifc);
+ qlock(arp);
+ continue;
+ }
+ runlock(ifc);
}
- l = &b->nextrxt;
+ cleanarpent(arp, a);
}
- for(b = *l; b; b = b->nextrxt){
- l = &b->nextrxt;
- }
- *l = a;
- a->rxtsrem--;
- a->nextrxt = nil;
- a->rtime = NOW + ReTransTimer;
-
- a = arp->rxmt;
- if(a==nil)
- nrxt = 0;
- else
- nrxt = a->rtime - NOW;
-
-dodrops:
- xp = arp->dropf;
- arp->dropf = nil;
- arp->dropl = nil;
+ bp = arp->dropf;
+ arp->dropf = arp->dropl = nil;
qunlock(arp);
- for(; xp; xp = next){
- next = xp->list;
- icmphostunr(f, ifc, xp, icmp6_adr_unreach, 1);
+ for(; bp != nil; bp = next){
+ next = bp->list;
+ bp->list = nil;
+ r = v6lookup(arp->f, ((Ip6hdr*)bp->rp)->src, ((Ip6hdr*)bp->rp)->dst, nil);
+ if(r != nil && (ifc = r->ifc) != nil && canrlock(ifc)){
+ if(!waserror()){
+ icmphostunr6(arp->f, ifc, bp, Icmp6_adr_unreach, (r->type & Runi) != 0);
+ poperror();
+ }
+ runlock(ifc);
+ }
+ freeblist(bp);
}
-
- return nrxt;
-
}
static int
rxready(void *v)
{
- Arp *arp = (Arp *) v;
- int x;
+ Arp *arp = (Arp *)v;
- x = ((arp->rxmt != nil) || (arp->dropf != nil));
-
- return x;
+ return arp->rxmt != nil || arp->dropf != nil;
}
static void
@@ -662,20 +568,15 @@
rxmitproc(void *v)
{
Arp *arp = v;
- long wakeupat;
arp->rxmitp = up;
- //print("arp rxmitproc started\n");
if(waserror()){
- arp->rxmitp = 0;
+ arp->rxmitp = nil;
pexit("hangup", 1);
}
for(;;){
- wakeupat = rxmitsols(arp);
- if(wakeupat == 0)
- sleep(&arp->rxmtq, rxready, v);
- else if(wakeupat > ReTransTimer/4)
- tsleep(&arp->rxmtq, return0, 0, wakeupat);
+ sleep(&arp->rxmtq, rxready, v);
+ rxmitsols(arp);
+ tsleep(&arp->rxmtq, return0, nil, ReTransTimer/4);
}
}
-
--- a/os/ip/bootp.c
+++ /dev/null
@@ -1,231 +1,0 @@
-#include "u.h"
-#include "../port/lib.h"
-#include "mem.h"
-#include "dat.h"
-#include "fns.h"
-#include "../port/error.h"
-#include "kernel.h"
-#include "ip.h"
-
-static ulong fsip;
-static ulong auip;
-static ulong gwip;
-static ulong ipmask;
-static ulong ipaddr;
-
-enum
-{
- Bootrequest = 1,
- Bootreply = 2,
-};
-
-typedef struct Bootp
-{
- /* udp.c oldheader */
- uchar raddr[IPaddrlen];
- uchar laddr[IPaddrlen];
- uchar rport[2];
- uchar lport[2];
- /* bootp itself */
- uchar op; /* opcode */
- uchar htype; /* hardware type */
- uchar hlen; /* hardware address len */
- uchar hops; /* hops */
- uchar xid[4]; /* a random number */
- uchar secs[2]; /* elapsed snce client started booting */
- uchar pad[2];
- uchar ciaddr[4]; /* client IP address (client tells server) */
- uchar yiaddr[4]; /* client IP address (server tells client) */
- uchar siaddr[4]; /* server IP address */
- uchar giaddr[4]; /* gateway IP address */
- uchar chaddr[16]; /* client hardware address */
- uchar sname[64]; /* server host name (optional) */
- uchar file[128]; /* boot file name */
- uchar vend[128]; /* vendor-specific goo */
-} Bootp;
-
-/*
- * bootp returns:
- *
- * "fsip d.d.d.d
- * auip d.d.d.d
- * gwip d.d.d.d
- * ipmask d.d.d.d
- * ipaddr d.d.d.d"
- *
- * where d.d.d.d is the IP address in dotted decimal notation, and each
- * address is followed by a newline.
- */
-
-static Bootp req;
-static Proc* rcvprocp;
-static int recv;
-static int done;
-static Rendez bootpr;
-static char rcvbuf[512+2*IPaddrlen+2*2];
-
-static void
-rcvbootp(void *a)
-{
- int n, fd;
- Bootp *rp;
- char *field[4];
- uchar ip[IPaddrlen];
-
- if(waserror())
- pexit("", 0);
- rcvprocp = up; /* store for postnote below */
- fd = (int)a; /* at compilation: warning: ../ip/bootp.c:78 conversion of pointer to shorter integer */
- while(done == 0) {
- n = kread(fd, rcvbuf, sizeof(rcvbuf));
- if(n <= 0)
- break;
- rp = (Bootp*)rcvbuf;
- /* currently ignore udp's header */
- if(memcmp(req.chaddr, rp->chaddr, 6) == 0
- && rp->htype == 1 && rp->hlen == 6
- && getfields((char*)rp->vend+4, field, 4, 1, " ") == 4
- && strncmp((char*)rp->vend, "p9 ", 4) == 0){
- if(ipaddr == 0)
- ipaddr = nhgetl(rp->yiaddr);
- if(ipmask == 0)
- ipmask = parseip(ip, field[0]);
- if(fsip == 0)
- fsip = parseip(ip, field[1]);
- if(auip == 0)
- auip = parseip(ip, field[2]);
- if(gwip == 0)
- gwip = parseip(ip, field[3]);
- break;
- }
- }
- poperror();
- rcvprocp = nil;
-
- recv = 1;
- wakeup(&bootpr);
- pexit("", 0);
-}
-
-static char*
-rbootp(Ipifc *ifc)
-{
- int cfd, dfd, tries, n;
- char ia[5+3*24], im[16], *av[3];
- uchar nipaddr[4], ngwip[4], nipmask[4];
- char dir[Maxpath];
-
- av[1] = "0.0.0.0";
- av[2] = "0.0.0.0";
- ipifcadd(ifc, av, 3, 0, nil);
-
- cfd = kannounce("udp!*!68", dir);
- if(cfd < 0)
- return "bootp announce failed";
- strcat(dir, "/data");
- if(kwrite(cfd, "headers", 7) < 0){
- kclose(cfd);
- return "bootp ctl headers failed";
- }
- kwrite(cfd, "oldheaders", 10);
- dfd = kopen(dir, ORDWR);
- if(dfd < 0){
- kclose(cfd);
- return "bootp open data failed";
- }
- kclose(cfd);
-
-
- /* create request */
- memset(&req, 0, sizeof(req));
- ipmove(req.raddr, IPv4bcast);
- hnputs(req.rport, 67);
- req.op = Bootrequest;
- req.htype = 1; /* ethernet (all we know) */
- req.hlen = 6; /* ethernet (all we know) */
-
- /* Hardware MAC address */
- memmove(req.chaddr, ifc->mac, 6);
- /* Fill in the local IP address if we know it */
- ipv4local(ifc, req.ciaddr);
- memset(req.file, 0, sizeof(req.file));
- strcpy((char*)req.vend, "p9 ");
-
- done = 0;
- recv = 0;
-
- kproc("rcvbootp", rcvbootp, (void*)dfd, KPDUPFDG);
-
- /*
- * broadcast bootp's till we get a reply,
- * or fixed number of tries
- */
- tries = 0;
- while(recv == 0) {
- if(kwrite(dfd, &req, sizeof(req)) < 0)
- print("bootp: write: %s\n", commonerror());
-
- tsleep(&bootpr, return0, 0, 1000);
- if(++tries > 10) {
- print("bootp: timed out\n");
- break;
- }
- }
- kclose(dfd);
- done = 1;
- if(rcvprocp != nil){
- postnote(rcvprocp, 1, "timeout", 0);
- rcvprocp = nil;
- }
-
- av[1] = "0.0.0.0";
- av[2] = "0.0.0.0";
- ipifcrem(ifc, av, 3);
-
- hnputl(nipaddr, ipaddr);
- sprint(ia, "%V", nipaddr);
- hnputl(nipmask, ipmask);
- sprint(im, "%V", nipmask);
- av[1] = ia;
- av[2] = im;
- ipifcadd(ifc, av, 3, 0, nil);
-
- if(gwip != 0) {
- hnputl(ngwip, gwip);
- n = snprint(ia, sizeof(ia), "add 0.0.0.0 0.0.0.0 %V", ngwip);
- routewrite(ifc->conv->p->f, nil, ia, n);
- }
- return nil;
-}
-
-static int
-rbootpread(char *bp, ulong offset, int len)
-{
- int n;
- char *buf;
- uchar a[4];
-
- buf = smalloc(READSTR);
- if(waserror()){
- free(buf);
- nexterror();
- }
- hnputl(a, fsip);
- n = snprint(buf, READSTR, "fsip %15V\n", a);
- hnputl(a, auip);
- n += snprint(buf + n, READSTR-n, "auip %15V\n", a);
- hnputl(a, gwip);
- n += snprint(buf + n, READSTR-n, "gwip %15V\n", a);
- hnputl(a, ipmask);
- n += snprint(buf + n, READSTR-n, "ipmask %15V\n", a);
- hnputl(a, ipaddr);
- snprint(buf + n, READSTR-n, "ipaddr %15V\n", a);
-
- len = readstr(offset, bp, len, buf);
- poperror();
- free(buf);
- return len;
-}
-
-char* (*bootp)(Ipifc*) = rbootp;
-int (*bootpread)(char*, ulong, int) = rbootpread;
--- a/os/ip/compress.c
+++ /dev/null
@@ -1,520 +1,0 @@
-#include "u.h"
-#include "../port/lib.h"
-#include "mem.h"
-#include "dat.h"
-#include "fns.h"
-#include "../port/error.h"
-
-#include "ip.h"
-#include "ppp.h"
-
-typedef struct Iphdr Iphdr;
-typedef struct Tcphdr Tcphdr;
-typedef struct Ilhdr Ilhdr;
-typedef struct Hdr Hdr;
-typedef struct Tcpc Tcpc;
-
-struct Iphdr
-{
- uchar vihl; /* Version and header length */
- uchar tos; /* Type of service */
- uchar length[2]; /* packet length */
- uchar id[2]; /* Identification */
- uchar frag[2]; /* Fragment information */
- uchar ttl; /* Time to live */
- uchar proto; /* Protocol */
- uchar cksum[2]; /* Header checksum */
- ulong src; /* Ip source (byte ordering unimportant) */
- ulong dst; /* Ip destination (byte ordering unimportant) */
-};
-
-struct Tcphdr
-{
- ulong ports; /* defined as a ulong to make comparisons easier */
- uchar seq[4];
- uchar ack[4];
- uchar flag[2];
- uchar win[2];
- uchar cksum[2];
- uchar urg[2];
-};
-
-struct Ilhdr
-{
- uchar sum[2]; /* Checksum including header */
- uchar len[2]; /* Packet length */
- uchar type; /* Packet type */
- uchar spec; /* Special */
- uchar src[2]; /* Src port */
- uchar dst[2]; /* Dst port */
- uchar id[4]; /* Sequence id */
- uchar ack[4]; /* Acked sequence */
-};
-
-enum
-{
- URG = 0x20, /* Data marked urgent */
- ACK = 0x10, /* Aknowledge is valid */
- PSH = 0x08, /* Whole data pipe is pushed */
- RST = 0x04, /* Reset connection */
- SYN = 0x02, /* Pkt. is synchronise */
- FIN = 0x01, /* Start close down */
-
- IP_DF = 0x4000, /* Don't fragment */
-
- IP_TCPPROTO = 6,
- IP_ILPROTO = 40,
- IL_IPHDR = 20,
-};
-
-struct Hdr
-{
- uchar buf[128];
- Iphdr *ip;
- Tcphdr *tcp;
- int len;
-};
-
-struct Tcpc
-{
- uchar lastrecv;
- uchar lastxmit;
- uchar basexmit;
- uchar err;
- uchar compressid;
- Hdr t[MAX_STATES];
- Hdr r[MAX_STATES];
-};
-
-enum
-{ /* flag bits for what changed in a packet */
- NEW_U=(1<<0), /* tcp only */
- NEW_W=(1<<1), /* tcp only */
- NEW_A=(1<<2), /* il tcp */
- NEW_S=(1<<3), /* tcp only */
- NEW_P=(1<<4), /* tcp only */
- NEW_I=(1<<5), /* il tcp */
- NEW_C=(1<<6), /* il tcp */
- NEW_T=(1<<7), /* il only */
- TCP_PUSH_BIT = 0x10,
-};
-
-/* reserved, special-case values of above for tcp */
-#define SPECIAL_I (NEW_S|NEW_W|NEW_U) /* echoed interactive traffic */
-#define SPECIAL_D (NEW_S|NEW_A|NEW_W|NEW_U) /* unidirectional data */
-#define SPECIALS_MASK (NEW_S|NEW_A|NEW_W|NEW_U)
-
-int
-encode(void *p, ulong n)
-{
- uchar *cp;
-
- cp = p;
- if(n >= 256 || n == 0) {
- *cp++ = 0;
- cp[0] = n >> 8;
- cp[1] = n;
- return 3;
- } else
- *cp = n;
- return 1;
-}
-
-#define DECODEL(f) { \
- if (*cp == 0) {\
- hnputl(f, nhgetl(f) + ((cp[1] << 8) | cp[2])); \
- cp += 3; \
- } else { \
- hnputl(f, nhgetl(f) + (ulong)*cp++); \
- } \
-}
-#define DECODES(f) { \
- if (*cp == 0) {\
- hnputs(f, nhgets(f) + ((cp[1] << 8) | cp[2])); \
- cp += 3; \
- } else { \
- hnputs(f, nhgets(f) + (ulong)*cp++); \
- } \
-}
-
-ushort
-tcpcompress(Tcpc *comp, Block *b, Fs *)
-{
- Iphdr *ip; /* current packet */
- Tcphdr *tcp; /* current pkt */
- ulong iplen, tcplen, hlen; /* header length in bytes */
- ulong deltaS, deltaA; /* general purpose temporaries */
- ulong changes; /* change mask */
- uchar new_seq[16]; /* changes from last to current */
- uchar *cp;
- Hdr *h; /* last packet */
- int i, j;
-
- /*
- * Bail if this is not a compressible TCP/IP packet
- */
- ip = (Iphdr*)b->rp;
- iplen = (ip->vihl & 0xf) << 2;
- tcp = (Tcphdr*)(b->rp + iplen);
- tcplen = (tcp->flag[0] & 0xf0) >> 2;
- hlen = iplen + tcplen;
- if((tcp->flag[1] & (SYN|FIN|RST|ACK)) != ACK)
- return Pip; /* connection control */
-
- /*
- * Packet is compressible, look for a connection
- */
- changes = 0;
- cp = new_seq;
- j = comp->lastxmit;
- h = &comp->t[j];
- if(ip->src != h->ip->src || ip->dst != h->ip->dst
- || tcp->ports != h->tcp->ports) {
- for(i = 0; i < MAX_STATES; ++i) {
- j = (comp->basexmit + i) % MAX_STATES;
- h = &comp->t[j];
- if(ip->src == h->ip->src && ip->dst == h->ip->dst
- && tcp->ports == h->tcp->ports)
- goto found;
- }
-
- /* no connection, reuse the oldest */
- if(i == MAX_STATES) {
- j = comp->basexmit;
- j = (j + MAX_STATES - 1) % MAX_STATES;
- comp->basexmit = j;
- h = &comp->t[j];
- goto raise;
- }
- }
-found:
-
- /*
- * Make sure that only what we expect to change changed.
- */
- if(ip->vihl != h->ip->vihl || ip->tos != h->ip->tos ||
- ip->ttl != h->ip->ttl || ip->proto != h->ip->proto)
- goto raise; /* headers changed */
- if(iplen != sizeof(Iphdr) && memcmp(ip+1, h->ip+1, iplen - sizeof(Iphdr)))
- goto raise; /* ip options changed */
- if(tcplen != sizeof(Tcphdr) && memcmp(tcp+1, h->tcp+1, tcplen - sizeof(Tcphdr)))
- goto raise; /* tcp options changed */
-
- if(tcp->flag[1] & URG) {
- cp += encode(cp, nhgets(tcp->urg));
- changes |= NEW_U;
- } else if(memcmp(tcp->urg, h->tcp->urg, sizeof(tcp->urg)) != 0)
- goto raise;
- if(deltaS = nhgets(tcp->win) - nhgets(h->tcp->win)) {
- cp += encode(cp, deltaS);
- changes |= NEW_W;
- }
- if(deltaA = nhgetl(tcp->ack) - nhgetl(h->tcp->ack)) {
- if(deltaA > 0xffff)
- goto raise;
- cp += encode(cp, deltaA);
- changes |= NEW_A;
- }
- if(deltaS = nhgetl(tcp->seq) - nhgetl(h->tcp->seq)) {
- if (deltaS > 0xffff)
- goto raise;
- cp += encode(cp, deltaS);
- changes |= NEW_S;
- }
-
- /*
- * Look for the special-case encodings.
- */
- switch(changes) {
- case 0:
- /*
- * Nothing changed. If this packet contains data and the last
- * one didn't, this is probably a data packet following an
- * ack (normal on an interactive connection) and we send it
- * compressed. Otherwise it's probably a retransmit,
- * retransmitted ack or window probe. Send it uncompressed
- * in case the other side missed the compressed version.
- */
- if(nhgets(ip->length) == nhgets(h->ip->length) ||
- nhgets(h->ip->length) != hlen)
- goto raise;
- break;
- case SPECIAL_I:
- case SPECIAL_D:
- /*
- * Actual changes match one of our special case encodings --
- * send packet uncompressed.
- */
- goto raise;
- case NEW_S | NEW_A:
- if (deltaS == deltaA &&
- deltaS == nhgets(h->ip->length) - hlen) {
- /* special case for echoed terminal traffic */
- changes = SPECIAL_I;
- cp = new_seq;
- }
- break;
- case NEW_S:
- if (deltaS == nhgets(h->ip->length) - hlen) {
- /* special case for data xfer */
- changes = SPECIAL_D;
- cp = new_seq;
- }
- break;
- }
- deltaS = nhgets(ip->id) - nhgets(h->ip->id);
- if(deltaS != 1) {
- cp += encode(cp, deltaS);
- changes |= NEW_I;
- }
- if (tcp->flag[1] & PSH)
- changes |= TCP_PUSH_BIT;
- /*
- * Grab the cksum before we overwrite it below. Then update our
- * state with this packet's header.
- */
- deltaA = nhgets(tcp->cksum);
- memmove(h->buf, b->rp, hlen);
- h->len = hlen;
- h->tcp = (Tcphdr*)(h->buf + iplen);
-
- /*
- * We want to use the original packet as our compressed packet. (cp -
- * new_seq) is the number of bytes we need for compressed sequence
- * numbers. In addition we need one byte for the change mask, one
- * for the connection id and two for the tcp checksum. So, (cp -
- * new_seq) + 4 bytes of header are needed. hlen is how many bytes
- * of the original packet to toss so subtract the two to get the new
- * packet size. The temporaries are gross -egs.
- */
- deltaS = cp - new_seq;
- cp = b->rp;
- if(comp->lastxmit != j || comp->compressid == 0) {
- comp->lastxmit = j;
- hlen -= deltaS + 4;
- cp += hlen;
- *cp++ = (changes | NEW_C);
- *cp++ = j;
- } else {
- hlen -= deltaS + 3;
- cp += hlen;
- *cp++ = changes;
- }
- b->rp += hlen;
- hnputs(cp, deltaA);
- cp += 2;
- memmove(cp, new_seq, deltaS);
- return Pvjctcp;
-
-raise:
- /*
- * Update connection state & send uncompressed packet
- */
- memmove(h->buf, b->rp, hlen);
- h->tcp = (Tcphdr*)(h->buf + iplen);
- h->len = hlen;
- h->ip->proto = j;
- comp->lastxmit = j;
- return Pvjutcp;
-}
-
-Block*
-tcpuncompress(Tcpc *comp, Block *b, ushort type, Fs *f)
-{
- uchar *cp, changes;
- int i;
- int iplen, len;
- Iphdr *ip;
- Tcphdr *tcp;
- Hdr *h;
-
- if(type == Pvjutcp) {
- /*
- * Locate the saved state for this connection. If the state
- * index is legal, clear the 'discard' flag.
- */
- ip = (Iphdr*)b->rp;
- if(ip->proto >= MAX_STATES)
- goto raise;
- iplen = (ip->vihl & 0xf) << 2;
- tcp = (Tcphdr*)(b->rp + iplen);
- comp->lastrecv = ip->proto;
- len = iplen + ((tcp->flag[0] & 0xf0) >> 2);
- comp->err = 0;
-netlog(f, Logcompress, "uncompressed %d\n", comp->lastrecv);
- /*
- * Restore the IP protocol field then save a copy of this
- * packet header. The checksum is zeroed in the copy so we
- * don't have to zero it each time we process a compressed
- * packet.
- */
- ip->proto = IP_TCPPROTO;
- h = &comp->r[comp->lastrecv];
- memmove(h->buf, b->rp, len);
- h->tcp = (Tcphdr*)(h->buf + iplen);
- h->len = len;
- h->ip->cksum[0] = h->ip->cksum[1] = 0;
- return b;
- }
-
- cp = b->rp;
- changes = *cp++;
- if(changes & NEW_C) {
- /*
- * Make sure the state index is in range, then grab the
- * state. If we have a good state index, clear the 'discard'
- * flag.
- */
- if(*cp >= MAX_STATES)
- goto raise;
- comp->err = 0;
- comp->lastrecv = *cp++;
-netlog(f, Logcompress, "newc %d\n", comp->lastrecv);
- } else {
- /*
- * This packet has no state index. If we've had a
- * line error since the last time we got an explicit state
- * index, we have to toss the packet.
- */
- if(comp->err != 0){
- freeblist(b);
- return nil;
- }
-netlog(f, Logcompress, "oldc %d\n", comp->lastrecv);
- }
-
- /*
- * Find the state then fill in the TCP checksum and PUSH bit.
- */
- h = &comp->r[comp->lastrecv];
- ip = h->ip;
- tcp = h->tcp;
- len = h->len;
- memmove(tcp->cksum, cp, sizeof tcp->cksum);
- cp += 2;
- if(changes & TCP_PUSH_BIT)
- tcp->flag[1] |= PSH;
- else
- tcp->flag[1] &= ~PSH;
- /*
- * Fix up the state's ack, seq, urg and win fields based on the
- * changemask.
- */
- switch (changes & SPECIALS_MASK) {
- case SPECIAL_I:
- i = nhgets(ip->length) - len;
- hnputl(tcp->ack, nhgetl(tcp->ack) + i);
- hnputl(tcp->seq, nhgetl(tcp->seq) + i);
- break;
-
- case SPECIAL_D:
- hnputl(tcp->seq, nhgetl(tcp->seq) + nhgets(ip->length) - len);
- break;
-
- default:
- if(changes & NEW_U) {
- tcp->flag[1] |= URG;
- if(*cp == 0){
- hnputs(tcp->urg, nhgets(cp+1));
- cp += 3;
- }else
- hnputs(tcp->urg, *cp++);
- } else
- tcp->flag[1] &= ~URG;
- if(changes & NEW_W)
- DECODES(tcp->win)
- if(changes & NEW_A)
- DECODEL(tcp->ack)
- if(changes & NEW_S)
- DECODEL(tcp->seq)
- break;
- }
-
- /* Update the IP ID */
- if(changes & NEW_I)
- DECODES(ip->id)
- else
- hnputs(ip->id, nhgets(ip->id) + 1);
-
- /*
- * At this point, cp points to the first byte of data in the packet.
- * Back up cp by the TCP/IP header length to make room for the
- * reconstructed header.
- * We assume the packet we were handed has enough space to prepend
- * up to 128 bytes of header.
- */
- b->rp = cp;
- if(b->rp - b->base < len){
- b = padblock(b, len);
- b = pullupblock(b, blocklen(b));
- } else
- b->rp -= len;
- hnputs(ip->length, BLEN(b));
- memmove(b->rp, ip, len);
-
- /* recompute the ip header checksum */
- ip = (Iphdr*)b->rp;
- hnputs(ip->cksum, ipcsum(b->rp));
- return b;
-
-raise:
- netlog(f, Logcompress, "Bad Packet!\n");
- comp->err = 1;
- freeblist(b);
- return nil;
-}
-
-Tcpc*
-compress_init(Tcpc *c)
-{
- int i;
- Hdr *h;
-
- if(c == nil){
- c = malloc(sizeof(Tcpc));
- if(c == nil)
- return nil;
- }
- memset(c, 0, sizeof(*c));
- for(i = 0; i < MAX_STATES; i++){
- h = &c->t[i];
- h->ip = (Iphdr*)h->buf;
- h->tcp = (Tcphdr*)(h->buf + 10);
- h->len = 20;
- h = &c->r[i];
- h->ip = (Iphdr*)h->buf;
- h->tcp = (Tcphdr*)(h->buf + 10);
- h->len = 20;
- }
-
- return c;
-}
-
-ushort
-compress(Tcpc *tcp, Block *b, Fs *f)
-{
- Iphdr *ip;
-
- /*
- * Bail if this is not a compressible IP packet
- */
- ip = (Iphdr*)b->rp;
- if((nhgets(ip->frag) & 0x3fff) != 0)
- return Pip;
-
- switch(ip->proto) {
- case IP_TCPPROTO:
- return tcpcompress(tcp, b, f);
- default:
- return Pip;
- }
-}
-
-int
-compress_negotiate(Tcpc *tcp, uchar *data)
-{
- if(data[0] != MAX_STATES - 1)
- return -1;
- tcp->compressid = data[1];
- return 0;
-}
--- a/os/ip/devip.c
+++ b/os/ip/devip.c
@@ -14,7 +14,6 @@
Qbootp,
Qndb,
Qiproute,
- Qiprouter,
Qipselftab,
Qlog,
@@ -43,11 +42,11 @@
Maskproto= (1<<Logproto)-1,
Shiftproto= Logtype + Logconv,
- Nfs= 32,
+ Nfs= 128,
};
-#define TYPE(x) ( ((u32)(x).path) & Masktype )
-#define CONV(x) ( (((u32)(x).path) >> Shiftconv) & Maskconv )
-#define PROTO(x) ( (((u32)(x).path) >> Shiftproto) & Maskproto )
+#define TYPE(x) ( ((ulong)(x).path) & Masktype )
+#define CONV(x) ( (((ulong)(x).path) >> Shiftconv) & Maskconv )
+#define PROTO(x) ( (((ulong)(x).path) >> Shiftproto) & Maskproto )
#define QID(p, c, y) ( ((p)<<(Shiftproto)) | ((c)<<Shiftconv) | (y) )
static char network[] = "network";
@@ -58,8 +57,7 @@
extern void nullmediumlink(void);
extern void pktmediumlink(void);
-static long ndbwrite(Fs*, char*, ulong, int);
-extern void closeconv(Conv*);
+ long ndbwrite(Fs *f, char *a, ulong off, int n);
static int
ip3gen(Chan *c, int i, Dir *dp)
@@ -121,7 +119,7 @@
mkqid(&q, QID(PROTO(c->qid), 0, Qstats), 0, QTFILE);
devdir(c, q, "stats", 0, network, 0444, dp);
return 1;
- }
+ }
return -1;
}
@@ -144,11 +142,10 @@
return -1;
case Qarp:
p = "arp";
+ prot = 0664;
break;
case Qbootp:
p = "bootp";
- if(bootp == nil)
- return 0;
break;
case Qndb:
p = "ndb";
@@ -157,14 +154,12 @@
break;
case Qiproute:
p = "iproute";
+ prot = 0664;
break;
case Qipselftab:
p = "ipselftab";
prot = 0444;
break;
- case Qiprouter:
- p = "iprouter";
- break;
case Qlog:
p = "log";
break;
@@ -188,7 +183,7 @@
case Qtopdir:
if(s == DEVDOTDOT){
mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
- sprint(up->genbuf, "#I%ud", c->dev);
+ snprint(up->genbuf, sizeof up->genbuf, "#I%lud", c->dev);
devdir(c, q, up->genbuf, 0, network, 0555, dp);
return 1;
}
@@ -206,19 +201,18 @@
case Qndb:
case Qlog:
case Qiproute:
- case Qiprouter:
case Qipselftab:
return ip1gen(c, TYPE(c->qid), dp);
case Qprotodir:
if(s == DEVDOTDOT){
mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
- sprint(up->genbuf, "#I%ud", c->dev);
+ snprint(up->genbuf, sizeof up->genbuf, "#I%lud", c->dev);
devdir(c, q, up->genbuf, 0, network, 0555, dp);
return 1;
}
if(s < f->p[PROTO(c->qid)]->ac) {
cv = f->p[PROTO(c->qid)]->conv[s];
- sprint(up->genbuf, "%d", s);
+ snprint(up->genbuf, sizeof up->genbuf, "%d", s);
mkqid(&q, QID(PROTO(c->qid), s, Qconvdir), 0, QTDIR);
devdir(c, q, up->genbuf, 0, cv->owner, 0555, dp);
return 1;
@@ -262,45 +256,14 @@
fmtinstall('M', eipfmt);
}
-static Fs*
-ipgetfs(int dev)
-{
- extern void (*ipprotoinit[])(Fs*);
- Fs *f;
- int i;
-
- if(dev >= Nfs)
- return nil;
-
- qlock(&fslock);
- if(ipfs[dev] == nil){
- f = smalloc(sizeof(Fs));
- ip_init(f);
- arpinit(f);
- netloginit(f);
- for(i = 0; ipprotoinit[i]; i++)
- ipprotoinit[i](f);
- f->dev = dev;
- ipfs[dev] = f;
- }
- qunlock(&fslock);
-
- return ipfs[dev];
-}
-
IPaux*
newipaux(char *owner, char *tag)
{
IPaux *a;
- int n;
a = smalloc(sizeof(*a));
kstrdup(&a->owner, owner);
- memset(a->tag, ' ', sizeof(a->tag));
- n = strlen(tag);
- if(n > sizeof(a->tag))
- n = sizeof(a->tag);
- memmove(a->tag, tag, n);
+ strncpy(a->tag, tag, sizeof(a->tag));
return a;
}
@@ -310,13 +273,29 @@
ipattach(char* spec)
{
Chan *c;
- int dev;
+ ulong dev;
- dev = atoi(spec);
+ dev = strtoul(spec, nil, 10);
if(dev >= Nfs)
- error("bad specification");
+ error(Enodev);
- ipgetfs(dev);
+ qlock(&fslock);
+ if(ipfs[dev] == nil){
+ extern void (*ipprotoinit[])(Fs*);
+ Fs *f;
+ int i;
+
+ f = smalloc(sizeof(Fs));
+ ip_init(f);
+ arpinit(f);
+ netloginit(f);
+ for(i = 0; ipprotoinit[i]; i++)
+ ipprotoinit[i](f);
+ f->dev = dev;
+ ipfs[dev] = f;
+ }
+ qunlock(&fslock);
+
c = devattach('I', spec);
mkqid(&c->qid, QID(0, 0, Qtopdir), 0, QTDIR);
c->dev = dev;
@@ -327,7 +306,7 @@
}
static Walkqid*
-ipwalk(Chan* c, Chan *nc, char **name, s32 nname)
+ipwalk(Chan* c, Chan *nc, char **name, int nname)
{
IPaux *a = c->aux;
Walkqid* w;
@@ -338,8 +317,9 @@
return w;
}
-static s32
-ipstat(Chan* c, uchar* db, s32 n)
+
+static int
+ipstat(Chan* c, uchar* db, int n)
{
return devstat(c, db, n, nil, 0, ipgen);
}
@@ -360,7 +340,7 @@
};
static Chan*
-ipopen(Chan* c, u32 omode)
+ipopen(Chan* c, int omode)
{
Conv *cv, *nc;
Proto *p;
@@ -375,7 +355,7 @@
default:
break;
case Qndb:
- if(omode & (OWRITE|OTRUNC) && !iseve())
+ if((omode & (OWRITE|OTRUNC)) != 0 && !iseve())
error(Eperm);
if((omode & (OWRITE|OTRUNC)) == (OWRITE|OTRUNC))
f->ndb[0] = 0;
@@ -383,10 +363,10 @@
case Qlog:
netlogopen(f);
break;
- case Qiprouter:
- iprouteropen(f);
- break;
case Qiproute:
+ case Qarp:
+ if(omode != OREAD && !iseve())
+ error(Eperm);
break;
case Qtopdir:
case Qprotodir:
@@ -412,13 +392,8 @@
case Qclone:
p = f->p[PROTO(c->qid)];
qlock(p);
- if(waserror()){
- qunlock(p);
- nexterror();
- }
cv = Fsprotoclone(p, ATTACHER(c));
qunlock(p);
- poperror();
if(cv == nil) {
error(Enodev);
break;
@@ -437,15 +412,12 @@
qunlock(p);
nexterror();
}
- if((perm & (cv->perm>>6)) != perm) {
- if(strcmp(ATTACHER(c), cv->owner) != 0)
- error(Eperm);
- if((perm & cv->perm) != perm)
- error(Eperm);
+ if(strcmp(ATTACHER(c), cv->owner) == 0)
+ perm <<= 6;
+ if((perm & cv->perm) != perm && !iseve())
+ error(Eperm);
- }
- cv->inuse++;
- if(cv->inuse == 1){
+ if(++cv->inuse == 1){
kstrdup(&cv->owner, ATTACHER(c));
cv->perm = 0660;
}
@@ -455,24 +427,26 @@
break;
case Qlisten:
cv = f->p[PROTO(c->qid)]->conv[CONV(c->qid)];
- if((perm & (cv->perm>>6)) != perm) {
- if(strcmp(ATTACHER(c), cv->owner) != 0)
- error(Eperm);
- if((perm & cv->perm) != perm)
- error(Eperm);
-
+ qlock(cv);
+ if(waserror()){
+ qunlock(cv);
+ nexterror();
}
+ if(strcmp(ATTACHER(c), cv->owner) == 0)
+ perm <<= 6;
+ if((perm & cv->perm) != perm && !iseve())
+ error(Eperm);
if(cv->state != Announced)
error("not announced");
+ cv->inuse++;
+ qunlock(cv);
+ poperror();
if(waserror()){
closeconv(cv);
nexterror();
}
- qlock(cv);
- cv->inuse++;
- qunlock(cv);
nc = nil;
while(nc == nil) {
@@ -494,7 +468,6 @@
if(nc != nil){
cv->incall = nc->next;
mkqid(&c->qid, QID(PROTO(c->qid), nc->x, Qctl), 0, QTFILE);
- kstrdup(&cv->owner, ATTACHER(c));
}
qunlock(cv);
@@ -511,13 +484,25 @@
return c;
}
-static s32
-ipwstat(Chan *c, uchar *dp, s32 n)
+static Chan*
+ipcreate(Chan*, char*, int, ulong)
{
- Dir *d;
+ error(Eperm);
+ return 0;
+}
+
+static void
+ipremove(Chan*)
+{
+ error(Eperm);
+}
+
+static int
+ipwstat(Chan *c, uchar *dp, int n)
+{
+ Dir *dir;
Conv *cv;
Fs *f;
- Proto *p;
f = ipfs[c->dev];
switch(TYPE(c->qid)) {
@@ -529,28 +514,40 @@
break;
}
- d = smalloc(sizeof(*d)+n);
+ dir = smalloc(sizeof(Dir)+n);
if(waserror()){
- free(d);
+ free(dir);
nexterror();
}
- n = convM2D(dp, n, d, (char*)&d[1]);
+ n = convM2D(dp, n, &dir[0], (char*)&dir[1]);
if(n == 0)
error(Eshortstat);
- p = f->p[PROTO(c->qid)];
- cv = p->conv[CONV(c->qid)];
- if(!iseve() && strcmp(ATTACHER(c), cv->owner) != 0)
+
+ cv = f->p[PROTO(c->qid)]->conv[CONV(c->qid)];
+ qlock(cv);
+ if(waserror()){
+ qunlock(cv);
+ nexterror();
+ }
+ if(strcmp(ATTACHER(c), cv->owner) != 0 && !iseve())
error(Eperm);
- if(!emptystr(d->uid))
- kstrdup(&cv->owner, d->uid);
- if(d->mode != ~0UL)
- cv->perm = d->mode & 0777;
+ if(!emptystr(dir->uid)){
+ if(strcmp(dir->uid, commonuser()) != 0 && !iseve())
+ error(Eperm);
+ kstrdup(&cv->owner, dir->uid);
+ }
+ if(dir->mode != ~0UL)
+ cv->perm = dir->mode & 0666;
+ qunlock(cv);
poperror();
- free(d);
+
+ free(dir);
+ poperror();
+
return n;
}
-extern void
+void
closeconv(Conv *cv)
{
Conv *nc;
@@ -564,7 +561,7 @@
}
/* close all incoming calls since no listen will ever happen */
- for(nc = cv->incall; nc; nc = cv->incall){
+ for(nc = cv->incall; nc != nil; nc = cv->incall){
cv->incall = nc->next;
closeconv(nc);
}
@@ -576,9 +573,9 @@
while((mp = cv->multi) != nil)
ipifcremmulti(cv, mp->ma, mp->ia);
- cv->r = nil;
- cv->rgen = 0;
- cv->p->close(cv);
+ if(cv->p->close != nil)
+ (*cv->p->close)(cv);
+
cv->state = Idle;
qunlock(cv);
}
@@ -596,10 +593,6 @@
if(c->flag & COPEN)
netlogclose(f);
break;
- case Qiprouter:
- if(c->flag & COPEN)
- iprouterclose(f);
- break;
case Qdata:
case Qctl:
case Qerr:
@@ -620,13 +613,13 @@
Statelen= 32*1024,
};
-static s32
-ipread(Chan *ch, void *a, s32 n, s64 off)
+static long
+ipread(Chan *ch, void *a, long n, vlong off)
{
Conv *c;
Proto *x;
char *buf, *p;
- s32 rv;
+ long rv;
Fs *f;
ulong offset = off;
@@ -648,21 +641,22 @@
return readstr(offset, a, n, f->ndb);
case Qiproute:
return routeread(f, a, offset, n);
- case Qiprouter:
- return iprouterread(f, a, n);
case Qipselftab:
return ipselftabread(f, a, offset, n);
case Qlog:
return netlogread(f, a, offset, n);
case Qctl:
- sprint(up->genbuf, "%ud", CONV(ch->qid));
- return readstr(offset, p, n, up->genbuf);
+ buf = smalloc(16);
+ snprint(buf, 16, "%lud", CONV(ch->qid));
+ rv = readstr(offset, p, n, buf);
+ free(buf);
+ return rv;
case Qremote:
buf = smalloc(Statelen);
x = f->p[PROTO(ch->qid)];
c = x->conv[CONV(ch->qid)];
if(x->remote == nil) {
- sprint(buf, "%I!%d\n", c->raddr, c->rport);
+ snprint(buf, Statelen, "%I!%d\n", c->raddr, c->rport);
} else {
(*x->remote)(c, buf, Statelen-2);
}
@@ -674,7 +668,7 @@
x = f->p[PROTO(ch->qid)];
c = x->conv[CONV(ch->qid)];
if(x->local == nil) {
- sprint(buf, "%I!%d\n", c->laddr, c->lport);
+ snprint(buf, Statelen, "%I!%d\n", c->laddr, c->lport);
} else {
(*x->local)(c, buf, Statelen-2);
}
@@ -711,7 +705,7 @@
}
static Block*
-ipbread(Chan* ch, s32 n, u32 offset)
+ipbread(Chan* ch, long n, ulong offset)
{
Conv *c;
Proto *x;
@@ -740,7 +734,7 @@
/*
* set a local port making sure the quad of raddr,rport,laddr,lport is unique
*/
-static char*
+char*
setluniqueport(Conv* c, int lport)
{
Proto *p;
@@ -771,51 +765,63 @@
}
/*
+ * is lport in use by anyone?
+ */
+static int
+lportinuse(Proto *p, ushort lport)
+{
+ int x;
+
+ for(x = 0; x < p->nc && p->conv[x]; x++)
+ if(p->conv[x]->lport == lport)
+ return 1;
+ return 0;
+}
+
+/*
* pick a local port and set it
*/
-extern void
+char *
setlport(Conv* c)
{
Proto *p;
- ushort *pp;
- int x, found;
+ int i, port;
p = c->p;
- if(c->restricted)
- pp = &p->nextrport;
- else
- pp = &p->nextport;
qlock(p);
- for(;;(*pp)++){
+ if(c->restricted){
+ /* Restricted ports cycle between 600 and 1024. */
+ for(i=0; i<1024-600; i++){
+ if(p->nextrport >= 1024 || p->nextrport < 600)
+ p->nextrport = 600;
+ port = p->nextrport++;
+ if(!lportinuse(p, port))
+ goto chosen;
+ }
+ }else{
/*
- * Fsproto initialises p->nextport to 0 and the restricted
- * ports (p->nextrport) to 600.
- * Restricted ports must lie between 600 and 1024.
- * For the initial condition or if the unrestricted port number
- * has wrapped round, select a random port between 5000 and 1<<15
- * to start at.
+ * Unrestricted ports are chosen randomly
+ * between 2^15 and 2^16. There are at most
+ * 4*Nchan = 4096 ports in use at any given time,
+ * so even in the worst case, a random probe has a
+ * 1 - 4096/2^15 = 87% chance of success.
+ * If 64 successive probes fail, there is a bug somewhere
+ * (or a once in 10^58 event has happened, but that's
+ * less likely than a venti collision).
*/
- if(c->restricted){
- if(*pp >= 1024)
- *pp = 600;
+ for(i=0; i<64; i++){
+ port = (1<<15) + nrand(1<<15);
+ if(!lportinuse(p, port))
+ goto chosen;
}
- else while(*pp < 5000)
- *pp = nrand(1<<15);
-
- found = 0;
- for(x = 0; x < p->nc; x++){
- if(p->conv[x] == nil)
- break;
- if(p->conv[x]->lport == *pp){
- found = 1;
- break;
- }
- }
- if(found == 0)
- break;
}
- c->lport = (*pp)++;
qunlock(p);
+ return "no ports available";
+
+chosen:
+ c->lport = port;
+ qunlock(p);
+ return nil;
}
/*
@@ -822,7 +828,7 @@
* set a local address and port from a string of the form
* [address!]port[!r]
*/
-static char*
+char*
setladdrport(Conv* c, char* str, int announcing)
{
char *p;
@@ -830,8 +836,6 @@
ushort lport;
uchar addr[IPaddrlen];
- rv = nil;
-
/*
* ignore restricted part if it exists. it's
* meaningless on local ports.
@@ -854,8 +858,9 @@
if(strcmp(str, "*") == 0)
ipmove(c->laddr, IPnoaddr);
else {
- parseip(addr, str);
- if(ipforme(c->p->f, addr))
+ if(parseip(addr, str) == -1)
+ return Ebadip;
+ if(ipforme(c->p->f, addr) != 0 || ipismulticast(addr))
ipmove(c->laddr, addr);
else
return "not a local IP address";
@@ -869,9 +874,13 @@
return setluniqueport(c, 0);
}
- lport = atoi(p);
+ str = p;
+ lport = strtol(str, &p, 10);
+ if(p <= str || strchr("!", *p) == nil)
+ return "bad numeric port";
+
if(lport <= 0)
- setlport(c);
+ rv = setlport(c);
else
rv = setluniqueport(c, lport);
return rv;
@@ -886,13 +895,17 @@
if(p == nil)
return "malformed address";
*p++ = 0;
- parseip(c->raddr, str);
- c->rport = atoi(p);
- p = strchr(p, '!');
- if(p){
- if(strstr(p, "!r") != nil)
- c->restricted = 1;
- }
+ if(parseip(c->raddr, str) == -1)
+ return Ebadip;
+
+ str = p;
+ c->rport = strtol(str, &p, 10);
+ if(p <= str || strchr("!", *p) == nil)
+ return "bad numeric port";
+
+ if(strstr(p, "!r") != nil)
+ c->restricted = 1;
+
return nil;
}
@@ -912,7 +925,9 @@
if(p != nil)
return p;
setladdr(c);
- setlport(c);
+ p = setlport(c);
+ if (p != nil)
+ return p;
break;
case 3:
p = setraddrport(c, argv[1]);
@@ -923,12 +938,7 @@
return p;
}
- if((memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
- memcmp(c->laddr, v4prefix, IPv4off) == 0)
- || ipcmp(c->raddr, IPnoaddr) == 0)
- c->ipversion = V4;
- else
- c->ipversion = V6;
+ c->ipversion = convipvers(c);
return nil;
}
@@ -978,10 +988,11 @@
c->rport = 0;
switch(argc){
default:
- return "bad args to announce";
+ break;
case 2:
return setladdrport(c, argv[1], 1);
}
+ return "bad args to announce";
}
/*
@@ -1028,10 +1039,11 @@
{
switch(argc){
default:
- return "bad args to bind";
+ break;
case 2:
return setladdrport(c, argv[1], 0);
}
+ return "bad args to bind";
}
static void
@@ -1042,7 +1054,7 @@
if(x->bind == nil)
p = Fsstdbind(c, cb->f, cb->nf);
else
- p = x->bind(c, cb->f, cb->nf);
+ p = (*x->bind)(c, cb->f, cb->nf);
if(p != nil)
error(p);
}
@@ -1065,8 +1077,8 @@
c->ttl = atoi(cb->f[1]);
}
-static s32
-ipwrite(Chan* ch, void *v, s32 n, s64 off)
+static long
+ipwrite(Chan* ch, void *v, long n, vlong off)
{
Conv *c;
Proto *x;
@@ -1075,6 +1087,7 @@
uchar ia[IPaddrlen], ma[IPaddrlen];
Fs *f;
char *a;
+ ulong offset = off;
a = v;
f = ipfs[ch->dev];
@@ -1099,7 +1112,8 @@
netlogctl(f, a, n);
return n;
case Qndb:
- return ndbwrite(f, a, off, n);
+ return ndbwrite(f, a, offset, n);
+ break;
case Qctl:
x = f->p[PROTO(ch->qid)];
c = x->conv[CONV(ch->qid)];
@@ -1131,13 +1145,15 @@
if(cb->nf == 2){
if(!ipismulticast(c->raddr))
error("addmulti for a non multicast address");
- parseip(ia, cb->f[1]);
+ if (parseip(ia, cb->f[1]) == -1)
+ error(Ebadip);
ipifcaddmulti(c, c->raddr, ia);
} else {
- parseip(ma, cb->f[2]);
+ if (parseip(ia, cb->f[1]) == -1 ||
+ parseip(ma, cb->f[2]) == -1)
+ error(Ebadip);
if(!ipismulticast(ma))
error("addmulti for a non multicast address");
- parseip(ia, cb->f[1]);
ipifcaddmulti(c, ma, ia);
}
} else if(strcmp(cb->f[0], "remmulti") == 0){
@@ -1145,10 +1161,11 @@
error("remmulti needs interface address");
if(!ipismulticast(c->raddr))
error("remmulti for a non multicast address");
- parseip(ia, cb->f[1]);
+ if (parseip(ia, cb->f[1]) == -1)
+ error(Ebadip);
ipifcremmulti(c, c->raddr, ia);
} else if(x->ctl != nil) {
- p = x->ctl(c, cb->f, cb->nf);
+ p = (*x->ctl)(c, cb->f, cb->nf);
if(p != nil)
error(p);
} else
@@ -1160,13 +1177,12 @@
return n;
}
-static s32
-ipbwrite(Chan* ch, Block* bp, u32 offset)
+static long
+ipbwrite(Chan* ch, Block* bp, ulong offset)
{
Conv *c;
Proto *x;
Fs *f;
- int n;
switch(TYPE(ch->qid)){
case Qdata:
@@ -1177,11 +1193,7 @@
if(c->wq == nil)
error(Eperm);
- if(bp->next)
- bp = concatblock(bp);
- n = BLEN(bp);
- qbwrite(c->wq, bp);
- return n;
+ return qbwrite(c->wq, bp);
default:
return devbwrite(ch, bp, offset);
}
@@ -1198,13 +1210,13 @@
ipwalk,
ipstat,
ipopen,
- devcreate,
+ ipcreate,
ipclose,
ipread,
ipbread,
ipwrite,
ipbwrite,
- devremove,
+ ipremove,
ipwstat,
};
@@ -1224,12 +1236,15 @@
p->qid.type = QTDIR;
p->qid.path = QID(f->np, 0, Qprotodir);
+ if(p->nc > Maskconv+1){
+ print("Fsproto: %s nc %d > %d\n", p->name, p->nc, Maskconv+1);
+ p->nc = Maskconv+1;
+ }
p->conv = malloc(sizeof(Conv*)*(p->nc+1));
if(p->conv == nil)
panic("Fsproto");
p->x = f->np;
- p->nextport = 0;
p->nextrport = 600;
f->p[f->np++] = p;
@@ -1262,21 +1277,33 @@
if(c == nil){
c = malloc(sizeof(Conv));
if(c == nil)
- error(Enomem);
- qlock(c);
+ return nil;
+ if(waserror()){
+ qfree(c->rq);
+ qfree(c->wq);
+ qfree(c->eq);
+ qfree(c->sq);
+ free(c->ptcl);
+ free(c);
+ return nil;
+ }
c->p = p;
c->x = pp - p->conv;
if(p->ptclsize != 0){
c->ptcl = malloc(p->ptclsize);
- if(c->ptcl == nil) {
- free(c);
+ if(c->ptcl == nil)
error(Enomem);
- }
}
- *pp = c;
- p->ac++;
c->eq = qopen(1024, Qmsg, 0, 0);
+ if(c->eq == nil)
+ error(Enomem);
(*p->create)(c);
+ if(c->rq == nil || c->wq == nil)
+ error(Enomem);
+ poperror();
+ qlock(c);
+ *pp = c;
+ p->ac++;
break;
}
if(canqlock(c)){
@@ -1291,8 +1318,11 @@
}
}
if(pp >= ep) {
- if(p->gc != nil && (*p->gc)(p))
- goto retry;
+ if(p->gc != nil){
+ print("Fsprotoclone: garbage collecting %s Convs\n", p->name);
+ if((*p->gc)(p))
+ goto retry;
+ }
return nil;
}
@@ -1307,8 +1337,9 @@
c->lport = 0;
c->rport = 0;
c->restricted = 0;
+ c->ignoreadvice = 0;
c->ttl = MAXTTL;
- c->tos = DFLTTOS;
+ c->tos = 0;
qreopen(c->rq);
qreopen(c->wq);
qreopen(c->eq);
@@ -1321,7 +1352,7 @@
Fsconnected(Conv* c, char* msg)
{
if(msg != nil && *msg != '\0')
- kstrcpy(c->cerr, msg, sizeof(c->cerr));
+ strncpy(c->cerr, msg, ERRMAX-1);
switch(c->state){
@@ -1368,12 +1399,19 @@
for(l = &c->incall; *l; l = &(*l)->next)
i++;
if(i >= Maxincall) {
+ static int beenhere;
+
qunlock(c);
+ if (!beenhere) {
+ beenhere = 1;
+ print("Fsnewcall: incall queue full (%d) on port %d\n",
+ i, c->lport);
+ }
return nil;
}
/* find a free conversation */
- nc = Fsprotoclone(c->p, network);
+ nc = Fsprotoclone(c->p, c->owner);
if(nc == nil) {
qunlock(c);
return nil;
@@ -1394,12 +1432,12 @@
return nc;
}
-static long
+long
ndbwrite(Fs *f, char *a, ulong off, int n)
{
if(off > strlen(f->ndb))
error(Eio);
- if(off+n >= sizeof(f->ndb)-1)
+ if(off+n >= sizeof(f->ndb))
error(Eio);
memmove(f->ndb+off, a, n);
f->ndb[off+n] = 0;
@@ -1411,7 +1449,7 @@
ulong
scalednconv(void)
{
- if(conf.npage*BY2PG >= 128*MB)
+ if(cpuserver && conf.npage*BY2PG >= 128*MB)
return Nchans*4;
return Nchans;
}
--- a/os/ip/dhcp.c
+++ /dev/null
@@ -1,447 +1,0 @@
-#include "u.h"
-#include "../port/lib.h"
-#include "mem.h"
-#include "dat.h"
-#include "fns.h"
-#include "../port/error.h"
-#include "kernel.h"
-#include "ip.h"
-#include "ppp.h"
-
-Ipaddr pppdns[2];
-
-static ulong fsip;
-static ulong auip;
-static ulong gwip;
-static ulong ipmask;
-static ulong ipaddr;
-static ulong dns1ip;
-static ulong dns2ip;
-
-int dhcpmsgtype;
-int debug=0;
-enum
-{
- Bootrequest = 1,
- Bootreply = 2,
-};
-
-typedef struct Bootp
-{
- /* udp.c oldheader */
- uchar raddr[IPaddrlen];
- uchar laddr[IPaddrlen];
- uchar rport[2];
- uchar lport[2];
- /* bootp itself */
- uchar op; /* opcode */
- uchar htype; /* hardware type */
- uchar hlen; /* hardware address len */
- uchar hops; /* hops */
- uchar xid[4]; /* a random number */
- uchar secs[2]; /* elapsed snce client started booting */
- uchar flags[2]; /* flags */
- uchar ciaddr[4]; /* client IP address (client tells server) */
- uchar yiaddr[4]; /* client IP address (server tells client) */
- uchar siaddr[4]; /* server IP address */
- uchar giaddr[4]; /* gateway IP address */
- uchar chaddr[16]; /* client hardware address */
- uchar sname[64]; /* server host name (optional) */
- uchar file[128]; /* boot file name */
- uchar vend[128]; /* vendor-specific goo 340 */
-} Bootp;
-
-static Bootp req;
-static Proc* rcvprocp;
-static int recv;
-static int done;
-static Rendez bootpr;
-static char rcvbuf[512+2*IPaddrlen+2*2]; /* 576 */
-static uchar sid[4];
-static ulong iplease;
-
-/*
- * bootp returns:
- *
- * "fsip d.d.d.d
- * auip d.d.d.d
- * gwip d.d.d.d
- * ipmask d.d.d.d
- * ipaddr d.d.d.d
- * dns1ip d.d.d.d
- * dns2ip d.d.d.d
- *
- * where d.d.d.d is the IP address in dotted decimal notation, and each
- * address is followed by a newline.
- Last change: SUN 13 Sep 2001 4:36 pm
- */
-
-/*
- * Parse the vendor specific fields according to RFC 1084.
- * We are overloading the "cookie server" to be the Inferno
- * authentication server and the "resource location server"
- * to be the Inferno file server.
- *
- * If the vendor specific field is formatted properly, it
- * will being with the four bytes 99.130.83.99 and end with
- * an 0xFF byte.
- */
-static int
-parsevend(uchar* pvend)
-{
- uchar *vend=pvend;
- int dhcpmsg=0;
- /* The field must start with 99.130.83.99 to be compliant */
- if ((vend[0] != 99) || (vend[1] != 130) || (vend[2] != 83) || (vend[3] != 99)){
- print("bad bootp vendor field: %.2x%.2x%.2x%.2x", vend[0], vend[1], vend[2], vend[3]);
- return -1;
- }
-
- /* Skip over the magic cookie */
- vend += 4;
-
- while ((vend[0] != 0) && (vend[0] != 0xFF)) {
- int i;
-//
- if(debug){
- print(">>>Opt[%d] [%d]", vend[0], vend[1]);
- for(i=0; i<vend[1]; i++)
- print(" %2.2x", vend[i+2]);
- print("\n");
- }
-//
- switch (vend[0]) {
- case 1: /* Subnet mask field */
- /* There must be only one subnet mask */
- if (vend[1] == 4)
- ipmask = (vend[2]<<24)|(vend[3]<<16)| (vend[4]<<8)| vend[5];
- else{
- return -1;
- }
- break;
-
- case 3: /* Gateway/router field */
- /* We are only concerned with first address */
- if (vend[1] >0 && vend[1]%4==0)
- gwip = (vend[2]<<24)|(vend[3]<<16)|(vend[4]<<8)|vend[5];
- else
- return -1;
- break;
- case 6: /* domain name server */
- if(vend[1]>0 && vend[1] %4==0){
- dns1ip=(vend[2]<<24)|(vend[3]<<16)|(vend[4]<<8)|vend[5];
- if(vend[1]>4)
- dns2ip=(vend[6]<<24)|(vend[7]<<16)|(vend[8]<<8)|vend[9];
- }else
- return -1;
- break;
-
- case 8: /* "Cookie server" (auth server) field */
- /* We are only concerned with first address */
- if (vend[1] > 0 && vend[1]%4==0)
- auip = (vend[2]<<24)|(vend[3]<<16)|(vend[4]<<8)|vend[5];
- else
- return -1;
- break;
-
- case 11: /* "Resource loc server" (file server) field */
- /* We are only concerned with first address */
- if (vend[1] > 0 && vend[1]%4==0)
- fsip = (vend[2]<<24)| (vend[3]<<16)| (vend[4]<<8)| vend[5];
- else
- return -1;
- break;
- case 51: /* ip lease time */
- if(vend[1]==4){
- iplease=(vend[2]<<24)|(vend[3]<<16)|(vend[4]<<8)|vend[5];
- }else
- return -1;
- break;
- case 53: /* DHCP message type */
- if(vend[1]==1)
- dhcpmsg=vend[2];
- else
- return -1;
- break;
- case 54: /* server identifier */
- if(vend[1]==4){
- memmove(sid, vend+2, 4);
- }else
- return -1;
- break;
-
- default: /* Everything else stops us */
- break;
- }
-
- /* Skip over the field */
- vend += vend[1] + 2;
- }
- if(debug)
- print(">>>Opt[%d] [%d]\n", vend[0], vend[1]);
- return dhcpmsg;
-}
-
-static void
-dispvend(uchar* pvend)
-{
- uchar *vend=pvend;
-
- //print("<<<Magic : %2.2x%2.2x%2.2x%2.2x\n", vend[0], vend[1], vend[2], vend[3]);
-
- vend += 4; /* Skip over the magic cookie */
- while ((vend[0] != 0) && (vend[0] != 0xFF)) {
- // int i;
- // print("<<<Opt[%d] [%d]", vend[0], vend[1]);
- //for(i=0; i<vend[1]; i++)
- // print(" %2.2x", vend[i+2]);
- //print("\n");
-
- vend += vend[1] + 2;
- }
- //print("<<<Opt[ %2.2x] [%2.2x]\n", vend[0], vend[1]);
-}
-
-static void
-rcvbootp(void *a)
-{
- int n, fd, dhcp;
- Bootp *rp;
-
- if(waserror())
- pexit("", 0);
- rcvprocp = up; /* store for postnote below */
- fd = (int)a;
- while(done == 0) {
- if(debug)
- print("rcvbootp:looping\n");
-
- n = kread(fd, rcvbuf, sizeof(rcvbuf));
- if(n <= 0)
- break;
- rp = (Bootp*)rcvbuf;
- if (memcmp(req.chaddr, rp->chaddr, 6) == 0 && rp->htype == 1 && rp->hlen == 6) {
- ipaddr = (rp->yiaddr[0]<<24)| (rp->yiaddr[1]<<16)| (rp->yiaddr[2]<<8)| rp->yiaddr[3];
- if(debug)
- print("ipaddr = %2.2x %2.2x %2.2x %2.2x \n", rp->yiaddr[0], rp->yiaddr[1], rp->yiaddr[2], rp->yiaddr[3]);
- //memmove(req.siaddr, rp->siaddr, 4); /* siaddr */
- dhcp = parsevend(rp->vend);
-
- if(dhcpmsgtype < dhcp){
- dhcpmsgtype=dhcp;
- recv = 1;
- wakeup(&bootpr);
- if(dhcp==0 || dhcp ==5 || dhcp == 6 )
- break;
- }
- }
- }
- poperror();
- rcvprocp = nil;
-
- if(debug)
- print("rcvbootp exit\n");
- pexit("", 0);
-}
-
-static char*
-rbootp(Ipifc *ifc)
-{
- int cfd, dfd, tries, n;
- char ia[5+3*16], im[16], *av[3];
- uchar nipaddr[4], ngwip[4], nipmask[4];
- char dir[Maxpath];
- static uchar vend_rfc1048[] = { 99, 130, 83, 99 };
- uchar *vend;
-
- /*
- * broadcast bootp's till we get a reply,
- * or fixed number of tries
- */
- if(debug)
- print("dhcp: bootp() called\n");
- tries = 0;
- av[1] = "0.0.0.0";
- av[2] = "0.0.0.0";
- ipifcadd(ifc, av, 3, 0, nil);
-
- cfd = kannounce("udp!*!68", dir);
- if(cfd < 0)
- return "dhcp announce failed";
- strcat(dir, "/data");
- if(kwrite(cfd, "headers", 7) < 0){
- kclose(cfd);
- return "dhcp ctl headers failed";
- }
- kwrite(cfd, "oldheaders", 10);
- dfd = kopen(dir, ORDWR);
- if(dfd < 0){
- kclose(cfd);
- return "dhcp open data failed";
- }
- kclose(cfd);
-
- while(tries<1){
- tries++;
- memset(sid, 0, 4);
- iplease=0;
- dhcpmsgtype=-2;
-/* DHCPDISCOVER*/
- done = 0;
- recv = 0;
- kproc("rcvbootp", rcvbootp, (void*)dfd, KPDUPFDG);
- /* Prepare DHCPDISCOVER */
- memset(&req, 0, sizeof(req));
- ipmove(req.raddr, IPv4bcast);
- hnputs(req.rport, 67);
- req.op = Bootrequest;
- req.htype = 1; /* ethernet (all we know) */
- req.hlen = 6; /* ethernet (all we know) */
-
- memmove(req.chaddr, ifc->mac, 6); /* Hardware MAC address */
- //ipv4local(ifc, req.ciaddr); /* Fill in the local IP address if we know it */
- memset(req.file, 0, sizeof(req.file));
- vend=req.vend;
- memmove(vend, vend_rfc1048, 4); vend+=4;
- *vend++=53; *vend++=1;*vend++=1; /* dhcp msg type==3, dhcprequest */
-
- *vend++=61;*vend++=7;*vend++=1;
- memmove(vend, ifc->mac, 6);vend+=6;
- *vend=0xff;
-
- if(debug)
- dispvend(req.vend);
- for(n=0;n<4;n++){
- if(kwrite(dfd, &req, sizeof(req))<0) /* SEND DHCPDISCOVER */
- print("DHCPDISCOVER: %r");
-
- tsleep(&bootpr, return0, 0, 1000); /* wait DHCPOFFER */
- if(debug)
- print("[DHCP] DISCOVER: msgtype = %d\n", dhcpmsgtype);
-
- if(dhcpmsgtype==2) /* DHCPOFFER */
- break;
- else if(dhcpmsgtype==0) /* bootp */
- return nil;
- else if(dhcpmsgtype== -2) /* time out */
- continue;
- else
- break;
-
- }
- if(dhcpmsgtype!=2)
- continue;
-
-/* DHCPREQUEST */
- memset(req.vend, 0, sizeof(req.vend));
- vend=req.vend;
- memmove(vend, vend_rfc1048, 4);vend+=4;
-
- *vend++=53; *vend++=1;*vend++=3; /* dhcp msg type==3, dhcprequest */
-
- *vend++=50; *vend++=4; /* requested ip address */
- *vend++=(ipaddr >> 24)&0xff;
- *vend++=(ipaddr >> 16)&0xff;
- *vend++=(ipaddr >> 8) & 0xff;
- *vend++=ipaddr & 0xff;
-
- *vend++=51;*vend++=4; /* lease time */
- *vend++=(iplease>>24)&0xff; *vend++=(iplease>>16)&0xff; *vend++=(iplease>>8)&0xff; *vend++=iplease&0xff;
-
- *vend++=54; *vend++=4; /* server identifier */
- memmove(vend, sid, 4); vend+=4;
-
- *vend++=61;*vend++=07;*vend++=01; /* client identifier */
- memmove(vend, ifc->mac, 6);vend+=6;
- *vend=0xff;
- if(debug)
- dispvend(req.vend);
- if(kwrite(dfd, &req, sizeof(req))<0){
- print("DHCPREQUEST: %r");
- continue;
- }
- tsleep(&bootpr, return0, 0, 2000);
- if(dhcpmsgtype==5) /* wait for DHCPACK */
- break;
- else
- continue;
- /* CHECK ARP */
- /* DHCPDECLINE */
- }
- kclose(dfd);
- done = 1;
- if(rcvprocp != nil){
- postnote(rcvprocp, 1, "timeout", 0);
- rcvprocp = nil;
- }
-
- av[1] = "0.0.0.0";
- av[2] = "0.0.0.0";
- ipifcrem(ifc, av, 3);
-
- hnputl(nipaddr, ipaddr);
- sprint(ia, "%V", nipaddr);
- hnputl(nipmask, ipmask);
- sprint(im, "%V", nipmask);
- av[1] = ia;
- av[2] = im;
- ipifcadd(ifc, av, 3, 0, nil);
-
- if(gwip != 0) {
- hnputl(ngwip, gwip);
- n = sprint(ia, "add 0.0.0.0 0.0.0.0 %V", ngwip);
- routewrite(ifc->conv->p->f, nil, ia, n);
- }
- return nil;
-}
-
-static int
-rbootpread(char *bp, ulong offset, int len)
-{
- int n, i;
- char *buf;
- uchar a[4];
-
- if(debug)
- print("dhcp: bootpread() \n");
- buf = smalloc(READSTR);
- if(waserror()){
- free(buf);
- nexterror();
- }
-
- hnputl(a, fsip);
- n = snprint(buf, READSTR, "fsip %15V\n", a);
- hnputl(a, auip);
- n += snprint(buf + n, READSTR-n, "auip %15V\n", a);
- hnputl(a, gwip);
- n += snprint(buf + n, READSTR-n, "gwip %15V\n", a);
- hnputl(a, ipmask);
- n += snprint(buf + n, READSTR-n, "ipmask %15V\n", a);
- hnputl(a, ipaddr);
- n += snprint(buf + n, READSTR-n, "ipaddr %15V\n", a);
- n += snprint(buf+n, READSTR-n, "expired %lud\n", iplease);
-
- n += snprint(buf + n, READSTR-n, "dns");
- if(dns2ip){
- hnputl(a, dns2ip);
- n+=snprint(buf + n, READSTR-n, " %15V", a);
- }
- if(dns1ip){
- hnputl(a, dns1ip);
- n += snprint(buf + n, READSTR-n, " %15V", a);
- }
-
- for(i=0; i<2; i++)
- if(ipcmp(pppdns[i], IPnoaddr) != 0 && ipcmp(pppdns[i], v4prefix) != 0)
- n += snprint(buf + n, READSTR-n, " %15I", pppdns[i]);
-
- snprint(buf + n, READSTR-n, "\n");
- len = readstr(offset, bp, len, buf);
- poperror();
- free(buf);
- return len;
-}
-
-char* (*bootp)(Ipifc*) = rbootp;
-int (*bootpread)(char*, ulong, int) = rbootpread;
--- a/os/ip/eipconvtest.c
+++ /dev/null
@@ -1,152 +1,0 @@
-#include <u.h>
-#include <libc.h>
-
-enum
-{
- Isprefix= 16,
-};
-
-uchar prefixvals[256] =
-{
-[0x00] 0 | Isprefix,
-[0x80] 1 | Isprefix,
-[0xC0] 2 | Isprefix,
-[0xE0] 3 | Isprefix,
-[0xF0] 4 | Isprefix,
-[0xF8] 5 | Isprefix,
-[0xFC] 6 | Isprefix,
-[0xFE] 7 | Isprefix,
-[0xFF] 8 | Isprefix,
-};
-
-uchar v4prefix[16] = {
- 0, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0xff, 0xff,
- 0, 0, 0, 0
-};
-
-void
-hnputl(void *p, ulong v)
-{
- uchar *a;
-
- a = p;
- a[0] = v>>24;
- a[1] = v>>16;
- a[2] = v>>8;
- a[3] = v;
-}
-
-int
-eipconv(va_list *arg, Fconv *f)
-{
- char buf[8*5];
- static char *efmt = "%.2lux%.2lux%.2lux%.2lux%.2lux%.2lux";
- static char *ifmt = "%d.%d.%d.%d";
- uchar *p, ip[16];
- ulong *lp;
- ushort s;
- int i, j, n, eln, eli;
-
- switch(f->chr) {
- case 'E': /* Ethernet address */
- p = va_arg(*arg, uchar*);
- sprint(buf, efmt, p[0], p[1], p[2], p[3], p[4], p[5]);
- break;
- case 'I': /* Ip address */
- p = va_arg(*arg, uchar*);
-common:
- if(memcmp(p, v4prefix, 12) == 0)
- sprint(buf, ifmt, p[12], p[13], p[14], p[15]);
- else {
- /* find longest elision */
- eln = eli = -1;
- for(i = 0; i < 16; i += 2){
- for(j = i; j < 16; j += 2)
- if(p[j] != 0 || p[j+1] != 0)
- break;
- if(j > i && j - i > eln){
- eli = i;
- eln = j - i;
- }
- }
-
- /* print with possible elision */
- n = 0;
- for(i = 0; i < 16; i += 2){
- if(i == eli){
- n += sprint(buf+n, "::");
- i += eln;
- if(i >= 16)
- break;
- } else if(i != 0)
- n += sprint(buf+n, ":");
- s = (p[i]<<8) + p[i+1];
- n += sprint(buf+n, "%ux", s);
- }
- }
- break;
- case 'i': /* v6 address as 4 longs */
- lp = va_arg(*arg, ulong*);
- for(i = 0; i < 4; i++)
- hnputl(ip+4*i, *lp++);
- p = ip;
- goto common;
- case 'V': /* v4 ip address */
- p = va_arg(*arg, uchar*);
- sprint(buf, ifmt, p[0], p[1], p[2], p[3]);
- break;
- case 'M': /* ip mask */
- p = va_arg(*arg, uchar*);
-
- /* look for a prefix mask */
- for(i = 0; i < 16; i++)
- if(p[i] != 0xff)
- break;
- if(i < 16){
- if((prefixvals[p[i]] & Isprefix) == 0)
- goto common;
- for(j = i+1; j < 16; j++)
- if(p[j] != 0)
- goto common;
- n = 8*i + (prefixvals[p[i]] & ~Isprefix);
- } else
- n = 8*16;
-
- /* got one, use /xx format */
- sprint(buf, "/%d", n);
- break;
- default:
- strcpy(buf, "(eipconv)");
- }
- strconv(buf, f);
- return sizeof(uchar*);
-}
-
-uchar testvec[11][16] =
-{
- { 0,0,0,0, 0,0,0,0, 0,0,0xff,0xff, 1,3,4,5, },
- { 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, },
- { 0xff,0xff,0x80,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, },
- { 0xff,0xff,0xff,0xc0, 0,0,0,0, 0,0,0,0, 0,0,0,0, },
- { 0xff,0xff,0xff,0xff, 0xe0,0,0,0, 0,0,0,0, 0,0,0,0, },
- { 0xff,0xff,0xff,0xff, 0xff,0xf0,0,0, 0,0,0,0, 0,0,0,0, },
- { 0xff,0xff,0xff,0xff, 0xff,0xff,0xf8,0, 0,0,0,0, 0,0,0,0, },
- { 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, },
- { 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, },
- { 0,0,0,0, 0,0x11,0,0, 0,0,0,0, 0,0,0,0, },
- { 0,0,0,0x11, 0,0,0,0, 0,0,0,0, 0,0,0,0x12, },
-};
-
-void
-main(void)
-{
- int i;
-
- fmtinstall('I', eipconv);
- fmtinstall('M', eipconv);
- for(i = 0; i < 11; i++)
- print("%I\n%M\n", testvec[i], testvec[i]);
- exits(0);
-}
--- a/os/ip/esp.c
+++ b/os/ip/esp.c
@@ -1,3 +1,11 @@
+/*
+ * Encapsulating Security Payload for IPsec for IPv4, rfc1827.
+ * extended to IPv6.
+ * rfc2104 defines hmac computation.
+ * currently only implements tunnel mode.
+ * TODO: verify aes algorithms;
+ * transport mode (host-to-host)
+ */
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
@@ -6,47 +14,79 @@
#include "../port/error.h"
#include "ip.h"
+#include "ipv6.h"
+#include <libsec.h>
-#include "libsec.h"
+#define BITS2BYTES(bi) (((bi) + BI2BY - 1) / BI2BY)
+#define BYTES2BITS(by) ((by) * BI2BY)
+typedef struct Algorithm Algorithm;
+typedef struct Esp4hdr Esp4hdr;
+typedef struct Esp6hdr Esp6hdr;
+typedef struct Espcb Espcb;
typedef struct Esphdr Esphdr;
+typedef struct Esppriv Esppriv;
typedef struct Esptail Esptail;
typedef struct Userhdr Userhdr;
-typedef struct Esppriv Esppriv;
-typedef struct Espcb Espcb;
-typedef struct Algorithm Algorithm;
-typedef struct Esprc4 Esprc4;
-#define DPRINT if(0)print
+enum {
+ Encrypt,
+ Decrypt,
-enum
-{
- IP_ESPPROTO = 50,
- EsphdrSize = 28, // includes IP header
- IphdrSize = 20, // options have been striped
- EsptailSize = 2, // does not include pad or auth data
- UserhdrSize = 4, // user visable header size - if enabled
+ IP_ESPPROTO = 50, /* IP v4 and v6 protocol number */
+ Esp4hdrlen = IP4HDR + 8,
+ Esp6hdrlen = IP6HDR + 8,
+
+ Esptaillen = 2, /* does not include pad or auth data */
+ Userhdrlen = 4, /* user-visible header size - if enabled */
+
+ Desblk = BITS2BYTES(64),
+ Des3keysz = BITS2BYTES(192),
+
+ Aesblk = BITS2BYTES(128),
+ Aeskeysz = BITS2BYTES(128),
};
struct Esphdr
{
- /* ip header */
+ uchar espspi[4]; /* Security parameter index */
+ uchar espseq[4]; /* Sequence number */
+ uchar payload[];
+};
+
+/*
+ * tunnel-mode (network-to-network, etc.) layout is:
+ * new IP hdrs | ESP hdr |
+ * enc { orig IP hdrs | TCP/UDP hdr | user data | ESP trailer } | ESP ICV
+ *
+ * transport-mode (host-to-host) layout would be:
+ * orig IP hdrs | ESP hdr |
+ * enc { TCP/UDP hdr | user data | ESP trailer } | ESP ICV
+ */
+struct Esp4hdr
+{
+ /* ipv4 header */
uchar vihl; /* Version and header length */
uchar tos; /* Type of service */
uchar length[2]; /* packet length */
uchar id[2]; /* Identification */
uchar frag[2]; /* Fragment information */
- uchar Unused;
+ uchar Unused;
uchar espproto; /* Protocol */
uchar espplen[2]; /* Header plus data length */
uchar espsrc[4]; /* Ip source */
uchar espdst[4]; /* Ip destination */
- /* esp header */
- uchar espspi[4]; /* Security parameter index */
- uchar espseq[4]; /* Sequence number */
+ Esphdr;
};
+/* tunnel-mode layout */
+struct Esp6hdr
+{
+ IPV6HDR;
+ Esphdr;
+};
+
struct Esptail
{
uchar pad;
@@ -53,16 +93,28 @@
uchar nexthdr;
};
+/* IP-version-dependent data */
+typedef struct Versdep Versdep;
+struct Versdep
+{
+ ulong version;
+ ulong iphdrlen;
+ ulong hdrlen; /* iphdrlen + esp hdr len */
+ ulong spi;
+ uchar laddr[IPaddrlen];
+ uchar raddr[IPaddrlen];
+};
+
/* header as seen by the user */
struct Userhdr
{
- uchar nexthdr; // next protocol
+ uchar nexthdr; /* next protocol */
uchar unused[3];
};
struct Esppriv
{
- ulong in;
+ uvlong in;
ulong inerrors;
};
@@ -72,77 +124,68 @@
struct Espcb
{
int incoming;
- int header; // user user level header
+ int header; /* user-level header */
ulong spi;
- ulong seq; // last seq sent
- ulong window; // for replay attacks
+ ulong seq; /* last seq sent */
+ ulong window; /* for replay attacks */
+
char *espalg;
- void *espstate; // other state for esp
- int espivlen; // in bytes
+ void *espstate; /* other state for esp */
+ int espivlen; /* in bytes */
int espblklen;
int (*cipher)(Espcb*, uchar *buf, int len);
+
char *ahalg;
- void *ahstate; // other state for esp
- int ahlen; // auth data length in bytes
+ void *ahstate; /* other state for esp */
+ int ahlen; /* auth data length in bytes */
int ahblklen;
int (*auth)(Espcb*, uchar *buf, int len, uchar *hash);
+ DigestState *ds;
};
struct Algorithm
{
char *name;
- int keylen; // in bits
- void (*init)(Espcb*, char* name, uchar *key, int keylen);
+ int keylen; /* in bits */
+ void (*init)(Espcb*, char* name, uchar *key, unsigned keylen);
};
-
-enum {
- RC4forward = 10*1024*1024, // maximum skip forward
- RC4back = 100*1024, // maximum look back
-};
-
-struct Esprc4
-{
- ulong cseq; // current byte sequence number
- RC4state current;
-
- int ovalid; // old is valid
- ulong lgseq; // last good sequence
- ulong oseq; // old byte sequence number
- RC4state old;
-};
-
static Conv* convlookup(Proto *esp, ulong spi);
static char *setalg(Espcb *ecb, char **f, int n, Algorithm *alg);
-static void nullespinit(Espcb*, char*, uchar *key, int keylen);
-static void nullahinit(Espcb*, char*, uchar *key, int keylen);
-static void shaahinit(Espcb*, char*, uchar *key, int keylen);
-static void md5ahinit(Espcb*, char*, uchar *key, int keylen);
-static void desespinit(Espcb *ecb, char *name, uchar *k, int n);
-static void rc4espinit(Espcb *ecb, char *name, uchar *k, int n);
static void espkick(void *x);
+static void nullespinit(Espcb*, char*, uchar *key, unsigned keylen);
+static void des3espinit(Espcb*, char*, uchar *key, unsigned keylen);
+static void aescbcespinit(Espcb*, char*, uchar *key, unsigned keylen);
+static void aesctrespinit(Espcb*, char*, uchar *key, unsigned keylen);
+static void desespinit(Espcb *ecb, char *name, uchar *k, unsigned n);
+
+static void nullahinit(Espcb*, char*, uchar *key, unsigned keylen);
+static void shaahinit(Espcb*, char*, uchar *key, unsigned keylen);
+static void md5ahinit(Espcb*, char*, uchar *key, unsigned keylen);
+
static Algorithm espalg[] =
{
- "null", 0, nullespinit,
- "des_56_cbc", 64, desespinit,
- "rc4_128", 128, rc4espinit,
- nil, 0, nil,
+ "null", 0, nullespinit,
+ "des3_cbc", 192, des3espinit, /* new rfc2451, des-ede3 */
+ "aes_128_cbc", 128, aescbcespinit, /* new rfc3602 */
+ "aes_ctr", 128, aesctrespinit, /* new rfc3686 */
+ "des_56_cbc", 64, desespinit, /* rfc2405, deprecated */
+ nil, 0, nil,
};
static Algorithm ahalg[] =
{
- "null", 0, nullahinit,
- "hmac_sha1_96", 128, shaahinit,
- "hmac_md5_96", 128, md5ahinit,
- nil, 0, nil,
+ "null", 0, nullahinit,
+ "hmac_sha1_96", 128, shaahinit, /* rfc2404 */
+ "hmac_md5_96", 128, md5ahinit, /* rfc2403 */
+ nil, 0, nil,
};
static char*
espconnect(Conv *c, char **argv, int argc)
{
- char *p, *pp;
- char *e = nil;
+ char *p, *pp, *e = nil;
ulong spi;
Espcb *ecb = (Espcb*)c->ptcl;
@@ -157,7 +200,10 @@
break;
}
*p++ = 0;
- parseip(c->raddr, argv[1]);
+ if (parseip(c->raddr, argv[1]) == -1) {
+ e = Ebadip;
+ break;
+ }
findlocalip(c->p->f, c->laddr, c->raddr);
ecb->incoming = 0;
ecb->seq = 0;
@@ -215,26 +261,86 @@
ipmove(c->raddr, IPnoaddr);
ecb = (Espcb*)c->ptcl;
- free(ecb->espstate);
- free(ecb->ahstate);
+ secfree(ecb->espstate);
+ secfree(ecb->ahstate);
memset(ecb, 0, sizeof(Espcb));
}
+static int
+pktipvers(Fs *f, Block **bpp)
+{
+ if (*bpp == nil || BLEN(*bpp) == 0) {
+ /* get enough to identify the IP version */
+ *bpp = pullupblock(*bpp, IP4HDR);
+ if(*bpp == nil) {
+ netlog(f, Logesp, "esp: short packet\n");
+ return 0;
+ }
+ }
+ return (((Esp4hdr*)(*bpp)->rp)->vihl & 0xf0) == IP_VER4? V4: V6;
+}
+
static void
+getverslens(int version, Versdep *vp)
+{
+ vp->version = version;
+ switch(vp->version) {
+ case V4:
+ vp->iphdrlen = IP4HDR;
+ vp->hdrlen = Esp4hdrlen;
+ break;
+ case V6:
+ vp->iphdrlen = IP6HDR;
+ vp->hdrlen = Esp6hdrlen;
+ break;
+ default:
+ panic("esp: getverslens version %d wrong", version);
+ }
+}
+
+static void
+getpktspiaddrs(uchar *pkt, Versdep *vp)
+{
+ Esp4hdr *eh4;
+ Esp6hdr *eh6;
+
+ switch(vp->version) {
+ case V4:
+ eh4 = (Esp4hdr*)pkt;
+ v4tov6(vp->raddr, eh4->espsrc);
+ v4tov6(vp->laddr, eh4->espdst);
+ vp->spi = nhgetl(eh4->espspi);
+ break;
+ case V6:
+ eh6 = (Esp6hdr*)pkt;
+ ipmove(vp->raddr, eh6->src);
+ ipmove(vp->laddr, eh6->dst);
+ vp->spi = nhgetl(eh6->espspi);
+ break;
+ default:
+ panic("esp: getpktspiaddrs vp->version %ld wrong", vp->version);
+ }
+}
+
+/*
+ * encapsulate next IP packet on x's write queue in IP/ESP packet
+ * and initiate output of the result.
+ */
+static void
espkick(void *x)
{
+ int nexthdr, payload, pad, align;
+ uchar *auth;
+ Block *bp;
Conv *c = x;
- Esphdr *eh;
+ Esp4hdr *eh4;
+ Esp6hdr *eh6;
+ Espcb *ecb;
Esptail *et;
Userhdr *uh;
- Espcb *ecb;
- Block *bp;
- int nexthdr;
- int payload;
- int pad;
- int align;
- uchar *auth;
+ Versdep vers;
+ getverslens(convipvers(c), &vers);
bp = qget(c->wq);
if(bp == nil)
return;
@@ -244,7 +350,7 @@
if(ecb->header) {
/* make sure the message has a User header */
- bp = pullupblock(bp, UserhdrSize);
+ bp = pullupblock(bp, Userhdrlen);
if(bp == nil) {
qunlock(c);
return;
@@ -251,15 +357,16 @@
}
uh = (Userhdr*)bp->rp;
nexthdr = uh->nexthdr;
- bp->rp += UserhdrSize;
+ bp->rp += Userhdrlen;
} else {
- nexthdr = 0; // what should this be?
+ nexthdr = 0; /* what should this be? */
}
payload = BLEN(bp) + ecb->espivlen;
/* Make space to fit ip header */
- bp = padblock(bp, EsphdrSize + ecb->espivlen);
+ bp = padblock(bp, vers.hdrlen + ecb->espivlen);
+ getpktspiaddrs(bp->rp, &vers);
align = 4;
if(ecb->espblklen > align)
@@ -266,7 +373,7 @@
align = ecb->espblklen;
if(align % ecb->ahblklen != 0)
panic("espkick: ahblklen is important after all");
- pad = (align-1) - (payload + EsptailSize-1)%align;
+ pad = (align-1) - (payload + Esptaillen-1)%align;
/*
* Make space for tail
@@ -273,70 +380,88 @@
* this is done by calling padblock with a negative size
* Padblock does not change bp->wp!
*/
- bp = padblock(bp, -(pad+EsptailSize+ecb->ahlen));
- bp->wp += pad+EsptailSize+ecb->ahlen;
+ bp = padblock(bp, -(pad+Esptaillen+ecb->ahlen));
+ bp->wp += pad+Esptaillen+ecb->ahlen;
- eh = (Esphdr *)(bp->rp);
- et = (Esptail*)(bp->rp + EsphdrSize + payload + pad);
+ et = (Esptail*)(bp->rp + vers.hdrlen + payload + pad);
- // fill in tail
+ /* fill in tail */
et->pad = pad;
et->nexthdr = nexthdr;
- ecb->cipher(ecb, bp->rp+EsphdrSize, payload+pad+EsptailSize);
- auth = bp->rp + EsphdrSize + payload + pad + EsptailSize;
+ /* encrypt the payload */
+ ecb->cipher(ecb, bp->rp + vers.hdrlen, payload + pad + Esptaillen);
+ auth = bp->rp + vers.hdrlen + payload + pad + Esptaillen;
- // fill in head
- eh->vihl = IP_VER4;
- hnputl(eh->espspi, ecb->spi);
- hnputl(eh->espseq, ++ecb->seq);
- v6tov4(eh->espsrc, c->laddr);
- v6tov4(eh->espdst, c->raddr);
- eh->espproto = IP_ESPPROTO;
- eh->frag[0] = 0;
- eh->frag[1] = 0;
+ /* fill in head; construct a new IP header and an ESP header */
+ if (vers.version == V4) {
+ eh4 = (Esp4hdr *)bp->rp;
+ eh4->vihl = IP_VER4;
+ v6tov4(eh4->espsrc, c->laddr);
+ v6tov4(eh4->espdst, c->raddr);
+ eh4->espproto = IP_ESPPROTO;
+ eh4->frag[0] = 0;
+ eh4->frag[1] = 0;
- ecb->auth(ecb, bp->rp+IphdrSize, (EsphdrSize-IphdrSize)+payload+pad+EsptailSize, auth);
+ hnputl(eh4->espspi, ecb->spi);
+ hnputl(eh4->espseq, ++ecb->seq);
+ } else {
+ eh6 = (Esp6hdr *)bp->rp;
+ eh6->vcf[0] = IP_VER6;
+ ipmove(eh6->src, c->laddr);
+ ipmove(eh6->dst, c->raddr);
+ eh6->proto = IP_ESPPROTO;
+ hnputl(eh6->espspi, ecb->spi);
+ hnputl(eh6->espseq, ++ecb->seq);
+ }
+
+ /* compute secure hash */
+ ecb->auth(ecb, bp->rp + vers.iphdrlen, (vers.hdrlen - vers.iphdrlen) +
+ payload + pad + Esptaillen, auth);
+
qunlock(c);
- //print("esp: pass down: %uld\n", BLEN(bp));
- ipoput4(c->p->f, bp, 0, c->ttl, c->tos, c);
+ /* print("esp: pass down: %uld\n", BLEN(bp)); */
+ if (vers.version == V4)
+ ipoput4(c->p->f, bp, 0, c->ttl, c->tos, c);
+ else
+ ipoput6(c->p->f, bp, 0, c->ttl, c->tos, c);
}
+/*
+ * decapsulate IP packet from IP/ESP packet in bp and
+ * pass the result up the spi's Conv's read queue.
+ */
void
espiput(Proto *esp, Ipifc*, Block *bp)
{
- Esphdr *eh;
- Esptail *et;
- Userhdr *uh;
+ int payload, nexthdr;
+ uchar *auth, *espspi;
Conv *c;
Espcb *ecb;
- uchar raddr[IPaddrlen], laddr[IPaddrlen];
+ Esptail *et;
Fs *f;
- uchar *auth;
- ulong spi;
- int payload, nexthdr;
+ Userhdr *uh;
+ Versdep vers;
f = esp->f;
- bp = pullupblock(bp, EsphdrSize+EsptailSize);
+ getverslens(pktipvers(f, &bp), &vers);
+
+ bp = pullupblock(bp, vers.hdrlen + Esptaillen);
if(bp == nil) {
netlog(f, Logesp, "esp: short packet\n");
return;
}
+ getpktspiaddrs(bp->rp, &vers);
- eh = (Esphdr*)(bp->rp);
- spi = nhgetl(eh->espspi);
- v4tov6(raddr, eh->espsrc);
- v4tov6(laddr, eh->espdst);
-
qlock(esp);
/* Look for a conversation structure for this port */
- c = convlookup(esp, spi);
+ c = convlookup(esp, vers.spi);
if(c == nil) {
qunlock(esp);
- netlog(f, Logesp, "esp: no conv %I -> %I!%d\n", raddr,
- laddr, spi);
+ netlog(f, Logesp, "esp: no conv %I -> %I!%lud\n", vers.raddr,
+ vers.laddr, vers.spi);
icmpnoconv(f, bp);
freeblist(bp);
return;
@@ -346,76 +471,83 @@
qunlock(esp);
ecb = c->ptcl;
- // too hard to do decryption/authentication on block lists
- if(bp->next)
+ /* too hard to do decryption/authentication on block lists */
+ if(bp->next != nil)
bp = concatblock(bp);
- if(BLEN(bp) < EsphdrSize + ecb->espivlen + EsptailSize + ecb->ahlen) {
+ if(BLEN(bp) < vers.hdrlen + ecb->espivlen + Esptaillen + ecb->ahlen) {
qunlock(c);
- netlog(f, Logesp, "esp: short block %I -> %I!%d\n", raddr,
- laddr, spi);
+ netlog(f, Logesp, "esp: short block %I -> %I!%lud\n", vers.raddr,
+ vers.laddr, vers.spi);
freeb(bp);
return;
}
- eh = (Esphdr*)(bp->rp);
auth = bp->wp - ecb->ahlen;
- if(!ecb->auth(ecb, eh->espspi, auth-eh->espspi, auth)) {
+ espspi = vers.version == V4? ((Esp4hdr*)bp->rp)->espspi:
+ ((Esp6hdr*)bp->rp)->espspi;
+
+ /* compute secure hash and authenticate */
+ if(!ecb->auth(ecb, espspi, auth - espspi, auth)) {
qunlock(c);
-print("esp: bad auth %I -> %I!%ld\n", raddr, laddr, spi);
- netlog(f, Logesp, "esp: bad auth %I -> %I!%d\n", raddr,
- laddr, spi);
+print("esp: bad auth %I -> %I!%ld\n", vers.raddr, vers.laddr, vers.spi);
+ netlog(f, Logesp, "esp: bad auth %I -> %I!%lud\n", vers.raddr,
+ vers.laddr, vers.spi);
freeb(bp);
return;
}
- payload = BLEN(bp)-EsphdrSize-ecb->ahlen;
- if(payload<=0 || payload%4 != 0 || payload%ecb->espblklen!=0) {
+ payload = BLEN(bp) - vers.hdrlen - ecb->ahlen;
+ if(payload <= 0 || payload % 4 != 0 || payload % ecb->espblklen != 0) {
qunlock(c);
- netlog(f, Logesp, "esp: bad length %I -> %I!%d payload=%d BLEN=%d\n", raddr,
- laddr, spi, payload, BLEN(bp));
+ netlog(f, Logesp, "esp: bad length %I -> %I!%lud payload=%d BLEN=%zd\n",
+ vers.raddr, vers.laddr, vers.spi, payload, BLEN(bp));
freeb(bp);
return;
}
- if(!ecb->cipher(ecb, bp->rp+EsphdrSize, payload)) {
+
+ /* decrypt payload */
+ if(!ecb->cipher(ecb, bp->rp + vers.hdrlen, payload)) {
qunlock(c);
-print("esp: cipher failed %I -> %I!%ld: %r\n", raddr, laddr, spi);
- netlog(f, Logesp, "esp: cipher failed %I -> %I!%d: %r\n", raddr,
- laddr, spi);
+print("esp: cipher failed %I -> %I!%ld: %s\n", vers.raddr, vers.laddr, vers.spi, up->errstr);
+ netlog(f, Logesp, "esp: cipher failed %I -> %I!%lud: %s\n",
+ vers.raddr, vers.laddr, vers.spi, up->errstr);
freeb(bp);
return;
}
- payload -= EsptailSize;
- et = (Esptail*)(bp->rp + EsphdrSize + payload);
+ payload -= Esptaillen;
+ et = (Esptail*)(bp->rp + vers.hdrlen + payload);
payload -= et->pad + ecb->espivlen;
nexthdr = et->nexthdr;
if(payload <= 0) {
qunlock(c);
- netlog(f, Logesp, "esp: short packet after decrypt %I -> %I!%d\n", raddr,
- laddr, spi);
+ netlog(f, Logesp, "esp: short packet after decrypt %I -> %I!%lud\n",
+ vers.raddr, vers.laddr, vers.spi);
freeb(bp);
return;
}
- // trim packet
- bp->rp += EsphdrSize + ecb->espivlen;
+ /* trim packet */
+ bp->rp += vers.hdrlen + ecb->espivlen; /* toss original IP & ESP hdrs */
bp->wp = bp->rp + payload;
if(ecb->header) {
- // assume UserhdrSize < EsphdrSize
- bp->rp -= UserhdrSize;
+ /* assume Userhdrlen < Esp4hdrlen < Esp6hdrlen */
+ bp->rp -= Userhdrlen;
uh = (Userhdr*)bp->rp;
- memset(uh, 0, UserhdrSize);
+ memset(uh, 0, Userhdrlen);
uh->nexthdr = nexthdr;
}
+ /* ingress filtering here? */
+
if(qfull(c->rq)){
- netlog(f, Logesp, "esp: qfull %I -> %I.%uld\n", raddr,
- laddr, spi);
+ netlog(f, Logesp, "esp: qfull %I -> %I.%uld\n", vers.raddr,
+ vers.laddr, vers.spi);
freeblist(bp);
}else {
-//print("esp: pass up: %uld\n", BLEN(bp));
- qpass(c->rq, bp);
+// print("esp: pass up: %uld\n", BLEN(bp));
+ qpass(c->rq, bp); /* pass packet up the read queue */
}
qunlock(c);
@@ -440,19 +572,19 @@
return e;
}
+/* called from icmp(v6) for unreachable hosts, time exceeded, etc. */
void
espadvise(Proto *esp, Block *bp, char *msg)
{
- Esphdr *h;
Conv *c;
- ulong spi;
+ Versdep vers;
- h = (Esphdr*)(bp->rp);
+ getverslens(pktipvers(esp->f, &bp), &vers);
+ getpktspiaddrs(bp->rp, &vers);
- spi = nhgets(h->espspi);
qlock(esp);
- c = convlookup(esp, spi);
- if(c != nil) {
+ c = convlookup(esp, vers.spi);
+ if(c != nil && !c->ignoreadvice) {
qhangup(c->rq, msg);
qhangup(c->wq, msg);
}
@@ -466,7 +598,7 @@
Esppriv *upriv;
upriv = esp->priv;
- return snprint(buf, len, "%lud %lud\n",
+ return snprint(buf, len, "%llud %lud\n",
upriv->in,
upriv->inerrors);
}
@@ -520,10 +652,10 @@
setalg(Espcb *ecb, char **f, int n, Algorithm *alg)
{
uchar *key;
- int i, nbyte, nchar;
- int c;
+ int c, nbyte, nchar;
+ uint i;
- if(n < 2)
+ if(n < 2 || n > 3)
return "bad format";
for(; alg->name; alg++)
if(strcmp(f[1], alg->name) == 0)
@@ -531,10 +663,14 @@
if(alg->name == nil)
return "unknown algorithm";
- if(n != 3)
- return "bad format";
nbyte = (alg->keylen + 7) >> 3;
- nchar = strlen(f[2]);
+ if (n == 2)
+ nchar = 0;
+ else
+ nchar = strlen(f[2]);
+ if(nchar != 2 * nbyte) /* TODO: maybe < is ok */
+ return "key not required length";
+ /* convert hex digits from ascii, in place */
for(i=0; i<nchar; i++) {
c = f[2][i];
if(c >= '0' && c <= '9')
@@ -544,21 +680,27 @@
else if(c >= 'A' && c <= 'F')
f[2][i] -= 'A'-10;
else
- return "bad character in key";
+ return "non-hex character in key";
}
- key = smalloc(nbyte);
- for(i=0; i<nchar && i*2<nbyte; i++) {
+ /* collapse hex digits into complete bytes in reverse order in key */
+ key = secalloc(nbyte);
+ for(i = 0; i < nchar && i/2 < nbyte; i++) {
c = f[2][nchar-i-1];
if(i&1)
c <<= 4;
- key[i>>1] |= c;
+ key[i/2] |= c;
}
-
+ memset(f[2], 0, nchar);
alg->init(ecb, alg->name, key, alg->keylen);
- free(key);
+ secfree(key);
return nil;
}
+
+/*
+ * null encryption
+ */
+
static int
nullcipher(Espcb*, uchar*, int)
{
@@ -566,7 +708,7 @@
}
static void
-nullespinit(Espcb *ecb, char *name, uchar*, int)
+nullespinit(Espcb *ecb, char *name, uchar*, unsigned)
{
ecb->espalg = name;
ecb->espblklen = 1;
@@ -581,7 +723,7 @@
}
static void
-nullahinit(Espcb *ecb, char *name, uchar*, int)
+nullahinit(Espcb *ecb, char *name, uchar*, unsigned)
{
ecb->ahalg = name;
ecb->ahblklen = 1;
@@ -589,26 +731,28 @@
ecb->auth = nullauth;
}
-void
+
+/*
+ * sha1
+ */
+
+static void
seanq_hmac_sha1(uchar hash[SHA1dlen], uchar *t, long tlen, uchar *key, long klen)
{
- uchar ipad[65], opad[65];
int i;
+ uchar ipad[Hmacblksz+1], opad[Hmacblksz+1], innerhash[SHA1dlen];
DigestState *digest;
- uchar innerhash[SHA1dlen];
- for(i=0; i<64; i++){
- ipad[i] = 0x36;
- opad[i] = 0x5c;
- }
- ipad[64] = opad[64] = 0;
- for(i=0; i<klen; i++){
+ memset(ipad, 0x36, Hmacblksz);
+ memset(opad, 0x5c, Hmacblksz);
+ ipad[Hmacblksz] = opad[Hmacblksz] = 0;
+ for(i = 0; i < klen; i++){
ipad[i] ^= key[i];
opad[i] ^= key[i];
}
- digest = sha1(ipad, 64, nil, nil);
+ digest = sha1(ipad, Hmacblksz, nil, nil);
sha1(t, tlen, innerhash, digest);
- digest = sha1(opad, 64, nil, nil);
+ digest = sha1(opad, Hmacblksz, nil, nil);
sha1(innerhash, SHA1dlen, hash, digest);
}
@@ -615,11 +759,11 @@
static int
shaauth(Espcb *ecb, uchar *t, int tlen, uchar *auth)
{
- uchar hash[SHA1dlen];
int r;
+ uchar hash[SHA1dlen];
memset(hash, 0, SHA1dlen);
- seanq_hmac_sha1(hash, t, tlen, (uchar*)ecb->ahstate, 16);
+ seanq_hmac_sha1(hash, t, tlen, (uchar*)ecb->ahstate, BITS2BYTES(128));
r = memcmp(auth, hash, ecb->ahlen) == 0;
memmove(auth, hash, ecb->ahlen);
return r;
@@ -626,40 +770,162 @@
}
static void
-shaahinit(Espcb *ecb, char *name, uchar *key, int klen)
+shaahinit(Espcb *ecb, char *name, uchar *key, unsigned klen)
{
if(klen != 128)
panic("shaahinit: bad keylen");
- klen >>= 8; // convert to bytes
+ klen /= BI2BY;
ecb->ahalg = name;
ecb->ahblklen = 1;
- ecb->ahlen = 12;
+ ecb->ahlen = BITS2BYTES(96);
ecb->auth = shaauth;
- ecb->ahstate = smalloc(klen);
+ ecb->ahstate = secalloc(klen);
memmove(ecb->ahstate, key, klen);
}
-void
+
+/*
+ * aes
+ */
+static int
+aescbccipher(Espcb *ecb, uchar *p, int n) /* 128-bit blocks */
+{
+ uchar tmp[AESbsize], q[AESbsize];
+ uchar *pp, *tp, *ip, *eip, *ep;
+ AESstate *ds = ecb->espstate;
+
+ ep = p + n;
+ if(ecb->incoming) {
+ memmove(ds->ivec, p, AESbsize);
+ p += AESbsize;
+ while(p < ep){
+ memmove(tmp, p, AESbsize);
+ aes_decrypt(ds->dkey, ds->rounds, p, q);
+ memmove(p, q, AESbsize);
+ tp = tmp;
+ ip = ds->ivec;
+ for(eip = ip + AESbsize; ip < eip; ){
+ *p++ ^= *ip;
+ *ip++ = *tp++;
+ }
+ }
+ } else {
+ memmove(p, ds->ivec, AESbsize);
+ for(p += AESbsize; p < ep; p += AESbsize){
+ pp = p;
+ ip = ds->ivec;
+ for(eip = ip + AESbsize; ip < eip; )
+ *pp++ ^= *ip++;
+ aes_encrypt(ds->ekey, ds->rounds, p, q);
+ memmove(ds->ivec, q, AESbsize);
+ memmove(p, q, AESbsize);
+ }
+ }
+ return 1;
+}
+
+static void
+aescbcespinit(Espcb *ecb, char *name, uchar *k, unsigned n)
+{
+ uchar key[Aeskeysz], ivec[Aeskeysz];
+
+ n = BITS2BYTES(n);
+ if(n > Aeskeysz)
+ n = Aeskeysz;
+ memset(key, 0, sizeof(key));
+ memmove(key, k, n);
+ prng(ivec, Aeskeysz);
+ ecb->espalg = name;
+ ecb->espblklen = Aesblk;
+ ecb->espivlen = Aesblk;
+ ecb->cipher = aescbccipher;
+ ecb->espstate = secalloc(sizeof(AESstate));
+ setupAESstate(ecb->espstate, key, n /* keybytes */, ivec);
+ memset(ivec, 0, sizeof(ivec));
+ memset(key, 0, sizeof(key));
+}
+
+static int
+aesctrcipher(Espcb *ecb, uchar *p, int n) /* 128-bit blocks */
+{
+ uchar tmp[AESbsize], q[AESbsize];
+ uchar *pp, *tp, *ip, *eip, *ep;
+ AESstate *ds = ecb->espstate;
+
+ ep = p + n;
+ if(ecb->incoming) {
+ memmove(ds->ivec, p, AESbsize);
+ p += AESbsize;
+ while(p < ep){
+ memmove(tmp, p, AESbsize);
+ aes_decrypt(ds->dkey, ds->rounds, p, q);
+ memmove(p, q, AESbsize);
+ tp = tmp;
+ ip = ds->ivec;
+ for(eip = ip + AESbsize; ip < eip; ){
+ *p++ ^= *ip;
+ *ip++ = *tp++;
+ }
+ }
+ } else {
+ memmove(p, ds->ivec, AESbsize);
+ for(p += AESbsize; p < ep; p += AESbsize){
+ pp = p;
+ ip = ds->ivec;
+ for(eip = ip + AESbsize; ip < eip; )
+ *pp++ ^= *ip++;
+ aes_encrypt(ds->ekey, ds->rounds, p, q);
+ memmove(ds->ivec, q, AESbsize);
+ memmove(p, q, AESbsize);
+ }
+ }
+ return 1;
+}
+
+static void
+aesctrespinit(Espcb *ecb, char *name, uchar *k, unsigned n)
+{
+ uchar key[Aesblk], ivec[Aesblk];
+
+ n = BITS2BYTES(n);
+ if(n > Aeskeysz)
+ n = Aeskeysz;
+ memset(key, 0, sizeof(key));
+ memmove(key, k, n);
+ prng(ivec, Aesblk);
+ ecb->espalg = name;
+ ecb->espblklen = Aesblk;
+ ecb->espivlen = Aesblk;
+ ecb->cipher = aesctrcipher;
+ ecb->espstate = secalloc(sizeof(AESstate));
+ setupAESstate(ecb->espstate, key, n /* keybytes */, ivec);
+ memset(ivec, 0, sizeof(ivec));
+ memset(key, 0, sizeof(key));
+}
+
+
+/*
+ * md5
+ */
+
+static void
seanq_hmac_md5(uchar hash[MD5dlen], uchar *t, long tlen, uchar *key, long klen)
{
- uchar ipad[65], opad[65];
int i;
+ uchar ipad[Hmacblksz+1], opad[Hmacblksz+1], innerhash[MD5dlen];
DigestState *digest;
- uchar innerhash[MD5dlen];
- for(i=0; i<64; i++){
- ipad[i] = 0x36;
- opad[i] = 0x5c;
- }
- ipad[64] = opad[64] = 0;
- for(i=0; i<klen; i++){
+ memset(ipad, 0x36, Hmacblksz);
+ memset(opad, 0x5c, Hmacblksz);
+ ipad[Hmacblksz] = opad[Hmacblksz] = 0;
+ for(i = 0; i < klen; i++){
ipad[i] ^= key[i];
opad[i] ^= key[i];
}
- digest = md5(ipad, 64, nil, nil);
+ digest = md5(ipad, Hmacblksz, nil, nil);
md5(t, tlen, innerhash, digest);
- digest = md5(opad, 64, nil, nil);
+ digest = md5(opad, Hmacblksz, nil, nil);
md5(innerhash, MD5dlen, hash, digest);
}
@@ -670,7 +936,7 @@
int r;
memset(hash, 0, MD5dlen);
- seanq_hmac_md5(hash, t, tlen, (uchar*)ecb->ahstate, 16);
+ seanq_hmac_md5(hash, t, tlen, (uchar*)ecb->ahstate, BITS2BYTES(128));
r = memcmp(auth, hash, ecb->ahlen) == 0;
memmove(auth, hash, ecb->ahlen);
return r;
@@ -677,168 +943,102 @@
}
static void
-md5ahinit(Espcb *ecb, char *name, uchar *key, int klen)
+md5ahinit(Espcb *ecb, char *name, uchar *key, unsigned klen)
{
if(klen != 128)
panic("md5ahinit: bad keylen");
- klen >>= 3; // convert to bytes
-
-
+ klen = BITS2BYTES(klen);
ecb->ahalg = name;
ecb->ahblklen = 1;
- ecb->ahlen = 12;
+ ecb->ahlen = BITS2BYTES(96);
ecb->auth = md5auth;
- ecb->ahstate = smalloc(klen);
+ ecb->ahstate = secalloc(klen);
memmove(ecb->ahstate, key, klen);
}
+
+/*
+ * des, single and triple
+ */
+
static int
descipher(Espcb *ecb, uchar *p, int n)
{
- uchar tmp[8];
- uchar *pp, *tp, *ip, *eip, *ep;
DESstate *ds = ecb->espstate;
- ep = p + n;
if(ecb->incoming) {
- memmove(ds->ivec, p, 8);
- p += 8;
- while(p < ep){
- memmove(tmp, p, 8);
- block_cipher(ds->expanded, p, 1);
- tp = tmp;
- ip = ds->ivec;
- for(eip = ip+8; ip < eip; ){
- *p++ ^= *ip;
- *ip++ = *tp++;
- }
- }
+ memmove(ds->ivec, p, Desblk);
+ desCBCdecrypt(p + Desblk, n - Desblk, ds);
} else {
- memmove(p, ds->ivec, 8);
- for(p += 8; p < ep; p += 8){
- pp = p;
- ip = ds->ivec;
- for(eip = ip+8; ip < eip; )
- *pp++ ^= *ip++;
- block_cipher(ds->expanded, p, 0);
- memmove(ds->ivec, p, 8);
- }
+ memmove(p, ds->ivec, Desblk);
+ desCBCencrypt(p + Desblk, n - Desblk, ds);
}
return 1;
}
-
+
+static int
+des3cipher(Espcb *ecb, uchar *p, int n)
+{
+ DES3state *ds = ecb->espstate;
+
+ if(ecb->incoming) {
+ memmove(ds->ivec, p, Desblk);
+ des3CBCdecrypt(p + Desblk, n - Desblk, ds);
+ } else {
+ memmove(p, ds->ivec, Desblk);
+ des3CBCencrypt(p + Desblk, n - Desblk, ds);
+ }
+ return 1;
+}
+
static void
-desespinit(Espcb *ecb, char *name, uchar *k, int n)
+desespinit(Espcb *ecb, char *name, uchar *k, unsigned n)
{
- uchar key[8];
- uchar ivec[8];
- int i;
-
- // bits to bytes
- n = (n+7)>>3;
- if(n > 8)
- n = 8;
+ uchar key[Desblk], ivec[Desblk];
+
+ n = BITS2BYTES(n);
+ if(n > Desblk)
+ n = Desblk;
memset(key, 0, sizeof(key));
memmove(key, k, n);
- for(i=0; i<8; i++)
- ivec[i] = nrand(256);
+ prng(ivec, Desblk);
ecb->espalg = name;
- ecb->espblklen = 8;
- ecb->espivlen = 8;
+ ecb->espblklen = Desblk;
+ ecb->espivlen = Desblk;
+
ecb->cipher = descipher;
- ecb->espstate = smalloc(sizeof(DESstate));
+ ecb->espstate = secalloc(sizeof(DESstate));
setupDESstate(ecb->espstate, key, ivec);
+ memset(ivec, 0, sizeof(ivec));
+ memset(key, 0, sizeof(key));
}
-static int
-rc4cipher(Espcb *ecb, uchar *p, int n)
+static void
+des3espinit(Espcb *ecb, char *name, uchar *k, unsigned n)
{
- Esprc4 *esprc4;
- RC4state tmpstate;
- ulong seq;
- long d, dd;
+ uchar key[3][Desblk], ivec[Desblk];
- if(n < 4)
- return 0;
+ n = BITS2BYTES(n);
+ if(n > Des3keysz)
+ n = Des3keysz;
+ memset(key, 0, sizeof(key));
+ memmove(key, k, n);
+ prng(ivec, Desblk);
+ ecb->espalg = name;
+ ecb->espblklen = Desblk;
+ ecb->espivlen = Desblk;
- esprc4 = ecb->espstate;
- if(ecb->incoming) {
- seq = nhgetl(p);
- p += 4;
- n -= 4;
- d = seq-esprc4->cseq;
- if(d == 0) {
- rc4(&esprc4->current, p, n);
- esprc4->cseq += n;
- if(esprc4->ovalid) {
- dd = esprc4->cseq - esprc4->lgseq;
- if(dd > RC4back)
- esprc4->ovalid = 0;
- }
- } else if(d > 0) {
-print("missing packet: %uld %ld\n", seq, d);
- // this link is hosed
- if(d > RC4forward) {
- strcpy(up->errstr, "rc4cipher: skipped too much");
- return 0;
- }
- esprc4->lgseq = seq;
- if(!esprc4->ovalid) {
- esprc4->ovalid = 1;
- esprc4->oseq = esprc4->cseq;
- memmove(&esprc4->old, &esprc4->current, sizeof(RC4state));
- }
- rc4skip(&esprc4->current, d);
- rc4(&esprc4->current, p, n);
- esprc4->cseq = seq+n;
- } else {
-print("reordered packet: %uld %ld\n", seq, d);
- dd = seq - esprc4->oseq;
- if(!esprc4->ovalid || -d > RC4back || dd < 0) {
- strcpy(up->errstr, "rc4cipher: too far back");
- return 0;
- }
- memmove(&tmpstate, &esprc4->old, sizeof(RC4state));
- rc4skip(&tmpstate, dd);
- rc4(&tmpstate, p, n);
- return 1;
- }
-
- // move old state up
- if(esprc4->ovalid) {
- dd = esprc4->cseq - RC4back - esprc4->oseq;
- if(dd > 0) {
- rc4skip(&esprc4->old, dd);
- esprc4->oseq += dd;
- }
- }
- } else {
- hnputl(p, esprc4->cseq);
- p += 4;
- n -= 4;
- rc4(&esprc4->current, p, n);
- esprc4->cseq += n;
- }
- return 1;
+ ecb->cipher = des3cipher;
+ ecb->espstate = secalloc(sizeof(DES3state));
+ setupDES3state(ecb->espstate, key, ivec);
+ memset(ivec, 0, sizeof(ivec));
+ memset(key, 0, sizeof(key));
}
-static void
-rc4espinit(Espcb *ecb, char *name, uchar *k, int n)
-{
- Esprc4 *esprc4;
- // bits to bytes
- n = (n+7)>>3;
- esprc4 = smalloc(sizeof(Esprc4));
- memset(esprc4, 0, sizeof(Esprc4));
- setupRC4state(&esprc4->current, k, n);
- ecb->espalg = name;
- ecb->espblklen = 4;
- ecb->espivlen = 4;
- ecb->cipher = rc4cipher;
- ecb->espstate = esprc4;
-}
-
+/*
+ * interfacing to devip
+ */
void
espinit(Fs *fs)
{
--- a/os/ip/ethermedium.c
+++ b/os/ip/ethermedium.c
@@ -5,9 +5,9 @@
#include "fns.h"
#include "../port/error.h"
+#include "../port/netif.h"
#include "ip.h"
#include "ipv6.h"
-#include "kernel.h"
typedef struct Etherhdr Etherhdr;
struct Etherhdr
@@ -18,10 +18,10 @@
};
static uchar ipbroadcast[IPaddrlen] = {
- 0xff,0xff,0xff,0xff,
- 0xff,0xff,0xff,0xff,
- 0xff,0xff,0xff,0xff,
0xff,0xff,0xff,0xff,
+ 0xff,0xff,0xff,0xff,
+ 0xff,0xff,0xff,0xff,
+ 0xff,0xff,0xff,0xff,
};
static uchar etherbroadcast[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
@@ -33,12 +33,12 @@
static void etherbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip);
static void etheraddmulti(Ipifc *ifc, uchar *a, uchar *ia);
static void etherremmulti(Ipifc *ifc, uchar *a, uchar *ia);
+static void etherareg(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *ip);
static Block* multicastarp(Fs *f, Arpent *a, Medium*, uchar *mac);
static void sendarp(Ipifc *ifc, Arpent *a);
-static void sendgarp(Ipifc *ifc, uchar*);
+static void sendndp(Ipifc *ifc, Arpent *a);
static int multicastea(uchar *ea, uchar *ip);
static void recvarpproc(void*);
-static void resolveaddr6(Ipifc *ifc, Arpent *a);
static void etherpref2addr(uchar *pref, uchar *ea);
Medium ethermedium =
@@ -53,8 +53,7 @@
.bwrite= etherbwrite,
.addmulti= etheraddmulti,
.remmulti= etherremmulti,
-.ares= arpenter,
-.areg= sendgarp,
+.areg= etherareg,
.pref2addr= etherpref2addr,
};
@@ -70,8 +69,7 @@
.bwrite= etherbwrite,
.addmulti= etheraddmulti,
.remmulti= etherremmulti,
-.ares= arpenter,
-.areg= sendgarp,
+.areg= etherareg,
.pref2addr= etherpref2addr,
};
@@ -94,9 +92,6 @@
*/
enum
{
- ETARP = 0x0806,
- ETIP4 = 0x0800,
- ETIP6 = 0x86DD,
ARPREQUEST = 1,
ARPREPLY = 2,
};
@@ -127,128 +122,92 @@
static void
etherbind(Ipifc *ifc, int argc, char **argv)
{
- Chan *mchan4, *cchan4, *achan, *mchan6, *cchan6;
- char addr[Maxpath]; //char addr[2*KNAMELEN];
- char dir[Maxpath]; //char dir[2*KNAMELEN];
- char *buf;
- int fd, cfd, n;
- char *ptr;
+ char addr[Maxpath], dir[Maxpath];
Etherrock *er;
+ Chan *c;
+ int n;
if(argc < 2)
error(Ebadarg);
- mchan4 = cchan4 = achan = mchan6 = cchan6 = nil;
- buf = nil;
+ /*
+ * get mac address
+ */
+ snprint(addr, sizeof(addr), "%s/addr", argv[2]);
+ c = namec(addr, Aopen, OREAD, 0);
if(waserror()){
- if(mchan4 != nil)
- cclose(mchan4);
- if(cchan4 != nil)
- cclose(cchan4);
- if(achan != nil)
- cclose(achan);
- if(mchan6 != nil)
- cclose(mchan6);
- if(cchan6 != nil)
- cclose(cchan6);
- if(buf != nil)
- free(buf);
- nexterror();
+ cclose(c);
+ nexterror();
}
+ n = devtab[c->type]->read(c, addr, sizeof(addr)-1, 0);
+ if(n < 0)
+ error(Eio);
+ addr[n] = 0;
+ if(parsemac(ifc->mac, addr, sizeof(ifc->mac)) != 6)
+ error("could not find mac address");
+ cclose(c);
+ poperror();
+ er = smalloc(sizeof(*er));
+ er->read4p = er->read6p = er->arpp = (void*)-1;
+ er->mchan4 = er->cchan4 = er->mchan6 = er->cchan6 = er->achan = nil;
+ er->f = ifc->conv->p->f;
+
+ if(waserror()){
+ if(er->mchan4 != nil)
+ cclose(er->mchan4);
+ if(er->cchan4 != nil)
+ cclose(er->cchan4);
+ if(er->mchan6 != nil)
+ cclose(er->mchan6);
+ if(er->cchan6 != nil)
+ cclose(er->cchan6);
+ if(er->achan != nil)
+ cclose(er->achan);
+ free(er);
+ nexterror();
+ }
+
/*
- * open ip converstation
+ * open ipv4 conversation
*
* the dial will fail if the type is already open on
* this device.
*/
- snprint(addr, sizeof(addr), "%s!0x800", argv[2]);
- fd = kdial(addr, nil, dir, &cfd);
- if(fd < 0)
- errorf("dial 0x800 failed: %s", up->env->errstr);
- mchan4 = commonfdtochan(fd, ORDWR, 0, 1);
- cchan4 = commonfdtochan(cfd, ORDWR, 0, 1);
- kclose(fd);
- kclose(cfd);
+ snprint(addr, sizeof(addr), "%s!0x800", argv[2]); /* ETIP4 */
+ er->mchan4 = chandial(addr, nil, dir, &er->cchan4);
/*
* make it non-blocking
*/
- devtab[cchan4->type]->write(cchan4, nbmsg, strlen(nbmsg), 0);
+ devtab[er->cchan4->type]->write(er->cchan4, nbmsg, strlen(nbmsg), 0);
/*
- * get mac address and speed
- */
- snprint(addr, sizeof(addr), "%s/stats", dir);
- fd = kopen(addr, OREAD);
- if(fd < 0)
- errorf("can't open ether stats: %s", up->env->errstr);
-
- buf = smalloc(512);
- n = kread(fd, buf, 511);
- kclose(fd);
- if(n <= 0)
- error(Eio);
- buf[n] = 0;
-
- ptr = strstr(buf, "addr: ");
- if(!ptr)
- error(Eio);
- ptr += 6;
- parsemac(ifc->mac, ptr, 6);
-
- ptr = strstr(buf, "mbps: ");
- if(ptr){
- ptr += 6;
- ifc->mbps = atoi(ptr);
- } else
- ifc->mbps = 100;
-
- /*
- * open arp conversation
- */
- snprint(addr, sizeof(addr), "%s!0x806", argv[2]);
- fd = kdial(addr, nil, nil, nil);
- if(fd < 0)
- errorf("dial 0x806 failed: %s", up->env->errstr);
- achan = commonfdtochan(fd, ORDWR, 0, 1);
- kclose(fd);
-
- /*
- * open ip conversation
+ * open ipv6 conversation
*
* the dial will fail if the type is already open on
* this device.
*/
- snprint(addr, sizeof(addr), "%s!0x86DD", argv[2]);
- fd = kdial(addr, nil, dir, &cfd);
- if(fd < 0)
- errorf("dial 0x86DD failed: %s", up->env->errstr);
- mchan6 = commonfdtochan(fd, ORDWR, 0, 1);
- cchan6 = commonfdtochan(cfd, ORDWR, 0, 1);
- kclose(fd);
- kclose(cfd);
+ snprint(addr, sizeof(addr), "%s!0x86DD", argv[2]); /* ETIP6 */
+ er->mchan6 = chandial(addr, nil, dir, &er->cchan6);
/*
* make it non-blocking
*/
- devtab[cchan6->type]->write(cchan6, nbmsg, strlen(nbmsg), 0);
+ devtab[er->cchan6->type]->write(er->cchan6, nbmsg, strlen(nbmsg), 0);
- er = smalloc(sizeof(*er));
- er->mchan4 = mchan4;
- er->cchan4 = cchan4;
- er->achan = achan;
- er->mchan6 = mchan6;
- er->cchan6 = cchan6;
- er->f = ifc->conv->p->f;
- ifc->arg = er;
-
- free(buf);
+ /*
+ * open arp conversation
+ */
+ snprint(addr, sizeof(addr), "%s!0x806", argv[2]); /* ETARP */
+ er->achan = chandial(addr, nil, nil, nil);
poperror();
- kproc("etherread4", etherread4, ifc, 0);
- kproc("recvarpproc", recvarpproc, ifc, 0);
- kproc("etherread6", etherread6, ifc, 0);
+ ifc->arg = er;
+
+ kproc("etherread4", etherread4, ifc);
+ kproc("etherread6", etherread6, ifc);
+ kproc("recvarpproc", recvarpproc, ifc);
}
/*
@@ -259,21 +218,35 @@
{
Etherrock *er = ifc->arg;
- if(er->read4p)
+ while(waserror())
+ ;
+
+ /* wait for readers to start */
+ while(er->arpp == (void*)-1 || er->read4p == (void*)-1 || er->read6p == (void*)-1)
+ tsleep(&up->sleep, return0, 0, 300);
+
+ if(er->read4p != nil)
postnote(er->read4p, 1, "unbind", 0);
- if(er->read6p)
+ if(er->read6p != nil)
postnote(er->read6p, 1, "unbind", 0);
- if(er->arpp)
+ if(er->arpp != nil)
postnote(er->arpp, 1, "unbind", 0);
+ poperror();
+
+ wunlock(ifc);
+ while(waserror())
+ ;
+
/* wait for readers to die */
- while(er->arpp != 0 || er->read4p != 0 || er->read6p != 0)
+ while(er->arpp != nil || er->read4p != nil || er->read6p != nil)
tsleep(&up->sleep, return0, 0, 300);
+ poperror();
+ wlock(ifc);
+
if(er->mchan4 != nil)
cclose(er->mchan4);
- if(er->achan != nil)
- cclose(er->achan);
if(er->cchan4 != nil)
cclose(er->cchan4);
if(er->mchan6 != nil)
@@ -280,6 +253,8 @@
cclose(er->mchan6);
if(er->cchan6 != nil)
cclose(er->cchan6);
+ if(er->achan != nil)
+ cclose(er->achan);
free(er);
}
@@ -297,16 +272,16 @@
/* get mac address of destination */
a = arpget(er->f->arp, bp, version, ifc, ip, mac);
- if(a){
+ if(a != nil){
/* check for broadcast or multicast */
bp = multicastarp(er->f, a, ifc->m, mac);
- if(bp==nil){
+ if(bp == nil){
switch(version){
case V4:
sendarp(ifc, a);
break;
- case V6:
- resolveaddr6(ifc, a);
+ case V6:
+ sendndp(ifc, a);
break;
default:
panic("etherbwrite: version %d", version);
@@ -317,8 +292,6 @@
/* make it a single block with space for the ether header */
bp = padblock(bp, ifc->m->hsize);
- if(bp->next)
- bp = concatblock(bp);
if(BLEN(bp) < ifc->mintu)
bp = adjustblock(bp, ifc->mintu);
eh = (Etherhdr*)bp->rp;
@@ -358,29 +331,28 @@
ifc = a;
er = ifc->arg;
er->read4p = up; /* hide identity under a rock for unbind */
- if(waserror()){
- er->read4p = 0;
- pexit("hangup", 1);
- }
+ if(!waserror())
for(;;){
bp = devtab[er->mchan4->type]->bread(er->mchan4, ifc->maxtu, 0);
- if(!canrlock(ifc)){
- freeb(bp);
- continue;
- }
+ if(bp == nil)
+ break;
+ rlock(ifc);
if(waserror()){
runlock(ifc);
nexterror();
}
ifc->in++;
- bp->rp += ifc->m->hsize;
- if(ifc->lifc == nil)
+ if(ifc->lifc == nil || BLEN(bp) <= ifc->m->hsize)
freeb(bp);
- else
+ else {
+ bp->rp += ifc->m->hsize;
ipiput4(er->f, ifc, bp);
+ }
runlock(ifc);
poperror();
}
+ er->read4p = nil;
+ pexit("hangup", 1);
}
@@ -397,29 +369,28 @@
ifc = a;
er = ifc->arg;
er->read6p = up; /* hide identity under a rock for unbind */
- if(waserror()){
- er->read6p = 0;
- pexit("hangup", 1);
- }
+ if(!waserror())
for(;;){
bp = devtab[er->mchan6->type]->bread(er->mchan6, ifc->maxtu, 0);
- if(!canrlock(ifc)){
- freeb(bp);
- continue;
- }
+ if(bp == nil)
+ break;
+ rlock(ifc);
if(waserror()){
runlock(ifc);
nexterror();
}
ifc->in++;
- bp->rp += ifc->m->hsize;
- if(ifc->lifc == nil)
+ if(ifc->lifc == nil || BLEN(bp) <= ifc->m->hsize)
freeb(bp);
- else
+ else {
+ bp->rp += ifc->m->hsize;
ipiput6(er->f, ifc, bp);
+ }
runlock(ifc);
poperror();
}
+ er->read6p = nil;
+ pexit("hangup", 1);
}
static void
@@ -477,6 +448,7 @@
Block *bp;
Etherarp *e;
Etherrock *er = ifc->arg;
+ uchar targ[IPv4addrlen], src[IPv4addrlen];
/* don't do anything if it's been less than a second since the last */
if(NOW - a->ctime < 1000){
@@ -484,6 +456,9 @@
return;
}
+ /* try to keep it around for a second more */
+ a->ctime = NOW;
+
/* remove all but the last message */
while((bp = a->hold) != nil){
if(bp == a->last)
@@ -492,18 +467,20 @@
freeblist(bp);
}
- /* try to keep it around for a second more */
- a->ctime = NOW;
+ memmove(targ, a->ip+IPv4off, IPv4addrlen);
arprelease(er->f->arp, a);
+ if(!ipv4local(ifc, src, 0, targ))
+ return;
+
n = sizeof(Etherarp);
- if(n < a->type->mintu)
- n = a->type->mintu;
+ if(n < ifc->m->mintu)
+ n = ifc->m->mintu;
bp = allocb(n);
memset(bp->rp, 0, n);
e = (Etherarp*)bp->rp;
- memmove(e->tpa, a->ip+IPv4off, sizeof(e->tpa));
- ipv4local(ifc, e->spa);
+ memmove(e->tpa, targ, sizeof(e->tpa));
+ memmove(e->spa, src, sizeof(e->spa));
memmove(e->sha, ifc->mac, sizeof(e->sha));
memset(e->d, 0xff, sizeof(e->d)); /* ethernet broadcast */
memmove(e->s, ifc->mac, sizeof(e->s));
@@ -516,18 +493,14 @@
hnputs(e->op, ARPREQUEST);
bp->wp += n;
- n = devtab[er->achan->type]->bwrite(er->achan, bp, 0);
- if(n < 0)
- print("arp: send: %r\n");
+ devtab[er->achan->type]->bwrite(er->achan, bp, 0);
}
static void
-resolveaddr6(Ipifc *ifc, Arpent *a)
+sendndp(Ipifc *ifc, Arpent *a)
{
- int sflag;
Block *bp;
Etherrock *er = ifc->arg;
- uchar ipsrc[IPaddrlen];
/* don't do anything if it's been less than a second since the last */
if(NOW - a->ctime < ReTransTimer){
@@ -543,19 +516,7 @@
freeblist(bp);
}
- /* try to keep it around for a second more */
- a->ctime = NOW;
- a->rtime = NOW + ReTransTimer;
- if(a->rxtsrem <= 0) {
- arprelease(er->f->arp, a);
- return;
- }
-
- a->rxtsrem--;
- arprelease(er->f->arp, a);
-
- if(sflag = ipv6anylocal(ifc, ipsrc))
- icmpns(er->f, ipsrc, sflag, a->ip, TARG_MULTI, ifc->mac);
+ ndpsendsol(er->f, ifc, a); /* unlocks arp */
}
/*
@@ -569,10 +530,6 @@
Etherarp *e;
Etherrock *er = ifc->arg;
- /* don't arp for our initial non address */
- if(ipcmp(ip, IPnoaddr) == 0)
- return;
-
n = sizeof(Etherarp);
if(n < ifc->m->mintu)
n = ifc->m->mintu;
@@ -593,15 +550,13 @@
hnputs(e->op, ARPREQUEST);
bp->wp += n;
- n = devtab[er->achan->type]->bwrite(er->achan, bp, 0);
- if(n < 0)
- print("garp: send: %r\n");
+ devtab[er->achan->type]->bwrite(er->achan, bp, 0);
}
static void
recvarp(Ipifc *ifc)
{
- int n;
+ int n, forme;
Block *ebp, *rbp;
Etherarp *e, *r;
uchar ip[IPaddrlen];
@@ -609,11 +564,11 @@
Etherrock *er = ifc->arg;
ebp = devtab[er->achan->type]->bread(er->achan, ifc->maxtu, 0);
- if(ebp == nil) {
- print("arp: rcv: %r\n");
+ if(ebp == nil)
return;
- }
+ rlock(ifc);
+
e = (Etherarp*)ebp->rp;
switch(nhgets(e->op)) {
default:
@@ -620,9 +575,13 @@
break;
case ARPREPLY:
+ /* make sure not to enter multi/broadcat address */
+ if(e->sha[0] & 1)
+ break;
+
/* check for machine using my ip address */
v4tov6(ip, e->spa);
- if(iplocalonifc(ifc, ip) || ipproxyifc(er->f, ifc, ip)){
+ if(iplocalonifc(ifc, ip) != nil || ipproxyifc(er->f, ifc, ip)){
if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) != 0){
print("arprep: 0x%E/0x%E also has ip addr %V\n",
e->s, e->sha, e->spa);
@@ -630,46 +589,47 @@
}
}
- /* make sure we're not entering broadcast addresses */
- if(ipcmp(ip, ipbroadcast) == 0 ||
- !memcmp(e->sha, etherbroadcast, sizeof(e->sha))){
- print("arprep: 0x%E/0x%E cannot register broadcast address %I\n",
- e->s, e->sha, e->spa);
- break;
- }
-
- arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), 0);
+ /* refresh what we know about sender */
+ arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), e->tpa, ifc, 1);
break;
case ARPREQUEST:
+ /* don't reply to multi/broadcat addresses */
+ if(e->sha[0] & 1)
+ break;
+
/* don't answer arps till we know who we are */
- if(ifc->lifc == 0)
+ if(ifc->lifc == nil)
break;
/* check for machine using my ip or ether address */
v4tov6(ip, e->spa);
- if(iplocalonifc(ifc, ip) || ipproxyifc(er->f, ifc, ip)){
+ if(iplocalonifc(ifc, ip) != nil || ipproxyifc(er->f, ifc, ip)){
if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) != 0){
- if (memcmp(eprinted, e->spa, sizeof(e->spa))){
+ if(memcmp(eprinted, e->spa, sizeof(e->spa)) != 0){
/* print only once */
- print("arpreq: 0x%E also has ip addr %V\n", e->sha, e->spa);
+ print("arpreq: 0x%E also has ip addr %V\n",
+ e->sha, e->spa);
memmove(eprinted, e->spa, sizeof(e->spa));
}
+ break;
}
} else {
if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) == 0){
- print("arpreq: %V also has ether addr %E\n", e->spa, e->sha);
+ print("arpreq: %V also has ether addr %E\n",
+ e->spa, e->sha);
break;
}
}
- /* refresh what we know about sender */
- arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), 1);
-
- /* answer only requests for our address or systems we're proxying for */
+ /*
+ * when request is for our address or systems we're proxying for,
+ * enter senders address into arp table and reply, otherwise just
+ * refresh the senders address.
+ */
v4tov6(ip, e->tpa);
- if(!iplocalonifc(ifc, ip))
- if(!ipproxyifc(er->f, ifc, ip))
+ forme = iplocalonifc(ifc, ip) != nil || ipproxyifc(er->f, ifc, ip);
+ if(arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), e->tpa, ifc, !forme) < 0 || !forme)
break;
n = sizeof(Etherarp);
@@ -692,10 +652,14 @@
memmove(r->s, ifc->mac, sizeof(r->s));
rbp->wp += n;
- n = devtab[er->achan->type]->bwrite(er->achan, rbp, 0);
- if(n < 0)
- print("arp: write: %r\n");
+ runlock(ifc);
+ freeb(ebp);
+
+ devtab[er->achan->type]->bwrite(er->achan, rbp, 0);
+ return;
}
+
+ runlock(ifc);
freeb(ebp);
}
@@ -707,7 +671,7 @@
er->arpp = up;
if(waserror()){
- er->arpp = 0;
+ er->arpp = nil;
pexit("hangup", 1);
}
for(;;)
@@ -749,14 +713,9 @@
multicastarp(Fs *f, Arpent *a, Medium *medium, uchar *mac)
{
/* is it broadcast? */
- switch(ipforme(f, a->ip)){
- case Runi:
- return nil;
- case Rbcast:
- memset(mac, 0xff, 6);
+ if(ipforme(f, a->ip) == Rbcast){
+ memset(mac, 0xff, medium->maclen);
return arpresolve(f->arp, a, medium, mac);
- default:
- break;
}
/* if multicast, fill in mac */
@@ -778,11 +737,11 @@
}
-static void
+static void
etherpref2addr(uchar *pref, uchar *ea)
{
- pref[8] = ea[0] | 0x2;
- pref[9] = ea[1];
+ pref[8] = ea[0] ^ 0x2;
+ pref[9] = ea[1];
pref[10] = ea[2];
pref[11] = 0xFF;
pref[12] = 0xFE;
@@ -789,4 +748,41 @@
pref[13] = ea[3];
pref[14] = ea[4];
pref[15] = ea[5];
+}
+
+static void
+etherareg(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *ip)
+{
+ static char tdad[] = "dad6";
+ uchar a[IPaddrlen];
+
+ if(ipcmp(ip, IPnoaddr) == 0 || ipcmp(ip, v4prefix) == 0)
+ return;
+
+ if(isv4(ip)){
+ sendgarp(ifc, ip);
+ return;
+ }
+
+ if((lifc->type&Rv4) != 0)
+ return;
+
+ if(!lifc->tentative){
+ icmpna(f, lifc->local, v6allnodesL, ip, ifc->mac, 1<<5);
+ return;
+ }
+
+ if(ipcmp(lifc->local, ip) != 0)
+ return;
+
+ /* temporarily add route for duplicate address detection */
+ ipv62smcast(a, ip);
+ addroute(f, a, IPallbits, v6Unspecified, IPallbits, ip, Rmulti, ifc, tdad);
+ if(waserror()){
+ remroute(f, a, IPallbits, v6Unspecified, IPallbits, ip, Rmulti, ifc, tdad);
+ nexterror();
+ }
+ icmpns(f, 0, SRC_UNSPEC, ip, TARG_MULTI, ifc->mac);
+ poperror();
+ remroute(f, a, IPallbits, v6Unspecified, IPallbits, ip, Rmulti, ifc, tdad);
}
--- a/os/ip/gre.c
+++ b/os/ip/gre.c
@@ -1,3 +1,6 @@
+/*
+ * Generic Routing Encapsulation over IPv4, rfc1702
+ */
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
@@ -7,10 +10,7 @@
#include "ip.h"
-#define DPRINT if(0)print
-
-enum
-{
+enum {
GRE_IPONLY = 12, /* size of ip header */
GRE_IPPLUSGRE = 12, /* minimum size of GRE header */
IP_GREPROTO = 47,
@@ -18,10 +18,33 @@
GRErxms = 200,
GREtickms = 100,
GREmaxxmit = 10,
+
+ K = 1024,
+ GREqlen = 256 * K,
+
+ GRE_cksum = 0x8000,
+ GRE_routing = 0x4000,
+ GRE_key = 0x2000,
+ GRE_seq = 0x1000,
+
+ Nring = 1 << 10, /* power of two, please */
+ Ringmask = Nring - 1,
+
+ GREctlraw = 0,
+ GREctlcooked,
+ GREctlretunnel,
+ GREctlreport,
+ GREctldlsuspend,
+ GREctlulsuspend,
+ GREctldlresume,
+ GREctlulresume,
+ GREctlforward,
+ GREctlulkey,
+ Ncmds,
};
-typedef struct GREhdr
-{
+typedef struct GREhdr GREhdr;
+struct GREhdr{
/* ip header */
uchar vihl; /* Version and header length */
uchar tos; /* Type of service */
@@ -28,7 +51,7 @@
uchar len[2]; /* packet length (including headers) */
uchar id[2]; /* Identification */
uchar frag[2]; /* Fragment information */
- uchar Unused;
+ uchar ttl;
uchar proto; /* Protocol */
uchar cksum[2]; /* checksum */
uchar src[4]; /* Ip source */
@@ -37,21 +60,115 @@
/* gre header */
uchar flags[2];
uchar eproto[2]; /* encapsulation protocol */
-} GREhdr;
+};
typedef struct GREpriv GREpriv;
-struct GREpriv
-{
- int raw; /* Raw GRE mode */
-
+struct GREpriv{
/* non-MIB stats */
- ulong csumerr; /* checksum errors */
- ulong lenerr; /* short packet */
+ uvlong lenerr; /* short packet */
};
+typedef struct Bring Bring;
+struct Bring{
+ Block *ring[Nring];
+ long produced;
+ long consumed;
+};
+
+typedef struct GREconv GREconv;
+struct GREconv{
+ int raw;
+
+ /* Retunnelling information. v4 only */
+ uchar north[4]; /* HA */
+ uchar south[4]; /* Base station */
+ uchar hoa[4]; /* Home address */
+ uchar coa[4]; /* Careof address */
+ ulong seq; /* Current sequence # */
+ int dlsusp; /* Downlink suspended? */
+ int ulsusp; /* Uplink suspended? */
+ ulong ulkey; /* GRE key */
+
+ QLock lock; /* Lock for rings */
+ Bring dlpending; /* Ring of pending packets */
+ Bring dlbuffered; /* Received while suspended */
+ Bring ulbuffered; /* Received while suspended */
+};
+
+typedef struct Metablock Metablock;
+struct Metablock{
+ uchar *rp;
+ ulong seq;
+};
+
+static char *grectlcooked(Conv *, int, char **);
+static char *grectldlresume(Conv *, int, char **);
+static char *grectldlsuspend(Conv *, int, char **);
+static char *grectlforward(Conv *, int, char **);
+static char *grectlraw(Conv *, int, char **);
+static char *grectlreport(Conv *, int, char **);
+static char *grectlretunnel(Conv *, int, char **);
+static char *grectlulkey(Conv *, int, char **);
+static char *grectlulresume(Conv *, int, char **);
+static char *grectlulsuspend(Conv *, int, char **);
+
+static struct{
+ char *cmd;
+ int argc;
+ char *(*f)(Conv *, int, char **);
+} grectls[Ncmds] = {
+[GREctlraw] = { "raw", 1, grectlraw, },
+[GREctlcooked] = { "cooked", 1, grectlcooked, },
+[GREctlretunnel]= { "retunnel", 5, grectlretunnel, },
+[GREctlreport] = { "report", 2, grectlreport, },
+[GREctldlsuspend]= { "dlsuspend", 1, grectldlsuspend,},
+[GREctlulsuspend]= { "ulsuspend", 1, grectlulsuspend,},
+[GREctldlresume]= { "dlresume", 1, grectldlresume, },
+[GREctlulresume]= { "ulresume", 1, grectlulresume, },
+[GREctlforward] = { "forward", 2, grectlforward, },
+[GREctlulkey] = { "ulkey", 2, grectlulkey, },
+};
+
+static uchar nulladdr[4];
+static char *sessend = "session end";
+
static void grekick(void *x, Block *bp);
+static char *gresetup(Conv *, char *, char *, char *);
-static char*
+uvlong grepdin, grepdout, grebdin, grebdout;
+uvlong grepuin, grepuout, grebuin, grebuout;
+
+static Block *
+getring(Bring *r)
+{
+ Block *bp;
+
+ if(r->consumed == r->produced)
+ return nil;
+
+ bp = r->ring[r->consumed & Ringmask];
+ r->ring[r->consumed & Ringmask] = nil;
+ r->consumed++;
+ return bp;
+}
+
+static void
+addring(Bring *r, Block *bp)
+{
+ Block *tbp;
+
+ if(r->produced - r->consumed > Ringmask){
+ /* Full! */
+ tbp = r->ring[r->produced & Ringmask];
+ assert(tbp);
+ freeb(tbp);
+ r->consumed++;
+ }
+ r->ring[r->produced & Ringmask] = bp;
+ r->produced++;
+}
+
+static char *
greconnect(Conv *c, char **argv, int argc)
{
Proto *p;
@@ -91,7 +208,7 @@
static void
grecreate(Conv *c)
{
- c->rq = qopen(64*1024, Qmsg, 0, c);
+ c->rq = qopen(GREqlen, Qmsg, 0, c);
c->wq = qbypass(grekick, c);
}
@@ -98,44 +215,88 @@
static int
grestate(Conv *c, char *state, int n)
{
- USED(c);
- return snprint(state, n, "%s", "Datagram");
+ GREconv *grec;
+ char *ep, *p;
+
+ grec = c->ptcl;
+ p = state;
+ ep = p + n;
+ p = seprint(p, ep, "%s%s%s%shoa %V north %V south %V seq %ulx "
+ "pending %uld %uld buffered dl %uld %uld ul %uld %uld ulkey %.8ulx\n",
+ c->inuse? "Open ": "Closed ",
+ grec->raw? "raw ": "",
+ grec->dlsusp? "DL suspended ": "",
+ grec->ulsusp? "UL suspended ": "",
+ grec->hoa, grec->north, grec->south, grec->seq,
+ grec->dlpending.consumed, grec->dlpending.produced,
+ grec->dlbuffered.consumed, grec->dlbuffered.produced,
+ grec->ulbuffered.consumed, grec->ulbuffered.produced,
+ grec->ulkey);
+ return p - state;
}
static char*
greannounce(Conv*, char**, int)
{
- return "pktifc does not support announce";
+ return "gre does not support announce";
}
static void
greclose(Conv *c)
{
- qclose(c->rq);
- qclose(c->wq);
- qclose(c->eq);
+ GREconv *grec;
+ Block *bp;
+
+ grec = c->ptcl;
+
+ /* Make sure we don't forward any more packets */
+ memset(grec->hoa, 0, sizeof grec->hoa);
+ memset(grec->north, 0, sizeof grec->north);
+ memset(grec->south, 0, sizeof grec->south);
+
+ qlock(&grec->lock);
+ while((bp = getring(&grec->dlpending)) != nil)
+ freeb(bp);
+
+ while((bp = getring(&grec->dlbuffered)) != nil)
+ freeb(bp);
+
+ while((bp = getring(&grec->ulbuffered)) != nil)
+ freeb(bp);
+
+ grec->dlpending.produced = grec->dlpending.consumed = 0;
+ grec->dlbuffered.produced = grec->dlbuffered.consumed = 0;
+ grec->ulbuffered.produced = grec->ulbuffered.consumed = 0;
+ qunlock(&grec->lock);
+
+ grec->raw = 0;
+ grec->seq = 0;
+ grec->dlsusp = grec->ulsusp = 1;
+
+ qhangup(c->rq, sessend);
+ qhangup(c->wq, sessend);
+ qhangup(c->eq, sessend);
ipmove(c->laddr, IPnoaddr);
ipmove(c->raddr, IPnoaddr);
- c->lport = 0;
- c->rport = 0;
+ c->lport = c->rport = 0;
}
-int drop;
-
static void
grekick(void *x, Block *bp)
{
- Conv *c = x;
- GREhdr *ghp;
+ Conv *c;
+ GREconv *grec;
+ GREhdr *gre;
uchar laddr[IPaddrlen], raddr[IPaddrlen];
if(bp == nil)
return;
+ c = x;
+ grec = c->ptcl;
+
/* Make space to fit ip header (gre header already there) */
bp = padblock(bp, GRE_IPONLY);
- if(bp == nil)
- return;
/* make sure the message has a GRE header */
bp = pullupblock(bp, GRE_IPONLY+GRE_IPPLUSGRE);
@@ -142,90 +303,344 @@
if(bp == nil)
return;
- ghp = (GREhdr *)(bp->rp);
- ghp->vihl = IP_VER4;
+ gre = (GREhdr *)bp->rp;
+ gre->vihl = IP_VER4;
- if(!((GREpriv*)c->p->priv)->raw){
- v4tov6(raddr, ghp->dst);
+ if(grec->raw == 0){
+ v4tov6(raddr, gre->dst);
if(ipcmp(raddr, v4prefix) == 0)
- memmove(ghp->dst, c->raddr + IPv4off, IPv4addrlen);
- v4tov6(laddr, ghp->src);
+ memmove(gre->dst, c->raddr + IPv4off, IPv4addrlen);
+ v4tov6(laddr, gre->src);
if(ipcmp(laddr, v4prefix) == 0){
if(ipcmp(c->laddr, IPnoaddr) == 0)
- findlocalip(c->p->f, c->laddr, raddr); /* pick interface closest to dest */
- memmove(ghp->src, c->laddr + IPv4off, IPv4addrlen);
+ /* pick interface closest to dest */
+ findlocalip(c->p->f, c->laddr, raddr);
+ memmove(gre->src, c->laddr + IPv4off, sizeof gre->src);
}
- hnputs(ghp->eproto, c->rport);
+ hnputs(gre->eproto, c->rport);
}
- ghp->proto = IP_GREPROTO;
- ghp->frag[0] = 0;
- ghp->frag[1] = 0;
+ gre->proto = IP_GREPROTO;
+ gre->frag[0] = gre->frag[1] = 0;
+ grepdout++;
+ grebdout += BLEN(bp);
ipoput4(c->p->f, bp, 0, c->ttl, c->tos, nil);
}
static void
-greiput(Proto *gre, Ipifc*, Block *bp)
+gredownlink(Conv *c, Block *bp)
{
- int len;
- GREhdr *ghp;
- Conv *c, **p;
- ushort eproto;
+ Metablock *m;
+ GREconv *grec;
+ GREhdr *gre;
+ int hdrlen, suspended, extra;
+ ushort flags;
+ ulong seq;
+
+ gre = (GREhdr *)bp->rp;
+ if(gre->ttl == 1){
+ freeb(bp);
+ return;
+ }
+
+ /*
+ * We've received a packet with a GRE header and we need to
+ * re-adjust the packet header to strip all unwanted parts
+ * but leave room for only a sequence number.
+ */
+ grec = c->ptcl;
+ flags = nhgets(gre->flags);
+ hdrlen = 0;
+ if(flags & GRE_cksum)
+ hdrlen += 2;
+ if(flags & GRE_routing){
+ print("%V routing info present. Discarding packet", gre->src);
+ freeb(bp);
+ return;
+ }
+ if(flags & (GRE_cksum|GRE_routing))
+ hdrlen += 2; /* Offset field */
+ if(flags & GRE_key)
+ hdrlen += 4;
+ if(flags & GRE_seq)
+ hdrlen += 4;
+
+ /*
+ * The outgoing packet only has the sequence number set. Make room
+ * for the sequence number.
+ */
+ if(hdrlen != sizeof(ulong)){
+ extra = hdrlen - sizeof(ulong);
+ if(extra < 0 && bp->rp - bp->base < -extra){
+ print("gredownlink: cannot add sequence number\n");
+ freeb(bp);
+ return;
+ }
+ memmove(bp->rp + extra, bp->rp, sizeof(GREhdr));
+ bp->rp += extra;
+ assert(BLEN(bp) >= sizeof(GREhdr) + sizeof(ulong));
+ gre = (GREhdr *)bp->rp;
+ }
+ seq = grec->seq++;
+ hnputs(gre->flags, GRE_seq);
+ hnputl(bp->rp + sizeof(GREhdr), seq);
+
+ /*
+ * Keep rp and seq at the base. ipoput4 consumes rp for
+ * refragmentation.
+ */
+ assert(bp->rp - bp->base >= sizeof(Metablock));
+ m = (Metablock *)bp->base;
+ m->rp = bp->rp;
+ m->seq = seq;
+
+ /*
+ * Here we make a decision what we're doing with the packet. We're
+ * doing this w/o holding a lock which means that later on in the
+ * process we may discover we've done the wrong thing. I don't want
+ * to call ipoput with the lock held.
+ */
+restart:
+ suspended = grec->dlsusp;
+ if(suspended){
+ if(!canqlock(&grec->lock)){
+ /*
+ * just give up. too bad, we lose a packet. this
+ * is just too hard and my brain already hurts.
+ */
+ freeb(bp);
+ return;
+ }
+
+ if(!grec->dlsusp){
+ /*
+ * suspend race. We though we were suspended, but
+ * we really weren't.
+ */
+ qunlock(&grec->lock);
+ goto restart;
+ }
+
+ /* Undo the incorrect ref count addition */
+ addring(&grec->dlbuffered, bp);
+ qunlock(&grec->lock);
+ return;
+ }
+
+ /*
+ * When we get here, we're not suspended. Proceed to send the
+ * packet.
+ */
+ memmove(gre->src, grec->coa, sizeof gre->dst);
+ memmove(gre->dst, grec->south, sizeof gre->dst);
+
+ ipoput4(c->p->f, copyblock(bp, BLEN(bp)), 0, gre->ttl - 1, gre->tos, nil);
+ grepdout++;
+ grebdout += BLEN(bp);
+
+ /*
+ * Now make sure we didn't do the wrong thing.
+ */
+ if(!canqlock(&grec->lock)){
+ freeb(bp); /* The packet just goes away */
+ return;
+ }
+
+ /* We did the right thing */
+ addring(&grec->dlpending, bp);
+ qunlock(&grec->lock);
+}
+
+static void
+greuplink(Conv *c, Block *bp)
+{
+ GREconv *grec;
+ GREhdr *gre;
+ ushort flags;
+
+ gre = (GREhdr *)bp->rp;
+ if(gre->ttl == 1)
+ return;
+
+ grec = c->ptcl;
+ memmove(gre->src, grec->coa, sizeof gre->src);
+ memmove(gre->dst, grec->north, sizeof gre->dst);
+
+ /*
+ * Add a key, if needed.
+ */
+ if(grec->ulkey){
+ flags = nhgets(gre->flags);
+ if(flags & (GRE_cksum|GRE_routing)){
+ print("%V routing info present. Discarding packet\n",
+ gre->src);
+ freeb(bp);
+ return;
+ }
+
+ if((flags & GRE_key) == 0){
+ /* Make room for the key */
+ if(bp->rp - bp->base < sizeof(ulong)){
+ print("%V can't add key\n", gre->src);
+ freeb(bp);
+ return;
+ }
+
+ bp->rp -= 4;
+ memmove(bp->rp, bp->rp + 4, sizeof(GREhdr));
+
+ gre = (GREhdr *)bp->rp;
+ hnputs(gre->flags, flags | GRE_key);
+ }
+
+ /* Add the key */
+ hnputl(bp->rp + sizeof(GREhdr), grec->ulkey);
+ }
+
+ if(!canqlock(&grec->lock)){
+ freeb(bp);
+ return;
+ }
+
+ if(grec->ulsusp)
+ addring(&grec->ulbuffered, bp);
+ else{
+ ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+ grepuout++;
+ grebuout += BLEN(bp);
+ }
+ qunlock(&grec->lock);
+}
+
+static void
+greiput(Proto *proto, Ipifc *, Block *bp)
+{
+ int len, hdrlen;
+ ushort eproto, flags;
uchar raddr[IPaddrlen];
+ Conv *c, **p;
+ GREconv *grec;
+ GREhdr *gre;
GREpriv *gpriv;
+ Ip4hdr *ip;
- gpriv = gre->priv;
- ghp = (GREhdr*)(bp->rp);
+ /*
+ * We don't want to deal with block lists. Ever. The problem is
+ * that when the block is forwarded, devether.c puts the block into
+ * a queue that also uses ->next. Just do not use ->next here!
+ */
+ if(bp->next != nil)
+ bp = pullupblock(bp, blocklen(bp));
- v4tov6(raddr, ghp->src);
- eproto = nhgets(ghp->eproto);
- qlock(gre);
+ gre = (GREhdr *)bp->rp;
+ if(BLEN(bp) < sizeof(GREhdr) || gre->proto != IP_GREPROTO){
+ freeb(bp);
+ return;
+ }
- /* Look for a conversation structure for this port and address */
- c = nil;
- for(p = gre->conv; *p; p++) {
+ v4tov6(raddr, gre->src);
+ eproto = nhgets(gre->eproto);
+ flags = nhgets(gre->flags);
+ hdrlen = sizeof(GREhdr);
+
+ if(flags & GRE_cksum)
+ hdrlen += 2;
+ if(flags & GRE_routing){
+ print("%I routing info present. Discarding packet\n", raddr);
+ freeb(bp);
+ return;
+ }
+ if(flags & (GRE_cksum|GRE_routing))
+ hdrlen += 2; /* Offset field */
+ if(flags & GRE_key)
+ hdrlen += 4;
+ if(flags & GRE_seq)
+ hdrlen += 4;
+
+ qlock(proto);
+
+ if(eproto != 0x880B && BLEN(bp) - hdrlen >= sizeof(Ip4hdr)){
+ ip = (Ip4hdr *)(bp->rp + hdrlen);
+
+ /*
+ * Look for a conversation structure for this port and address, or
+ * match the retunnel part, or match on the raw flag.
+ */
+ for(p = proto->conv; *p; p++) {
+ c = *p;
+
+ if(c->inuse == 0)
+ continue;
+
+ /*
+ * Do not stop this session - blocking here
+ * implies that etherread is blocked.
+ */
+ grec = c->ptcl;
+ if(memcmp(ip->dst, grec->hoa, sizeof ip->dst) == 0){
+ grepdin++;
+ grebdin += BLEN(bp);
+ gredownlink(c, bp);
+ qunlock(proto);
+ return;
+ }
+
+ if(memcmp(ip->src, grec->hoa, sizeof ip->src) == 0){
+ grepuin++;
+ grebuin += BLEN(bp);
+ greuplink(c, bp);
+ qunlock(proto);
+ return;
+ }
+ }
+ }
+
+
+ /*
+ * when we get here, none of the forwarding tunnels matched. now
+ * try to match on raw and conversational sessions.
+ */
+ for(c = nil, p = proto->conv; *p; p++) {
c = *p;
+
if(c->inuse == 0)
continue;
- if(c->rport == eproto &&
- (gpriv->raw || ipcmp(c->raddr, raddr) == 0))
+
+ /*
+ * Do not stop this session - blocking here
+ * implies that etherread is blocked.
+ */
+ grec = c->ptcl;
+ if(c->rport == eproto &&
+ (grec->raw || ipcmp(c->raddr, raddr) == 0))
break;
}
- if(*p == nil) {
- qunlock(gre);
- freeblist(bp);
+ qunlock(proto);
+
+ if(*p == nil){
+ freeb(bp);
return;
}
- qunlock(gre);
-
/*
* Trim the packet down to data size
*/
- len = nhgets(ghp->len) - GRE_IPONLY;
+ len = nhgets(gre->len) - GRE_IPONLY;
if(len < GRE_IPPLUSGRE){
- freeblist(bp);
+ freeb(bp);
return;
}
+
bp = trimblock(bp, GRE_IPONLY, len);
if(bp == nil){
+ gpriv = proto->priv;
gpriv->lenerr++;
return;
}
- /*
- * Can't delimit packet so pull it all into one block.
- */
- if(qlen(c->rq) > 64*1024)
- freeblist(bp);
- else{
- bp = concatblock(bp);
- if(bp == 0)
- panic("greiput");
- qpass(c->rq, bp);
- }
+ qpass(c->rq, bp);
}
int
@@ -234,29 +649,258 @@
GREpriv *gpriv;
gpriv = gre->priv;
+ return snprint(buf, len,
+ "gre: %llud %llud %llud %llud %llud %llud %llud %llud, lenerrs %llud\n",
+ grepdin, grepdout, grepuin, grepuout,
+ grebdin, grebdout, grebuin, grebuout, gpriv->lenerr);
+}
- return snprint(buf, len, "gre: len %lud\n", gpriv->lenerr);
+static char *
+grectlraw(Conv *c, int, char **)
+{
+ GREconv *grec;
+
+ grec = c->ptcl;
+ grec->raw = 1;
+ return nil;
}
-char*
-grectl(Conv *c, char **f, int n)
+static char *
+grectlcooked(Conv *c, int, char **)
{
- GREpriv *gpriv;
+ GREconv *grec;
- gpriv = c->p->priv;
- if(n == 1){
- if(strcmp(f[0], "raw") == 0){
- gpriv->raw = 1;
- return nil;
- }
- else if(strcmp(f[0], "cooked") == 0){
- gpriv->raw = 0;
- return nil;
- }
+ grec = c->ptcl;
+ grec->raw = 0;
+ return nil;
+}
+
+static char *
+grectlretunnel(Conv *c, int, char **argv)
+{
+ GREconv *grec;
+ uchar ipaddr[4];
+
+ grec = c->ptcl;
+ if(memcmp(grec->hoa, nulladdr, sizeof grec->hoa))
+ return "tunnel already set up";
+
+ v4parseip(ipaddr, argv[1]);
+ if(memcmp(ipaddr, nulladdr, sizeof ipaddr) == 0)
+ return "bad hoa";
+ memmove(grec->hoa, ipaddr, sizeof grec->hoa);
+ v4parseip(ipaddr, argv[2]);
+ memmove(grec->north, ipaddr, sizeof grec->north);
+ v4parseip(ipaddr, argv[3]);
+ memmove(grec->south, ipaddr, sizeof grec->south);
+ v4parseip(ipaddr, argv[4]);
+ memmove(grec->coa, ipaddr, sizeof grec->coa);
+ grec->ulsusp = 1;
+ grec->dlsusp = 0;
+
+ return nil;
+}
+
+static char *
+grectlreport(Conv *c, int, char **argv)
+{
+ ulong seq;
+ Block *bp;
+ Bring *r;
+ GREconv *grec;
+ Metablock *m;
+
+ grec = c->ptcl;
+ seq = strtoul(argv[1], nil, 0);
+
+ qlock(&grec->lock);
+ r = &grec->dlpending;
+ while(r->produced - r->consumed > 0){
+ bp = r->ring[r->consumed & Ringmask];
+
+ assert(bp && bp->rp - bp->base >= sizeof(Metablock));
+ m = (Metablock *)bp->base;
+ if((long)(seq - m->seq) <= 0)
+ break;
+
+ r->ring[r->consumed & Ringmask] = nil;
+ r->consumed++;
+
+ freeb(bp);
}
- return "unknown control request";
+ qunlock(&grec->lock);
+ return nil;
}
+static char *
+grectldlsuspend(Conv *c, int, char **)
+{
+ GREconv *grec;
+
+ grec = c->ptcl;
+ if(grec->dlsusp)
+ return "already suspended";
+
+ grec->dlsusp = 1;
+ return nil;
+}
+
+static char *
+grectlulsuspend(Conv *c, int, char **)
+{
+ GREconv *grec;
+
+ grec = c->ptcl;
+ if(grec->ulsusp)
+ return "already suspended";
+
+ grec->ulsusp = 1;
+ return nil;
+}
+
+static char *
+grectldlresume(Conv *c, int, char **)
+{
+ GREconv *grec;
+ GREhdr *gre;
+ Block *bp;
+
+ grec = c->ptcl;
+
+ qlock(&grec->lock);
+ if(!grec->dlsusp){
+ qunlock(&grec->lock);
+ return "not suspended";
+ }
+
+ while((bp = getring(&grec->dlbuffered)) != nil){
+ gre = (GREhdr *)bp->rp;
+ qunlock(&grec->lock);
+
+ ipoput4(c->p->f, copyblock(bp, BLEN(bp)), 0, gre->ttl - 1, gre->tos, nil);
+
+ qlock(&grec->lock);
+ addring(&grec->dlpending, bp);
+ }
+ grec->dlsusp = 0;
+ qunlock(&grec->lock);
+ return nil;
+}
+
+static char *
+grectlulresume(Conv *c, int, char **)
+{
+ GREconv *grec;
+ GREhdr *gre;
+ Block *bp;
+
+ grec = c->ptcl;
+
+ qlock(&grec->lock);
+ while((bp = getring(&grec->ulbuffered)) != nil){
+ gre = (GREhdr *)bp->rp;
+
+ qunlock(&grec->lock);
+ ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+ qlock(&grec->lock);
+ }
+ grec->ulsusp = 0;
+ qunlock(&grec->lock);
+ return nil;
+}
+
+static char *
+grectlforward(Conv *c, int, char **argv)
+{
+ Block *bp;
+ GREconv *grec;
+ GREhdr *gre;
+ Metablock *m;
+
+ grec = c->ptcl;
+
+ v4parseip(grec->south, argv[1]);
+ memmove(grec->north, grec->south, sizeof grec->north);
+
+ qlock(&grec->lock);
+ if(!grec->dlsusp){
+ qunlock(&grec->lock);
+ return "not suspended";
+ }
+ grec->dlsusp = 0;
+ grec->ulsusp = 0;
+
+ while((bp = getring(&grec->dlpending)) != nil){
+
+ assert(bp->rp - bp->base >= sizeof(Metablock));
+ m = (Metablock *)bp->base;
+ assert(m->rp >= bp->base && m->rp < bp->lim);
+
+ bp->rp = m->rp;
+
+ gre = (GREhdr *)bp->rp;
+ memmove(gre->src, grec->coa, sizeof gre->dst);
+ memmove(gre->dst, grec->south, sizeof gre->dst);
+
+ qunlock(&grec->lock);
+ ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+ qlock(&grec->lock);
+ }
+
+ while((bp = getring(&grec->dlbuffered)) != nil){
+ gre = (GREhdr *)bp->rp;
+ memmove(gre->src, grec->coa, sizeof gre->dst);
+ memmove(gre->dst, grec->south, sizeof gre->dst);
+
+ qunlock(&grec->lock);
+ ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+ qlock(&grec->lock);
+ }
+
+ while((bp = getring(&grec->ulbuffered)) != nil){
+ gre = (GREhdr *)bp->rp;
+
+ memmove(gre->src, grec->coa, sizeof gre->dst);
+ memmove(gre->dst, grec->south, sizeof gre->dst);
+
+ qunlock(&grec->lock);
+ ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+ qlock(&grec->lock);
+ }
+ qunlock(&grec->lock);
+ return nil;
+}
+
+static char *
+grectlulkey(Conv *c, int, char **argv)
+{
+ GREconv *grec;
+
+ grec = c->ptcl;
+ grec->ulkey = strtoul(argv[1], nil, 0);
+ return nil;
+}
+
+char *
+grectl(Conv *c, char **f, int n)
+{
+ int i;
+
+ if(n < 1)
+ return "too few arguments";
+
+ for(i = 0; i < Ncmds; i++)
+ if(strcmp(f[0], grectls[i].cmd) == 0)
+ break;
+
+ if(i == Ncmds)
+ return "no such command";
+ if(grectls[i].argc != 0 && grectls[i].argc != n)
+ return "incorrect number of arguments";
+
+ return grectls[i].f(c, n, f);
+}
+
void
greinit(Fs *fs)
{
@@ -276,7 +920,7 @@
gre->stats = grestats;
gre->ipproto = IP_GREPROTO;
gre->nc = 64;
- gre->ptclsize = 0;
+ gre->ptclsize = sizeof(GREconv);
Fsproto(fs, gre);
}
--- a/os/ip/icmp.c
+++ b/os/ip/icmp.c
@@ -44,11 +44,6 @@
Maxtype = 18,
};
-enum
-{
- MinAdvise = 24, /* minimum needed for us to advise another protocol */
-};
-
char *icmpnames[Maxtype+1] =
{
[EchoReply] "EchoReply",
@@ -70,6 +65,8 @@
IP_ICMPPROTO = 1,
ICMP_IPSIZE = 20,
ICMP_HDRSIZE = 8,
+
+ MinAdvise = ICMP_IPSIZE+4, /* minimum needed for us to advise another protocol */
};
enum
@@ -113,7 +110,7 @@
c->wq = qbypass(icmpkick, c);
}
-extern char*
+char*
icmpconnect(Conv *c, char **argv, int argc)
{
char *e;
@@ -126,11 +123,11 @@
return nil;
}
-extern int
+int
icmpstate(Conv *c, char *state, int n)
{
USED(c);
- return snprint(state, n, "%s qin %d qout %d",
+ return snprint(state, n, "%s qin %d qout %d\n",
"Datagram",
c->rq ? qlen(c->rq) : 0,
c->wq ? qlen(c->wq) : 0
@@ -137,7 +134,7 @@
);
}
-extern char*
+char*
icmpannounce(Conv *c, char **argv, int argc)
{
char *e;
@@ -150,7 +147,7 @@
return nil;
}
-extern void
+void
icmpclose(Conv *c)
{
qclose(c->rq);
@@ -169,8 +166,7 @@
if(bp == nil)
return;
-
- if(blocklen(bp) < ICMP_IPSIZE + ICMP_HDRSIZE){
+ if(BLEN(bp) < ICMP_IPSIZE + ICMP_HDRSIZE){
freeblist(bp);
return;
}
@@ -190,21 +186,50 @@
ipoput4(c->p->f, bp, 0, c->ttl, c->tos, nil);
}
-extern void
-icmpttlexceeded(Fs *f, uchar *ia, Block *bp)
+static int
+ip4reply(Fs *f, uchar ip4[4])
{
+ uchar addr[IPaddrlen];
+ int i;
+
+ v4tov6(addr, ip4);
+ if(ipismulticast(addr))
+ return 0;
+ i = ipforme(f, addr);
+ return i == 0 || i == Runi;
+}
+
+static int
+ip4me(Fs *f, uchar ip4[4])
+{
+ uchar addr[IPaddrlen];
+
+ v4tov6(addr, ip4);
+ if(ipismulticast(addr))
+ return 0;
+ return ipforme(f, addr) == Runi;
+}
+
+void
+icmpttlexceeded(Fs *f, Ipifc *ifc, Block *bp)
+{
Block *nbp;
Icmp *p, *np;
+ uchar ia[IPv4addrlen];
p = (Icmp *)bp->rp;
+ if(!ip4reply(f, p->src) || !ipv4local(ifc, ia, 0, p->src))
+ return;
- netlog(f, Logicmp, "sending icmpttlexceeded -> %V\n", p->src);
+ netlog(f, Logicmp, "sending icmpttlexceeded %V -> src %V dst %V\n",
+ ia, p->src, p->dst);
+
nbp = allocb(ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8);
nbp->wp += ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8;
np = (Icmp *)nbp->rp;
np->vihl = IP_VER4;
+ memmove(np->src, ia, sizeof(np->src));
memmove(np->dst, p->src, sizeof(np->dst));
- v6tov4(np->src, ia);
memmove(np->data, bp->rp, ICMP_IPSIZE + 8);
np->type = TimeExceed;
np->code = 0;
@@ -214,7 +239,6 @@
memset(np->cksum, 0, sizeof(np->cksum));
hnputs(np->cksum, ptclcsum(nbp, ICMP_IPSIZE, blocklen(nbp) - ICMP_IPSIZE));
ipoput4(f, nbp, 0, MAXTTL, DFLTTOS, nil);
-
}
static void
@@ -222,20 +246,10 @@
{
Block *nbp;
Icmp *p, *np;
- int i;
- uchar addr[IPaddrlen];
p = (Icmp *)bp->rp;
-
- /* only do this for unicast sources and destinations */
- v4tov6(addr, p->dst);
- i = ipforme(f, addr);
- if((i&Runi) == 0)
+ if(!ip4me(f, p->dst) || !ip4reply(f, p->src))
return;
- v4tov6(addr, p->src);
- i = ipforme(f, addr);
- if(i != 0 && (i&Runi) == 0)
- return;
netlog(f, Logicmp, "sending icmpnoconv -> %V\n", p->src);
nbp = allocb(ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8);
@@ -255,13 +269,13 @@
ipoput4(f, nbp, 0, MAXTTL, DFLTTOS, nil);
}
-extern void
+void
icmpnoconv(Fs *f, Block *bp)
{
icmpunreachable(f, bp, 3, 0);
}
-extern void
+void
icmpcantfrag(Fs *f, Block *bp, int mtu)
{
icmpunreachable(f, bp, 4, mtu);
@@ -270,35 +284,34 @@
static void
goticmpkt(Proto *icmp, Block *bp)
{
+ ushort recid;
+ uchar dst[IPaddrlen], src[IPaddrlen];
Conv **c, *s;
Icmp *p;
- uchar dst[IPaddrlen];
- ushort recid;
p = (Icmp *) bp->rp;
- v4tov6(dst, p->src);
+ v4tov6(dst, p->dst);
+ v4tov6(src, p->src);
recid = nhgets(p->icmpid);
- for(c = icmp->conv; *c; c++) {
- s = *c;
+ for(c = icmp->conv; (s = *c) != nil; c++){
if(s->lport == recid)
- if(ipcmp(s->raddr, dst) == 0){
- bp = concatblock(bp);
- if(bp != nil)
- qpass(s->rq, bp);
- return;
- }
+ if(ipcmp(s->laddr, dst) == 0 || ipcmp(s->raddr, src) == 0)
+ qpass(s->rq, copyblock(bp, blocklen(bp)));
}
freeblist(bp);
}
static Block *
-mkechoreply(Block *bp)
+mkechoreply(Block *bp, Fs *f)
{
Icmp *q;
uchar ip[4];
q = (Icmp *)bp->rp;
+ if(!ip4me(f, q->dst) || !ip4reply(f, q->src))
+ return nil;
+
q->vihl = IP_VER4;
memmove(ip, q->src, sizeof(q->dst));
memmove(q->src, q->dst, sizeof(q->src));
@@ -318,12 +331,22 @@
[3] "port unreachable",
[4] "fragmentation needed and DF set",
[5] "source route failed",
+[6] "destination network unknown",
+[7] "destination host unknown",
+[8] "source host isolated",
+[9] "network administratively prohibited",
+[10] "host administratively prohibited",
+[11] "network unreachable for tos",
+[12] "host unreachable for tos",
+[13] "communication administratively prohibited",
+[14] "host precedence violation",
+[15] "precedence cutoff in effect",
};
static void
icmpiput(Proto *icmp, Ipifc*, Block *bp)
{
- int n, iplen;
+ int n;
Icmp *p;
Block *r;
Proto *pr;
@@ -332,12 +355,10 @@
Icmppriv *ipriv;
ipriv = icmp->priv;
-
ipriv->stats[InMsgs]++;
- p = (Icmp *)bp->rp;
- netlog(icmp->f, Logicmp, "icmpiput %d %d\n", p->type, p->code);
- n = blocklen(bp);
+ bp = concatblock(bp);
+ n = BLEN(bp);
if(n < ICMP_IPSIZE+ICMP_HDRSIZE){
ipriv->stats[InErrors]++;
ipriv->stats[HlenErrs]++;
@@ -344,69 +365,57 @@
netlog(icmp->f, Logicmp, "icmp hlen %d\n", n);
goto raise;
}
- iplen = nhgets(p->length);
- if(iplen > n || (iplen % 1)){
- ipriv->stats[LenErrs]++;
+ if(ptclcsum(bp, ICMP_IPSIZE, n - ICMP_IPSIZE)){
ipriv->stats[InErrors]++;
- netlog(icmp->f, Logicmp, "icmp length %d\n", iplen);
- goto raise;
- }
- if(ptclcsum(bp, ICMP_IPSIZE, iplen - ICMP_IPSIZE)){
- ipriv->stats[InErrors]++;
ipriv->stats[CsumErrs]++;
netlog(icmp->f, Logicmp, "icmp checksum error\n");
goto raise;
}
+ p = (Icmp *)bp->rp;
+ netlog(icmp->f, Logicmp, "icmpiput %s (%d) %d\n",
+ (p->type < nelem(icmpnames)? icmpnames[p->type]: ""),
+ p->type, p->code);
if(p->type <= Maxtype)
ipriv->in[p->type]++;
switch(p->type) {
case EchoRequest:
- if (iplen < n)
- bp = trimblock(bp, 0, iplen);
- r = mkechoreply(bp);
+ r = mkechoreply(bp, icmp->f);
+ if(r == nil)
+ goto raise;
ipriv->out[EchoReply]++;
ipoput4(icmp->f, r, 0, MAXTTL, DFLTTOS, nil);
break;
case Unreachable:
- if(p->code > 5)
- msg = unreachcode[1];
- else
+ if(p->code >= nelem(unreachcode)) {
+ snprint(m2, sizeof m2, "unreachable %V -> %V code %d",
+ p->src, p->dst, p->code);
+ msg = m2;
+ } else
msg = unreachcode[p->code];
+ Advise:
bp->rp += ICMP_IPSIZE+ICMP_HDRSIZE;
- if(blocklen(bp) < MinAdvise){
+ if(BLEN(bp) < MinAdvise){
ipriv->stats[LenErrs]++;
goto raise;
}
p = (Icmp *)bp->rp;
- pr = Fsrcvpcolx(icmp->f, p->proto);
- if(pr != nil && pr->advise != nil) {
- (*pr->advise)(pr, bp, msg);
- return;
+ if((nhgets(p->frag) & IP_FO) == 0){
+ pr = Fsrcvpcolx(icmp->f, p->proto);
+ if(pr != nil && pr->advise != nil) {
+ (*pr->advise)(pr, bp, msg);
+ return;
+ }
}
-
bp->rp -= ICMP_IPSIZE+ICMP_HDRSIZE;
goticmpkt(icmp, bp);
break;
case TimeExceed:
if(p->code == 0){
- sprint(m2, "ttl exceeded at %V", p->src);
-
- bp->rp += ICMP_IPSIZE+ICMP_HDRSIZE;
- if(blocklen(bp) < MinAdvise){
- ipriv->stats[LenErrs]++;
- goto raise;
- }
- p = (Icmp *)bp->rp;
- pr = Fsrcvpcolx(icmp->f, p->proto);
- if(pr != nil && pr->advise != nil) {
- (*pr->advise)(pr, bp, m2);
- return;
- }
- bp->rp -= ICMP_IPSIZE+ICMP_HDRSIZE;
+ snprint(msg = m2, sizeof m2, "ttl exceeded at %V", p->src);
+ goto Advise;
}
-
goticmpkt(icmp, bp);
break;
default:
@@ -419,22 +428,25 @@
freeblist(bp);
}
-void
+static void
icmpadvise(Proto *icmp, Block *bp, char *msg)
{
+ ushort recid;
+ uchar dst[IPaddrlen], src[IPaddrlen];
Conv **c, *s;
Icmp *p;
- uchar dst[IPaddrlen];
- ushort recid;
p = (Icmp *) bp->rp;
v4tov6(dst, p->dst);
+ v4tov6(src, p->src);
recid = nhgets(p->icmpid);
- for(c = icmp->conv; *c; c++) {
- s = *c;
+ for(c = icmp->conv; (s = *c) != nil; c++){
if(s->lport == recid)
+ if(ipcmp(s->laddr, src) == 0)
if(ipcmp(s->raddr, dst) == 0){
+ if(s->ignoreadvice)
+ break;
qhangup(s->rq, msg);
qhangup(s->wq, msg);
break;
@@ -443,7 +455,7 @@
freeblist(bp);
}
-int
+static int
icmpstats(Proto *icmp, char *buf, int len)
{
Icmppriv *priv;
@@ -456,7 +468,7 @@
for(i = 0; i < Nstats; i++)
p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
for(i = 0; i <= Maxtype; i++){
- if(icmpnames[i])
+ if(icmpnames[i] != nil)
p = seprint(p, e, "%s: %lud %lud\n", icmpnames[i], priv->in[i], priv->out[i]);
else
p = seprint(p, e, "%d: %lud %lud\n", i, priv->in[i], priv->out[i]);
@@ -463,13 +475,7 @@
}
return p - buf;
}
-
-int
-icmpgc(Proto *icmp)
-{
- return natgc(icmp->ipproto);
-}
-
+
void
icmpinit(Fs *fs)
{
@@ -487,7 +493,7 @@
icmp->stats = icmpstats;
icmp->ctl = nil;
icmp->advise = icmpadvise;
- icmp->gc = icmpgc;
+ icmp->gc = nil;
icmp->ipproto = IP_ICMPPROTO;
icmp->nc = 128;
icmp->ptclsize = 0;
--- a/os/ip/icmp6.c
+++ b/os/ip/icmp6.c
@@ -1,3 +1,6 @@
+/*
+ * Internet Control Message Protocol for IPv6
+ */
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
@@ -7,41 +10,36 @@
#include "ip.h"
#include "ipv6.h"
-typedef struct ICMPpkt ICMPpkt;
-typedef struct IPICMP IPICMP;
-typedef struct Ndpkt Ndpkt;
-typedef struct NdiscC NdiscC;
+enum
+{
+ InMsgs6,
+ InErrors6,
+ OutMsgs6,
+ CsumErrs6,
+ LenErrs6,
+ HlenErrs6,
+ HoplimErrs6,
+ IcmpCodeErrs6,
+ TargetErrs6,
+ OptlenErrs6,
+ AddrmxpErrs6,
+ RouterAddrErrs6,
-struct ICMPpkt {
- uchar type;
- uchar code;
- uchar cksum[2];
- uchar icmpid[2];
- uchar seq[2];
+ Nstats6,
};
-struct IPICMP {
- Ip6hdr;
- ICMPpkt;
+enum {
+ ICMP_USEAD6 = 40,
};
-struct NdiscC
-{
- IPICMP;
- uchar target[IPaddrlen];
+enum {
+ Oflag = 1<<5,
+ Sflag = 1<<6,
+ Rflag = 1<<7,
};
-struct Ndpkt
-{
- NdiscC;
- uchar otype;
- uchar olen; // length in units of 8 octets(incl type, code),
- // 1 for IEEE 802 addresses
- uchar lnaddr[6]; // link-layer address
-};
-
-enum {
- // ICMPv6 types
+enum {
+ /* ICMPv6 types */
EchoReply = 0,
UnreachableV6 = 1,
PacketTooBigV6 = 2,
@@ -69,6 +67,65 @@
Maxtype6 = 137,
};
+enum {
+ MinAdvise = IP6HDR+4, /* minimum needed for us to advise another protocol */
+};
+
+/* on-the-wire packet formats */
+typedef struct IPICMP IPICMP;
+typedef struct Ndpkt Ndpkt;
+typedef struct NdiscC NdiscC;
+
+/* we do this to avoid possible struct padding */
+#define ICMPHDR \
+ IPV6HDR; \
+ uchar type; \
+ uchar code; \
+ uchar cksum[2]; \
+ uchar icmpid[2]; \
+ uchar seq[2]
+
+struct IPICMP {
+ ICMPHDR;
+ uchar payload[];
+};
+
+#define IPICMPSZ offsetof(IPICMP, payload[0])
+
+struct NdiscC {
+ ICMPHDR;
+ uchar target[IPaddrlen];
+ uchar payload[];
+};
+
+#define NDISCSZ offsetof(NdiscC, payload[0])
+
+struct Ndpkt {
+ ICMPHDR;
+ uchar target[IPaddrlen];
+ uchar otype;
+ uchar olen; /* length in units of 8 octets(incl type, code),
+ * 1 for IEEE 802 addresses */
+ uchar lnaddr[6]; /* link-layer address */
+ uchar payload[];
+};
+
+#define NDPKTSZ offsetof(Ndpkt, payload[0])
+
+typedef struct Icmppriv6
+{
+ ulong stats[Nstats6];
+
+ /* message counts */
+ ulong in[Maxtype6+1];
+ ulong out[Maxtype6+1];
+} Icmppriv6;
+
+typedef struct Icmpcb6
+{
+ uchar headers;
+} Icmpcb6;
+
char *icmpnames6[Maxtype6+1] =
{
[EchoReply] "EchoReply",
@@ -95,24 +152,6 @@
[RedirectV6] "RedirectV6",
};
-enum
-{
- InMsgs6,
- InErrors6,
- OutMsgs6,
- CsumErrs6,
- LenErrs6,
- HlenErrs6,
- HoplimErrs6,
- IcmpCodeErrs6,
- TargetErrs6,
- OptlenErrs6,
- AddrmxpErrs6,
- RouterAddrErrs6,
-
- Nstats6,
-};
-
static char *statnames6[Nstats6] =
{
[InMsgs6] "InMsgs",
@@ -129,49 +168,18 @@
[RouterAddrErrs6] "RouterAddrErrs",
};
-typedef struct Icmppriv6
-{
- ulong stats[Nstats6];
-
- /* message counts */
- ulong in[Maxtype6+1];
- ulong out[Maxtype6+1];
-} Icmppriv6;
-
-typedef struct Icmpcb6
-{
- QLock;
- uchar headers;
-} Icmpcb6;
-
static char *unreachcode[] =
{
-[icmp6_no_route] "no route to destination",
-[icmp6_ad_prohib] "comm with destination administratively prohibited",
-[icmp6_unassigned] "icmp unreachable: unassigned error code (2)",
-[icmp6_adr_unreach] "address unreachable",
-[icmp6_port_unreach] "port unreachable",
-[icmp6_unkn_code] "icmp unreachable: unknown code",
+[Icmp6_no_route] "no route to destination",
+[Icmp6_ad_prohib] "comm with destination administratively prohibited",
+[Icmp6_out_src_scope] "beyond scope of source address",
+[Icmp6_adr_unreach] "address unreachable",
+[Icmp6_port_unreach] "port unreachable",
+[Icmp6_gress_src_fail] "source address failed ingress/egress policy",
+[Icmp6_rej_route] "reject route to destination",
+[Icmp6_unknown] "icmp unreachable: unknown code",
};
-enum {
- ICMP_USEAD6 = 40,
-};
-
-enum {
- Oflag = 1<<5,
- Sflag = 1<<6,
- Rflag = 1<<7,
-};
-
-enum {
- slladd = 1,
- tlladd = 2,
- prfinfo = 3,
- redhdr = 4,
- mtuopt = 5,
-};
-
static void icmpkick6(void *x, Block *bp);
static void
@@ -185,13 +193,14 @@
set_cksum(Block *bp)
{
IPICMP *p = (IPICMP *)(bp->rp);
+ int n = blocklen(bp);
- hnputl(p->vcf, 0); // borrow IP header as pseudoheader
- hnputs(p->ploadlen, blocklen(bp)-IPV6HDR_LEN);
+ hnputl(p->vcf, 0); /* borrow IP header as pseudoheader */
+ hnputs(p->ploadlen, n - IP6HDR);
p->proto = 0;
- p->ttl = ICMPv6; // ttl gets set later
+ p->ttl = ICMPv6; /* ttl gets set later */
hnputs(p->cksum, 0);
- hnputs(p->cksum, ptclcsum(bp, 0, blocklen(bp)));
+ hnputs(p->cksum, ptclcsum(bp, 0, n));
p->proto = ICMPv6;
}
@@ -198,7 +207,8 @@
static Block *
newIPICMP(int packetlen)
{
- Block *nbp;
+ Block *nbp;
+
nbp = allocb(packetlen);
nbp->wp += packetlen;
memset(nbp->rp, 0, packetlen);
@@ -205,20 +215,22 @@
return nbp;
}
-void
+static void
icmpadvise6(Proto *icmp, Block *bp, char *msg)
{
- Conv **c, *s;
- IPICMP *p;
- ushort recid;
+ ushort recid;
+ Conv **c, *s;
+ IPICMP *p;
- p = (IPICMP *) bp->rp;
+ p = (IPICMP *)bp->rp;
recid = nhgets(p->icmpid);
- for(c = icmp->conv; *c; c++) {
- s = *c;
+ for(c = icmp->conv; (s = *c) != nil; c++){
if(s->lport == recid)
+ if(ipcmp(s->laddr, p->src) == 0)
if(ipcmp(s->raddr, p->dst) == 0){
+ if(s->ignoreadvice)
+ break;
qhangup(s->rq, msg);
qhangup(s->wq, msg);
break;
@@ -230,9 +242,9 @@
static void
icmpkick6(void *x, Block *bp)
{
+ uchar laddr[IPaddrlen], raddr[IPaddrlen];
Conv *c = x;
IPICMP *p;
- uchar laddr[IPaddrlen], raddr[IPaddrlen];
Icmppriv6 *ipriv = c->p->priv;
Icmpcb6 *icb = (Icmpcb6*)c->ptcl;
@@ -249,10 +261,10 @@
bp->rp += IPaddrlen;
ipmove(raddr, bp->rp);
bp->rp += IPaddrlen;
- bp = padblock(bp, sizeof(Ip6hdr));
+ bp = padblock(bp, IP6HDR);
}
- if(blocklen(bp) < sizeof(IPICMP)){
+ if(BLEN(bp) < IPICMPSZ){
freeblist(bp);
return;
}
@@ -268,23 +280,20 @@
set_cksum(bp);
p->vcf[0] = 0x06 << 4;
- if(p->type <= Maxtype6)
+ if(p->type <= Maxtype6)
ipriv->out[p->type]++;
ipoput6(c->p->f, bp, 0, c->ttl, c->tos, nil);
}
-char*
+static char*
icmpctl6(Conv *c, char **argv, int argc)
{
Icmpcb6 *icb;
icb = (Icmpcb6*) c->ptcl;
-
- if(argc==1) {
- if(strcmp(argv[0], "headers")==0) {
- icb->headers = 6;
- return nil;
- }
+ if(argc==1 && strcmp(argv[0], "headers")==0) {
+ icb->headers = 6;
+ return nil;
}
return "unknown control request";
}
@@ -292,41 +301,39 @@
static void
goticmpkt6(Proto *icmp, Block *bp, int muxkey)
{
- Conv **c, *s;
- IPICMP *p = (IPICMP *)bp->rp;
- ushort recid;
- uchar *addr;
+ ushort recid;
+ uchar *addr;
+ Conv **c, *s;
+ IPICMP *p = (IPICMP *)bp->rp;
if(muxkey == 0) {
recid = nhgets(p->icmpid);
addr = p->src;
- }
- else {
+ } else {
recid = muxkey;
addr = p->dst;
}
-
- for(c = icmp->conv; *c; c++){
- s = *c;
- if(s->lport == recid && ipcmp(s->raddr, addr) == 0){
- bp = concatblock(bp);
- if(bp != nil)
- qpass(s->rq, bp);
- return;
- }
+ for(c = icmp->conv; (s = *c) != nil; c++){
+ if(s->lport == recid)
+ if(ipcmp(s->laddr, p->dst) == 0 || ipcmp(s->raddr, addr) == 0)
+ qpass(s->rq, copyblock(bp, blocklen(bp)));
}
-
freeblist(bp);
}
static Block *
-mkechoreply6(Block *bp)
+mkechoreply6(Block *bp, Ipifc *ifc)
{
+ uchar addr[IPaddrlen];
IPICMP *p = (IPICMP *)(bp->rp);
- uchar addr[IPaddrlen];
+ if(isv6mcast(p->src))
+ return nil;
ipmove(addr, p->src);
- ipmove(p->src, p->dst);
+ if(!isv6mcast(p->dst))
+ ipmove(p->src, p->dst);
+ else if (!ipv6local(ifc, p->src, 0, addr))
+ return nil;
ipmove(p->dst, addr);
p->type = EchoReplyV6;
set_cksum(bp);
@@ -335,49 +342,43 @@
/*
* sends out an ICMPv6 neighbor solicitation
- * suni == SRC_UNSPEC or SRC_UNI,
+ * suni == SRC_UNSPEC or SRC_UNI,
* tuni == TARG_MULTI => multicast for address resolution,
* and tuni == TARG_UNI => neighbor reachability.
*/
-
-extern void
+void
icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac)
{
- Block *nbp;
+ Block *nbp;
Ndpkt *np;
Proto *icmp = f->t2p[ICMPv6];
Icmppriv6 *ipriv = icmp->priv;
-
- nbp = newIPICMP(sizeof(Ndpkt));
+ nbp = newIPICMP(NDPKTSZ);
np = (Ndpkt*) nbp->rp;
+ if(suni == SRC_UNSPEC)
+ ipmove(np->src, v6Unspecified);
+ else
+ ipmove(np->src, src);
- if(suni == SRC_UNSPEC)
- memmove(np->src, v6Unspecified, IPaddrlen);
- else
- memmove(np->src, src, IPaddrlen);
-
if(tuni == TARG_UNI)
- memmove(np->dst, targ, IPaddrlen);
+ ipmove(np->dst, targ);
else
ipv62smcast(np->dst, targ);
np->type = NbrSolicit;
np->code = 0;
- memmove(np->target, targ, IPaddrlen);
+ ipmove(np->target, targ);
if(suni != SRC_UNSPEC) {
- np->otype = SRC_LLADDRESS;
- np->olen = 1; /* 1+1+6 = 8 = 1 8-octet */
+ np->otype = SRC_LLADDR;
+ np->olen = 1; /* 1+1+6 = 8 = 1 8-octet */
memmove(np->lnaddr, mac, sizeof(np->lnaddr));
- }
- else {
- int r = sizeof(Ndpkt)-sizeof(NdiscC);
- nbp->wp -= r;
- }
+ } else
+ nbp->wp -= NDPKTSZ - NDISCSZ;
set_cksum(nbp);
- np = (Ndpkt*) nbp->rp;
+ np = (Ndpkt*)nbp->rp;
np->ttl = HOP_LIMIT;
np->vcf[0] = 0x06 << 4;
ipriv->out[NbrSolicit]++;
@@ -388,122 +389,101 @@
/*
* sends out an ICMPv6 neighbor advertisement. pktflags == RSO flags.
*/
-extern void
+void
icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags)
{
- Block *nbp;
+ Block *nbp;
Ndpkt *np;
Proto *icmp = f->t2p[ICMPv6];
Icmppriv6 *ipriv = icmp->priv;
- nbp = newIPICMP(sizeof(Ndpkt));
- np = (Ndpkt*) nbp->rp;
+ nbp = newIPICMP(NDPKTSZ);
+ np = (Ndpkt*)nbp->rp;
- memmove(np->src, src, IPaddrlen);
- memmove(np->dst, dst, IPaddrlen);
+ ipmove(np->src, src);
+ ipmove(np->dst, dst);
np->type = NbrAdvert;
np->code = 0;
np->icmpid[0] = flags;
- memmove(np->target, targ, IPaddrlen);
+ ipmove(np->target, targ);
- np->otype = TARGET_LLADDRESS;
- np->olen = 1;
+ np->otype = TARGET_LLADDR;
+ np->olen = 1;
memmove(np->lnaddr, mac, sizeof(np->lnaddr));
set_cksum(nbp);
- np = (Ndpkt*) nbp->rp;
+ np = (Ndpkt*)nbp->rp;
np->ttl = HOP_LIMIT;
np->vcf[0] = 0x06 << 4;
ipriv->out[NbrAdvert]++;
- netlog(f, Logicmp, "sending neighbor advertisement %I\n", src);
+ netlog(f, Logicmp, "sending neighbor advertisement %I\n", targ);
ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
}
-extern void
-icmphostunr(Fs *f, Ipifc *ifc, Block *bp, int code, int free)
+void
+icmphostunr6(Fs *f, Ipifc *ifc, Block *bp, int code, int tome)
{
+ int osz = BLEN(bp);
+ int sz = MIN(IPICMPSZ + osz, v6MINTU);
Block *nbp;
IPICMP *np;
- Ip6hdr *p;
- int osz = BLEN(bp);
- int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
- Proto *icmp = f->t2p[ICMPv6];
+ Ip6hdr *p;
+ Proto *icmp = f->t2p[ICMPv6];
Icmppriv6 *ipriv = icmp->priv;
+ uchar ia[IPaddrlen];
- p = (Ip6hdr *) bp->rp;
+ p = (Ip6hdr *)bp->rp;
+ if(isv6mcast(p->dst) || isv6mcast(p->src) || !ipv6local(ifc, ia, 0, p->src))
+ return;
- if(isv6mcast(p->src))
- goto clean;
+ netlog(f, Logicmp, "send icmphostunr %I -> src %I dst %I\n",
+ ia, p->src, p->dst);
nbp = newIPICMP(sz);
- np = (IPICMP *) nbp->rp;
-
- rlock(ifc);
- if(ipv6anylocal(ifc, np->src)) {
- netlog(f, Logicmp, "send icmphostunr -> s%I d%I\n", p->src, p->dst);
- }
- else {
- netlog(f, Logicmp, "icmphostunr fail -> s%I d%I\n", p->src, p->dst);
- freeblist(nbp);
- if(free)
- goto clean;
- else
- return;
- }
-
- memmove(np->dst, p->src, IPaddrlen);
+ np = (IPICMP *)nbp->rp;
+ ipmove(np->src, ia);
+ ipmove(np->dst, p->src);
np->type = UnreachableV6;
np->code = code;
- memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+ memmove(nbp->rp + IPICMPSZ, bp->rp, sz - IPICMPSZ);
set_cksum(nbp);
np->ttl = HOP_LIMIT;
np->vcf[0] = 0x06 << 4;
ipriv->out[UnreachableV6]++;
- if(free)
+ if(tome)
ipiput6(f, ifc, nbp);
- else {
+ else
ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
- return;
- }
-
-clean:
- runlock(ifc);
- freeblist(bp);
}
-extern void
+void
icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp)
{
+ int osz = BLEN(bp);
+ int sz = MIN(IPICMPSZ + osz, v6MINTU);
Block *nbp;
IPICMP *np;
- Ip6hdr *p;
- int osz = BLEN(bp);
- int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
- Proto *icmp = f->t2p[ICMPv6];
+ Ip6hdr *p;
+ Proto *icmp = f->t2p[ICMPv6];
Icmppriv6 *ipriv = icmp->priv;
+ uchar ia[IPaddrlen];
- p = (Ip6hdr *) bp->rp;
-
- if(isv6mcast(p->src))
+ p = (Ip6hdr *)bp->rp;
+ if(isv6mcast(p->dst) || isv6mcast(p->src) || !ipv6local(ifc, ia, 0, p->src))
return;
+ netlog(f, Logicmp, "send icmpttlexceeded6 %I -> src %I dst %I\n",
+ ia, p->src, p->dst);
+
nbp = newIPICMP(sz);
np = (IPICMP *) nbp->rp;
-
- if(ipv6anylocal(ifc, np->src)) {
- netlog(f, Logicmp, "send icmpttlexceeded6 -> s%I d%I\n", p->src, p->dst);
- }
- else {
- netlog(f, Logicmp, "icmpttlexceeded6 fail -> s%I d%I\n", p->src, p->dst);
- return;
- }
-
- memmove(np->dst, p->src, IPaddrlen);
+ ipmove(np->src, ia);
+ ipmove(np->dst, p->src);
np->type = TimeExceedV6;
np->code = 0;
- memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+ memmove(nbp->rp + IPICMPSZ, bp->rp, sz - IPICMPSZ);
set_cksum(nbp);
np->ttl = HOP_LIMIT;
np->vcf[0] = 0x06 << 4;
@@ -511,38 +491,33 @@
ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
}
-extern void
+void
icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp)
{
+ int osz = BLEN(bp);
+ int sz = MIN(IPICMPSZ + osz, v6MINTU);
Block *nbp;
IPICMP *np;
- Ip6hdr *p;
- int osz = BLEN(bp);
- int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
- Proto *icmp = f->t2p[ICMPv6];
+ Ip6hdr *p;
+ Proto *icmp = f->t2p[ICMPv6];
Icmppriv6 *ipriv = icmp->priv;
+ uchar ia[IPaddrlen];
- p = (Ip6hdr *) bp->rp;
-
- if(isv6mcast(p->src))
+ p = (Ip6hdr *)bp->rp;
+ if(isv6mcast(p->dst) || isv6mcast(p->src) || !ipv6local(ifc, ia, 0, p->src))
return;
- nbp = newIPICMP(sz);
- np = (IPICMP *) nbp->rp;
+ netlog(f, Logicmp, "send icmppkttoobig6 %I -> src %I dst %I\n",
+ ia, p->src, p->dst);
- if(ipv6anylocal(ifc, np->src)) {
- netlog(f, Logicmp, "send icmppkttoobig6 -> s%I d%I\n", p->src, p->dst);
- }
- else {
- netlog(f, Logicmp, "icmppkttoobig6 fail -> s%I d%I\n", p->src, p->dst);
- return;
- }
-
- memmove(np->dst, p->src, IPaddrlen);
+ nbp = newIPICMP(sz);
+ np = (IPICMP *)nbp->rp;
+ ipmove(np->src, ia);
+ ipmove(np->dst, p->src);
np->type = PacketTooBigV6;
np->code = 0;
hnputl(np->icmpid, ifc->maxtu - ifc->m->hsize);
- memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+ memmove(nbp->rp + IPICMPSZ, bp->rp, sz - IPICMPSZ);
set_cksum(nbp);
np->ttl = HOP_LIMIT;
np->vcf[0] = 0x06 << 4;
@@ -554,31 +529,23 @@
* RFC 2461, pages 39-40, pages 57-58.
*/
static int
-valid(Proto *icmp, Ipifc *ifc, Block *bp, Icmppriv6 *ipriv) {
- int sz, osz, unsp, n, ttl, iplen;
- int pktsz = BLEN(bp);
- uchar *packet = bp->rp;
- IPICMP *p = (IPICMP *) packet;
- Ndpkt *np;
+valid(Proto *icmp, Ipifc *, Block *bp, Icmppriv6 *ipriv)
+{
+ int sz, osz, unsp, ttl;
+ int pktsz = BLEN(bp);
+ uchar *packet = bp->rp;
+ IPICMP *p = (IPICMP *) packet;
+ Ndpkt *np;
- USED(ifc);
- n = blocklen(bp);
- if(n < sizeof(IPICMP)) {
+ if(pktsz < IPICMPSZ) {
ipriv->stats[HlenErrs6]++;
- netlog(icmp->f, Logicmp, "icmp hlen %d\n", n);
+ netlog(icmp->f, Logicmp, "icmp hlen %d\n", pktsz);
goto err;
}
- iplen = nhgets(p->ploadlen);
- if(iplen > n-IPV6HDR_LEN || (iplen % 1)) {
- ipriv->stats[LenErrs6]++;
- netlog(icmp->f, Logicmp, "icmp length %d\n", iplen);
- goto err;
- }
-
- // Rather than construct explicit pseudoheader, overwrite IPv6 header
+ /* Rather than construct explicit pseudoheader, overwrite IPv6 header */
if(p->proto != ICMPv6) {
- // This code assumes no extension headers!!!
+ /* This code assumes no extension headers!!! */
netlog(icmp->f, Logicmp, "icmp error: extension header\n");
goto err;
}
@@ -586,7 +553,7 @@
ttl = p->ttl;
p->ttl = p->proto;
p->proto = 0;
- if(ptclcsum(bp, 0, iplen + IPV6HDR_LEN)) {
+ if(ptclcsum(bp, 0, pktsz)) {
ipriv->stats[CsumErrs6]++;
netlog(icmp->f, Logicmp, "icmp checksum error\n");
goto err;
@@ -595,19 +562,16 @@
p->ttl = ttl;
/* additional tests for some pkt types */
- if( (p->type == NbrSolicit) ||
- (p->type == NbrAdvert) ||
- (p->type == RouterAdvert) ||
- (p->type == RouterSolicit) ||
- (p->type == RedirectV6) ) {
-
+ if (p->type == NbrSolicit || p->type == NbrAdvert ||
+ p->type == RouterAdvert || p->type == RouterSolicit ||
+ p->type == RedirectV6) {
if(p->ttl != HOP_LIMIT) {
- ipriv->stats[HoplimErrs6]++;
- goto err;
+ ipriv->stats[HoplimErrs6]++;
+ goto err;
}
if(p->code != 0) {
- ipriv->stats[IcmpCodeErrs6]++;
- goto err;
+ ipriv->stats[IcmpCodeErrs6]++;
+ goto err;
}
switch (p->type) {
@@ -615,82 +579,78 @@
case NbrAdvert:
np = (Ndpkt*) p;
if(isv6mcast(np->target)) {
- ipriv->stats[TargetErrs6]++;
- goto err;
+ ipriv->stats[TargetErrs6]++;
+ goto err;
}
- if(optexsts(np) && (np->olen == 0)) {
- ipriv->stats[OptlenErrs6]++;
- goto err;
+ if(optexsts(np) && np->olen == 0) {
+ ipriv->stats[OptlenErrs6]++;
+ goto err;
}
-
- if(p->type == NbrSolicit) {
- if(ipcmp(np->src, v6Unspecified) == 0) {
- if(!issmcast(np->dst) || optexsts(np)) {
- ipriv->stats[AddrmxpErrs6]++;
- goto err;
- }
+
+ if (p->type == NbrSolicit &&
+ ipcmp(np->src, v6Unspecified) == 0)
+ if(!issmcast(np->dst) || optexsts(np)) {
+ ipriv->stats[AddrmxpErrs6]++;
+ goto err;
}
- }
-
- if(p->type == NbrAdvert) {
- if((isv6mcast(np->dst))&&(nhgets(np->icmpid) & Sflag)){
- ipriv->stats[AddrmxpErrs6]++;
- goto err;
+
+ if(p->type == NbrAdvert)
+ if(isv6mcast(np->dst) &&
+ (nhgets(np->icmpid) & Sflag)){
+ ipriv->stats[AddrmxpErrs6]++;
+ goto err;
}
- }
break;
-
+
case RouterAdvert:
- if(pktsz - sizeof(Ip6hdr) < 16) {
- ipriv->stats[HlenErrs6]++;
- goto err;
+ if(pktsz - IP6HDR < 16) {
+ ipriv->stats[HlenErrs6]++;
+ goto err;
}
if(!islinklocal(p->src)) {
- ipriv->stats[RouterAddrErrs6]++;
- goto err;
+ ipriv->stats[RouterAddrErrs6]++;
+ goto err;
}
- sz = sizeof(IPICMP) + 8;
- while ((sz+1) < pktsz) {
- osz = *(packet+sz+1);
+ sz = IPICMPSZ + 8;
+ while (sz+8 <= pktsz) {
+ osz = packet[sz+1];
if(osz <= 0) {
- ipriv->stats[OptlenErrs6]++;
- goto err;
- }
+ ipriv->stats[OptlenErrs6]++;
+ goto err;
+ }
sz += 8*osz;
}
break;
-
+
case RouterSolicit:
- if(pktsz - sizeof(Ip6hdr) < 8) {
- ipriv->stats[HlenErrs6]++;
- goto err;
+ if(pktsz - IP6HDR < 8) {
+ ipriv->stats[HlenErrs6]++;
+ goto err;
}
unsp = (ipcmp(p->src, v6Unspecified) == 0);
- sz = sizeof(IPICMP) + 8;
- while ((sz+1) < pktsz) {
- osz = *(packet+sz+1);
- if((osz <= 0) ||
- (unsp && (*(packet+sz) == slladd)) ) {
- ipriv->stats[OptlenErrs6]++;
- goto err;
+ sz = IPICMPSZ + 8;
+ while (sz+8 <= pktsz) {
+ osz = packet[sz+1];
+ if(osz <= 0 ||
+ (unsp && packet[sz] == SRC_LLADDR)) {
+ ipriv->stats[OptlenErrs6]++;
+ goto err;
}
sz += 8*osz;
}
break;
-
+
case RedirectV6:
- //to be filled in
+ /* to be filled in */
break;
-
+
default:
goto err;
}
}
-
return 1;
-
err:
- ipriv->stats[InErrors6]++;
+ ipriv->stats[InErrors6]++;
return 0;
}
@@ -700,169 +660,162 @@
Iplifc *lifc;
int t;
- rlock(ifc);
- if(ipproxyifc(f, ifc, target)) {
- runlock(ifc);
- return t_uniproxy;
- }
-
- for(lifc = ifc->lifc; lifc; lifc = lifc->next) {
- if(ipcmp(lifc->local, target) == 0) {
- t = (lifc->tentative) ? t_unitent : t_unirany;
- runlock(ifc);
- return t;
- }
- }
-
- runlock(ifc);
- return 0;
+ if((lifc = iplocalonifc(ifc, target)) != nil)
+ t = lifc->tentative? Tunitent: Tunirany;
+ else if(ipproxyifc(f, ifc, target))
+ t = Tuniproxy;
+ else
+ t = 0;
+ return t;
}
static void
-icmpiput6(Proto *icmp, Ipifc *ipifc, Block *bp)
+icmpiput6(Proto *icmp, Ipifc *ifc, Block *bp)
{
- uchar *packet = bp->rp;
- IPICMP *p = (IPICMP *)packet;
- Icmppriv6 *ipriv = icmp->priv;
- Block *r;
- Proto *pr;
- char *msg, m2[128];
- Ndpkt* np;
+ char *msg, m2[128];
uchar pktflags;
- uchar lsrc[IPaddrlen];
- int refresh = 1;
+ uchar ia[IPaddrlen];
+ Block *r;
+ IPICMP *p;
+ Icmppriv6 *ipriv = icmp->priv;
Iplifc *lifc;
+ Ndpkt* np;
+ Proto *pr;
- if(!valid(icmp, ipifc, bp, ipriv))
- goto raise;
+ bp = concatblock(bp);
+ p = (IPICMP*)bp->rp;
- if(p->type <= Maxtype6)
- ipriv->in[p->type]++;
- else
+ if(!valid(icmp, ifc, bp, ipriv) || p->type > Maxtype6)
goto raise;
+ ipriv->in[p->type]++;
+
switch(p->type) {
case EchoRequestV6:
- r = mkechoreply6(bp);
+ r = mkechoreply6(bp, ifc);
+ if(r == nil)
+ goto raise;
ipriv->out[EchoReply]++;
ipoput6(icmp->f, r, 0, MAXTTL, DFLTTOS, nil);
break;
case UnreachableV6:
- if(p->code > 4)
- msg = unreachcode[icmp6_unkn_code];
+ if(p->code >= nelem(unreachcode))
+ msg = unreachcode[Icmp6_unknown];
else
msg = unreachcode[p->code];
-
- bp->rp += sizeof(IPICMP);
- if(blocklen(bp) < 8){
+ Advise:
+ bp->rp += IPICMPSZ;
+ if(BLEN(bp) < MinAdvise){
ipriv->stats[LenErrs6]++;
goto raise;
}
p = (IPICMP *)bp->rp;
- pr = Fsrcvpcolx(icmp->f, p->proto);
- if(pr != nil && pr->advise != nil) {
- (*pr->advise)(pr, bp, msg);
- return;
- }
- bp->rp -= sizeof(IPICMP);
- goticmpkt6(icmp, bp, 0);
- break;
-
- case TimeExceedV6:
- if(p->code == 0){
- sprint(m2, "ttl exceeded at %I", p->src);
-
- bp->rp += sizeof(IPICMP);
- if(blocklen(bp) < 8){
- ipriv->stats[LenErrs6]++;
- goto raise;
+ /* get rid of fragment header if this is the first fragment */
+ if(p->proto == FH && BLEN(bp) >= MinAdvise+IP6FHDR && MinAdvise > IP6HDR){
+ Fraghdr6 *fh = (Fraghdr6*)(bp->rp + IP6HDR);
+ if((nhgets(fh->offsetRM) & ~7) == 0){ /* first fragment */
+ p->proto = fh->nexthdr;
+ /* copy down payload over fragment header */
+ bp->rp += IP6HDR;
+ bp->wp -= IP6FHDR;
+ memmove(bp->rp, bp->rp+IP6FHDR, BLEN(bp));
+ hnputs(p->ploadlen, BLEN(bp));
+ bp->rp -= IP6HDR;
}
- p = (IPICMP *)bp->rp;
+ }
+ if(p->proto != FH){
pr = Fsrcvpcolx(icmp->f, p->proto);
if(pr != nil && pr->advise != nil) {
- (*pr->advise)(pr, bp, m2);
+ (*pr->advise)(pr, bp, msg);
return;
}
- bp->rp -= sizeof(IPICMP);
}
+ bp->rp -= IPICMPSZ;
+ goticmpkt6(icmp, bp, 0);
+ break;
+ case TimeExceedV6:
+ if(p->code == 0){
+ snprint(msg = m2, sizeof m2, "ttl exceeded at %I", p->src);
+ goto Advise;
+ }
+ if(p->code == 1){
+ snprint(msg = m2, sizeof m2, "frag time exceeded at %I", p->src);
+ goto Advise;
+ }
goticmpkt6(icmp, bp, 0);
break;
+ case PacketTooBigV6:
+ snprint(msg = m2, sizeof(m2), "packet too big for %lud mtu at %I",
+ (ulong)nhgetl(p->icmpid), p->src);
+ goto Advise;
+
case RouterAdvert:
case RouterSolicit:
- /* using lsrc as a temp, munge hdr for goticmp6
- memmove(lsrc, p->src, IPaddrlen);
- memmove(p->src, p->dst, IPaddrlen);
- memmove(p->dst, lsrc, IPaddrlen); */
-
goticmpkt6(icmp, bp, p->type);
break;
case NbrSolicit:
- np = (Ndpkt*) p;
+ np = (Ndpkt*)p;
pktflags = 0;
- switch (targettype(icmp->f, ipifc, np->target)) {
- case t_unirany:
+ if(ifc->sendra6)
+ pktflags |= Rflag;
+ switch (targettype(icmp->f, ifc, np->target)) {
+ case Tunirany:
pktflags |= Oflag;
/* fall through */
- case t_uniproxy:
- if(ipcmp(np->src, v6Unspecified) != 0) {
- arpenter(icmp->f, V6, np->src, np->lnaddr, 8*np->olen-2, 0);
+ case Tuniproxy:
+ if(ipv6local(ifc, ia, 0, np->src)) {
+ if(arpenter(icmp->f, V6, np->src, np->lnaddr, 8*np->olen-2, ia, ifc, 0) < 0)
+ break;
pktflags |= Sflag;
- }
- if(ipv6local(ipifc, lsrc)) {
- icmpna(icmp->f, lsrc,
- (ipcmp(np->src, v6Unspecified)==0)?v6allnodesL:np->src,
- np->target, ipifc->mac, pktflags);
- }
- else
- freeblist(bp);
+ } else
+ ipmove(ia, np->target);
+ icmpna(icmp->f, ia, (pktflags & Sflag)? np->src: v6allnodesL,
+ np->target, ifc->mac, pktflags);
break;
-
- case t_unitent:
- /* not clear what needs to be done. send up
- * an icmp mesg saying don't use this address? */
-
- default:
- freeblist(bp);
+ case Tunitent:
+ /*
+ * not clear what needs to be done. send up
+ * an icmp mesg saying don't use this address?
+ */
+ break;
}
-
+ freeblist(bp);
break;
case NbrAdvert:
- np = (Ndpkt*) p;
+ np = (Ndpkt*)p;
- /* if the target address matches one of the local interface
- * address and the local interface address has tentative bit set,
- * then insert into ARP table. this is so the duplication address
- * detection part of ipconfig can discover duplication through
- * the arp table
+ /*
+ * if the target address matches one of the local interface
+ * addresses and the local interface address has tentative bit
+ * set, insert into ARP table. this is so the duplicate address
+ * detection part of ipconfig can discover duplication through
+ * the arp table.
*/
- lifc = iplocalonifc(ipifc, np->target);
- if(lifc && lifc->tentative)
- refresh = 0;
- arpenter(icmp->f, V6, np->target, np->lnaddr, 8*np->olen-2, refresh);
+ lifc = iplocalonifc(ifc, np->target);
+ if(lifc != nil && lifc->tentative)
+ arpenter(icmp->f, V6, np->target, np->lnaddr, 8*np->olen-2, np->target, ifc, 0);
+ else if(ipv6local(ifc, ia, 0, np->target))
+ arpenter(icmp->f, V6, np->target, np->lnaddr, 8*np->olen-2, ia, ifc, 1);
freeblist(bp);
break;
- case PacketTooBigV6:
-
default:
goticmpkt6(icmp, bp, 0);
break;
}
return;
-
raise:
freeblist(bp);
-
}
-int
+static int
icmpstats6(Proto *icmp6, char *buf, int len)
{
Icmppriv6 *priv;
@@ -874,23 +827,28 @@
e = p+len;
for(i = 0; i < Nstats6; i++)
p = seprint(p, e, "%s: %lud\n", statnames6[i], priv->stats[i]);
- for(i = 0; i <= Maxtype6; i++){
+ for(i = 0; i <= Maxtype6; i++)
if(icmpnames6[i])
- p = seprint(p, e, "%s: %lud %lud\n", icmpnames6[i], priv->in[i], priv->out[i]);
-/* else
- p = seprint(p, e, "%d: %lud %lud\n", i, priv->in[i], priv->out[i]);
-*/
- }
+ p = seprint(p, e, "%s: %lud %lud\n", icmpnames6[i],
+ priv->in[i], priv->out[i]);
return p - buf;
}
-// need to import from icmp.c
+/* import from icmp.c */
extern int icmpstate(Conv *c, char *state, int n);
extern char* icmpannounce(Conv *c, char **argv, int argc);
extern char* icmpconnect(Conv *c, char **argv, int argc);
extern void icmpclose(Conv *c);
+static void
+icmpclose6(Conv *c)
+{
+ Icmpcb6 *icb = (Icmpcb6*)c->ptcl;
+ icb->headers = 0;
+ icmpclose(c);
+}
+
void
icmp6init(Fs *fs)
{
@@ -902,7 +860,7 @@
icmp6->announce = icmpannounce;
icmp6->state = icmpstate;
icmp6->create = icmpcreate6;
- icmp6->close = icmpclose;
+ icmp6->close = icmpclose6;
icmp6->rcv = icmpiput6;
icmp6->stats = icmpstats6;
icmp6->ctl = icmpctl6;
@@ -914,4 +872,3 @@
Fsproto(fs, icmp6);
}
-
--- a/os/ip/igmp.c
+++ b/os/ip/igmp.c
@@ -1,3 +1,7 @@
+/*
+ * igmp - internet group management protocol
+ * unfinished.
+ */
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
@@ -40,8 +44,12 @@
uchar unused;
uchar igmpcksum[2]; /* checksum of igmp portion */
uchar group[IPaddrlen]; /* multicast group */
+
+ uchar payload[];
};
+#define IGMPPKTSZ offsetof(IGMPpkt, payload[0])
+
/*
* lists for group reports
*/
@@ -49,7 +57,7 @@
struct IGMPrep
{
IGMPrep *next;
- Media *m;
+ Medium *m;
int ticks;
Multicast *multi;
};
@@ -76,19 +84,17 @@
} stats;
void
-igmpsendreport(Media *m, uchar *addr)
+igmpsendreport(Medium *m, uchar *addr)
{
IGMPpkt *p;
Block *bp;
bp = allocb(sizeof(IGMPpkt));
- if(bp == nil)
- return;
p = (IGMPpkt*)bp->wp;
p->vihl = IP_VER4;
- bp->wp += sizeof(IGMPpkt);
- memset(bp->rp, 0, sizeof(IGMPpkt));
- hnputl(p->src, Mediagetaddr(m));
+ bp->wp += IGMPPKTSZ;
+ memset(bp->rp, 0, IGMPPKTSZ);
+ hnputl(p->src, Mediumgetaddr(m));
hnputl(p->dst, Ipallsys);
p->vertype = (1<<4) | IGMPreport;
p->proto = IP_IGMPPROTO;
@@ -166,7 +172,7 @@
}
void
-igmpiput(Media *m, Ipifc *, Block *bp)
+igmpiput(Medium *m, Ipifc *, Block *bp)
{
int n;
IGMPpkt *ghp;
@@ -206,7 +212,7 @@
if(rp != nil)
break; /* already reporting */
- mp = Mediacopymulti(m);
+ mp = Mediumcopymulti(m);
if(mp == nil)
break;
@@ -285,7 +291,7 @@
igmp.ptclsize = 0;
igmpreportfn = igmpsendreport;
- kproc("igmpproc", igmpproc, 0, 0);
+ kproc("igmpproc", igmpproc, 0);
Fsproto(fs, &igmp);
}
--- a/os/ip/ihbootp.c
+++ /dev/null
@@ -1,323 +1,0 @@
-#include "u.h"
-#include "../port/lib.h"
-#include "mem.h"
-#include "dat.h"
-#include "fns.h"
-#include "../port/error.h"
-#include "kernel.h"
-#include "ip.h"
-
-static ulong fsip;
-static ulong auip;
-static ulong gwip;
-static ulong ipmask;
-static ulong ipaddr;
-static ulong dnsip;
-
-enum
-{
- Bootrequest = 1,
- Bootreply = 2,
-};
-
-typedef struct Bootp
-{
- /* udp.c oldheader */
- uchar raddr[IPaddrlen];
- uchar laddr[IPaddrlen];
- uchar rport[2];
- uchar lport[2];
- /* bootp itself */
- uchar op; /* opcode */
- uchar htype; /* hardware type */
- uchar hlen; /* hardware address len */
- uchar hops; /* hops */
- uchar xid[4]; /* a random number */
- uchar secs[2]; /* elapsed snce client started booting */
- uchar pad[2];
- uchar ciaddr[4]; /* client IP address (client tells server) */
- uchar yiaddr[4]; /* client IP address (server tells client) */
- uchar siaddr[4]; /* server IP address */
- uchar giaddr[4]; /* gateway IP address */
- uchar chaddr[16]; /* client hardware address */
- uchar sname[64]; /* server host name (optional) */
- uchar file[128]; /* boot file name */
- uchar vend[128]; /* vendor-specific goo */
-} Bootp;
-
-/*
- * bootp returns:
- *
- * "fsip d.d.d.d
- * auip d.d.d.d
- * gwip d.d.d.d
- * ipmask d.d.d.d
- * ipaddr d.d.d.d
- * dnsip d.d.d.d"
- *
- * where d.d.d.d is the IP address in dotted decimal notation, and each
- * address is followed by a newline.
- */
-
-static Bootp req;
-static Proc* rcvprocp;
-static int recv;
-static int done;
-static Rendez bootpr;
-static char rcvbuf[512];
-static int bootpdebug;
-
-/*
- * Parse the vendor specific fields according to RFC 1084.
- * We are overloading the "cookie server" to be the Inferno
- * authentication server and the "resource location server"
- * to be the Inferno file server.
- *
- * If the vendor specific field is formatted properly, it
- * will begin with the four bytes 99.130.83.99 and end with
- * an 0xFF byte.
- */
-static void
-parsevend(uchar* vend)
-{
- /* The field must start with 99.130.83.99 to be compliant */
- if ((vend[0] != 99) || (vend[1] != 130) ||
- (vend[2] != 83) || (vend[3] != 99)){
- if(bootpdebug)
- print("bad bootp vendor field: %.2x%.2x%.2x%.2x", vend[0], vend[1], vend[2], vend[3]);
- return;
- }
-
- /* Skip over the magic cookie */
- vend += 4;
-
- while ((vend[0] != 0) && (vend[0] != 0xFF)) {
- if(bootpdebug){
- int i;
- print("vend %d [%d]", vend[0], vend[1]);
- for(i=0; i<vend[1]; i++)
- print(" %2.2x", vend[i]);
- print("\n");
- }
- switch (vend[0]) {
- case 1: /* Subnet mask field */
- /* There must be only one subnet mask */
- if (vend[1] != 4)
- return;
-
- ipmask = (vend[2]<<24)|
- (vend[3]<<16)|
- (vend[4]<<8)|
- vend[5];
- break;
-
- case 3: /* Gateway/router field */
- /* We are only concerned with first address */
- if (vend[1] < 4)
- break;
-
- gwip = (vend[2]<<24)|
- (vend[3]<<16)|
- (vend[4]<<8)|
- vend[5];
- break;
-
- case 6: /* DNS server */
- /* We are only concerned with first address */
- if (vend[1] < 4)
- break;
-
- dnsip = (vend[2]<<24)|
- (vend[3]<<16)|
- (vend[4]<<8)|
- vend[5];
- break;
-
- case 8: /* "Cookie server" (auth server) field */
- /* We are only concerned with first address */
- if (vend[1] < 4)
- break;
-
- auip = (vend[2]<<24)|
- (vend[3]<<16)|
- (vend[4]<<8)|
- vend[5];
- break;
-
- case 11: /* "Resource loc server" (file server) field */
- /* We are only concerned with first address */
- if (vend[1] < 4)
- break;
-
- fsip = (vend[2]<<24)|
- (vend[3]<<16)|
- (vend[4]<<8)|
- vend[5];
- break;
-
- default: /* Ignore everything else */
- break;
- }
-
- /* Skip over the field */
- vend += vend[1] + 2;
- }
-}
-
-static void
-rcvbootp(void *a)
-{
- int n, fd;
- Bootp *rp;
-
- if(waserror())
- pexit("", 0);
- rcvprocp = up; /* store for postnote below */
- fd = (int)a;
- while(done == 0) {
- n = kread(fd, rcvbuf, sizeof(rcvbuf));
- if(n <= 0)
- break;
- rp = (Bootp*)rcvbuf;
- if (memcmp(req.chaddr, rp->chaddr, 6) == 0 &&
- rp->htype == 1 && rp->hlen == 6) {
- ipaddr = (rp->yiaddr[0]<<24)|
- (rp->yiaddr[1]<<16)|
- (rp->yiaddr[2]<<8)|
- rp->yiaddr[3];
- parsevend(rp->vend);
- break;
- }
- }
- poperror();
- rcvprocp = nil;
-
- recv = 1;
- wakeup(&bootpr);
- pexit("", 0);
-}
-
-static char*
-rbootp(Ipifc *ifc)
-{
- int cfd, dfd, tries, n;
- char ia[5+3*16], im[16], *av[3];
- uchar nipaddr[4], ngwip[4], nipmask[4];
- char dir[Maxpath];
- static uchar vend_rfc1048[] = { 99, 130, 83, 99 };
-
- av[1] = "0.0.0.0";
- av[2] = "0.0.0.0";
- ipifcadd(ifc, av, 3, 0, nil);
-
- cfd = kannounce("udp!*!68", dir);
- if(cfd < 0)
- return "bootp announce failed";
- strcat(dir, "/data");
- if(kwrite(cfd, "headers", 7) < 0){
- kclose(cfd);
- return "bootp ctl headers failed";
- }
- kwrite(cfd, "oldheaders", 10);
- dfd = kopen(dir, ORDWR);
- if(dfd < 0){
- kclose(cfd);
- return "bootp open data failed";
- }
- kclose(cfd);
-
- /* create request */
- memset(&req, 0, sizeof(req));
- ipmove(req.raddr, IPv4bcast);
- hnputs(req.rport, 67);
- req.op = Bootrequest;
- req.htype = 1; /* ethernet (all we know) */
- req.hlen = 6; /* ethernet (all we know) */
-
- /* Hardware MAC address */
- memmove(req.chaddr, ifc->mac, 6);
- /* Fill in the local IP address if we know it */
- ipv4local(ifc, req.ciaddr);
- memset(req.file, 0, sizeof(req.file));
- memmove(req.vend, vend_rfc1048, 4);
-
- done = 0;
- recv = 0;
-
- kproc("rcvbootp", rcvbootp, (void*)dfd, KPDUPFDG);
-
- /*
- * broadcast bootp's till we get a reply,
- * or fixed number of tries
- */
- tries = 0;
- while(recv == 0) {
- if(kwrite(dfd, &req, sizeof(req)) < 0)
- print("bootp: write: %r");
-
- tsleep(&bootpr, return0, 0, 1000);
- if(++tries > 10) {
- print("bootp: timed out\n");
- break;
- }
- }
- kclose(dfd);
- done = 1;
- if(rcvprocp != nil){
- postnote(rcvprocp, 1, "timeout", 0);
- rcvprocp = nil;
- }
-
- av[1] = "0.0.0.0";
- av[2] = "0.0.0.0";
- ipifcrem(ifc, av, 3);
-
- hnputl(nipaddr, ipaddr);
- sprint(ia, "%V", nipaddr);
- hnputl(nipmask, ipmask);
- sprint(im, "%V", nipmask);
- av[1] = ia;
- av[2] = im;
- ipifcadd(ifc, av, 3, 0, nil);
-
- if(gwip != 0) {
- hnputl(ngwip, gwip);
- n = sprint(ia, "add 0.0.0.0 0.0.0.0 %V", ngwip);
- routewrite(ifc->conv->p->f, nil, ia, n);
- }
- return nil;
-}
-
-static int
-rbootpread(char *bp, ulong offset, int len)
-{
- int n;
- char *buf;
- uchar a[4];
-
- buf = smalloc(READSTR);
- if(waserror()){
- free(buf);
- nexterror();
- }
- hnputl(a, fsip);
- n = snprint(buf, READSTR, "fsip %15V\n", a);
- hnputl(a, auip);
- n += snprint(buf + n, READSTR-n, "auip %15V\n", a);
- hnputl(a, gwip);
- n += snprint(buf + n, READSTR-n, "gwip %15V\n", a);
- hnputl(a, ipmask);
- n += snprint(buf + n, READSTR-n, "ipmask %15V\n", a);
- hnputl(a, ipaddr);
- n += snprint(buf + n, READSTR-n, "ipaddr %15V\n", a);
- hnputl(a, dnsip);
- snprint(buf + n, READSTR-n, "dnsip %15V\n", a);
-
- len = readstr(offset, bp, len, buf);
- poperror();
- free(buf);
- return len;
-}
-
-char* (*bootp)(Ipifc*) = rbootp;
-int (*bootpread)(char*, ulong, int) = rbootpread;
--- a/os/ip/il.c
+++ b/os/ip/il.c
@@ -189,7 +189,7 @@
{
Ipht ht;
- ulong stats[Nstats];
+ uvlong stats[Nstats];
ulong csumerr; /* checksum errors */
ulong hlenerr; /* header length error */
@@ -208,7 +208,7 @@
void ilrcvmsg(Conv*, Block*);
-void ilsendctl(Conv*, Ilhdr*, int, ulong, ulong, int);
+int ilsendctl(Conv*, Ilhdr*, int, ulong, ulong, int);
void ilackq(Ilcb*, Block*);
void ilprocess(Conv*, Ilhdr*, Block*);
void ilpullup(Conv*);
@@ -251,6 +251,9 @@
e = Fsstdconnect(c, argv, argc);
if(e != nil)
return e;
+ if(c->ipversion != V4)
+ return "only IP version 4 supported";
+
return ilstart(c, IL_CONNECT, fast);
}
@@ -260,7 +263,7 @@
Ilcb *ic;
ic = (Ilcb*)(c->ptcl);
- return snprint(state, n, "%s qin %d qout %d del %5.5d Br %5.5d md %5.5d una %5.5lud rex %5.5d rxq %5.5d max %5.5d",
+ return snprint(state, n, "%s qin %d qout %d del %5.5d Br %5.5d md %5.5d una %5.5lud rex %5.5d rxq %5.5d max %5.5d\n",
ilstates[ic->state],
c->rq ? qlen(c->rq) : 0,
c->wq ? qlen(c->wq) : 0,
@@ -434,7 +437,7 @@
p = buf;
e = p+len;
for(i = 0; i < Nstats; i++)
- p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
+ p = seprint(p, e, "%s: %llud\n", statnames[i], priv->stats[i]);
return p - buf;
}
@@ -548,6 +551,9 @@
ih = (Ilhdr *)bp->rp;
plen = blocklen(bp);
+ if(plen > 0 && (ih->vihl&0xF0)!=IP_VER4)
+ goto raise; /* ignore non V4 packets */
+
if(plen < IL_IPSIZE+IL_HDRSIZE){
netlog(il->f, Logil, "il: hlenerr\n");
ipriv->stats[HlenErrs]++;
@@ -572,7 +578,7 @@
else
st = iltype[ih->iltype];
ipriv->stats[CsumErrs]++;
- netlog(il->f, Logil, "il: cksum %ux %ux, pkt(%s id %lud ack %lud %I/%d->%d)\n",
+ netlog(il->f, Logil, "il: cksum %ux %s, pkt(%ux id %ud ack %I/%d->%d)\n",
csum, st, nhgetl(ih->ilid), nhgetl(ih->ilack), raddr, sp, dp);
goto raise;
}
@@ -595,7 +601,7 @@
else
st = iltype[ih->iltype];
ilreject(il->f, ih); /* no channel and not sync */
- netlog(il->f, Logil, "il: no channel, pkt(%s id %lud ack %lud %I/%ud->%ud)\n",
+ netlog(il->f, Logil, "il: no channel, pkt(%s id %ud ack %ud %I/%ud->%ud)\n",
st, nhgetl(ih->ilid), nhgetl(ih->ilack), raddr, sp, dp);
goto raise;
}
@@ -829,7 +835,7 @@
c = ic->conv;
id = nhgetl(h->ilid);
- netlog(c->p->f, Logil, "il: rexmit %d %ud: %d %d: %i %d/%d\n", id, ic->recvd,
+ netlog(c->p->f, Logil, "il: rexmit %lud %lud: %d %lud: %I %d/%d\n", id, ic->recvd,
ic->rexmit, ic->timeout,
c->raddr, c->lport, c->rport);
@@ -852,7 +858,7 @@
ic = (Ilcb*)s->ptcl;
USED(ic);
- netlog(s->p->f, Logilmsg, "%11s rcv %d/%d snt %d/%d pkt(%s id %d ack %d %d->%d) ",
+ netlog(s->p->f, Logilmsg, "%11s rcv %lud/%lud snt %lud/%lud pkt(%s id %d ack %ud %ud->%ud) ",
ilstates[ic->state], ic->rstart, ic->recvd, ic->start,
ic->next, iltype[h->iltype], nhgetl(h->ilid),
nhgetl(h->ilack), nhgets(h->ilsrc), nhgets(h->ildst));
@@ -859,7 +865,7 @@
_ilprocess(s, h, bp);
- netlog(s->p->f, Logilmsg, "%11s rcv %d snt %d\n", ilstates[ic->state], ic->recvd, ic->next);
+ netlog(s->p->f, Logilmsg, "%11s rcv %lud snt %lud\n", ilstates[ic->state], ic->recvd, ic->next);
}
void
@@ -917,17 +923,12 @@
bp->list = nil;
dlen = nhgets(oh->illen)-IL_HDRSIZE;
bp = trimblock(bp, IL_IPSIZE+IL_HDRSIZE, dlen);
+
/*
* Upper levels don't know about multiple-block
* messages so copy all into one (yick).
*/
- bp = concatblock(bp);
- if(bp == 0)
- panic("ilpullup");
- bp = packblock(bp);
- if(bp == 0)
- panic("ilpullup2");
- qpass(s->rq, bp);
+ qpass(s->rq, packblock(concatblock(bp)));
}
qunlock(&ic->outo);
}
@@ -948,7 +949,7 @@
id = nhgetl(h->ilid);
/* Window checks */
if(id <= ic->recvd || id > ic->recvd+ic->window) {
- netlog(s->p->f, Logil, "il: message outside window %ud <%ud-%ud>: %i %d/%d\n",
+ netlog(s->p->f, Logil, "il: message outside window %lud <%lud-%lud>: %I %d/%d\n",
id, ic->recvd, ic->recvd+ic->window, s->raddr, s->lport, s->rport);
freeblist(bp);
return;
@@ -983,7 +984,7 @@
qunlock(&ic->outo);
}
-void
+int
ilsendctl(Conv *ipc, Ilhdr *inih, int type, ulong id, ulong ack, int ilspec)
{
Ilhdr *ih;
@@ -1034,7 +1035,7 @@
hnputs(ih->ilsum, ptclcsum(bp, IL_IPSIZE, IL_HDRSIZE));
if(ipc==nil)
- panic("ipc is nil caller is %.8lux", getcallerpc(&ipc));
+ panic("ipc is nil caller is %#p", getcallerpc(&ipc));
if(ipc->p==nil)
panic("ipc->p is nil");
@@ -1042,7 +1043,7 @@
iltype[ih->iltype], nhgetl(ih->ilid), nhgetl(ih->ilack),
nhgets(ih->ilsrc), nhgets(ih->ildst));
- ipoput4(ipc->p->f, bp, 0, ttl, tos, ipc);
+ return ipoput4(ipc->p->f, bp, 0, ttl, tos, ipc);
}
void
@@ -1145,6 +1146,8 @@
il = x;
+ while(waserror())
+ ;
loop:
tsleep(&up->sleep, return0, 0, Iltickms);
for(s = il->conv; s && *s; s++) {
@@ -1248,7 +1251,7 @@
qlock(&ipriv->apl);
if(ipriv->ackprocstarted == 0){
sprint(kpname, "#I%dilack", c->p->f->dev);
- kproc(kpname, ilackproc, c->p, 0);
+ kproc(kpname, ilackproc, c->p);
ipriv->ackprocstarted = 1;
}
qunlock(&ipriv->apl);
@@ -1280,7 +1283,8 @@
case IL_CONNECT:
ic->state = Ilsyncer;
iphtadd(&ipriv->ht, c);
- ilsendctl(c, nil, Ilsync, ic->start, ic->recvd, 0);
+ if(ilsendctl(c, nil, Ilsync, ic->start, ic->recvd, 0) < 0)
+ ilhangup(c, "no route");
break;
}
@@ -1332,6 +1336,8 @@
if(s->lport == psource)
if(ipcmp(s->laddr, source) == 0)
if(ipcmp(s->raddr, dest) == 0){
+ if(s->ignoreadvice)
+ break;
qunlock(il);
ic = (Ilcb*)s->ptcl;
switch(ic->state){
@@ -1380,12 +1386,6 @@
}
}
-int
-ilgc(Proto *il)
-{
- return natgc(il->ipproto);
-}
-
void
ilinit(Fs *f)
{
@@ -1406,7 +1406,7 @@
il->advise = iladvise;
il->stats = ilxstats;
il->inuse = ilinuse;
- il->gc = ilgc;
+ il->gc = nil;
il->ipproto = IP_ILPROTO;
il->nc = scalednconv();
il->ptclsize = sizeof(Ilcb);
--- a/os/ip/ip.c
+++ b/os/ip/ip.c
@@ -7,94 +7,6 @@
#include "ip.h"
-typedef struct IP IP;
-typedef struct Fragment4 Fragment4;
-typedef struct Fragment6 Fragment6;
-typedef struct Ipfrag Ipfrag;
-
-enum
-{
- IP4HDR = 20, /* sizeof(Ip4hdr) */
- IP6HDR = 40, /* sizeof(Ip6hdr) */
- IP_HLEN4 = 0x05, /* Header length in words */
- IP_DF = 0x4000, /* Don't fragment */
- IP_MF = 0x2000, /* More fragments */
- IP6FHDR = 8, /* sizeof(Fraghdr6) */
- IP_MAX = 64*1024, /* Maximum Internet packet size */
-};
-
-#define BLKIPVER(xp) (((Ip4hdr*)((xp)->rp))->vihl&0xF0)
-
-/* MIB II counters */
-enum
-{
- Forwarding,
- DefaultTTL,
- InReceives,
- InHdrErrors,
- InAddrErrors,
- ForwDatagrams,
- InUnknownProtos,
- InDiscards,
- InDelivers,
- OutRequests,
- OutDiscards,
- OutNoRoutes,
- ReasmTimeout,
- ReasmReqds,
- ReasmOKs,
- ReasmFails,
- FragOKs,
- FragFails,
- FragCreates,
-
- Nstats,
-};
-
-struct Fragment4
-{
- Block* blist;
- Fragment4* next;
- ulong src;
- ulong dst;
- ushort id;
- ulong age;
-};
-
-struct Fragment6
-{
- Block* blist;
- Fragment6* next;
- uchar src[IPaddrlen];
- uchar dst[IPaddrlen];
- uint id;
- ulong age;
-};
-
-struct Ipfrag
-{
- ushort foff;
- ushort flen;
-};
-
-/* an instance of IP */
-struct IP
-{
- ulong stats[Nstats];
-
- QLock fraglock4;
- Fragment4* flisthead4;
- Fragment4* fragfree4;
- Ref id4;
-
- QLock fraglock6;
- Fragment6* flisthead6;
- Fragment6* fragfree6;
- Ref id6;
-
- int iprouting; /* true if we route like a gateway */
-};
-
static char *statnames[] =
{
[Forwarding] "Forwarding",
@@ -118,45 +30,11 @@
[FragCreates] "FragCreates",
};
-#define BLKIP(xp) ((Ip4hdr*)((xp)->rp))
-/*
- * This sleazy macro relies on the media header size being
- * larger than sizeof(Ipfrag). ipreassemble checks this is true
- */
-#define BKFG(xp) ((Ipfrag*)((xp)->base))
+static Block* ip4reassemble(IP*, int, Block*);
+static void ipfragfree4(IP*, Fragment4*);
+static Fragment4* ipfragallo4(IP*);
-ushort ipcsum(uchar*);
-Block* ip4reassemble(IP*, int, Block*, Ip4hdr*);
-void ipfragfree4(IP*, Fragment4*);
-Fragment4* ipfragallo4(IP*);
-
-
-void
-ip_init_6(Fs *f)
-{
- V6params *v6p;
-
- v6p = smalloc(sizeof(V6params));
-
- v6p->rp.mflag = 0; // default not managed
- v6p->rp.oflag = 0;
- v6p->rp.maxraint = 600000; // millisecs
- v6p->rp.minraint = 200000;
- v6p->rp.linkmtu = 0; // no mtu sent
- v6p->rp.reachtime = 0;
- v6p->rp.rxmitra = 0;
- v6p->rp.ttl = MAXTTL;
- v6p->rp.routerlt = 3*(v6p->rp.maxraint);
-
- v6p->hp.rxmithost = 1000; // v6 RETRANS_TIMER
-
- v6p->cdrouter = -1;
-
- f->v6p = v6p;
-
-}
-
-void
+static void
initfrag(IP *ip, int size)
{
Fragment4 *fq4, *eq4;
@@ -189,6 +67,7 @@
IP *ip;
ip = smalloc(sizeof(IP));
+ ip->stats[DefaultTTL] = MAXTTL;
initfrag(ip, 100);
f->ip = ip;
@@ -202,11 +81,11 @@
if(f->ip->iprouting==0)
f->ip->stats[Forwarding] = 2;
else
- f->ip->stats[Forwarding] = 1;
+ f->ip->stats[Forwarding] = 1;
}
int
-ipoput4(Fs *f, Block *bp, int gating, int ttl, int tos, Conv *c)
+ipoput4(Fs *f, Block *bp, int gating, int ttl, int tos, Routehint *rh)
{
Ipifc *ifc;
uchar *gate;
@@ -213,66 +92,41 @@
ulong fragoff;
Block *xp, *nb;
Ip4hdr *eh, *feh;
- int lid, len, seglen, chunk, dlen, blklen, offset, medialen;
- Route *r, *sr;
+ int lid, len, seglen, chunk, hlen, dlen, blklen, offset, medialen;
+ Route *r;
IP *ip;
int rv = 0;
ip = f->ip;
-
- /* Fill out the ip header */
- eh = (Ip4hdr*)(bp->rp);
-
ip->stats[OutRequests]++;
- /* Number of uchars in data and ip header to write */
+ /* Fill out the ip header */
+ eh = (Ip4hdr*)bp->rp;
+ assert(BLEN(bp) >= IP4HDR);
len = blocklen(bp);
-
- if(gating){
- chunk = nhgets(eh->length);
- if(chunk > len){
- ip->stats[OutDiscards]++;
- netlog(f, Logip, "short gated packet\n");
- goto free;
- }
- if(chunk < len)
- len = chunk;
- }
if(len >= IP_MAX){
ip->stats[OutDiscards]++;
- netlog(f, Logip, "exceeded ip max size %V\n", eh->dst);
+ netlog(f, Logip, "%V -> %V: exceeded ip max size: %d\n", eh->src, eh->dst, len);
goto free;
}
- r = v4lookup(f, eh->dst, c);
- if(r == nil){
+ r = v4lookup(f, eh->dst, eh->src, rh);
+ if(r == nil || (ifc = r->ifc) == nil){
ip->stats[OutNoRoutes]++;
- netlog(f, Logip, "no interface %V\n", eh->dst);
+ netlog(f, Logip, "%V -> %V: no interface\n", eh->src, eh->dst);
rv = -1;
goto free;
}
- ifc = r->ifc;
- if(r->type & (Rifc|Runi))
+ if(r->type & (Rifc|Runi|Rbcast|Rmulti))
gate = eh->dst;
else
- if(r->type & (Rbcast|Rmulti)) {
- gate = eh->dst;
- sr = v4lookup(f, eh->src, nil);
- if(sr != nil && (sr->type & Runi))
- ifc = sr->ifc;
- }
- else
gate = r->v4.gate;
- if(!gating)
- eh->vihl = IP_VER4|IP_HLEN4;
- eh->ttl = ttl;
- if(!gating)
- eh->tos = tos;
-
- if(!canrlock(ifc))
+ if(!canrlock(ifc)){
+ ip->stats[OutDiscards]++;
goto free;
+ }
if(waserror()){
runlock(ifc);
nexterror();
@@ -280,17 +134,18 @@
if(ifc->m == nil)
goto raise;
- /* Output NAT */
- if(nato(bp, ifc, f) != 0)
- goto raise;
+ if(!gating){
+ eh->vihl = IP_VER4|IP_HLEN4;
+ eh->tos = tos;
+ }
+ eh->ttl = ttl;
/* If we dont need to fragment just send it */
medialen = ifc->maxtu - ifc->m->hsize;
if(len <= medialen) {
- if(!gating)
- hnputs(eh->id, incref(&ip->id4));
hnputs(eh->length, len);
if(!gating){
+ hnputs(eh->id, incref(&ip->id4));
eh->frag[0] = 0;
eh->frag[1] = 0;
}
@@ -297,31 +152,31 @@
eh->cksum[0] = 0;
eh->cksum[1] = 0;
hnputs(eh->cksum, ipcsum(&eh->vihl));
- ifc->m->bwrite(ifc, bp, V4, gate);
+
+ ipifcoput(ifc, bp, V4, gate);
runlock(ifc);
poperror();
return 0;
}
-if((eh->frag[0] & (IP_DF>>8)) && !gating) print("%V: DF set\n", eh->dst);
-
if(eh->frag[0] & (IP_DF>>8)){
ip->stats[FragFails]++;
ip->stats[OutDiscards]++;
icmpcantfrag(f, bp, medialen);
- netlog(f, Logip, "%V: eh->frag[0] & (IP_DF>>8)\n", eh->dst);
+ netlog(f, Logip, "%V -> %V: can't fragment with DF flag set\n", eh->src, eh->dst);
goto raise;
}
- seglen = (medialen - IP4HDR) & ~7;
+ hlen = (eh->vihl & 0xF)<<2;
+ seglen = (medialen - hlen) & ~7;
if(seglen < 8){
ip->stats[FragFails]++;
ip->stats[OutDiscards]++;
- netlog(f, Logip, "%V seglen < 8\n", eh->dst);
+ netlog(f, Logip, "%V -> %V: can't fragment with seglen < 8\n", eh->src, eh->dst);
goto raise;
}
- dlen = len - IP4HDR;
+ dlen = len - hlen;
xp = bp;
if(gating)
lid = nhgets(eh->id);
@@ -328,8 +183,8 @@
else
lid = incref(&ip->id4);
- offset = IP4HDR;
- while(xp != nil && offset && offset >= BLEN(xp)) {
+ offset = hlen;
+ while(offset && offset >= BLEN(xp)) {
offset -= BLEN(xp);
xp = xp->next;
}
@@ -341,30 +196,30 @@
fragoff = 0;
dlen += fragoff;
for(; fragoff < dlen; fragoff += seglen) {
- nb = allocb(IP4HDR+seglen);
- feh = (Ip4hdr*)(nb->rp);
+ nb = allocb(hlen+seglen);
+ feh = (Ip4hdr*)nb->rp;
- memmove(nb->wp, eh, IP4HDR);
- nb->wp += IP4HDR;
+ memmove(nb->wp, eh, hlen);
+ nb->wp += hlen;
if((fragoff + seglen) >= dlen) {
seglen = dlen - fragoff;
hnputs(feh->frag, fragoff>>3);
}
- else
+ else
hnputs(feh->frag, (fragoff>>3)|IP_MF);
- hnputs(feh->length, seglen + IP4HDR);
+ hnputs(feh->length, seglen + hlen);
hnputs(feh->id, lid);
/* Copy up the data area */
chunk = seglen;
while(chunk) {
- if(!xp) {
+ if(xp == nil) {
ip->stats[OutDiscards]++;
ip->stats[FragFails]++;
freeblist(nb);
- netlog(f, Logip, "!xp: chunk %d\n", chunk);
+ netlog(f, Logip, "xp == nil: chunk %d\n", chunk);
goto raise;
}
blklen = chunk;
@@ -376,12 +231,13 @@
chunk -= blklen;
if(xp->rp == xp->wp)
xp = xp->next;
- }
+ }
feh->cksum[0] = 0;
feh->cksum[1] = 0;
hnputs(feh->cksum, ipcsum(&feh->vihl));
- ifc->m->bwrite(ifc, nb, V4, gate);
+
+ ipifcoput(ifc, nb, V4, gate);
ip->stats[FragCreates]++;
}
ip->stats[FragOKs]++;
@@ -396,17 +252,14 @@
void
ipiput4(Fs *f, Ipifc *ifc, Block *bp)
{
- int hl;
- int hop, tos, proto, olen;
+ int hl, len, hop, tos;
+ uchar v6dst[IPaddrlen];
+ ushort frag;
Ip4hdr *h;
Proto *p;
- ushort frag;
- int notforme;
- uchar *dp, v6dst[IPaddrlen];
IP *ip;
- Route *r;
- if(BLKIPVER(bp) != IP_VER4) {
+ if((bp->rp[0]&0xF0) != IP_VER4) {
ipiput6(f, ifc, bp);
return;
}
@@ -430,58 +283,45 @@
return;
}
- h = (Ip4hdr*)(bp->rp);
-
- /* Input NAT */
- nati(bp, ifc);
-
- /* dump anything that whose header doesn't checksum */
+ h = (Ip4hdr*)bp->rp;
+ hl = (h->vihl & 0xF)<<2;
+ if(hl < IP4HDR || hl > BLEN(bp)) {
+ ip->stats[InHdrErrors]++;
+ netlog(f, Logip, "%V -> %V: bad ip header length: %d\n", h->src, h->dst, hl);
+ goto drop;
+ }
if((bp->flag & Bipck) == 0 && ipcsum(&h->vihl)) {
ip->stats[InHdrErrors]++;
- netlog(f, Logip, "ip: checksum error %V\n", h->src);
- freeblist(bp);
+ netlog(f, Logip, "%V -> %V: bad ip header checksum\n", h->src, h->dst);
+ goto drop;
+ }
+ len = nhgets(h->length);
+ if(len < hl || (bp = trimblock(bp, 0, len)) == nil){
+ ip->stats[InHdrErrors]++;
+ netlog(f, Logip, "%V -> %V: bogus packet length: %d\n", h->src, h->dst, len);
+ if(bp != nil)
+ goto drop;
return;
}
- v4tov6(v6dst, h->dst);
- notforme = ipforme(f, v6dst) == 0;
+ h = (Ip4hdr*)bp->rp;
- /* Check header length and version */
- if((h->vihl&0x0F) != IP_HLEN4) {
- hl = (h->vihl&0xF)<<2;
- if(hl < (IP_HLEN4<<2)) {
- ip->stats[InHdrErrors]++;
- netlog(f, Logip, "ip: %V bad hivl %ux\n", h->src, h->vihl);
- freeblist(bp);
- return;
- }
- /* If this is not routed strip off the options */
- if(notforme == 0) {
- olen = nhgets(h->length);
- dp = bp->rp + (hl - (IP_HLEN4<<2));
- memmove(dp, h, IP_HLEN4<<2);
- bp->rp = dp;
- h = (Ip4hdr*)(bp->rp);
- h->vihl = (IP_VER4|IP_HLEN4);
- hnputs(h->length, olen-hl+(IP_HLEN4<<2));
- }
- }
-
/* route */
- if(notforme) {
- Conv conv;
+ v4tov6(v6dst, h->dst);
+ if(!ipforme(f, v6dst)) {
+ Route *r;
+ Routehint rh;
+ Ipifc *nifc;
- if(!ip->iprouting){
- freeb(bp);
- return;
- }
+ if(!ip->iprouting)
+ goto drop;
/* don't forward to source's network */
- conv.r = nil;
- r = v4lookup(f, h->dst, &conv);
- if(r == nil || r->ifc == ifc){
+ rh.r = nil;
+ r = v4lookup(f, h->dst, h->src, &rh);
+ if(r == nil || (nifc = r->ifc) == nil
+ || (nifc == ifc && !ifc->reflect)){
ip->stats[OutDiscards]++;
- freeblist(bp);
- return;
+ goto drop;
}
/* don't forward if packet has timed out */
@@ -488,23 +328,18 @@
hop = h->ttl;
if(hop < 1) {
ip->stats[InHdrErrors]++;
- icmpttlexceeded(f, ifc->lifc->local, bp);
- freeblist(bp);
- return;
+ icmpttlexceeded(f, ifc, bp);
+ goto drop;
}
/* reassemble if the interface expects it */
-if(r->ifc == nil) panic("nil route rfc");
- if(r->ifc->reassemble){
+ if(nifc->reassemble){
frag = nhgets(h->frag);
- if(frag) {
- h->tos = 0;
- if(frag & IP_MF)
- h->tos = 1;
- bp = ip4reassemble(ip, frag, bp, h);
+ if(frag & (IP_MF|IP_FO)) {
+ bp = ip4reassemble(ip, frag, bp);
if(bp == nil)
return;
- h = (Ip4hdr*)(bp->rp);
+ h = (Ip4hdr*)bp->rp;
}
}
@@ -511,27 +346,30 @@
ip->stats[ForwDatagrams]++;
tos = h->tos;
hop = h->ttl;
- ipoput4(f, bp, 1, hop - 1, tos, &conv);
+ ipoput4(f, bp, 1, hop - 1, tos, &rh);
return;
}
+ /* If this is not routed strip off the options */
+ if(hl > IP4HDR) {
+ hl -= IP4HDR;
+ len -= hl;
+ bp->rp += hl;
+ memmove(bp->rp, h, IP4HDR);
+ h = (Ip4hdr*)bp->rp;
+ h->vihl = IP_VER4|IP_HLEN4;
+ hnputs(h->length, len);
+ }
+
frag = nhgets(h->frag);
- if(frag) {
- h->tos = 0;
- if(frag & IP_MF)
- h->tos = 1;
- bp = ip4reassemble(ip, frag, bp, h);
+ if(frag & (IP_MF|IP_FO)) {
+ bp = ip4reassemble(ip, frag, bp);
if(bp == nil)
return;
- h = (Ip4hdr*)(bp->rp);
+ h = (Ip4hdr*)bp->rp;
}
- /* don't let any frag info go up the stack */
- h->frag[0] = 0;
- h->frag[1] = 0;
-
- proto = h->proto;
- p = Fsrcvpcol(f, proto);
+ p = Fsrcvpcol(f, h->proto);
if(p != nil && p->rcv != nil) {
ip->stats[InDelivers]++;
(*p->rcv)(p, ifc, bp);
@@ -539,6 +377,7 @@
}
ip->stats[InDiscards]++;
ip->stats[InUnknownProtos]++;
+drop:
freeblist(bp);
}
@@ -550,45 +389,43 @@
int i;
ip = f->ip;
- ip->stats[DefaultTTL] = MAXTTL;
-
p = buf;
e = p+len;
- for(i = 0; i < Nstats; i++)
- p = seprint(p, e, "%s: %lud\n", statnames[i], ip->stats[i]);
+ for(i = 0; i < Nipstats; i++)
+ p = seprint(p, e, "%s: %llud\n", statnames[i], ip->stats[i]);
return p - buf;
}
-Block*
-ip4reassemble(IP *ip, int offset, Block *bp, Ip4hdr *ih)
+static Block*
+ip4reassemble(IP *ip, int offset, Block *bp)
{
- int fend;
+ int ovlap, fragsize, len;
+ ulong src, dst;
ushort id;
+ Block *bl, **l, *prev;
Fragment4 *f, *fnext;
- ulong src, dst;
- Block *bl, **l, *last, *prev;
- int ovlap, len, fragsize, pktposn;
+ Ipfrag *fp, *fq;
+ Ip4hdr *ih;
+ /*
+ * block lists are too hard, concatblock into a single block
+ */
+ bp = concatblock(bp);
+
+ ih = (Ip4hdr*)bp->rp;
src = nhgetl(ih->src);
dst = nhgetl(ih->dst);
id = nhgets(ih->id);
+ fragsize = BLEN(bp) - ((ih->vihl&0xF)<<2);
- /*
- * block lists are too hard, pullupblock into a single block
- */
- if(bp->next){
- bp = pullupblock(bp, blocklen(bp));
- ih = (Ip4hdr*)(bp->rp);
- }
-
qlock(&ip->fraglock4);
/*
* find a reassembly queue for this fragment
*/
- for(f = ip->flisthead4; f; f = fnext){
+ for(f = ip->flisthead4; f != nil; f = fnext){
fnext = f->next; /* because ipfragfree4 changes the list */
- if(f->src == src && f->dst == dst && f->id == id)
+ if(f->id == id && f->src == src && f->dst == dst)
break;
if(f->age < NOW){
ip->stats[ReasmTimeout]++;
@@ -601,22 +438,23 @@
* and get rid of any fragments that might go
* with it.
*/
- if(!ih->tos && (offset & ~(IP_MF|IP_DF)) == 0) {
+ if((offset & (IP_MF|IP_FO)) == 0) {
if(f != nil) {
- ipfragfree4(ip, f);
ip->stats[ReasmFails]++;
+ ipfragfree4(ip, f);
}
qunlock(&ip->fraglock4);
return bp;
}
- if(bp->base+sizeof(Ipfrag) >= bp->rp){
- bp = padblock(bp, sizeof(Ipfrag));
- bp->rp += sizeof(Ipfrag);
+ if(bp->base+IPFRAGSZ > bp->rp){
+ bp = padblock(bp, IPFRAGSZ);
+ bp->rp += IPFRAGSZ;
}
- BKFG(bp)->foff = offset<<3;
- BKFG(bp)->flen = nhgets(ih->length)-IP4HDR;
+ fp = (Ipfrag*)bp->base;
+ fp->foff = (offset & IP_FO)<<3;
+ fp->flen = fragsize;
/* First fragment allocates a reassembly queue */
if(f == nil) {
@@ -627,8 +465,9 @@
f->blist = bp;
- qunlock(&ip->fraglock4);
ip->stats[ReasmReqds]++;
+ qunlock(&ip->fraglock4);
+
return nil;
}
@@ -638,7 +477,7 @@
prev = nil;
l = &f->blist;
bl = f->blist;
- while(bl != nil && BKFG(bp)->foff > BKFG(bl)->foff) {
+ while(bl != nil && fp->foff > ((Ipfrag*)bl->base)->foff) {
prev = bl;
l = &bl->next;
bl = bl->next;
@@ -645,15 +484,16 @@
}
/* Check overlap of a previous fragment - trim away as necessary */
- if(prev) {
- ovlap = BKFG(prev)->foff + BKFG(prev)->flen - BKFG(bp)->foff;
+ if(prev != nil) {
+ fq = (Ipfrag*)prev->base;
+ ovlap = fq->foff + fq->flen - fp->foff;
if(ovlap > 0) {
- if(ovlap >= BKFG(bp)->flen) {
- freeblist(bp);
+ if(ovlap >= fp->flen) {
qunlock(&ip->fraglock4);
+ freeb(bp);
return nil;
}
- BKFG(prev)->flen -= ovlap;
+ fq->flen -= ovlap;
}
}
@@ -662,26 +502,26 @@
*l = bp;
/* Check to see if succeeding segments overlap */
- if(bp->next) {
+ if(bp->next != nil) {
l = &bp->next;
- fend = BKFG(bp)->foff + BKFG(bp)->flen;
+ offset = fp->foff + fp->flen;
/* Take completely covered segments out */
- while(*l) {
- ovlap = fend - BKFG(*l)->foff;
+ while((bl = *l) != nil) {
+ fq = (Ipfrag*)bl->base;
+ ovlap = offset - fq->foff;
if(ovlap <= 0)
break;
- if(ovlap < BKFG(*l)->flen) {
- BKFG(*l)->flen -= ovlap;
- BKFG(*l)->foff += ovlap;
- /* move up ih hdrs */
- memmove((*l)->rp + ovlap, (*l)->rp, IP4HDR);
- (*l)->rp += ovlap;
+ if(ovlap < fq->flen) {
+ /* move up ip header */
+ memmove(bl->rp + ovlap, bl->rp, BLEN(bl) - fq->flen);
+ bl->rp += ovlap;
+ fq->flen -= ovlap;
+ fq->foff += ovlap;
break;
}
- last = (*l)->next;
- (*l)->next = nil;
- freeblist(*l);
- *l = last;
+ *l = bl->next;
+ bl->next = nil;
+ freeb(bl);
}
}
@@ -689,35 +529,50 @@
* look for a complete packet. if we get to a fragment
* without IP_MF set, we're done.
*/
- pktposn = 0;
- for(bl = f->blist; bl; bl = bl->next) {
- if(BKFG(bl)->foff != pktposn)
+ offset = 0;
+ for(bl = f->blist; bl != nil; bl = bl->next, offset += fp->flen) {
+ fp = (Ipfrag*)bl->base;
+ if(fp->foff != offset)
break;
- if((BLKIP(bl)->frag[0]&(IP_MF>>8)) == 0) {
- bl = f->blist;
- len = nhgets(BLKIP(bl)->length);
- bl->wp = bl->rp + len;
- /* Pullup all the fragment headers and
- * return a complete packet
- */
- for(bl = bl->next; bl; bl = bl->next) {
- fragsize = BKFG(bl)->flen;
- len += fragsize;
- bl->rp += IP4HDR;
- bl->wp = bl->rp + fragsize;
- }
+ ih = (Ip4hdr*)bl->rp;
+ if(ih->frag[0]&(IP_MF>>8))
+ continue;
- bl = f->blist;
- f->blist = nil;
+ bl = f->blist;
+ len = BLEN(bl);
+
+ /*
+ * Pullup all the fragment headers and
+ * return a complete packet
+ */
+ for(bl = bl->next; bl != nil && len < IP_MAX; bl = bl->next) {
+ fq = (Ipfrag*)bl->base;
+ fragsize = fq->flen;
+ bl->rp = bl->wp - fragsize;
+ len += fragsize;
+ }
+
+ if(len >= IP_MAX){
ipfragfree4(ip, f);
- ih = BLKIP(bl);
- hnputs(ih->length, len);
+ ip->stats[ReasmFails]++;
qunlock(&ip->fraglock4);
- ip->stats[ReasmOKs]++;
- return bl;
+ return nil;
}
- pktposn += BKFG(bl)->flen;
+
+ bl = f->blist;
+ f->blist = nil;
+ ipfragfree4(ip, f);
+
+ ih = (Ip4hdr*)bl->rp;
+ ih->frag[0] = 0;
+ ih->frag[1] = 0;
+ hnputs(ih->length, len);
+
+ ip->stats[ReasmOKs]++;
+ qunlock(&ip->fraglock4);
+
+ return bl;
}
qunlock(&ip->fraglock4);
return nil;
@@ -726,20 +581,20 @@
/*
* ipfragfree4 - Free a list of fragments - assume hold fraglock4
*/
-void
+static void
ipfragfree4(IP *ip, Fragment4 *frag)
{
Fragment4 *fl, **l;
- if(frag->blist)
+ if(frag->blist != nil)
freeblist(frag->blist);
-
- frag->src = 0;
- frag->id = 0;
frag->blist = nil;
+ frag->id = 0;
+ frag->src = 0;
+ frag->dst = 0;
l = &ip->flisthead4;
- for(fl = *l; fl; fl = fl->next) {
+ for(fl = *l; fl != nil; fl = fl->next) {
if(fl == frag) {
*l = frag->next;
break;
@@ -755,7 +610,7 @@
/*
* ipfragallo4 - allocate a reassembly queue - assume hold fraglock4
*/
-Fragment4 *
+static Fragment4*
ipfragallo4(IP *ip)
{
Fragment4 *f;
@@ -762,7 +617,7 @@
while(ip->fragfree4 == nil) {
/* free last entry on fraglist */
- for(f = ip->flisthead4; f->next; f = f->next)
+ for(f = ip->flisthead4; f->next != nil; f = f->next)
;
ipfragfree4(ip, f);
}
--- a/os/ip/ip.h
+++ b/os/ip/ip.h
@@ -1,35 +1,33 @@
typedef struct Conv Conv;
+typedef struct Fragment4 Fragment4;
+typedef struct Fragment6 Fragment6;
typedef struct Fs Fs;
typedef union Hwaddr Hwaddr;
typedef struct IP IP;
typedef struct IPaux IPaux;
+typedef struct Ip4hdr Ip4hdr;
+typedef struct Ipfrag Ipfrag;
typedef struct Ipself Ipself;
typedef struct Ipselftab Ipselftab;
typedef struct Iplink Iplink;
typedef struct Iplifc Iplifc;
typedef struct Ipmulti Ipmulti;
-typedef struct IProuter IProuter;
typedef struct Ipifc Ipifc;
typedef struct Iphash Iphash;
typedef struct Ipht Ipht;
typedef struct Netlog Netlog;
-typedef struct Ifclog Ifclog;
typedef struct Medium Medium;
typedef struct Proto Proto;
typedef struct Arpent Arpent;
typedef struct Arp Arp;
typedef struct Route Route;
+typedef struct Routehint Routehint;
typedef struct Routerparams Routerparams;
typedef struct Hostparams Hostparams;
-typedef struct V6router V6router;
-typedef struct V6params V6params;
+typedef struct v6params v6params;
-typedef struct Ip4hdr Ip4hdr;
-typedef struct Nat Nat;
-
#pragma incomplete Arp
-#pragma incomplete Ifclog
#pragma incomplete Ipself
#pragma incomplete Ipselftab
#pragma incomplete IP
@@ -39,10 +37,9 @@
{
Addrlen= 64,
Maxproto= 20,
- Nhash= 64,
- Maxincall= 5,
- Nchans= 16383,
- MAClen= 16, /* longest mac address */
+ Maxincall= 10,
+ Nchans= 1024,
+ MAClen= 8, /* longest mac address */
MAXTTL= 255,
DFLTTOS= 0,
@@ -57,6 +54,12 @@
V6= 6,
IP_VER4= 0x40,
IP_VER6= 0x60,
+ IP_HLEN4= 5, /* v4: Header length in words */
+ IP_DF= 0x4000, /* v4: Don't fragment */
+ IP_MF= 0x2000, /* v4: More fragments */
+ IP_FO= 0x1fff, /* v4: Fragment offset */
+ IP4HDR= IP_HLEN4<<2, /* sizeof(Ip4hdr) */
+ IP_MAX= 64*1024, /* Max. Internet packet size, v4 & v6 */
/* 2^Lroot trees in the root table */
Lroot= 10,
@@ -73,6 +76,79 @@
Connected= 4,
};
+/* MIB II counters */
+enum
+{
+ Forwarding,
+ DefaultTTL,
+ InReceives,
+ InHdrErrors,
+ InAddrErrors,
+ ForwDatagrams,
+ InUnknownProtos,
+ InDiscards,
+ InDelivers,
+ OutRequests,
+ OutDiscards,
+ OutNoRoutes,
+ ReasmTimeout,
+ ReasmReqds,
+ ReasmOKs,
+ ReasmFails,
+ FragOKs,
+ FragFails,
+ FragCreates,
+
+ Nipstats,
+};
+
+struct Fragment4
+{
+ Block* blist;
+ Fragment4* next;
+ ulong src;
+ ulong dst;
+ ushort id;
+ ulong age;
+};
+
+struct Fragment6
+{
+ Block* blist;
+ Fragment6* next;
+ uchar src[IPaddrlen];
+ uchar dst[IPaddrlen];
+ uint id;
+ ulong age;
+};
+
+struct Ipfrag
+{
+ ushort foff;
+ ushort flen;
+ uchar payload[];
+};
+
+#define IPFRAGSZ offsetof(Ipfrag, payload[0])
+
+/* an instance of IP */
+struct IP
+{
+ uvlong stats[Nipstats];
+
+ QLock fraglock4;
+ Fragment4* flisthead4;
+ Fragment4* fragfree4;
+ Ref id4;
+
+ QLock fraglock6;
+ Fragment6* flisthead6;
+ Fragment6* fragfree6;
+ Ref id6;
+
+ int iprouting; /* true if we route like a gateway */
+};
+
/* on the wire packet header */
struct Ip4hdr
{
@@ -86,9 +162,14 @@
uchar cksum[2]; /* Header checksum */
uchar src[4]; /* IP source */
uchar dst[4]; /* IP destination */
- uchar data[1]; /* start of data */
};
+struct Routehint
+{
+ Route *r; /* last route used */
+ ulong rgen; /* routetable generation for *r */
+};
+
/*
* one per conversation directory
*/
@@ -100,9 +181,9 @@
Proto* p;
int restricted; /* remote port is restricted */
+ int ignoreadvice; /* don't terminate connection on icmp errors */
uint ttl; /* max time to live */
uint tos; /* type of service */
- int ignoreadvice; /* don't terminate connection on icmp errors */
uchar ipversion;
uchar laddr[IPaddrlen]; /* local IP address */
@@ -139,8 +220,7 @@
void* ptcl; /* protocol specific stuff */
- Route *r; /* last route used */
- ulong rgen; /* routetable generation for *r */
+ Routehint;
};
struct Medium
@@ -161,18 +241,8 @@
/* process packets written to 'data' */
void (*pktin)(Fs *f, Ipifc *ifc, Block *bp);
- /* routes for router boards */
- void (*addroute)(Ipifc *ifc, int, uchar*, uchar*, uchar*, int);
- void (*remroute)(Ipifc *ifc, int, uchar*, uchar*);
- void (*flushroutes)(Ipifc *ifc);
-
- /* for routing multicast groups */
- void (*joinmulti)(Ipifc *ifc, uchar *a, uchar *ia);
- void (*leavemulti)(Ipifc *ifc, uchar *a, uchar *ia);
-
/* address resolution */
- void (*ares)(Fs*, int, uchar*, uchar*, int, int); /* resolve */
- void (*areg)(Ipifc*, uchar*); /* register */
+ void (*areg)(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *ip);
/* v6 address generation */
void (*pref2addr)(uchar *pref, uchar *ea);
@@ -187,12 +257,13 @@
uchar mask[IPaddrlen];
uchar remote[IPaddrlen];
uchar net[IPaddrlen];
+ uchar type; /* route type */
uchar tentative; /* =1 => v6 dup disc on, =0 => confirmed unique */
uchar onlink; /* =1 => onlink, =0 offlink. */
uchar autoflag; /* v6 autonomous flag */
- long validlt; /* v6 valid lifetime */
- long preflt; /* v6 preferred lifetime */
- long origint; /* time when addr was added */
+ ulong validlt; /* v6 valid lifetime */
+ ulong preflt; /* v6 preferred lifetime */
+ ulong origint; /* time when addr was added */
Iplink *link; /* addresses linked to this lifc */
Iplifc *next;
};
@@ -203,25 +274,25 @@
Ipself *self;
Iplifc *lifc;
Iplink *selflink; /* next link for this local address */
- Iplink *lifclink; /* next link for this ifc */
- ulong expire;
+ Iplink *lifclink; /* next link for this lifc */
Iplink *next; /* free list */
+ ulong expire;
int ref;
};
-/* rfc 2461, pp.40--43. */
+/* rfc 2461, pp.40—43. */
/* default values, one per stack */
struct Routerparams {
- int mflag;
- int oflag;
- int maxraint;
- int minraint;
- int linkmtu;
- int reachtime;
- int rxmitra;
- int ttl;
- int routerlt;
+ int mflag; /* flag: managed address configuration */
+ int oflag; /* flag: other stateful configuration */
+ int maxraint; /* max. router adv interval (ms) */
+ int minraint; /* min. router adv interval (ms) */
+ int linkmtu; /* mtu options */
+ int reachtime; /* reachable time */
+ int rxmitra; /* retransmit interval */
+ int ttl; /* cur hop count limit */
+ int routerlt; /* router lifetime */
};
struct Hostparams {
@@ -231,22 +302,18 @@
struct Ipifc
{
RWlock;
-
+
Conv *conv; /* link to its conversation structure */
char dev[64]; /* device we're attached to */
Medium *m; /* Media pointer */
int maxtu; /* Maximum transfer unit */
int mintu; /* Minumum tranfer unit */
- int mbps; /* megabits per second */
void *arg; /* medium specific */
- int reassemble; /* reassemble IP packets before forwarding */
- /* these are used so that we can unbind on the fly */
- Lock idlock;
+ uchar reflect; /* allow forwarded packets to go out the same interface */
+ uchar reassemble; /* reassemble IP packets before forwarding to this interface */
+
uchar ifcid; /* incremented each 'bind/unbind/add/remove' */
- int ref; /* number of proc's using this ipifc */
- Rendez wait; /* where unbinder waits for ref == 0 */
- int unbinding;
uchar mac[MAClen]; /* MAC address */
@@ -255,10 +322,16 @@
ulong in, out; /* message statistics */
ulong inerr, outerr; /* ... */
- uchar sendra6; /* == 1 => send router advs on this ifc */
- uchar recvra6; /* == 1 => recv router advs on this ifc */
- Routerparams rp; /* router parameters as in RFC 2461, pp.40--43.
+ uchar sendra6; /* flag: send router advs on this ifc */
+ uchar recvra6; /* flag: recv router advs on this ifc */
+ Routerparams rp; /* router parameters as in RFC 2461, pp.40—43.
used only if node is router */
+
+ int speed; /* link speed in bits per second */
+ int delay; /* burst delay in ms */
+ int burst; /* burst delay in bytes */
+ int load; /* bytes in flight */
+ ulong ticks;
};
/*
@@ -330,20 +403,11 @@
int nc; /* number of conversations */
int ac;
Qid qid; /* qid for protocol directory */
- ushort nextport;
ushort nextrport;
void *priv;
};
-/*
- * Stream for sending packets to user level
- */
-struct IProuter {
- QLock;
- int opens;
- Queue *q;
-};
/*
* one per IP protocol stack
@@ -362,8 +426,7 @@
IP *ip;
Ipselftab *self;
Arp *arp;
- V6params *v6p;
- IProuter iprouter;
+ v6params *v6p;
Route *v4root[1<<Lroot]; /* v4 routing forest */
Route *v6root[1<<Lroot]; /* v6 routing forest */
@@ -370,7 +433,6 @@
Route *queue; /* used as temp when reinjecting routes */
Netlog *alog;
- Ifclog *ilog;
char ndb[1024]; /* an ndb entry for this interface */
int ndbvers;
@@ -377,23 +439,10 @@
long ndbmtime;
};
-/* one per default router known to host */
-struct V6router {
- uchar inuse;
- Ipifc *ifc;
- int ifcid;
- uchar routeraddr[IPaddrlen];
- long ltorigin;
- Routerparams rp;
-};
-
-struct V6params
+struct v6params
{
Routerparams rp; /* v6 params, one copy per node now */
Hostparams hp;
- V6router v6rlist[3]; /* max 3 default routers, currently */
- int cdrouter; /* uses only v6rlist[cdrouter] if */
- /* cdrouter >= 0. */
};
@@ -410,8 +459,7 @@
char* Fsstdbind(Conv*, char**, int);
ulong scalednconv(void);
void closeconv(Conv*);
-
-/*
+/*
* logging
*/
enum
@@ -434,7 +482,6 @@
Logrudpmsg= 1<<16,
Logesp= 1<<17,
Logtcpwin= 1<<18,
- Lognat= 1<<19,
};
void netloginit(Fs*);
@@ -449,17 +496,17 @@
void ifclogopen(Fs*, Chan*);
void ifclogclose(Fs*, Chan*);
+#pragma varargck argpos netlog 3
+
/*
* iproute.c
*/
typedef struct RouteTree RouteTree;
-typedef struct Routewalk Routewalk;
typedef struct V4route V4route;
typedef struct V6route V6route;
enum
{
-
/* type bits */
Rv4= (1<<0), /* this is a version 4 route */
Rifc= (1<<1), /* this route is a directly connected interface */
@@ -468,27 +515,18 @@
Rbcast= (1<<4), /* a broadcast self address */
Rmulti= (1<<5), /* a multicast self address */
Rproxy= (1<<6), /* this route should be proxied */
+ Rsrc= (1<<7), /* source specific route */
};
-struct Routewalk
-{
- int o;
- int h;
- char* p;
- char* e;
- void* state;
- void (*walk)(Route*, Routewalk*);
-};
-
struct RouteTree
{
- Route* right;
- Route* left;
- Route* mid;
+ Route *mid;
+ Route *left;
+ Route *right;
+ Ipifc *ifc;
+ uchar ifcid; /* must match ifc->id */
uchar depth;
uchar type;
- uchar ifcid; /* must match ifc->id */
- Ipifc *ifc;
char tag[4];
int ref;
};
@@ -497,6 +535,10 @@
{
ulong address;
ulong endaddress;
+
+ ulong source;
+ ulong endsource;
+
uchar gate[IPv4addrlen];
};
@@ -504,6 +546,10 @@
{
ulong address[IPllen];
ulong endaddress[IPllen];
+
+ ulong source[IPllen];
+ ulong endsource[IPllen];
+
uchar gate[IPaddrlen];
};
@@ -516,17 +562,16 @@
V4route v4;
};
};
-extern void v4addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type);
-extern void v6addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type);
-extern void v4delroute(Fs *f, uchar *a, uchar *mask, int dolock);
-extern void v6delroute(Fs *f, uchar *a, uchar *mask, int dolock);
-extern Route* v4lookup(Fs *f, uchar *a, Conv *c);
-extern Route* v6lookup(Fs *f, uchar *a, Conv *c);
+
+extern void addroute(Fs *f, uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag);
+extern void remroute(Fs *f, uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag);
+extern Route* v4lookup(Fs *f, uchar *a, uchar *s, Routehint *h);
+extern Route* v6lookup(Fs *f, uchar *a, uchar *s, Routehint *h);
+extern Route* v4source(Fs *f, uchar *a, uchar *s);
+extern Route* v6source(Fs *f, uchar *a, uchar *s);
extern long routeread(Fs *f, char*, ulong, int);
extern long routewrite(Fs *f, Chan*, char*, int);
-extern void routetype(int, char*);
-extern void ipwalkroutes(Fs*, Routewalk*);
-extern void convroute(Route*, uchar*, uchar*, uchar*, char*, int*);
+extern void routetype(int type, char p[8]);
/*
* devip.c
@@ -543,7 +588,6 @@
};
extern IPaux* newipaux(char*, char*);
-extern void setlport(Conv*);
/*
* arp.c
@@ -552,18 +596,16 @@
{
uchar ip[IPaddrlen];
uchar mac[MAClen];
- Medium *type; /* media type */
- Arpent* hash;
- Block* hold;
- Block* last;
- uint ctime; /* time entry was created or refreshed */
- uint utime; /* time entry was last used */
- uchar state;
+ Arpent *hash;
Arpent *nextrxt; /* re-transmit chain */
- uint rtime; /* time for next retransmission */
- uchar rxtsrem;
+ Block *hold;
+ Block *last;
Ipifc *ifc;
uchar ifcid; /* must match ifc->id */
+ uchar state;
+ uchar rxtsrem; /* re-tranmissions remaining */
+ ulong ctime; /* time entry was created or refreshed */
+ ulong utime; /* time entry was last used */
};
extern void arpinit(Fs*);
@@ -572,15 +614,17 @@
extern Arpent* arpget(Arp*, Block *bp, int version, Ipifc *ifc, uchar *ip, uchar *h);
extern void arprelease(Arp*, Arpent *a);
extern Block* arpresolve(Arp*, Arpent *a, Medium *type, uchar *mac);
-extern void arpenter(Fs*, int version, uchar *ip, uchar *mac, int len, int norefresh);
+extern int arpenter(Fs*, int version, uchar *ip, uchar *mac, int n, uchar *ia, Ipifc *ifc, int refresh);
+extern void ndpsendsol(Fs*, Ipifc*, Arpent*);
/*
* ipaux.c
*/
-extern int myetheraddr(uchar*, char*);
-extern ulong parseip(uchar*, char*);
-extern ulong parseipmask(uchar*, char*);
+extern int parseether(uchar*, char*);
+extern vlong parseip(uchar*, char*);
+extern vlong parseipmask(uchar*, char*, int);
+extern vlong parseipandmask(uchar*, uchar*, char*, char*);
extern char* v4parseip(uchar*, char*);
extern void maskip(uchar *from, uchar *mask, uchar *to);
extern int parsemac(uchar *to, char *from, int len);
@@ -589,12 +633,10 @@
extern void v4tov6(uchar *v6, uchar *v4);
extern int v6tov4(uchar *v4, uchar *v6);
extern int eipfmt(Fmt*);
+extern int convipvers(Conv *c);
#define ipmove(x, y) memmove(x, y, IPaddrlen)
#define ipcmp(x, y) ( (x)[IPaddrlen-1] != (y)[IPaddrlen-1] || memcmp(x, y, IPaddrlen) )
-
-#define ip4move(x, y) memmove(x, y, IPv4addrlen)
-#define ip4cmp(x, y) ( (x)[IPv4addrlen-1] != (y)[IPv4addrlen-1] || memcmp(x, y, IPv4addrlen) )
extern uchar IPv4bcast[IPaddrlen];
extern uchar IPv4bcastobs[IPaddrlen];
@@ -612,7 +654,6 @@
extern Medium ethermedium;
extern Medium nullmedium;
extern Medium pktmedium;
-extern Medium tripmedium;
/*
* ipifc.c
@@ -619,33 +660,24 @@
*/
extern Medium* ipfindmedium(char *name);
extern void addipmedium(Medium *med);
+extern void ipifcoput(Ipifc *ifc, Block *bp, int version, uchar *ip);
extern int ipforme(Fs*, uchar *addr);
-extern int iptentative(Fs*, uchar *addr);
-extern int ipisbm(uchar *);
-extern int ipismulticast(uchar *);
-extern Ipifc* findipifc(Fs*, uchar *remote, int type);
-extern void findprimaryip(Fs*, uchar*);
+extern int ipismulticast(uchar *ip);
+extern Ipifc* findipifc(Fs*, uchar *local, uchar *remote, int type);
+extern Ipifc* findipifcstr(Fs *f, char *s);
extern void findlocalip(Fs*, uchar *local, uchar *remote);
-extern int ipv4local(Ipifc *ifc, uchar *addr);
-extern int ipv6local(Ipifc *ifc, uchar *addr);
-extern int ipv6anylocal(Ipifc *ifc, uchar *addr);
+extern int ipv4local(Ipifc *ifc, uchar *local, int prefixlen, uchar *remote);
+extern int ipv6local(Ipifc *ifc, uchar *local, int prefixlen, uchar *remote);
extern Iplifc* iplocalonifc(Ipifc *ifc, uchar *ip);
+extern Iplifc* ipremoteonifc(Ipifc *ifc, uchar *ip);
extern int ipproxyifc(Fs *f, Ipifc *ifc, uchar *ip);
-extern int ipismulticast(uchar *ip);
-extern int ipisbooting(void);
-extern int ipifccheckin(Ipifc *ifc, Medium *med);
-extern void ipifccheckout(Ipifc *ifc);
-extern int ipifcgrab(Ipifc *ifc);
-extern void ipifcaddroute(Fs*, int, uchar*, uchar*, uchar*, int);
-extern void ipifcremroute(Fs*, int, uchar*, uchar*);
extern void ipifcremmulti(Conv *c, uchar *ma, uchar *ia);
extern void ipifcaddmulti(Conv *c, uchar *ma, uchar *ia);
extern char* ipifcrem(Ipifc *ifc, char **argv, int argc);
extern char* ipifcadd(Ipifc *ifc, char **argv, int argc, int tentative, Iplifc *lifcp);
extern long ipselftabread(Fs*, char *a, ulong offset, int n);
-extern char* ipifcaddpref6(Ipifc *ifc, char**argv, int argc);
-extern void ipsendra6(Fs *f, int on);
-
+extern char* ipifcadd6(Ipifc *ifc, char**argv, int argc);
+extern char* ipifcremove6(Ipifc *ifc, char**argv, int argc);
/*
* ip.c
*/
@@ -652,37 +684,26 @@
extern void iprouting(Fs*, int);
extern void icmpnoconv(Fs*, Block*);
extern void icmpcantfrag(Fs*, Block*, int);
-extern void icmpttlexceeded(Fs*, uchar*, Block*);
+extern void icmpttlexceeded(Fs*, Ipifc*, Block*);
extern ushort ipcsum(uchar*);
extern void ipiput4(Fs*, Ipifc*, Block*);
extern void ipiput6(Fs*, Ipifc*, Block*);
-extern int ipoput4(Fs*, Block*, int, int, int, Conv*);
-extern int ipoput6(Fs*, Block*, int, int, int, Conv*);
+extern int ipoput4(Fs*, Block*, int, int, int, Routehint*);
+extern int ipoput6(Fs*, Block*, int, int, int, Routehint*);
extern int ipstats(Fs*, char*, int);
extern ushort ptclbsum(uchar*, int);
extern ushort ptclcsum(Block*, int, int);
extern void ip_init(Fs*);
-extern void update_mtucache(uchar*, ulong);
-extern ulong restrict_mtu(uchar*, ulong);
+extern void ip_init_6(Fs*);
/*
* bootp.c
*/
-char* (*bootp)(Ipifc*);
-int (*bootpread)(char*, ulong, int);
+extern int bootpread(char*, ulong, int);
/*
- * iprouter.c
- */
-void useriprouter(Fs*, Ipifc*, Block*);
-void iprouteropen(Fs*);
-void iprouterclose(Fs*);
-long iprouterread(Fs*, void*, int);
-
-/*
* resolving inferno/plan9 differences
*/
-Chan* commonfdtochan(int, int, int, int);
char* commonuser(void);
char* commonerror(void);
@@ -695,15 +716,3 @@
* global to all of the stack
*/
extern void (*igmpreportfn)(Ipifc*, uchar*);
-
-/*
- * nat.c
- */
-extern int nato(Block*, Ipifc*, Fs*);
-extern void nati(Block*, Ipifc*);
-extern int natgc(uchar);
-
-extern int addnataddr(uchar*, uchar*, Iplifc*);
-extern int removenataddr(uchar*, uchar*, Iplifc*);
-extern void shownataddr(void);
-extern void flushnataddr(void);
--- a/os/ip/ipaux.c
+++ b/os/ip/ipaux.c
@@ -5,49 +5,8 @@
#include "fns.h"
#include "../port/error.h"
#include "ip.h"
-#include "ipv6.h"
+#include "ipv6.h"
-/*
- * well known IP addresses
- */
-uchar IPv4bcast[IPaddrlen] = {
- 0, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff
-};
-uchar IPv4allsys[IPaddrlen] = {
- 0, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0xff, 0xff,
- 0xe0, 0, 0, 0x01
-};
-uchar IPv4allrouter[IPaddrlen] = {
- 0, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0xff, 0xff,
- 0xe0, 0, 0, 0x02
-};
-uchar IPallbits[IPaddrlen] = {
- 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff
-};
-
-uchar IPnoaddr[IPaddrlen];
-
-/*
- * prefix of all v4 addresses
- */
-uchar v4prefix[IPaddrlen] = {
- 0, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0xff, 0xff,
- 0, 0, 0, 0
-};
-
-
char *v6hdrtypes[Maxhdrtype] =
{
[HBH] "HopbyHop",
@@ -54,7 +13,7 @@
[ICMP] "ICMP",
[IGMP] "IGMP",
[GGP] "GGP",
- [IPINIP] "IP",
+ [IPINIP] "IP",
[ST] "ST",
[TCP] "TCP",
[UDP] "UDP",
@@ -87,6 +46,7 @@
0, 0, 0, 0,
0, 0, 0, 0x01
};
+
uchar v6linklocal[IPaddrlen] = {
0xfe, 0x80, 0, 0,
0, 0, 0, 0,
@@ -99,26 +59,8 @@
0, 0, 0, 0,
0, 0, 0, 0
};
-int v6llpreflen = 8; // link-local prefix length
-uchar v6sitelocal[IPaddrlen] = {
- 0xfe, 0xc0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0, 0
-};
-uchar v6sitelocalmask[IPaddrlen] = {
- 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff,
- 0, 0, 0, 0,
- 0, 0, 0, 0
-};
-int v6slpreflen = 6; // site-local prefix length
-uchar v6glunicast[IPaddrlen] = {
- 0x08, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0, 0
-};
+int v6llpreflen = 8; /* link-local prefix length in bytes */
+
uchar v6multicast[IPaddrlen] = {
0xff, 0, 0, 0,
0, 0, 0, 0,
@@ -131,7 +73,8 @@
0, 0, 0, 0,
0, 0, 0, 0
};
-int v6mcpreflen = 1; // multicast prefix length
+int v6mcpreflen = 1; /* multicast prefix length */
+
uchar v6allnodesN[IPaddrlen] = {
0xff, 0x01, 0, 0,
0, 0, 0, 0,
@@ -138,6 +81,12 @@
0, 0, 0, 0,
0, 0, 0, 0x01
};
+uchar v6allroutersN[IPaddrlen] = {
+ 0xff, 0x01, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0x02
+};
uchar v6allnodesNmask[IPaddrlen] = {
0xff, 0xff, 0, 0,
0, 0, 0, 0,
@@ -144,7 +93,8 @@
0, 0, 0, 0,
0, 0, 0, 0
};
-int v6aNpreflen = 2; // all nodes (N) prefix
+int v6aNpreflen = 2; /* all nodes (N) prefix */
+
uchar v6allnodesL[IPaddrlen] = {
0xff, 0x02, 0, 0,
0, 0, 0, 0,
@@ -151,19 +101,6 @@
0, 0, 0, 0,
0, 0, 0, 0x01
};
-uchar v6allnodesLmask[IPaddrlen] = {
- 0xff, 0xff, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0, 0
-};
-int v6aLpreflen = 2; // all nodes (L) prefix
-uchar v6allroutersN[IPaddrlen] = {
- 0xff, 0x01, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0, 0x02
-};
uchar v6allroutersL[IPaddrlen] = {
0xff, 0x02, 0, 0,
0, 0, 0, 0,
@@ -170,12 +107,14 @@
0, 0, 0, 0,
0, 0, 0, 0x02
};
-uchar v6allroutersS[IPaddrlen] = {
- 0xff, 0x05, 0, 0,
+uchar v6allnodesLmask[IPaddrlen] = {
+ 0xff, 0xff, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
- 0, 0, 0, 0x02
+ 0, 0, 0, 0
};
+int v6aLpreflen = 2; /* all nodes (L) prefix */
+
uchar v6solicitednode[IPaddrlen] = {
0xff, 0x02, 0, 0,
0, 0, 0, 0,
@@ -190,9 +129,6 @@
};
int v6snpreflen = 13;
-
-
-
ushort
ptclcsum(Block *bp, int offset, int len)
{
@@ -215,7 +151,7 @@
if(bp->next == nil) {
if(blocklen < len)
len = blocklen;
- return ~ptclbsum(addr, len) & 0xffff;
+ return ptclbsum(addr, len) ^ 0xffff;
}
losum = 0;
@@ -247,7 +183,7 @@
while((csum = losum>>16) != 0)
losum = csum + (losum & 0xffff);
- return ~losum & 0xffff;
+ return losum ^ 0xffff;
}
enum
@@ -255,306 +191,9 @@
Isprefix= 16,
};
-static uchar prefixvals[256] =
-{
-[0x00] 0 | Isprefix,
-[0x80] 1 | Isprefix,
-[0xC0] 2 | Isprefix,
-[0xE0] 3 | Isprefix,
-[0xF0] 4 | Isprefix,
-[0xF8] 5 | Isprefix,
-[0xFC] 6 | Isprefix,
-[0xFE] 7 | Isprefix,
-[0xFF] 8 | Isprefix,
-};
-
-int
-eipfmt(Fmt *f)
-{
- char buf[5*8];
- static char *efmt = "%.2lux%.2lux%.2lux%.2lux%.2lux%.2lux";
- static char *ifmt = "%d.%d.%d.%d";
- uchar *p, ip[16];
- ulong *lp;
- ushort s;
- int i, j, n, eln, eli;
-
- switch(f->r) {
- case 'E': /* Ethernet address */
- p = va_arg(f->args, uchar*);
- return fmtprint(f, efmt, p[0], p[1], p[2], p[3], p[4], p[5]);
-
- case 'I': /* Ip address */
- p = va_arg(f->args, uchar*);
-common:
- if(memcmp(p, v4prefix, 12) == 0)
- return fmtprint(f, ifmt, p[12], p[13], p[14], p[15]);
-
- /* find longest elision */
- eln = eli = -1;
- for(i = 0; i < 16; i += 2){
- for(j = i; j < 16; j += 2)
- if(p[j] != 0 || p[j+1] != 0)
- break;
- if(j > i && j - i > eln){
- eli = i;
- eln = j - i;
- }
- }
-
- /* print with possible elision */
- n = 0;
- for(i = 0; i < 16; i += 2){
- if(i == eli){
- n += sprint(buf+n, "::");
- i += eln;
- if(i >= 16)
- break;
- } else if(i != 0)
- n += sprint(buf+n, ":");
- s = (p[i]<<8) + p[i+1];
- n += sprint(buf+n, "%ux", s);
- }
- return fmtstrcpy(f, buf);
-
- case 'i': /* v6 address as 4 longs */
- lp = va_arg(f->args, ulong*);
- for(i = 0; i < 4; i++)
- hnputl(ip+4*i, *lp++);
- p = ip;
- goto common;
-
- case 'V': /* v4 ip address */
- p = va_arg(f->args, uchar*);
- return fmtprint(f, ifmt, p[0], p[1], p[2], p[3]);
-
- case 'M': /* ip mask */
- p = va_arg(f->args, uchar*);
-
- /* look for a prefix mask */
- for(i = 0; i < 16; i++)
- if(p[i] != 0xff)
- break;
- if(i < 16){
- if((prefixvals[p[i]] & Isprefix) == 0)
- goto common;
- for(j = i+1; j < 16; j++)
- if(p[j] != 0)
- goto common;
- n = 8*i + (prefixvals[p[i]] & ~Isprefix);
- } else
- n = 8*16;
-
- /* got one, use /xx format */
- return fmtprint(f, "/%d", n);
- }
- return fmtstrcpy(f, "(eipfmt)");
-}
-
#define CLASS(p) ((*(uchar*)(p))>>6)
-extern char*
-v4parseip(uchar *to, char *from)
-{
- int i;
- char *p;
-
- p = from;
- for(i = 0; i < 4 && *p; i++){
- to[i] = strtoul(p, &p, 0);
- if(*p == '.')
- p++;
- }
- switch(CLASS(to)){
- case 0: /* class A - 1 uchar net */
- case 1:
- if(i == 3){
- to[3] = to[2];
- to[2] = to[1];
- to[1] = 0;
- } else if(i == 2){
- to[3] = to[1];
- to[1] = 0;
- }
- break;
- case 2: /* class B - 2 uchar net */
- if(i == 3){
- to[3] = to[2];
- to[2] = 0;
- }
- break;
- }
- return p;
-}
-
-int
-isv4(uchar *ip)
-{
- return memcmp(ip, v4prefix, IPv4off) == 0;
-}
-
-
-/*
- * the following routines are unrolled with no memset's to speed
- * up the usual case
- */
void
-v4tov6(uchar *v6, uchar *v4)
-{
- v6[0] = 0;
- v6[1] = 0;
- v6[2] = 0;
- v6[3] = 0;
- v6[4] = 0;
- v6[5] = 0;
- v6[6] = 0;
- v6[7] = 0;
- v6[8] = 0;
- v6[9] = 0;
- v6[10] = 0xff;
- v6[11] = 0xff;
- v6[12] = v4[0];
- v6[13] = v4[1];
- v6[14] = v4[2];
- v6[15] = v4[3];
-}
-
-int
-v6tov4(uchar *v4, uchar *v6)
-{
- if(v6[0] == 0
- && v6[1] == 0
- && v6[2] == 0
- && v6[3] == 0
- && v6[4] == 0
- && v6[5] == 0
- && v6[6] == 0
- && v6[7] == 0
- && v6[8] == 0
- && v6[9] == 0
- && v6[10] == 0xff
- && v6[11] == 0xff)
- {
- v4[0] = v6[12];
- v4[1] = v6[13];
- v4[2] = v6[14];
- v4[3] = v6[15];
- return 0;
- } else {
- memset(v4, 0, 4);
- return -1;
- }
-}
-
-ulong
-parseip(uchar *to, char *from)
-{
- int i, elipsis = 0, v4 = 1;
- ulong x;
- char *p, *op;
-
- memset(to, 0, IPaddrlen);
- p = from;
- for(i = 0; i < 16 && *p; i+=2){
- op = p;
- x = strtoul(p, &p, 16);
- if(*p == '.' || (*p == 0 && i == 0)){
- p = v4parseip(to+i, op);
- i += 4;
- break;
- } else {
- to[i] = x>>8;
- to[i+1] = x;
- }
- if(*p == ':'){
- v4 = 0;
- if(*++p == ':'){
- elipsis = i+2;
- p++;
- }
- }
- }
- if(i < 16){
- memmove(&to[elipsis+16-i], &to[elipsis], i-elipsis);
- memset(&to[elipsis], 0, 16-i);
- }
- if(v4){
- to[10] = to[11] = 0xff;
- return nhgetl(to+12);
- } else
- return 6;
-}
-
-/*
- * hack to allow ip v4 masks to be entered in the old
- * style
- */
-ulong
-parseipmask(uchar *to, char *from)
-{
- ulong x;
- int i;
- uchar *p;
-
- if(*from == '/'){
- /* as a number of prefix bits */
- i = atoi(from+1);
- if(i < 0)
- i = 0;
- if(i > 128)
- i = 128;
- memset(to, 0, IPaddrlen);
- for(p = to; i >= 8; i -= 8)
- *p++ = 0xff;
- if(i > 0)
- *p = ~((1<<(8-i))-1);
- x = nhgetl(to+IPv4off);
- } else {
- /* as a straight bit mask */
- x = parseip(to, from);
- if(memcmp(to, v4prefix, IPv4off) == 0)
- memset(to, 0xff, IPv4off);
- }
- return x;
-}
-
-void
-maskip(uchar *from, uchar *mask, uchar *to)
-{
- int i;
-
- for(i = 0; i < IPaddrlen; i++)
- to[i] = from[i] & mask[i];
-}
-
-uchar classmask[4][16] = {
- 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0x00,0x00,0x00,
- 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0x00,0x00,0x00,
- 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0x00,0x00,
- 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0x00,
-};
-
-uchar*
-defmask(uchar *ip)
-{
- if(isv4(ip))
- return classmask[ip[IPv4off]>>6];
- else {
- if(ipcmp(ip, v6loopback) == 0)
- return IPallbits;
- else if(memcmp(ip, v6linklocal, v6llpreflen) == 0)
- return v6linklocalmask;
- else if(memcmp(ip, v6sitelocal, v6slpreflen) == 0)
- return v6sitelocalmask;
- else if(memcmp(ip, v6solicitednode, v6snpreflen) == 0)
- return v6solicitednodemask;
- else if(memcmp(ip, v6multicast, v6mcpreflen) == 0)
- return v6multicastmask;
- return IPallbits;
- }
-}
-
-void
ipv62smcast(uchar *smcast, uchar *a)
{
assert(IPaddrlen == 16);
@@ -599,7 +238,7 @@
ulong
iphash(uchar *sa, ushort sp, uchar *da, ushort dp)
{
- return ((sa[IPaddrlen-1]<<24) ^ (sp << 16) ^ (da[IPaddrlen-1]<<8) ^ dp ) % Nhash;
+ return ((sa[IPaddrlen-1]<<24) ^ (sp << 16) ^ (da[IPaddrlen-1]<<8) ^ dp ) % Nipht;
}
void
@@ -678,7 +317,7 @@
return c;
}
}
-
+
/* match local address and port */
hv = iphash(IPnoaddr, 0, da, dp);
for(h = ht->tab[hv]; h != nil; h = h->next){
@@ -690,7 +329,7 @@
return c;
}
}
-
+
/* match just port */
hv = iphash(IPnoaddr, 0, IPnoaddr, dp);
for(h = ht->tab[hv]; h != nil; h = h->next){
@@ -702,7 +341,7 @@
return c;
}
}
-
+
/* match local address */
hv = iphash(IPnoaddr, 0, da, 0);
for(h = ht->tab[hv]; h != nil; h = h->next){
@@ -714,7 +353,7 @@
return c;
}
}
-
+
/* look for something that matches anything */
hv = iphash(IPnoaddr, 0, IPnoaddr, 0);
for(h = ht->tab[hv]; h != nil; h = h->next){
@@ -726,4 +365,13 @@
}
unlock(ht);
return nil;
+}
+
+int
+convipvers(Conv *c)
+{
+ if(isv4(c->raddr) && isv4(c->laddr) || ipcmp(c->raddr, IPnoaddr) == 0)
+ return V4;
+ else
+ return V6;
}
--- a/os/ip/ipifc.c
+++ b/os/ip/ipifc.c
@@ -11,17 +11,14 @@
#define DPRINT if(0)print
enum {
- Maxmedia = 32,
- Nself = Maxmedia*5,
- NHASH = (1<<6),
- NCACHE = 256,
- QMAX = 64*1024-1,
+ Maxmedia = 32,
+ Nself = Maxmedia*5,
+ NHASH = 1<<6,
+ NCACHE = 256,
+ QMAX = 192*1024-1,
};
-Medium *media[Maxmedia] =
-{
- 0
-};
+Medium *media[Maxmedia] = { 0 };
/*
* cache of local addresses (addresses we answer to)
@@ -29,12 +26,10 @@
struct Ipself
{
uchar a[IPaddrlen];
- Ipself *hnext; /* next address in the hash table */
+ Ipself *next; /* next address in the hash table */
Iplink *link; /* binding twixt Ipself and Ipifc */
ulong expire;
uchar type; /* type of address */
- int ref;
- Ipself *next; /* free list */
};
struct Ipselftab
@@ -64,11 +59,47 @@
static void addselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a, int type);
static void remselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a);
-static char* ipifcjoinmulti(Ipifc *ifc, char **argv, int argc);
-static char* ipifcleavemulti(Ipifc *ifc, char **argv, int argc);
-static void ipifcregisterproxy(Fs*, Ipifc*, uchar*);
-static char* ipifcremlifc(Ipifc*, Iplifc*);
+static void ipifcregisteraddr(Fs*, Ipifc*, Iplifc*, uchar*);
+static void ipifcregisterproxy(Fs*, Ipifc*, uchar*, int);
+static char* ipifcremlifc(Ipifc*, Iplifc**);
+static char Ebound[] = "interface already bound";
+static char Eunbound[] = "interface not bound";
+
+enum {
+ unknownv6, /* UGH */
+ unspecifiedv6,
+ linklocalv6,
+ globalv6,
+};
+
+static int
+v6addrtype(uchar *addr)
+{
+ if(isv4(addr) || ipcmp(addr, IPnoaddr) == 0)
+ return unknownv6;
+ else if(islinklocal(addr) || ipcmp(addr, v6loopback) == 0 ||
+ isv6mcast(addr) && (addr[1] & 0xF) <= Link_local_scop)
+ return linklocalv6;
+ else
+ return globalv6;
+}
+
+static int
+comprefixlen(uchar *a, uchar *b, int n)
+{
+ int i, c;
+
+ for(i = 0; i < n; i++){
+ if((c = a[i] ^ b[i]) == 0)
+ continue;
+ for(i <<= 3; (c & 0x80) == 0; i++)
+ c <<= 1;
+ return i;
+ }
+ return i << 3;
+}
+
/*
* link in a new medium
*/
@@ -121,7 +152,7 @@
wlock(ifc);
if(ifc->m != nil){
wunlock(ifc);
- return "interface already bound";
+ return Ebound;
}
if(waserror()){
wunlock(ifc);
@@ -142,18 +173,14 @@
ifc->m = m;
ifc->mintu = ifc->m->mintu;
ifc->maxtu = ifc->m->maxtu;
+ ifc->delay = 40;
+ ifc->speed = 0;
if(ifc->m->unbindonclose == 0)
ifc->conv->inuse++;
- ifc->rp.mflag = 0; // default not managed
- ifc->rp.oflag = 0;
- ifc->rp.maxraint = 600000; // millisecs
- ifc->rp.minraint = 200000;
- ifc->rp.linkmtu = 0; // no mtu sent
- ifc->rp.reachtime = 0;
- ifc->rp.rxmitra = 0;
- ifc->rp.ttl = MAXTTL;
- ifc->rp.routerlt = 3*(ifc->rp.maxraint);
+ /* default router paramters */
+ ifc->rp = c->p->f->v6p->rp;
+
/* any ancillary structures (like routes) no longer pertain */
ifc->ifcid++;
@@ -170,29 +197,44 @@
/*
* detach a device from an interface, close the interface
- * called with ifc->conv closed
*/
static char*
ipifcunbind(Ipifc *ifc)
{
- char *err;
+ Medium *m;
- if(waserror()){
+ wlock(ifc);
+ m = ifc->m;
+ if(m == nil){
wunlock(ifc);
- nexterror();
+ return Eunbound;
}
- wlock(ifc);
- /* dissociate routes */
- if(ifc->m != nil && ifc->m->unbindonclose == 0)
- ifc->conv->inuse--;
- ifc->ifcid++;
+ /* disassociate logical interfaces (before zeroing ifc->arg) */
+ while(ifc->lifc != nil)
+ ipifcremlifc(ifc, &ifc->lifc);
/* disassociate device */
- if(ifc->m != nil && ifc->m->unbind)
- (*ifc->m->unbind)(ifc);
+ if(m->unbind != nil){
+ extern Medium nullmedium;
+
+ /*
+ * unbind() might unlock the ifc, so change the medium
+ * to the nullmedium to prevent packets from getting
+ * sent while the medium is shutting down.
+ */
+ ifc->m = &nullmedium;
+
+ if(!waserror()){
+ (*m->unbind)(ifc);
+ poperror();
+ }
+ }
+
memset(ifc->dev, 0, sizeof(ifc->dev));
ifc->arg = nil;
+
+ ifc->reflect = 0;
ifc->reassemble = 0;
/* close queues to stop queuing of packets */
@@ -200,26 +242,22 @@
qclose(ifc->conv->wq);
qclose(ifc->conv->sq);
- /* disassociate logical interfaces */
- while(ifc->lifc){
- err = ipifcremlifc(ifc, ifc->lifc);
- if(err)
- error(err);
- }
-
+ /* dissociate routes */
+ ifc->ifcid++;
+ if(m->unbindonclose == 0)
+ ifc->conv->inuse--;
ifc->m = nil;
wunlock(ifc);
- poperror();
+
return nil;
}
+char sfixedformat[] = "device %s maxtu %d sendra %d recvra %d mflag %d oflag %d"
+" maxraint %d minraint %d linkmtu %d reachtime %d rxmitra %d ttl %d routerlt %d"
+" pktin %lud pktout %lud errin %lud errout %lud speed %d delay %d\n";
-
-char sfixedformat[] = "device %s maxtu %d sendra %d recvra %d mflag %d oflag %d maxraint %d minraint %d linkmtu %d reachtime %d rxmitra %d ttl %d routerlt %d pktin %lud pktout %lud errin %lud errout %lud\n";
-
char slineformat[] = " %-40I %-10M %-40I %-12lud %-12lud\n";
-
static int
ipifcstate(Conv *c, char *state, int n)
{
@@ -228,19 +266,18 @@
int m;
ifc = (Ipifc*)c->ptcl;
-
m = snprint(state, n, sfixedformat,
ifc->dev, ifc->maxtu, ifc->sendra6, ifc->recvra6,
ifc->rp.mflag, ifc->rp.oflag, ifc->rp.maxraint,
ifc->rp.minraint, ifc->rp.linkmtu, ifc->rp.reachtime,
ifc->rp.rxmitra, ifc->rp.ttl, ifc->rp.routerlt,
- ifc->in, ifc->out, ifc->inerr, ifc->outerr);
+ ifc->in, ifc->out, ifc->inerr, ifc->outerr,
+ ifc->speed, ifc->delay);
rlock(ifc);
- for(lifc = ifc->lifc; lifc && n > m; lifc = lifc->next)
- m += snprint(state+m, n - m, slineformat,
- lifc->local, lifc->mask, lifc->remote,
- lifc->validlt, lifc->preflt);
+ for(lifc = ifc->lifc; lifc != nil && n > m; lifc = lifc->next)
+ m += snprint(state+m, n - m, slineformat, lifc->local,
+ lifc->mask, lifc->remote, lifc->validlt, lifc->preflt);
if(ifc->lifc == nil)
m += snprint(state+m, n - m, "\n");
runlock(ifc);
@@ -256,13 +293,11 @@
int m;
ifc = (Ipifc*)c->ptcl;
-
- m = 0;
-
rlock(ifc);
- for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+ m = 0;
+ for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
m += snprint(state+m, n - m, "%-40.40I ->", lifc->local);
- for(link = lifc->link; link; link = link->lifclink)
+ for(link = lifc->link; link != nil; link = link->lifclink)
m += snprint(state+m, n - m, " %-40.40I", link->self->a);
m += snprint(state+m, n - m, "\n");
}
@@ -279,6 +314,59 @@
return ifc->m != nil;
}
+static void
+ipifcadjustburst(Ipifc *ifc)
+{
+ int burst;
+
+ burst = ((vlong)ifc->delay * ifc->speed) / 8000;
+ if(burst < ifc->maxtu)
+ burst = ifc->maxtu;
+ ifc->burst = burst;
+}
+
+static void
+ipifcsetdelay(Ipifc *ifc, int delay)
+{
+ if(delay < 0)
+ delay = 0;
+ else if(delay > 1000)
+ delay = 1000;
+ ifc->delay = delay;
+ ipifcadjustburst(ifc);
+}
+
+static void
+ipifcsetspeed(Ipifc *ifc, int speed)
+{
+ if(speed < 0)
+ speed = 0;
+ ifc->speed = speed;
+ ifc->load = 0;
+ ipifcadjustburst(ifc);
+}
+
+void
+ipifcoput(Ipifc *ifc, Block *bp, int version, uchar *ip)
+{
+ if(ifc->speed){
+ ulong now = MACHP(0)->ticks;
+ int dt = TK2MS(now - ifc->ticks);
+ ifc->ticks = now;
+ ifc->load -= ((vlong)dt * ifc->speed) / 8000;
+ if(ifc->load < 0 || dt < 0 || dt > 1000)
+ ifc->load = 0;
+ else if(ifc->load > ifc->burst){
+ freeblist(bp);
+ return;
+ }
+ }
+ bp = concatblock(bp);
+ ifc->load += BLEN(bp);
+ ifc->m->bwrite(ifc, bp, version, ip);
+}
+
+
/*
* called when a process writes to an interface's 'data'
*/
@@ -294,18 +382,15 @@
return;
ifc = (Ipifc*)c->ptcl;
- if(!canrlock(ifc)){
- freeb(bp);
- return;
- }
+ rlock(ifc);
if(waserror()){
runlock(ifc);
nexterror();
}
- if(ifc->m == nil || ifc->m->pktin == nil)
- freeb(bp);
- else
+ if(ifc->m != nil && ifc->m->pktin != nil)
(*ifc->m->pktin)(c->p->f, ifc, bp);
+ else
+ freeb(bp);
runlock(ifc);
poperror();
}
@@ -319,27 +404,26 @@
Ipifc *ifc;
c->rq = qopen(QMAX, 0, 0, 0);
- c->sq = qopen(2*QMAX, 0, 0, 0);
c->wq = qopen(QMAX, Qkick, ipifckick, c);
+ c->sq = qopen(QMAX, 0, 0, 0);
+ if(c->rq == nil || c->wq == nil || c->sq == nil)
+ error(Enomem);
ifc = (Ipifc*)c->ptcl;
ifc->conv = c;
- ifc->unbinding = 0;
ifc->m = nil;
+ ifc->reflect = 0;
ifc->reassemble = 0;
}
/*
* called after last close of ipifc data or ctl
- * called with c locked, we must unlock
*/
static void
ipifcclose(Conv *c)
{
- Ipifc *ifc;
- Medium *m;
+ Ipifc *ifc = (Ipifc*)c->ptcl;
+ Medium *m = ifc->m;
- ifc = (Ipifc*)c->ptcl;
- m = ifc->m;
if(m != nil && m->unbindonclose)
ipifcunbind(ifc);
}
@@ -347,19 +431,17 @@
/*
* change an interface's mtu
*/
-char*
-ipifcsetmtu(Ipifc *ifc, char **argv, int argc)
+static char*
+ipifcsetmtu(Ipifc *ifc, int mtu)
{
- int mtu;
+ Medium *m = ifc->m;
- if(argc < 2)
+ if(m == nil)
+ return Eunbound;
+ if(mtu < m->mintu || mtu > m->maxtu)
return Ebadarg;
- if(ifc->m == nil)
- return Ebadarg;
- mtu = strtoul(argv[1], 0, 0);
- if(mtu < ifc->m->mintu || mtu > ifc->m->maxtu)
- return Ebadarg;
ifc->maxtu = mtu;
+ ipifcadjustburst(ifc);
return nil;
}
@@ -374,13 +456,8 @@
Iplifc *lifc, **l;
int i, type, mtu;
Fs *f;
- int sendnbrdisc = 0;
- if(ifc->m == nil)
- return "ipifc not yet bound to device";
-
- f = ifc->conv->p->f;
-
+ mtu = 0;
type = Rifc;
memset(ip, 0, IPaddrlen);
memset(mask, 0, IPaddrlen);
@@ -392,23 +469,21 @@
/* fall through */
case 5:
mtu = strtoul(argv[4], 0, 0);
- if(mtu >= ifc->m->mintu && mtu <= ifc->m->maxtu)
- ifc->maxtu = mtu;
/* fall through */
case 4:
- parseip(ip, argv[1]);
- parseipmask(mask, argv[2]);
- parseip(rem, argv[3]);
+ if (parseipandmask(ip, mask, argv[1], argv[2]) == -1 || parseip(rem, argv[3]) == -1)
+ return Ebadip;
maskip(rem, mask, net);
break;
case 3:
- parseip(ip, argv[1]);
- parseipmask(mask, argv[2]);
+ if (parseipandmask(ip, mask, argv[1], argv[2]) == -1)
+ return Ebadip;
maskip(ip, mask, rem);
maskip(rem, mask, net);
break;
case 2:
- parseip(ip, argv[1]);
+ if (parseip(ip, argv[1]) == -1)
+ return Ebadip;
memmove(mask, defmask(ip), IPaddrlen);
maskip(ip, mask, rem);
maskip(rem, mask, net);
@@ -415,26 +490,55 @@
break;
default:
return Ebadarg;
- break;
}
- if(isv4(ip))
+
+ /* check for point-to-point interface */
+ if(ipcmp(ip, v6loopback) != 0) /* skip v6 loopback, it's a special address */
+ if(ipcmp(mask, IPallbits) == 0)
+ type |= Rptpt;
+
+ if(isv4(ip) || ipcmp(ip, IPnoaddr) == 0){
+ type |= Rv4;
tentative = 0;
+ }
+
wlock(ifc);
+ if(ifc->m == nil){
+ wunlock(ifc);
+ return Eunbound;
+ }
+ f = ifc->conv->p->f;
+ if(waserror()){
+ wunlock(ifc);
+ return up->errstr;
+ }
+ if(mtu > 0)
+ ipifcsetmtu(ifc, mtu);
+
/* ignore if this is already a local address for this ifc */
- for(lifc = ifc->lifc; lifc; lifc = lifc->next) {
- if(ipcmp(lifc->local, ip) == 0) {
- if(lifc->tentative != tentative)
- lifc->tentative = tentative;
- if(lifcp != nil) {
- lifc->onlink = lifcp->onlink;
- lifc->autoflag = lifcp->autoflag;
- lifc->validlt = lifcp->validlt;
- lifc->preflt = lifcp->preflt;
- lifc->origint = lifcp->origint;
+ if((lifc = iplocalonifc(ifc, ip)) != nil){
+ if(lifcp != nil) {
+ if(!lifc->onlink && lifcp->onlink){
+ lifc->onlink = 1;
+ addroute(f, lifc->remote, lifc->mask, ip, IPallbits,
+ lifc->remote, lifc->type, ifc, tifc);
+ if(v6addrtype(ip) != linklocalv6)
+ addroute(f, lifc->remote, lifc->mask, ip, IPnoaddr,
+ lifc->remote, lifc->type, ifc, tifc);
}
- goto out;
+ lifc->autoflag = lifcp->autoflag;
+ lifc->validlt = lifcp->validlt;
+ lifc->preflt = lifcp->preflt;
+ lifc->origint = lifcp->origint;
}
+ if(lifc->tentative != tentative){
+ lifc->tentative = tentative;
+ goto done;
+ }
+ wunlock(ifc);
+ poperror();
+ return nil;
}
/* add the address to the list of logical ifc's for this ifc */
@@ -443,6 +547,7 @@
ipmove(lifc->mask, mask);
ipmove(lifc->remote, rem);
ipmove(lifc->net, net);
+ lifc->type = type;
lifc->tentative = tentative;
if(lifcp != nil) {
lifc->onlink = lifcp->onlink;
@@ -450,39 +555,34 @@
lifc->validlt = lifcp->validlt;
lifc->preflt = lifcp->preflt;
lifc->origint = lifcp->origint;
+ } else { /* default values */
+ lifc->onlink = lifc->autoflag = 1;
+ lifc->validlt = lifc->preflt = ~0UL;
+ lifc->origint = NOW / 1000;
}
- else { // default values
- lifc->onlink = 1;
- lifc->autoflag = 1;
- lifc->validlt = 0xffffffff;
- lifc->preflt = 0xffffffff;
- lifc->origint = NOW / 10^3;
- }
lifc->next = nil;
- for(l = &ifc->lifc; *l; l = &(*l)->next)
+ for(l = &ifc->lifc; *l != nil; l = &(*l)->next)
;
*l = lifc;
- /* check for point-to-point interface */
- if(ipcmp(ip, v6loopback)) /* skip v6 loopback, it's a special address */
- if(ipcmp(mask, IPallbits) == 0)
- type |= Rptpt;
+ /* add route for this logical interface */
+ if(lifc->onlink){
+ addroute(f, rem, mask, ip, IPallbits, rem, type, ifc, tifc);
+ if(v6addrtype(ip) != linklocalv6)
+ addroute(f, rem, mask, ip, IPnoaddr, rem, type, ifc, tifc);
+ }
- /* add local routes */
- if(isv4(ip))
- v4addroute(f, tifc, rem+IPv4off, mask+IPv4off, rem+IPv4off, type);
- else
- v6addroute(f, tifc, rem, mask, rem, type);
-
addselfcache(f, ifc, lifc, ip, Runi);
- if((type & (Rproxy|Rptpt)) == (Rproxy|Rptpt)){
- ipifcregisterproxy(f, ifc, rem);
- goto out;
+ /* register proxy */
+ if(type & Rptpt){
+ if(type & Rproxy)
+ ipifcregisterproxy(f, ifc, rem, 1);
+ goto done;
}
- if(isv4(ip) || ipcmp(ip, IPnoaddr) == 0) {
+ if(type & Rv4) {
/* add subnet directed broadcast address to the self cache */
for(i = 0; i < IPaddrlen; i++)
bcast[i] = (ip[i] & mask[i]) | ~mask[i];
@@ -504,174 +604,135 @@
for(i = 0; i < IPaddrlen; i++)
bcast[i] = (ip[i] & mask[i]) & mask[i];
addselfcache(f, ifc, lifc, bcast, Rbcast);
-
+
addselfcache(f, ifc, lifc, IPv4bcast, Rbcast);
- }
- else {
+ } else {
if(ipcmp(ip, v6loopback) == 0) {
/* add node-local mcast address */
addselfcache(f, ifc, lifc, v6allnodesN, Rmulti);
/* add route for all node multicast */
- v6addroute(f, tifc, v6allnodesN, v6allnodesNmask, v6allnodesN, Rmulti);
+ addroute(f, v6allnodesN, v6allnodesNmask,
+ ip, IPallbits,
+ v6allnodesN, Rmulti, ifc, tifc);
}
/* add all nodes multicast address */
addselfcache(f, ifc, lifc, v6allnodesL, Rmulti);
-
+
/* add route for all nodes multicast */
- v6addroute(f, tifc, v6allnodesL, v6allnodesLmask, v6allnodesL, Rmulti);
-
+ addroute(f, v6allnodesL, v6allnodesLmask,
+ ip, IPallbits,
+ v6allnodesL, Rmulti, ifc, tifc);
+
/* add solicited-node multicast address */
ipv62smcast(bcast, ip);
addselfcache(f, ifc, lifc, bcast, Rmulti);
-
- sendnbrdisc = 1;
}
- /* register the address on this network for address resolution */
- if(isv4(ip) && ifc->m->areg != nil)
- (*ifc->m->areg)(ifc, ip);
-
-out:
+done:
wunlock(ifc);
- if(tentative && sendnbrdisc)
- icmpns(f, 0, SRC_UNSPEC, ip, TARG_MULTI, ifc->mac);
+ poperror();
+
+ rlock(ifc);
+ ipifcregisteraddr(f, ifc, lifc, ip);
+ runlock(ifc);
+
return nil;
}
/*
* remove a logical interface from an ifc
- * always called with ifc wlock'd
+ * called with ifc wlock'd
*/
static char*
-ipifcremlifc(Ipifc *ifc, Iplifc *lifc)
+ipifcremlifc(Ipifc *ifc, Iplifc **l)
{
- Iplifc **l;
- Fs *f;
+ Iplifc *lifc = *l;
+ Fs *f = ifc->conv->p->f;
- f = ifc->conv->p->f;
-
- /*
- * find address on this interface and remove from chain.
- * for pt to pt we actually specify the remote address as the
- * addresss to remove.
- */
- for(l = &ifc->lifc; *l != nil && *l != lifc; l = &(*l)->next)
- ;
- if(*l == nil)
+ if(lifc == nil)
return "address not on this interface";
*l = lifc->next;
/* disassociate any addresses */
- while(lifc->link)
+ while(lifc->link != nil)
remselfcache(f, ifc, lifc, lifc->link->self->a);
/* remove the route for this logical interface */
- if(isv4(lifc->local))
- v4delroute(f, lifc->remote+IPv4off, lifc->mask+IPv4off, 1);
- else {
- v6delroute(f, lifc->remote, lifc->mask, 1);
+ if(lifc->onlink){
+ remroute(f, lifc->remote, lifc->mask,
+ lifc->local, IPallbits,
+ lifc->remote, lifc->type, ifc, tifc);
+ if(v6addrtype(lifc->local) != linklocalv6)
+ remroute(f, lifc->remote, lifc->mask,
+ lifc->local, IPnoaddr,
+ lifc->remote, lifc->type, ifc, tifc);
+ }
+
+ /* unregister proxy */
+ if(lifc->type & Rptpt){
+ if(lifc->type & Rproxy)
+ ipifcregisterproxy(f, ifc, lifc->remote, 0);
+ goto done;
+ }
+
+ /* remove route for all nodes multicast */
+ if((lifc->type & Rv4) == 0){
if(ipcmp(lifc->local, v6loopback) == 0)
- /* remove route for all node multicast */
- v6delroute(f, v6allnodesN, v6allnodesNmask, 1);
- else if(memcmp(lifc->local, v6linklocal, v6llpreflen) == 0)
- /* remove route for all link multicast */
- v6delroute(f, v6allnodesL, v6allnodesLmask, 1);
+ remroute(f, v6allnodesN, v6allnodesNmask,
+ lifc->local, IPallbits,
+ v6allnodesN, Rmulti, ifc, tifc);
+
+ remroute(f, v6allnodesL, v6allnodesLmask,
+ lifc->local, IPallbits,
+ v6allnodesL, Rmulti, ifc, tifc);
}
+done:
free(lifc);
return nil;
-
}
/*
* remove an address from an interface.
- * called with c locked
*/
char*
ipifcrem(Ipifc *ifc, char **argv, int argc)
{
- uchar ip[IPaddrlen];
- uchar mask[IPaddrlen];
- uchar rem[IPaddrlen];
- Iplifc *lifc;
- char *rv;
+ uchar ip[IPaddrlen], mask[IPaddrlen], rem[IPaddrlen];
+ Iplifc *lifc, **l;
+ char *err;
if(argc < 3)
return Ebadarg;
-
- parseip(ip, argv[1]);
- parseipmask(mask, argv[2]);
+ if(parseipandmask(ip, mask, argv[1], argv[2]) == -1)
+ return Ebadip;
if(argc < 4)
maskip(ip, mask, rem);
- else
- parseip(rem, argv[3]);
+ else if(parseip(rem, argv[3]) == -1)
+ return Ebadip;
- wlock(ifc);
-
/*
* find address on this interface and remove from chain.
* for pt to pt we actually specify the remote address as the
* addresss to remove.
*/
+ wlock(ifc);
+ l = &ifc->lifc;
for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next) {
- if (memcmp(ip, lifc->local, IPaddrlen) == 0
- && memcmp(mask, lifc->mask, IPaddrlen) == 0
- && memcmp(rem, lifc->remote, IPaddrlen) == 0)
+ if(ipcmp(ip, lifc->local) == 0
+ && ipcmp(mask, lifc->mask) == 0
+ && ipcmp(rem, lifc->remote) == 0)
break;
+ l = &lifc->next;
}
-
- rv = ipifcremlifc(ifc, lifc);
+ err = ipifcremlifc(ifc, l);
wunlock(ifc);
- return rv;
+ return err;
}
/*
- * distribute routes to active interfaces like the
- * TRIP linecards
- */
-void
-ipifcaddroute(Fs *f, int vers, uchar *addr, uchar *mask, uchar *gate, int type)
-{
- Medium *m;
- Conv **cp, **e;
- Ipifc *ifc;
-
- e = &f->ipifc->conv[f->ipifc->nc];
- for(cp = f->ipifc->conv; cp < e; cp++){
- if(*cp != nil) {
- ifc = (Ipifc*)(*cp)->ptcl;
- m = ifc->m;
- if(m == nil)
- continue;
- if(m->addroute != nil)
- m->addroute(ifc, vers, addr, mask, gate, type);
- }
- }
-}
-
-void
-ipifcremroute(Fs *f, int vers, uchar *addr, uchar *mask)
-{
- Medium *m;
- Conv **cp, **e;
- Ipifc *ifc;
-
- e = &f->ipifc->conv[f->ipifc->nc];
- for(cp = f->ipifc->conv; cp < e; cp++){
- if(*cp != nil) {
- ifc = (Ipifc*)(*cp)->ptcl;
- m = ifc->m;
- if(m == nil)
- continue;
- if(m->remroute != nil)
- m->remroute(ifc, vers, addr, mask);
- }
- }
-}
-
-/*
* associate an address with the interface. This wipes out any previous
* addresses. This is a macro that means, remove all the old interfaces
* and add a new one.
@@ -679,170 +740,89 @@
static char*
ipifcconnect(Conv* c, char **argv, int argc)
{
+ Ipifc *ifc = (Ipifc*)c->ptcl;
char *err;
- Ipifc *ifc;
- ifc = (Ipifc*)c->ptcl;
-
- if(ifc->m == nil)
- return "ipifc not yet bound to device";
-
- if(waserror()){
- wunlock(ifc);
- nexterror();
- }
wlock(ifc);
- while(ifc->lifc){
- err = ipifcremlifc(ifc, ifc->lifc);
- if(err)
- error(err);
- }
+ while(ifc->lifc != nil)
+ ipifcremlifc(ifc, &ifc->lifc);
wunlock(ifc);
- poperror();
err = ipifcadd(ifc, argv, argc, 0, nil);
- if(err)
+ if(err != nil)
return err;
Fsconnected(c, nil);
-
return nil;
}
char*
-ipifcsetpar6(Ipifc *ifc, char **argv, int argc)
+ipifcra6(Ipifc *ifc, char **argv, int argc)
{
- int i, argsleft, vmax = ifc->rp.maxraint, vmin = ifc->rp.minraint;
+ int i, argsleft;
+ uchar sendra, recvra;
+ Routerparams rp;
- argsleft = argc - 1;
i = 1;
-
- if(argsleft % 2 != 0)
+ argsleft = argc - 1;
+ if((argsleft % 2) != 0)
return Ebadarg;
+ sendra = ifc->sendra6;
+ recvra = ifc->recvra6;
+ rp = ifc->rp;
+
while (argsleft > 1) {
- if(strcmp(argv[i],"recvra")==0)
- ifc->recvra6 = (atoi(argv[i+1]) != 0);
- else if(strcmp(argv[i],"sendra")==0)
- ifc->sendra6 = (atoi(argv[i+1]) != 0);
- else if(strcmp(argv[i],"mflag")==0)
- ifc->rp.mflag = (atoi(argv[i+1]) != 0);
- else if(strcmp(argv[i],"oflag")==0)
- ifc->rp.oflag = (atoi(argv[i+1]) != 0);
- else if(strcmp(argv[i],"maxraint")==0)
- ifc->rp.maxraint = atoi(argv[i+1]);
- else if(strcmp(argv[i],"minraint")==0)
- ifc->rp.minraint = atoi(argv[i+1]);
- else if(strcmp(argv[i],"linkmtu")==0)
- ifc->rp.linkmtu = atoi(argv[i+1]);
- else if(strcmp(argv[i],"reachtime")==0)
- ifc->rp.reachtime = atoi(argv[i+1]);
- else if(strcmp(argv[i],"rxmitra")==0)
- ifc->rp.rxmitra = atoi(argv[i+1]);
- else if(strcmp(argv[i],"ttl")==0)
- ifc->rp.ttl = atoi(argv[i+1]);
- else if(strcmp(argv[i],"routerlt")==0)
- ifc->rp.routerlt = atoi(argv[i+1]);
+ if(strcmp(argv[i], "recvra") == 0)
+ recvra = atoi(argv[i+1]) != 0;
+ else if(strcmp(argv[i], "sendra") == 0)
+ sendra = atoi(argv[i+1]) != 0;
+ else if(strcmp(argv[i], "mflag") == 0)
+ rp.mflag = atoi(argv[i+1]) != 0;
+ else if(strcmp(argv[i], "oflag") == 0)
+ rp.oflag = atoi(argv[i+1]) != 0;
+ else if(strcmp(argv[i], "maxraint") == 0)
+ rp.maxraint = atoi(argv[i+1]);
+ else if(strcmp(argv[i], "minraint") == 0)
+ rp.minraint = atoi(argv[i+1]);
+ else if(strcmp(argv[i], "linkmtu") == 0)
+ rp.linkmtu = atoi(argv[i+1]);
+ else if(strcmp(argv[i], "reachtime") == 0)
+ rp.reachtime = atoi(argv[i+1]);
+ else if(strcmp(argv[i], "rxmitra") == 0)
+ rp.rxmitra = atoi(argv[i+1]);
+ else if(strcmp(argv[i], "ttl") == 0)
+ rp.ttl = atoi(argv[i+1]);
+ else if(strcmp(argv[i], "routerlt") == 0)
+ rp.routerlt = atoi(argv[i+1]);
else
- return Ebadarg;
+ return Ebadarg;
argsleft -= 2;
i += 2;
}
- // consistency check
- if(ifc->rp.maxraint < ifc->rp.minraint) {
- ifc->rp.maxraint = vmax;
- ifc->rp.minraint = vmin;
+ /* consistency check */
+ if(rp.maxraint < rp.minraint)
return Ebadarg;
- }
- return nil;
-}
+ ifc->rp = rp;
+ ifc->sendra6 = sendra;
+ ifc->recvra6 = recvra;
-char*
-ipifcsendra6(Ipifc *ifc, char **argv, int argc)
-{
- int i;
-
- i = 0;
- if(argc > 1)
- i = atoi(argv[1]);
- ifc->sendra6 = (i!=0);
return nil;
}
-char*
-ipifcrecvra6(Ipifc *ifc, char **argv, int argc)
-{
- int i;
-
- i = 0;
- if(argc > 1)
- i = atoi(argv[1]);
- ifc->recvra6 = (i!=0);
- return nil;
-}
-
-char*
-ipifcnat(Ipifc *ifc, char **argv, int argc)
-{
- uchar src[IPaddrlen], mask[IPaddrlen], dst[IPaddrlen];
- Iplifc *lifc;
-
- if(argc == 2){
- if((strcmp(argv[1], "show") == 0)){
- shownataddr();
- return nil;
- }else if((strcmp(argv[1], "flush") == 0)){
- flushnataddr();
- return nil;
- }else
- return Ebadarg;
- }
-
- if(argc != 5)
- return Ebadarg;
-
- if (parseip(src, argv[2]) == -1)
- return Ebadip;
-
- if (parseipmask(mask, argv[3]) == -1)
- return Ebadip;
-
- if (parseip(dst, argv[4]) == -1)
- return Ebadip;
-
- if((lifc=iplocalonifc(ifc, dst)) == nil)
- return Ebadip;
-
- if(strcmp(argv[1], "add") == 0){
- if(addnataddr(src, mask, lifc) != 0)
- return Ebadarg;
- }else if(strcmp(argv[1], "remove") == 0){
- if(removenataddr(src, mask, lifc) != 0)
- return Ebadarg;
- }else
- return Ebadarg;
-
- return nil;
-}
-
/*
* non-standard control messages.
- * called with c locked.
*/
static char*
-ipifcctl(Conv* c, char**argv, int argc)
+ipifcctl(Conv* c, char **argv, int argc)
{
- Ipifc *ifc;
- int i;
+ Ipifc *ifc = (Ipifc*)c->ptcl;
- ifc = (Ipifc*)c->ptcl;
if(strcmp(argv[0], "add") == 0)
return ipifcadd(ifc, argv, argc, 0, nil);
- else if(strcmp(argv[0], "bootp") == 0)
- return bootp(ifc);
else if(strcmp(argv[0], "try") == 0)
return ipifcadd(ifc, argv, argc, 1, nil);
else if(strcmp(argv[0], "remove") == 0)
@@ -849,36 +829,38 @@
return ipifcrem(ifc, argv, argc);
else if(strcmp(argv[0], "unbind") == 0)
return ipifcunbind(ifc);
- else if(strcmp(argv[0], "joinmulti") == 0)
- return ipifcjoinmulti(ifc, argv, argc);
- else if(strcmp(argv[0], "leavemulti") == 0)
- return ipifcleavemulti(ifc, argv, argc);
else if(strcmp(argv[0], "mtu") == 0)
- return ipifcsetmtu(ifc, argv, argc);
- else if(strcmp(argv[0], "reassemble") == 0){
- ifc->reassemble = 1;
+ return ipifcsetmtu(ifc, argc>1? strtoul(argv[1], 0, 0): 0);
+ else if(strcmp(argv[0], "speed") == 0){
+ ipifcsetspeed(ifc, argc>1? atoi(argv[1]): 0);
return nil;
}
+ else if(strcmp(argv[0], "delay") == 0){
+ ipifcsetdelay(ifc, argc>1? atoi(argv[1]): 0);
+ return nil;
+ }
else if(strcmp(argv[0], "iprouting") == 0){
- i = 1;
- if(argc > 1)
- i = atoi(argv[1]);
- iprouting(c->p->f, i);
+ iprouting(c->p->f, argc>1? atoi(argv[1]): 1);
return nil;
}
- else if(strcmp(argv[0], "addpref6") == 0)
- return ipifcaddpref6(ifc, argv, argc);
- else if(strcmp(argv[0], "setpar6") == 0)
- return ipifcsetpar6(ifc, argv, argc);
- else if(strcmp(argv[0], "sendra6") == 0)
- return ipifcsendra6(ifc, argv, argc);
- else if(strcmp(argv[0], "recvra6") == 0)
- return ipifcrecvra6(ifc, argv, argc);
- else if(strcmp(argv[0], "nat") == 0)
- return ipifcnat(ifc, argv, argc);
+ else if(strcmp(argv[0], "reflect") == 0){
+ ifc->reflect = argc>1? atoi(argv[1]): 1;
+ return nil;
+ }
+ else if(strcmp(argv[0], "reassemble") == 0){
+ ifc->reassemble = argc>1? atoi(argv[1]): 1;
+ return nil;
+ }
+ else if(strcmp(argv[0], "add6") == 0)
+ return ipifcadd6(ifc, argv, argc);
+ else if(strcmp(argv[0], "remove6") == 0)
+ return ipifcremove6(ifc, argv, argc);
+ else if(strcmp(argv[0], "ra6") == 0)
+ return ipifcra6(ifc, argv, argc);
return "unsupported ctl";
}
+int
ipifcstats(Proto *ipifc, char *buf, int len)
{
return ipstats(ipifc->f, buf, len);
@@ -907,7 +889,7 @@
ipifc->nc = Maxmedia;
ipifc->ptclsize = sizeof(Ipifc);
- f->ipifc = ipifc; /* hack for ipifcremroute, findipifc, ... */
+ f->ipifc = ipifc; /* hack for ipifcremroute, findipifc, ... */
f->self = smalloc(sizeof(Ipselftab)); /* hack for ipforme */
Fsproto(f, ipifc);
@@ -915,21 +897,25 @@
/*
* add to self routing cache
- * called with c locked
*/
static void
addselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a, int type)
{
- Ipself *p;
Iplink *lp;
+ Ipself *p;
int h;
+ type |= (lifc->type & Rv4);
qlock(f->self);
+ if(waserror()){
+ qunlock(f->self);
+ nexterror();
+ }
/* see if the address already exists */
h = hashipa(a);
- for(p = f->self->hash[h]; p; p = p->next)
- if(memcmp(a, p->a, IPaddrlen) == 0)
+ for(p = f->self->hash[h]; p != nil; p = p->next)
+ if(ipcmp(a, p->a) == 0)
break;
/* allocate a local address and add to hash chain */
@@ -946,7 +932,7 @@
}
/* look for a link for this lifc */
- for(lp = p->link; lp; lp = lp->selflink)
+ for(lp = p->link; lp != nil; lp = lp->selflink)
if(lp->lifc == lifc)
break;
@@ -962,18 +948,19 @@
lifc->link = lp;
/* add to routing table */
- if(isv4(a))
- v4addroute(f, tifc, a+IPv4off, IPallbits+IPv4off, a+IPv4off, type);
- else
- v6addroute(f, tifc, a, IPallbits, a, type);
+ addroute(f, a, IPallbits,
+ lifc->local,
+ ((type & (Rbcast|Rmulti)) != 0 || v6addrtype(a) == linklocalv6) ?
+ IPallbits : IPnoaddr,
+ a, type, ifc, tifc);
if((type & Rmulti) && ifc->m->addmulti != nil)
(*ifc->m->addmulti)(ifc, a, lifc->local);
- } else {
+ } else
lp->ref++;
- }
qunlock(f->self);
+ poperror();
}
/*
@@ -992,8 +979,8 @@
ulong now = NOW;
l = &freeiplink;
- for(np = *l; np; np = *l){
- if(np->expire > now){
+ for(np = *l; np != nil; np = *l){
+ if((long)(now - np->expire) >= 0){
*l = np->next;
free(np);
continue;
@@ -1000,10 +987,11 @@
}
l = &np->next;
}
- p->expire = now + 5000; /* give other threads 5 secs to get out */
+ p->expire = now + 5000; /* give other threads 5 secs to get out */
p->next = nil;
*l = p;
}
+
static void
ipselffree(Ipself *p)
{
@@ -1011,8 +999,8 @@
ulong now = NOW;
l = &freeipself;
- for(np = *l; np; np = *l){
- if(np->expire > now){
+ for(np = *l; np != nil; np = *l){
+ if((long)(now - np->expire) >= 0){
*l = np->next;
free(np);
continue;
@@ -1019,7 +1007,7 @@
}
l = &np->next;
}
- p->expire = now + 5000; /* give other threads 5 secs to get out */
+ p->expire = now + 5000; /* give other threads 5 secs to get out */
p->next = nil;
*l = p;
}
@@ -1027,7 +1015,6 @@
/*
* Decrement reference for this address on this link.
* Unlink from selftab if this is the last ref.
- * called with c locked
*/
static void
remselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a)
@@ -1039,7 +1026,7 @@
/* find the unique selftab entry */
l = &f->self->hash[hashipa(a)];
- for(p = *l; p; p = *l){
+ for(p = *l; p != nil; p = *l){
if(ipcmp(p->a, a) == 0)
break;
l = &p->next;
@@ -1053,7 +1040,7 @@
* that matches the selftab entry
*/
l_lifc = &lifc->link;
- for(link = *l_lifc; link; link = *l_lifc){
+ for(link = *l_lifc; link != nil; link = *l_lifc){
if(link->self == p)
break;
l_lifc = &link->lifclink;
@@ -1067,8 +1054,8 @@
* the one we just found
*/
l_self = &p->link;
- for(link = *l_self; link; link = *l_self){
- if(link == *(l_lifc))
+ for(link = *l_self; link != nil; link = *l_self){
+ if(link == *l_lifc)
break;
l_self = &link->selflink;
}
@@ -1079,9 +1066,20 @@
if(--(link->ref) != 0)
goto out;
- if((p->type & Rmulti) && ifc->m->remmulti != nil)
- (*ifc->m->remmulti)(ifc, a, lifc->local);
+ /* remove from routing table */
+ remroute(f, a, IPallbits,
+ lifc->local,
+ ((p->type & (Rbcast|Rmulti)) != 0 || v6addrtype(a) == linklocalv6) ?
+ IPallbits : IPnoaddr,
+ a, p->type, ifc, tifc);
+ if((p->type & Rmulti) && ifc->m->remmulti != nil){
+ if(!waserror()){
+ (*ifc->m->remmulti)(ifc, a, lifc->local);
+ poperror();
+ }
+ }
+
/* ref == 0, remove from both chains and free the link */
*l_lifc = link->lifclink;
*l_self = link->selflink;
@@ -1090,30 +1088,18 @@
if(p->link != nil)
goto out;
- /* remove from routing table */
- if(isv4(a))
- v4delroute(f, a+IPv4off, IPallbits+IPv4off, 1);
- else
- v6delroute(f, a, IPallbits, 1);
-
+ /* if null address, forget */
+ if(ipcmp(a, v4prefix) == 0 || ipcmp(a, IPnoaddr) == 0)
+ f->self->acceptall = 0;
+
/* no more links, remove from hash and free */
*l = p->next;
ipselffree(p);
- /* if IPnoaddr, forget */
- if(ipcmp(a, v4prefix) == 0 || ipcmp(a, IPnoaddr) == 0)
- f->self->acceptall = 0;
-
out:
qunlock(f->self);
}
-static char *stformat = "%-44.44I %2.2d %4.4s\n";
-enum
-{
- Nstformat= 41,
-};
-
long
ipselftabread(Fs *f, char *cp, ulong offset, int n)
{
@@ -1124,14 +1110,14 @@
m = 0;
off = offset;
- qlock(f->self);
for(i = 0; i < NHASH && m < n; i++){
for(p = f->self->hash[i]; p != nil && m < n; p = p->next){
nifc = 0;
- for(link = p->link; link; link = link->selflink)
+ for(link = p->link; link != nil; link = link->selflink)
nifc++;
routetype(p->type, state);
- m += snprint(cp + m, n - m, stformat, p->a, nifc, state);
+ m += snprint(cp + m, n - m, "%-44.44I %2.2d %4.4s\n",
+ p->a, nifc, state);
if(off > 0){
off -= m;
m = 0;
@@ -1138,30 +1124,15 @@
}
}
}
- qunlock(f->self);
return m;
}
-int
-iptentative(Fs *f, uchar *addr)
-{
- Ipself *p;
-
- p = f->self->hash[hashipa(addr)];
- for(; p; p = p->next){
- if(ipcmp(addr, p->a) == 0) {
- return p->link->lifc->tentative;
- }
- }
- return 0;
-}
-
/*
* returns
* 0 - no match
* Runi
* Rbcast
- * Rmcast
+ * Rmulti
*/
int
ipforme(Fs *f, uchar *addr)
@@ -1168,11 +1139,9 @@
{
Ipself *p;
- p = f->self->hash[hashipa(addr)];
- for(; p; p = p->next){
+ for(p = f->self->hash[hashipa(addr)]; p != nil; p = p->next)
if(ipcmp(addr, p->a) == 0)
- return p->type;
- }
+ return p->type & (Runi|Rbcast|Rmulti);
/* hack to say accept anything */
if(f->self->acceptall)
@@ -1186,254 +1155,237 @@
* return nil.
*/
Ipifc*
-findipifc(Fs *f, uchar *remote, int type)
+findipifc(Fs *f, uchar *local, uchar *remote, int type)
{
+ uchar gnet[IPaddrlen];
+ int spec, xspec;
Ipifc *ifc, *x;
Iplifc *lifc;
- Conv **cp, **e;
- uchar gnet[IPaddrlen];
- uchar xmask[IPaddrlen];
+ Conv **cp;
- x = nil; memset(xmask, 0, IPaddrlen);
-
- /* find most specific match */
- e = &f->ipifc->conv[f->ipifc->nc];
- for(cp = f->ipifc->conv; cp < e; cp++){
- if(*cp == 0)
- continue;
-
+ x = nil;
+ xspec = 0;
+ for(cp = f->ipifc->conv; *cp != nil; cp++){
ifc = (Ipifc*)(*cp)->ptcl;
-
- for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+ if(!canrlock(ifc))
+ continue;
+ for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+ if(type & Runi){
+ if(ipcmp(remote, lifc->local) == 0){
+ Found:
+ runlock(ifc);
+ return ifc;
+ }
+ } else if(type & (Rbcast|Rmulti)) {
+ if(ipcmp(local, lifc->local) == 0)
+ goto Found;
+ }
maskip(remote, lifc->mask, gnet);
if(ipcmp(gnet, lifc->net) == 0){
- if(x == nil || ipcmp(lifc->mask, xmask) > 0){
+ spec = comprefixlen(remote, lifc->local, IPaddrlen);
+ if(spec > xspec){
x = ifc;
- ipmove(xmask, lifc->mask);
+ xspec = spec;
}
}
}
+ runlock(ifc);
}
- if(x != nil)
- return x;
+ return x;
+}
- /* for now for broadcast and multicast, just use first interface */
- if(type & (Rbcast|Rmulti)){
- for(cp = f->ipifc->conv; cp < e; cp++){
- if(*cp == 0)
- continue;
- ifc = (Ipifc*)(*cp)->ptcl;
- if(ifc->lifc != nil)
- return ifc;
- }
+Ipifc*
+findipifcstr(Fs *f, char *s)
+{
+ uchar ip[IPaddrlen];
+ Conv *c;
+ char *p;
+ long x;
+
+ x = strtol(s, &p, 10);
+ if(p > s && *p == '\0'){
+ if(x < 0)
+ return nil;
+ if(x < f->ipifc->nc && (c = f->ipifc->conv[x]) != nil && ipifcinuse(c))
+ return (Ipifc*)c->ptcl;
}
-
+ if(parseip(ip, s) != -1)
+ return findipifc(f, ip, ip, Runi);
return nil;
}
-enum {
- unknownv6,
- multicastv6,
- unspecifiedv6,
- linklocalv6,
- sitelocalv6,
- globalv6,
-};
-
-int
-v6addrtype(uchar *addr)
-{
- if(isv6global(addr))
- return globalv6;
- if(islinklocal(addr))
- return linklocalv6;
- if(isv6mcast(addr))
- return multicastv6;
- if(issitelocal(addr))
- return sitelocalv6;
- return unknownv6;
-}
-
-#define v6addrcurr(lifc) (( (lifc)->origint + (lifc)->preflt >= (NOW/10^3) ) || ( (lifc)->preflt == 0xffffffff ))
-
+/*
+ * find "best" (global > link local > unspecified)
+ * local address; address must be current.
+ */
static void
findprimaryipv6(Fs *f, uchar *local)
{
- Conv **cp, **e;
- Ipifc *ifc;
- Iplifc *lifc;
+ ulong now = NOW/1000;
int atype, atypel;
+ Iplifc *lifc;
+ Ipifc *ifc;
+ Conv **cp;
ipmove(local, v6Unspecified);
atype = unspecifiedv6;
- /* find "best" (global > sitelocal > link local > unspecified)
- * local address; address must be current */
-
- e = &f->ipifc->conv[f->ipifc->nc];
- for(cp = f->ipifc->conv; cp < e; cp++){
- if(*cp == 0)
- continue;
+ for(cp = f->ipifc->conv; *cp != nil; cp++){
ifc = (Ipifc*)(*cp)->ptcl;
- for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+ rlock(ifc);
+ for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
atypel = v6addrtype(lifc->local);
if(atypel > atype)
- if(v6addrcurr(lifc)) {
+ if(lifc->preflt == ~0UL || lifc->preflt >= now-lifc->origint) {
ipmove(local, lifc->local);
atype = atypel;
- if(atype == globalv6)
+ if(atype == globalv6){
+ runlock(ifc);
return;
+ }
}
}
+ runlock(ifc);
}
}
/*
- * returns first ip address configured
+ * returns first v4 address configured
*/
static void
findprimaryipv4(Fs *f, uchar *local)
{
- Conv **cp, **e;
- Ipifc *ifc;
Iplifc *lifc;
+ Ipifc *ifc;
+ Conv **cp;
/* find first ifc local address */
- e = &f->ipifc->conv[f->ipifc->nc];
- for(cp = f->ipifc->conv; cp < e; cp++){
- if(*cp == 0)
- continue;
+ for(cp = f->ipifc->conv; *cp != nil; cp++){
ifc = (Ipifc*)(*cp)->ptcl;
- if((lifc = ifc->lifc) != nil){
- ipmove(local, lifc->local);
- return;
+ rlock(ifc);
+ for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+ if((lifc->type & Rv4) != 0){
+ ipmove(local, lifc->local);
+ runlock(ifc);
+ return;
+ }
}
+ runlock(ifc);
}
+ ipmove(local, IPnoaddr);
}
/*
- * find the local address 'closest' to the remote system, copy it to
- * local and return the ifc for that address
+ * ipv4local, ipv6local:
+ * return a local address associated with an interface close to remote.
+ * prefixlen is the number of leading bits in the local address that
+ * have to match an interface address to be considered. this is used
+ * by source specific routes to filter on the source address.
+ * return non-zero on success or zero when no address was found.
+ *
+ * for ipv4local, all addresses are 4 byte format.
*/
-void
-findlocalip(Fs *f, uchar *local, uchar *remote)
+int
+ipv4local(Ipifc *ifc, uchar *local, int prefixlen, uchar *remote)
{
- Ipifc *ifc;
Iplifc *lifc;
- Route *r;
- uchar gate[IPaddrlen];
- uchar gnet[IPaddrlen];
- int version;
- int atype = unspecifiedv6, atypel = unknownv6;
+ int a, b;
- USED(atype);
- USED(atypel);
- qlock(f->ipifc);
- r = v6lookup(f, remote, nil);
- version = (memcmp(remote, v4prefix, IPv4off) == 0) ? V4 : V6;
-
- if(r != nil){
- ifc = r->ifc;
- if(r->type & Rv4)
- v4tov6(gate, r->v4.gate);
- else {
- ipmove(gate, r->v6.gate);
- ipmove(local, v6Unspecified);
- }
+ b = -1;
+ for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+ if((lifc->type & Rv4) == 0 || ipcmp(lifc->local, IPnoaddr) == 0)
+ continue;
- /* find ifc address closest to the gateway to use */
- switch(version) {
- case V4:
- for(lifc = ifc->lifc; lifc; lifc = lifc->next){
- maskip(gate, lifc->mask, gnet);
- if(ipcmp(gnet, lifc->net) == 0){
- ipmove(local, lifc->local);
- goto out;
- }
- }
- break;
- case V6:
- for(lifc = ifc->lifc; lifc; lifc = lifc->next){
- atypel = v6addrtype(lifc->local);
- maskip(gate, lifc->mask, gnet);
- if(ipcmp(gnet, lifc->net) == 0)
- if(atypel > atype)
- if(v6addrcurr(lifc)) {
- ipmove(local, lifc->local);
- atype = atypel;
- if(atype == globalv6)
- break;
- }
- }
- if(atype > unspecifiedv6)
- goto out;
- break;
- default:
- panic("findlocalip: version %d", version);
+ if(prefixlen && comprefixlen(lifc->local+IPv4off, local, IPv4addrlen) < prefixlen)
+ continue;
+
+ a = comprefixlen(lifc->local+IPv4off, remote, IPv4addrlen);
+ if(a > b){
+ b = a;
+ memmove(local, lifc->local+IPv4off, IPv4addrlen);
}
}
-
- switch(version){
- case V4:
- findprimaryipv4(f, local);
- break;
- case V6:
- findprimaryipv6(f, local);
- break;
- default:
- panic("findlocalip2: version %d", version);
- }
-
-out:
- qunlock(f->ipifc);
+ return b >= 0;
}
-/*
- * return first v4 address associated with an interface
- */
int
-ipv4local(Ipifc *ifc, uchar *addr)
+ipv6local(Ipifc *ifc, uchar *local, int prefixlen, uchar *remote)
{
+ struct {
+ int atype;
+ int deprecated;
+ int comprefixlen;
+ } a, b;
+ int atype;
+ ulong now;
Iplifc *lifc;
- for(lifc = ifc->lifc; lifc; lifc = lifc->next){
- if(isv4(lifc->local)){
- memmove(addr, lifc->local+IPv4off, IPv4addrlen);
- return 1;
- }
+ if(isv4(remote)){
+ memmove(local, v4prefix, IPv4off);
+ if((prefixlen -= IPv4off*8) < 0)
+ prefixlen = 0;
+ return ipv4local(ifc, local+IPv4off, prefixlen, remote+IPv4off);
}
- return 0;
-}
-/*
- * return first v6 address associated with an interface
- */
-int
-ipv6local(Ipifc *ifc, uchar *addr)
-{
- Iplifc *lifc;
+ atype = v6addrtype(remote);
+ b.atype = unknownv6;
+ b.deprecated = 1;
+ b.comprefixlen = 0;
- for(lifc = ifc->lifc; lifc; lifc = lifc->next){
- if(!isv4(lifc->local) && !(lifc->tentative)){
- ipmove(addr, lifc->local);
- return 1;
+ now = NOW/1000;
+ for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+ if(lifc->tentative)
+ continue;
+
+ if(prefixlen && comprefixlen(lifc->local, local, IPaddrlen) < prefixlen)
+ continue;
+
+ a.atype = v6addrtype(lifc->local);
+ a.deprecated = lifc->preflt != ~0UL && lifc->preflt < now-lifc->origint;
+ a.comprefixlen = comprefixlen(lifc->local, remote, IPaddrlen);
+
+ /* prefer appropriate scope */
+ if(a.atype != b.atype){
+ if(a.atype > b.atype && b.atype < atype ||
+ a.atype < b.atype && b.atype > atype)
+ goto Good;
+ continue;
}
+ /* prefer non-deprecated addresses */
+ if(a.deprecated != b.deprecated){
+ if(b.deprecated)
+ goto Good;
+ continue;
+ }
+ /* prefer longer common prefix */
+ if(a.comprefixlen != b.comprefixlen){
+ if(a.comprefixlen > b.comprefixlen)
+ goto Good;
+ continue;
+ }
+ continue;
+ Good:
+ b = a;
+ ipmove(local, lifc->local);
}
- return 0;
+
+ return b.atype >= atype;
}
-int
-ipv6anylocal(Ipifc *ifc, uchar *addr)
+/*
+ * find the local address for a remote destination
+ */
+void
+findlocalip(Fs *f, uchar *local, uchar *remote)
{
- Iplifc *lifc;
-
- for(lifc = ifc->lifc; lifc; lifc = lifc->next){
- if(!isv4(lifc->local)){
- ipmove(addr, lifc->local);
- return SRC_UNI;
- }
+ if(isv4(remote)) {
+ memmove(local, v4prefix, IPv4off);
+ if(v4source(f, remote+IPv4off, local+IPv4off) == nil)
+ findprimaryipv4(f, local);
+ } else {
+ if(v6source(f, remote, local) == nil)
+ findprimaryipv6(f, local);
}
- return SRC_UNSPEC;
}
/*
@@ -1444,13 +1396,28 @@
{
Iplifc *lifc;
- for(lifc = ifc->lifc; lifc; lifc = lifc->next)
+ for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next)
if(ipcmp(ip, lifc->local) == 0)
return lifc;
+
return nil;
}
+Iplifc*
+ipremoteonifc(Ipifc *ifc, uchar *ip)
+{
+ uchar net[IPaddrlen];
+ Iplifc *lifc;
+ for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+ maskip(ip, lifc->mask, net);
+ if(ipcmp(net, lifc->remote) == 0)
+ return lifc;
+ }
+ return nil;
+}
+
+
/*
* See if we're proxying for this address on this interface
*/
@@ -1458,24 +1425,13 @@
ipproxyifc(Fs *f, Ipifc *ifc, uchar *ip)
{
Route *r;
- uchar net[IPaddrlen];
- Iplifc *lifc;
/* see if this is a direct connected pt to pt address */
- r = v6lookup(f, ip, nil);
- if(r == nil)
+ r = v6lookup(f, ip, ip, nil);
+ if(r == nil || (r->type & (Rifc|Rproxy)) != (Rifc|Rproxy))
return 0;
- if((r->type & (Rifc|Rproxy)) != (Rifc|Rproxy))
- return 0;
- /* see if this is on the right interface */
- for(lifc = ifc->lifc; lifc; lifc = lifc->next){
- maskip(ip, lifc->mask, net);
- if(ipcmp(net, lifc->remote) == 0)
- return 1;
- }
-
- return 0;
+ return ipremoteonifc(ifc, ip) != nil;
}
/*
@@ -1487,73 +1443,53 @@
if(isv4(ip)){
if(ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0)
return V4;
- } else {
- if(ip[0] == 0xff)
- return V6;
}
+ else if(ip[0] == 0xff)
+ return V6;
return 0;
}
-int
-ipisbm(uchar *ip)
-{
- if(isv4(ip)){
- if(ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0)
- return V4;
- if(ipcmp(ip, IPv4bcast) == 0)
- return V4;
- } else {
- if(ip[0] == 0xff)
- return V6;
- }
- return 0;
-}
-
-
/*
- * add a multicast address to an interface, called with c locked
+ * add a multicast address to an interface.
*/
void
ipifcaddmulti(Conv *c, uchar *ma, uchar *ia)
{
- Ipifc *ifc;
- Iplifc *lifc;
- Conv **p;
Ipmulti *multi, **l;
+ Iplifc *lifc;
+ Ipifc *ifc;
Fs *f;
- f = c->p->f;
-
- for(l = &c->multi; *l; l = &(*l)->next)
- if(ipcmp(ma, (*l)->ma) == 0)
- if(ipcmp(ia, (*l)->ia) == 0)
+ if(isv4(ma) != isv4(ia))
+ error("incompatible multicast/interface ip address");
+
+ for(l = &c->multi; *l != nil; l = &(*l)->next)
+ if(ipcmp(ma, (*l)->ma) == 0 && ipcmp(ia, (*l)->ia) == 0)
return; /* it's already there */
- multi = *l = smalloc(sizeof(*multi));
- ipmove(multi->ma, ma);
- ipmove(multi->ia, ia);
- multi->next = nil;
-
- for(p = f->ipifc->conv; *p; p++){
- if((*p)->inuse == 0)
- continue;
- ifc = (Ipifc*)(*p)->ptcl;
+ f = c->p->f;
+ if((ifc = findipifc(f, ia, ma, Rmulti)) != nil){
+ rlock(ifc);
if(waserror()){
- wunlock(ifc);
+ runlock(ifc);
nexterror();
}
- wlock(ifc);
- for(lifc = ifc->lifc; lifc; lifc = lifc->next)
- if(ipcmp(ia, lifc->local) == 0)
- addselfcache(f, ifc, lifc, ma, Rmulti);
- wunlock(ifc);
+ if((lifc = iplocalonifc(ifc, ia)) != nil)
+ addselfcache(f, ifc, lifc, ma, Rmulti);
+ runlock(ifc);
poperror();
}
+
+ multi = smalloc(sizeof(*multi));
+ ipmove(multi->ma, ma);
+ ipmove(multi->ia, ia);
+ multi->next = nil;
+ *l = multi;
}
/*
- * remove a multicast address from an interface, called with c locked
+ * remove a multicast address from an interface.
*/
void
ipifcremmulti(Conv *c, uchar *ma, uchar *ia)
@@ -1560,15 +1496,11 @@
{
Ipmulti *multi, **l;
Iplifc *lifc;
- Conv **p;
Ipifc *ifc;
Fs *f;
- f = c->p->f;
-
- for(l = &c->multi; *l; l = &(*l)->next)
- if(ipcmp(ma, (*l)->ma) == 0)
- if(ipcmp(ia, (*l)->ia) == 0)
+ for(l = &c->multi; *l != nil; l = &(*l)->next)
+ if(ipcmp(ma, (*l)->ma) == 0 && ipcmp(ia, (*l)->ia) == 0)
break;
multi = *l;
@@ -1576,161 +1508,101 @@
return; /* we don't have it open */
*l = multi->next;
+ multi->next = nil;
- for(p = f->ipifc->conv; *p; p++){
- if((*p)->inuse == 0)
- continue;
-
- ifc = (Ipifc*)(*p)->ptcl;
- if(waserror()){
- wunlock(ifc);
- nexterror();
- }
- wlock(ifc);
- for(lifc = ifc->lifc; lifc; lifc = lifc->next)
- if(ipcmp(ia, lifc->local) == 0)
+ f = c->p->f;
+ if((ifc = findipifc(f, ia, ma, Rmulti)) != nil){
+ rlock(ifc);
+ if(!waserror()){
+ if((lifc = iplocalonifc(ifc, ia)) != nil)
remselfcache(f, ifc, lifc, ma);
- wunlock(ifc);
- poperror();
+ poperror();
+ }
+ runlock(ifc);
}
-
free(multi);
}
-/*
- * make lifc's join and leave multicast groups
- */
-static char*
-ipifcjoinmulti(Ipifc *ifc, char **argv, int argc)
+/* register the address on this network for address resolution */
+static void
+ipifcregisteraddr(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *ip)
{
- USED(ifc, argv, argc);
- return nil;
+ if(waserror()){
+ print("ipifcregisteraddr %s %I %I: %s\n", ifc->dev, lifc->local, ip, up->errstr);
+ return;
+ }
+ if(ifc->m != nil && ifc->m->areg != nil)
+ (*ifc->m->areg)(f, ifc, lifc, ip);
+ poperror();
}
-static char*
-ipifcleavemulti(Ipifc *ifc, char **argv, int argc)
-{
- USED(ifc, argv, argc);
- return nil;
-}
-
static void
-ipifcregisterproxy(Fs *f, Ipifc *ifc, uchar *ip)
+ipifcregisterproxy(Fs *f, Ipifc *ifc, uchar *ip, int add)
{
- Conv **cp, **e;
- Ipifc *nifc;
+ uchar a[IPaddrlen];
Iplifc *lifc;
- Medium *m;
- uchar net[IPaddrlen];
+ Ipifc *nifc;
+ Conv **cp;
- /* register the address on any network that will proxy for us */
- e = &f->ipifc->conv[f->ipifc->nc];
+ /* register the address on any interface that will proxy for the ip */
+ for(cp = f->ipifc->conv; *cp != nil; cp++){
+ nifc = (Ipifc*)(*cp)->ptcl;
+ if(nifc == ifc || !canrlock(nifc))
+ continue;
- if(!isv4(ip)) { // V6
- for(cp = f->ipifc->conv; cp < e; cp++){
- if(*cp == nil)
- continue;
- nifc = (Ipifc*)(*cp)->ptcl;
- if(nifc == ifc)
- continue;
-
- rlock(nifc);
- m = nifc->m;
- if(m == nil || m->addmulti == nil) {
- runlock(nifc);
- continue;
- }
- for(lifc = nifc->lifc; lifc; lifc = lifc->next){
- maskip(ip, lifc->mask, net);
- if(ipcmp(net, lifc->remote) == 0) { /* add solicited-node multicast address */
- ipv62smcast(net, ip);
- addselfcache(f, nifc, lifc, net, Rmulti);
- arpenter(f, V6, ip, nifc->mac, 6, 0);
- //(*m->addmulti)(nifc, net, ip);
- break;
- }
- }
+ if(nifc->m == nil
+ || (lifc = ipremoteonifc(nifc, ip)) == nil
+ || (lifc->type & Rptpt) != 0
+ || waserror()){
runlock(nifc);
+ continue;
}
- return;
- }
- else { // V4
- for(cp = f->ipifc->conv; cp < e; cp++){
- if(*cp == nil)
- continue;
- nifc = (Ipifc*)(*cp)->ptcl;
- if(nifc == ifc)
- continue;
-
- rlock(nifc);
- m = nifc->m;
- if(m == nil || m->areg == nil){
- runlock(nifc);
- continue;
- }
- for(lifc = nifc->lifc; lifc; lifc = lifc->next){
- maskip(ip, lifc->mask, net);
- if(ipcmp(net, lifc->remote) == 0){
- (*m->areg)(nifc, ip);
- break;
- }
- }
- runlock(nifc);
+ if((lifc->type & Rv4) == 0){
+ /* add solicited-node multicast addr */
+ ipv62smcast(a, ip);
+ if(add)
+ addselfcache(f, nifc, lifc, a, Rmulti);
+ else
+ remselfcache(f, nifc, lifc, a);
}
+ if(add)
+ ipifcregisteraddr(f, nifc, lifc, ip);
+ runlock(nifc);
+ poperror();
}
}
-
-// added for new v6 mesg types
-static void
-adddefroute6(Fs *f, uchar *gate, int force)
-{
- Route *r;
-
- r = v6lookup(f, v6Unspecified, nil);
- if(r!=nil)
- if(!(force) && (strcmp(r->tag,"ra")!=0)) // route entries generated
- return; // by all other means take
- // precedence over router annc
-
- v6delroute(f, v6Unspecified, v6Unspecified, 1);
- v6addroute(f, "ra", v6Unspecified, v6Unspecified, gate, 0);
-}
-
-enum
-{
- Ngates = 3,
-};
-
char*
-ipifcaddpref6(Ipifc *ifc, char**argv, int argc)
+ipifcadd6(Ipifc *ifc, char **argv, int argc)
{
- uchar onlink = 1;
- uchar autoflag = 1;
- long validlt = 0xffffffff;
- long preflt = 0xffffffff;
- long origint = NOW / 10^3;
- uchar prefix[IPaddrlen];
- int plen = 64;
- Iplifc *lifc;
- char addr[40], preflen[6];
- char *params[3];
+ int plen = 64;
+ char addr[40], preflen[6];
+ char *params[3];
+ uchar prefix[IPaddrlen];
+ Iplifc lifc;
+ Medium *m;
+ lifc.onlink = 1;
+ lifc.autoflag = 1;
+ lifc.validlt = lifc.preflt = ~0UL;
+ lifc.origint = NOW / 1000;
+
switch(argc) {
case 7:
- preflt = atoi(argv[6]);
+ lifc.preflt = strtoul(argv[6], 0, 10);
/* fall through */
case 6:
- validlt = atoi(argv[5]);
+ lifc.validlt = strtoul(argv[5], 0, 10);
/* fall through */
case 5:
- autoflag = atoi(argv[4]);
+ lifc.autoflag = atoi(argv[4]) != 0;
/* fall through */
case 4:
- onlink = atoi(argv[3]);
+ lifc.onlink = atoi(argv[3]) != 0;
/* fall through */
case 3:
plen = atoi(argv[2]);
+ /* fall through */
case 2:
break;
default:
@@ -1737,25 +1609,16 @@
return Ebadarg;
}
- if((parseip(prefix, argv[1])!=6) ||
- (validlt < preflt) ||
- (plen < 0) || (plen > 64) ||
- (islinklocal(prefix))
- )
+ if (parseip(prefix, argv[1]) != 6 || lifc.validlt < lifc.preflt || plen < 0 ||
+ plen > 64 || islinklocal(prefix))
return Ebadarg;
- lifc = smalloc(sizeof(Iplifc));
- lifc->onlink = (onlink!=0);
- lifc->autoflag = (autoflag!=0);
- lifc->validlt = validlt;
- lifc->preflt = preflt;
- lifc->origint = origint;
+ /* issue "add" ctl msg for v6 link-local addr and prefix len */
+ m = ifc->m;
+ if(m == nil || m->pref2addr == nil)
+ return Eunbound;
+ (*m->pref2addr)(prefix, ifc->mac); /* mac → v6 link-local addr */
- if(ifc->m->pref2addr!=nil)
- ifc->m->pref2addr(prefix, ifc->mac);
- else
- return Ebadarg;
-
sprint(addr, "%I", prefix);
sprint(preflen, "/%d", plen);
params[0] = "add";
@@ -1762,6 +1625,28 @@
params[1] = addr;
params[2] = preflen;
- return ipifcadd(ifc, params, 3, 0, lifc);
+ return ipifcadd(ifc, params, 3, 0, &lifc);
}
+char*
+ipifcremove6(Ipifc *ifc, char**, int argc)
+{
+ Iplifc *lifc, **l;
+ ulong now;
+
+ if(argc != 1)
+ return Ebadarg;
+
+ wlock(ifc);
+ now = NOW/1000;
+ for(l = &ifc->lifc; (lifc = *l) != nil;) {
+ if((lifc->type & Rv4) == 0)
+ if(lifc->validlt != ~0UL && lifc->validlt < now-lifc->origint)
+ if(ipifcremlifc(ifc, l) == nil)
+ continue;
+ l = &lifc->next;
+ }
+ wunlock(ifc);
+
+ return nil;
+}
--- a/os/ip/ipmux.c
+++ b/os/ip/ipmux.c
@@ -1,3 +1,6 @@
+/*
+ * IP packet filter
+ */
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
@@ -6,30 +9,14 @@
#include "../port/error.h"
#include "ip.h"
-#define DPRINT if(0)print
+#include "ipv6.h"
typedef struct Ipmuxrock Ipmuxrock;
typedef struct Ipmux Ipmux;
-typedef struct Ip6hdr Ip6hdr;
enum
{
- IPHDR = 20, /* sizeof(Ip4hdr) */
-};
-
-struct Ip6hdr
-{
- uchar vcf[4]; /* version, class label, and flow label */
- uchar ploadlen[2]; /* payload length */
- uchar proto; /* next header, i.e. proto */
- uchar ttl; /* hop limit, i.e. ttl */
- uchar src[16]; /* IP source */
- uchar dst[16]; /* IP destination */
-};
-
-
-enum
-{
+ Tver,
Tproto,
Tdata,
Tiph,
@@ -36,28 +23,8 @@
Tdst,
Tsrc,
Tifc,
-
- Cother = 0,
- Cbyte, /* single byte */
- Cmbyte, /* single byte with mask */
- Cshort, /* single short */
- Cmshort, /* single short with mask */
- Clong, /* single long */
- Cmlong, /* single long with mask */
- Cifc,
- Cmifc,
};
-char *ftname[] =
-{
-[Tproto] "proto",
-[Tdata] "data",
-[Tiph] "iph",
-[Tdst] "dst",
-[Tsrc] "src",
-[Tifc] "ifc",
-};
-
/*
* a node in the decision tree
*/
@@ -66,16 +33,12 @@
Ipmux *yes;
Ipmux *no;
uchar type; /* type of field(Txxxx) */
- uchar ctype; /* tupe of comparison(Cxxxx) */
uchar len; /* length in bytes of item to compare */
uchar n; /* number of items val points to */
- short off; /* offset of comparison */
- short eoff; /* end offset of comparison */
- uchar skiphdr; /* should offset start after ipheader */
+ int off; /* offset of comparison */
uchar *val;
uchar *mask;
uchar *e; /* val+n*len*/
-
int ref; /* so we can garbage collect */
Conv *conv;
};
@@ -90,6 +53,7 @@
static int ipmuxsprint(Ipmux*, int, char*, int);
static void ipmuxkick(void *x);
+static void ipmuxfree(Ipmux *f);
static char*
skipwhite(char *p)
@@ -122,27 +86,33 @@
Ipmux *f;
p = skipwhite(p);
- if(strncmp(p, "dst", 3) == 0){
+ if(strncmp(p, "ver", 3) == 0){
+ type = Tver;
+ off = 0;
+ len = 1;
+ p += 3;
+ }
+ else if(strncmp(p, "dst", 3) == 0){
type = Tdst;
- off = offsetof(Ip4hdr, dst[0]);
- len = IPv4addrlen;
+ off = offsetof(Ip6hdr, dst[0]);
+ len = IPaddrlen;
p += 3;
}
else if(strncmp(p, "src", 3) == 0){
type = Tsrc;
- off = offsetof(Ip4hdr, src[0]);
- len = IPv4addrlen;
+ off = offsetof(Ip6hdr, src[0]);
+ len = IPaddrlen;
p += 3;
}
else if(strncmp(p, "ifc", 3) == 0){
type = Tifc;
- off = -IPv4addrlen;
- len = IPv4addrlen;
+ off = -IPaddrlen;
+ len = IPaddrlen;
p += 3;
}
else if(strncmp(p, "proto", 5) == 0){
type = Tproto;
- off = offsetof(Ip4hdr, proto);
+ off = offsetof(Ip6hdr, proto);
len = 1;
p += 5;
}
@@ -160,7 +130,7 @@
return nil;
p++;
off = strtoul(p, &p, 0);
- if(off < 0 || off > (64-IPHDR))
+ if(off < 0)
return nil;
p = skipwhite(p);
if(*p != ':')
@@ -189,11 +159,6 @@
f->mask = nil;
f->n = 1;
f->ref = 1;
- if(type == Tdata)
- f->skiphdr = 1;
- else
- f->skiphdr = 0;
-
return f;
}
@@ -229,7 +194,7 @@
static Ipmux*
parsemux(char *p)
{
- int n, nomask;
+ int n;
Ipmux *f;
char *val;
char *mask;
@@ -247,7 +212,7 @@
goto parseerror;
/* parse mask */
- mask = follows(val, '&');
+ mask = follows(p, '&');
if(mask != nil){
switch(f->type){
case Tsrc:
@@ -254,7 +219,7 @@
case Tdst:
case Tifc:
f->mask = smalloc(f->len);
- v4parseip(f->mask, mask);
+ parseipmask(f->mask, mask, 0);
break;
case Tdata:
case Tiph:
@@ -264,15 +229,13 @@
default:
goto parseerror;
}
- nomask = 0;
- } else {
- nomask = 1;
+ } else if(f->type == Tver){
f->mask = smalloc(f->len);
- memset(f->mask, 0xff, f->len);
+ f->mask[0] = 0xF0;
}
/* parse vals */
- f->n = getfields(val, vals, sizeof(vals)/sizeof(char*), 1, "|");
+ f->n = getfields(val, vals, nelem(vals), 1, "|");
if(f->n == 0)
goto parseerror;
f->val = smalloc(f->n*f->len);
@@ -279,10 +242,21 @@
v = f->val;
for(n = 0; n < f->n; n++){
switch(f->type){
+ case Tver:
+ if(f->n != 1)
+ goto parseerror;
+ if(strcmp(vals[n], "6") == 0)
+ *v = IP_VER6;
+ else if(strcmp(vals[n], "4") == 0)
+ *v = IP_VER4;
+ else
+ goto parseerror;
+ break;
case Tsrc:
case Tdst:
case Tifc:
- v4parseip(v, vals[n]);
+ if(parseip(v, vals[n]) == -1)
+ goto parseerror;
break;
case Tproto:
case Tdata:
@@ -292,34 +266,11 @@
}
v += f->len;
}
-
- f->eoff = f->off + f->len;
f->e = f->val + f->n*f->len;
- f->ctype = Cother;
- if(f->n == 1){
- switch(f->len){
- case 1:
- f->ctype = nomask ? Cbyte : Cmbyte;
- break;
- case 2:
- f->ctype = nomask ? Cshort : Cmshort;
- break;
- case 4:
- if(f->type == Tifc)
- f->ctype = nomask ? Cifc : Cmifc;
- else
- f->ctype = nomask ? Clong : Cmlong;
- break;
- }
- }
return f;
parseerror:
- if(f->mask)
- free(f->mask);
- if(f->val)
- free(f->val);
- free(f);
+ ipmuxfree(f);
return nil;
}
@@ -342,8 +293,7 @@
return n;
/* compare offsets, call earlier ones more specific */
- n = (a->off+((int)a->skiphdr)*offsetof(Ip4hdr, data[0])) -
- (b->off+((int)b->skiphdr)*offsetof(Ip4hdr, data[0]));
+ n = a->off - b->off;
if(n != 0)
return n;
@@ -413,6 +363,10 @@
*nf = *f;
nf->no = ipmuxcopy(f->no);
nf->yes = ipmuxcopy(f->yes);
+ if(f->mask != nil){
+ nf->mask = smalloc(f->len);
+ memmove(nf->mask, f->mask, f->len);
+ }
nf->val = smalloc(f->n*f->len);
nf->e = nf->val + f->len*f->n;
memmove(nf->val, f->val, f->n*f->len);
@@ -422,8 +376,10 @@
static void
ipmuxfree(Ipmux *f)
{
- if(f->val != nil)
- free(f->val);
+ if(f == nil)
+ return;
+ free(f->val);
+ free(f->mask);
free(f);
}
@@ -432,10 +388,8 @@
{
if(f == nil)
return;
- if(f->no != nil)
- ipmuxfree(f->no);
- if(f->yes != nil)
- ipmuxfree(f->yes);
+ ipmuxfree(f->no);
+ ipmuxfree(f->yes);
ipmuxfree(f);
}
@@ -510,6 +464,8 @@
return ipmuxremove(&ft->no, f);
}
+ ipmuxremove(&ft->no, f->no);
+
/* we found a match */
if(--(ft->ref) == 0){
/*
@@ -531,8 +487,55 @@
}
/*
+ * convert to ipv4 filter
+ */
+static Ipmux*
+ipmuxconv4(Ipmux *f)
+{
+ int i, n;
+
+ if(f == nil)
+ return nil;
+
+ switch(f->type){
+ case Tproto:
+ f->off = offsetof(Ip4hdr, proto);
+ break;
+ case Tdst:
+ f->off = offsetof(Ip4hdr, dst[0]);
+ if(0){
+ case Tsrc:
+ f->off = offsetof(Ip4hdr, src[0]);
+ }
+ if(f->len != IPaddrlen)
+ break;
+ n = 0;
+ for(i = 0; i < f->n; i++){
+ if(isv4(f->val + i*IPaddrlen)){
+ memmove(f->val + n*IPv4addrlen, f->val + i*IPaddrlen + IPv4off, IPv4addrlen);
+ n++;
+ }
+ }
+ if(n == 0){
+ ipmuxtreefree(f);
+ return nil;
+ }
+ f->n = n;
+ f->len = IPv4addrlen;
+ if(f->mask != nil)
+ memmove(f->mask, f->mask+IPv4off, IPv4addrlen);
+ }
+ f->e = f->val + f->n*f->len;
+
+ f->yes = ipmuxconv4(f->yes);
+ f->no = ipmuxconv4(f->no);
+
+ return f;
+}
+
+/*
* connection request is a semi separated list of filters
- * e.g. proto=17;dat[0:4]=11aa22bb;ifc=135.104.9.2&255.255.255.0
+ * e.g. ver=4;proto=17;data[0:4]=11aa22bb;ifc=135.104.9.2&255.255.255.0
*
* there's no protection against overlapping specs.
*/
@@ -568,6 +571,18 @@
return Ebadarg;
mux->conv = c;
+ if(chain->type != Tver) {
+ char ver6[] = "ver=6";
+ mux = parsemux(ver6);
+ mux->yes = chain;
+ mux->no = ipmuxcopy(chain);
+ chain = mux;
+ }
+ if(*chain->val == IP_VER4)
+ chain->yes = ipmuxconv4(chain->yes);
+ else
+ chain->no = ipmuxconv4(chain->no);
+
/* save a copy of the chain so we can later remove it */
mux = ipmuxcopy(chain);
r = (Ipmuxrock*)(c->ptcl);
@@ -642,95 +657,84 @@
Block *bp;
bp = qget(c->wq);
- if(bp == nil)
- return;
- else {
+ if(bp != nil) {
Ip4hdr *ih4 = (Ip4hdr*)(bp->rp);
- if((ih4->vihl)&0xF0 != 0x60)
+
+ if((ih4->vihl & 0xF0) != IP_VER6)
ipoput4(c->p->f, bp, 0, ih4->ttl, ih4->tos, nil);
- else {
- Ip6hdr *ih6 = (Ip6hdr*)(bp->rp);
- ipoput6(c->p->f, bp, 0, ih6->ttl, 0, nil);
- }
+ else
+ ipoput6(c->p->f, bp, 0, ((Ip6hdr*)ih4)->ttl, 0, nil);
}
}
+static int
+maskmemcmp(uchar *m, uchar *v, uchar *c, int n)
+{
+ int i;
+
+ if(m == nil)
+ return memcmp(v, c, n) != 0;
+
+ for(i = 0; i < n; i++)
+ if((v[i] & m[i]) != c[i])
+ return 1;
+ return 0;
+}
+
static void
ipmuxiput(Proto *p, Ipifc *ifc, Block *bp)
{
- int len, hl;
Fs *f = p->f;
- uchar *m, *h, *v, *e, *ve, *hp;
Conv *c;
+ Iplifc *lifc;
Ipmux *mux;
- Ip4hdr *ip;
+ uchar *v;
+ Ip4hdr *ip4;
Ip6hdr *ip6;
+ int off, hl;
- ip = (Ip4hdr*)bp->rp;
- hl = (ip->vihl&0x0F)<<2;
+ ip4 = (Ip4hdr*)bp->rp;
+ if((ip4->vihl & 0xF0) == IP_VER4) {
+ hl = (ip4->vihl&0x0F)<<2;
+ ip6 = nil;
+ } else {
+ hl = IP6HDR;
+ ip6 = (Ip6hdr*)ip4;
+ }
if(p->priv == nil)
goto nomatch;
- h = bp->rp;
- len = BLEN(bp);
+ c = nil;
+ lifc = nil;
- /* run the v4 filter */
+ /* run the filter */
rlock(f);
- c = nil;
mux = f->ipmux->priv;
while(mux != nil){
- if(mux->eoff > len){
- mux = mux->no;
- continue;
- }
- hp = h + mux->off + ((int)mux->skiphdr)*hl;
- switch(mux->ctype){
- case Cbyte:
- if(*mux->val == *hp)
- goto yes;
+ switch(mux->type){
+ case Tifc:
+ if(mux->len != IPaddrlen)
+ goto no;
+ for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next)
+ for(v = mux->val; v < mux->e; v += IPaddrlen)
+ if(maskmemcmp(mux->mask, lifc->local, v, IPaddrlen) == 0)
+ goto yes;
+ goto no;
+ case Tdata:
+ off = hl;
break;
- case Cmbyte:
- if((*hp & *mux->mask) == *mux->val)
- goto yes;
- break;
- case Cshort:
- if(*((ushort*)mux->val) == *(ushort*)hp)
- goto yes;
- break;
- case Cmshort:
- if((*(ushort*)hp & (*((ushort*)mux->mask))) == *((ushort*)mux->val))
- goto yes;
- break;
- case Clong:
- if(*((ulong*)mux->val) == *(ulong*)hp)
- goto yes;
- break;
- case Cmlong:
- if((*(ulong*)hp & (*((ulong*)mux->mask))) == *((ulong*)mux->val))
- goto yes;
- break;
- case Cifc:
- if(*((ulong*)mux->val) == *(ulong*)(ifc->lifc->local + IPv4off))
- goto yes;
- break;
- case Cmifc:
- if((*(ulong*)(ifc->lifc->local + IPv4off) & (*((ulong*)mux->mask))) == *((ulong*)mux->val))
- goto yes;
- break;
default:
- v = mux->val;
- for(e = mux->e; v < e; v = ve){
- m = mux->mask;
- hp = h + mux->off;
- for(ve = v + mux->len; v < ve; v++){
- if((*hp++ & *m++) != *v)
- break;
- }
- if(v == ve)
- goto yes;
- }
+ off = 0;
+ break;
}
+ off += mux->off;
+ if(off < 0 || off + mux->len > BLEN(bp))
+ goto no;
+ for(v = mux->val; v < mux->e; v += mux->len)
+ if(maskmemcmp(mux->mask, bp->rp + off, v, mux->len) == 0)
+ goto yes;
+no:
mux = mux->no;
continue;
yes:
@@ -743,28 +747,24 @@
if(c != nil){
/* tack on interface address */
bp = padblock(bp, IPaddrlen);
- ipmove(bp->rp, ifc->lifc->local);
- bp = concatblock(bp);
- if(bp != nil)
- if(qpass(c->rq, bp) < 0)
- print("Q");
+ if(lifc == nil)
+ lifc = ifc->lifc;
+ ipmove(bp->rp, lifc != nil ? lifc->local : IPnoaddr);
+ qpass(c->rq, concatblock(bp));
return;
}
nomatch:
/* doesn't match any filter, hand it to the specific protocol handler */
- ip = (Ip4hdr*)bp->rp;
- if((ip->vihl&0xF0)==0x40) {
- p = f->t2p[ip->proto];
- } else {
- ip6 = (Ip6hdr*)bp->rp;
+ if(ip6 != nil)
p = f->t2p[ip6->proto];
- }
- if(p && p->rcv)
- (*p->rcv)(p, ifc, bp);
else
- freeblist(bp);
- return;
+ p = f->t2p[ip4->proto];
+ if(p != nil && p->rcv != nil){
+ (*p->rcv)(p, ifc, bp);
+ return;
+ }
+ freeblist(bp);
}
static int
@@ -780,11 +780,14 @@
n += snprint(buf+n, len-n, "\n");
return n;
}
- n += snprint(buf+n, len-n, "h[%d:%d]&",
- mux->off+((int)mux->skiphdr)*((int)offsetof(Ip4hdr, data[0])),
- mux->off+(((int)mux->skiphdr)*((int)offsetof(Ip4hdr, data[0])))+mux->len-1);
- for(i = 0; i < mux->len; i++)
- n += snprint(buf+n, len - n, "%2.2ux", mux->mask[i]);
+ n += snprint(buf+n, len-n, "%s[%d:%d]",
+ mux->type == Tdata ? "data": "iph",
+ mux->off, mux->off+mux->len-1);
+ if(mux->mask != nil){
+ n += snprint(buf+n, len-n, "&");
+ for(i = 0; i < mux->len; i++)
+ n += snprint(buf+n, len - n, "%2.2ux", mux->mask[i]);
+ }
n += snprint(buf+n, len-n, "=");
v = mux->val;
for(j = 0; j < mux->n; j++){
--- a/os/ip/iproute.c
+++ b/os/ip/iproute.c
@@ -12,10 +12,10 @@
static void calcd(Route*);
/* these are used for all instances of IP */
-Route* v4freelist;
-Route* v6freelist;
-RWlock routelock;
-ulong v4routegeneration, v6routegeneration;
+static Route* v4freelist;
+static Route* v6freelist;
+static RWlock routelock;
+static ulong v4routegeneration, v6routegeneration;
static void
freeroute(Route *r)
@@ -22,6 +22,7 @@
{
Route **l;
+ r->ref = 0;
r->left = nil;
r->right = nil;
if(r->type & Rv4)
@@ -35,9 +36,8 @@
static Route*
allocroute(int type)
{
- Route *r;
+ Route *r, **l;
int n;
- Route **l;
if(type & Rv4){
n = sizeof(RouteTree) + sizeof(V4route);
@@ -72,9 +72,9 @@
return;
l = allocroute(r->type);
+ l->left = r;
l->mid = *q;
*q = l;
- l->left = r;
}
/*
@@ -99,11 +99,11 @@
*/
enum
{
- Rpreceeds,
- Rfollows,
- Requals,
- Rcontains,
- Rcontained,
+ Rpreceeds, /* a left of b */
+ Rfollows, /* a right of b */
+ Requals, /* a equals b */
+ Rcontains, /* a contians b */
+ Roverlaps, /* a overlaps b */
};
static int
@@ -112,44 +112,88 @@
if(a->type & Rv4){
if(a->v4.endaddress < b->v4.address)
return Rpreceeds;
-
if(a->v4.address > b->v4.endaddress)
return Rfollows;
-
if(a->v4.address <= b->v4.address
&& a->v4.endaddress >= b->v4.endaddress){
if(a->v4.address == b->v4.address
- && a->v4.endaddress == b->v4.endaddress)
- return Requals;
+ && a->v4.endaddress == b->v4.endaddress){
+ if(a->v4.source <= b->v4.source
+ && a->v4.endsource >= b->v4.endsource){
+ if(a->v4.source == b->v4.source
+ && a->v4.endsource == b->v4.endsource)
+ return Requals;
+ return Rcontains;
+ }
+ return Roverlaps;
+ }
return Rcontains;
}
- return Rcontained;
+ return Roverlaps;
}
if(lcmp(a->v6.endaddress, b->v6.address) < 0)
return Rpreceeds;
-
if(lcmp(a->v6.address, b->v6.endaddress) > 0)
return Rfollows;
-
if(lcmp(a->v6.address, b->v6.address) <= 0
&& lcmp(a->v6.endaddress, b->v6.endaddress) >= 0){
if(lcmp(a->v6.address, b->v6.address) == 0
- && lcmp(a->v6.endaddress, b->v6.endaddress) == 0)
- return Requals;
+ && lcmp(a->v6.endaddress, b->v6.endaddress) == 0){
+ if(lcmp(a->v6.source, b->v6.source) <= 0
+ && lcmp(a->v6.endsource, b->v6.endsource) >= 0){
+ if(lcmp(a->v6.source, b->v6.source) == 0
+ && lcmp(a->v6.endsource, b->v6.endsource) == 0)
+ return Requals;
+ return Rcontains;
+ }
+ return Roverlaps;
+ }
return Rcontains;
}
+ return Roverlaps;
+}
- return Rcontained;
+/* return 1 if a matches b, otherwise 0 */
+static int
+matchroute(Route *a, Route *b)
+{
+ if(a == b)
+ return 1;
+
+ if((a->type^b->type) & (Rifc|Runi|Rmulti|Rbcast))
+ return 0;
+
+ if(a->type & Rv4){
+ if(memcmp(a->v4.gate, IPnoaddr+IPv4off, IPv4addrlen) != 0
+ && memcmp(a->v4.gate, b->v4.gate, IPv4addrlen) != 0)
+ return 0;
+ } else {
+ if(ipcmp(a->v6.gate, IPnoaddr) != 0
+ && ipcmp(a->v6.gate, b->v6.gate) != 0)
+ return 0;
+ }
+
+ if(a->ifc != nil && b->ifc != nil && (a->ifc != b->ifc || a->ifcid != b->ifcid))
+ return 0;
+
+ if(*a->tag != 0 && strncmp(a->tag, b->tag, sizeof(a->tag)) != 0)
+ return 0;
+
+ return 1;
}
static void
copygate(Route *old, Route *new)
{
+ old->type = new->type;
+ old->ifc = new->ifc;
+ old->ifcid = new->ifcid;
if(new->type & Rv4)
memmove(old->v4.gate, new->v4.gate, IPv4addrlen);
else
- memmove(old->v6.gate, new->v6.gate, IPaddrlen);
+ ipmove(old->v6.gate, new->v6.gate);
+ strncpy(old->tag, new->tag, sizeof(new->tag));
}
/*
@@ -162,12 +206,12 @@
l = p->left;
r = p->right;
- p->left = 0;
- p->right = 0;
+ p->left = nil;
+ p->right = nil;
addnode(f, root, p);
- if(l)
+ if(l != nil)
walkadd(f, root, l);
- if(r)
+ if(r != nil)
walkadd(f, root, r);
}
@@ -180,16 +224,16 @@
Route *q;
int d;
- if(p) {
+ if(p != nil) {
d = 0;
q = p->left;
- if(q)
+ if(q != nil)
d = q->depth;
q = p->right;
- if(q && q->depth > d)
+ if(q != nil && q->depth > d)
d = q->depth;
q = p->mid;
- if(q && q->depth > d)
+ if(q != nil && q->depth > d)
d = q->depth;
p->depth = d+1;
}
@@ -210,8 +254,8 @@
* rotate tree node
*/
p = *cur;
- dl = 0; if(l = p->left) dl = l->depth;
- dr = 0; if(r = p->right) dr = r->depth;
+ dl = 0; if((l = p->left) != nil) dl = l->depth;
+ dr = 0; if((r = p->right) != nil) dr = r->depth;
if(dl > dr+1) {
p->left = l->right;
@@ -239,7 +283,7 @@
Route *p;
p = *cur;
- if(p == 0) {
+ if(p == nil) {
*cur = new;
new->depth = 1;
return;
@@ -269,15 +313,13 @@
* supercede the old entry if the old one isn't
* a local interface.
*/
- if((p->type & Rifc) == 0){
- p->type = new->type;
- p->ifcid = -1;
+ if((p->type & Rifc) == 0)
copygate(p, new);
- } else if(new->type & Rifc)
+ else if(new->type & Rifc)
p->ref++;
freeroute(new);
break;
- case Rcontained:
+ case Roverlaps:
addnode(f, &p->mid, new);
break;
}
@@ -285,241 +327,316 @@
balancetree(cur);
}
-#define V4H(a) ((a&0x07ffffff)>>(32-Lroot-5))
-
-void
-v4addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type)
+/*
+ * find node matching r
+ */
+static Route**
+looknode(Route **cur, Route *r)
{
Route *p;
- ulong sa;
- ulong m;
- ulong ea;
- int h, eh;
- m = nhgetl(mask);
- sa = nhgetl(a) & m;
- ea = sa | ~m;
+ for(;;){
+ p = *cur;
+ if(p == nil)
+ return nil;
+ switch(rangecompare(r, p)){
+ case Rcontains:
+ return nil;
+ case Rpreceeds:
+ cur = &p->left;
+ break;
+ case Rfollows:
+ cur = &p->right;
+ break;
+ case Roverlaps:
+ cur = &p->mid;
+ break;
+ case Requals:
+ if((p->type & Rifc) == 0 && !matchroute(r, p))
+ return nil;
+ return cur;
+ }
+ }
+}
- eh = V4H(ea);
- for(h=V4H(sa); h<=eh; h++) {
- p = allocroute(Rv4 | type);
- p->v4.address = sa;
- p->v4.endaddress = ea;
- memmove(p->v4.gate, gate, sizeof(p->v4.gate));
- memmove(p->tag, tag, sizeof(p->tag));
+static Route*
+looknodetag(Route *r, char *tag)
+{
+ Route *x;
- wlock(&routelock);
- addnode(f, &f->v4root[h], p);
- while(p = f->queue) {
- f->queue = p->mid;
- walkadd(f, &f->v4root[h], p->left);
- freeroute(p);
- }
- wunlock(&routelock);
+ if(r == nil)
+ return nil;
+
+ if((x = looknodetag(r->mid, tag)) != nil)
+ return x;
+ if((x = looknodetag(r->left, tag)) != nil)
+ return x;
+ if((x = looknodetag(r->right, tag)) != nil)
+ return x;
+
+ if((r->type & Rifc) == 0){
+ if(tag == nil || strncmp(tag, r->tag, sizeof(r->tag)) == 0)
+ return r;
}
- v4routegeneration++;
- ipifcaddroute(f, Rv4, a, mask, gate, type);
+ return nil;
}
-#define V6H(a) (((a)[IPllen-1] & 0x07ffffff)>>(32-Lroot-5))
-#define ISDFLT(a, mask, tag) ((ipcmp((a),v6Unspecified)==0) && (ipcmp((mask),v6Unspecified)==0) && (strcmp((tag), "ra")!=0))
+#define V4H(a) ((a&0x07ffffff)>>(32-Lroot-5))
+#define V6H(a) (((a)[IPllen-1]&0x07ffffff)>>(32-Lroot-5))
-void
-v6addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type)
+static void
+routeadd(Fs *f, Route *r)
{
- Route *p;
- ulong sa[IPllen], ea[IPllen];
- ulong x, y;
- int h, eh;
+ Route **h, **e, *p;
- /*
- if(ISDFLT(a, mask, tag))
- f->v6p->cdrouter = -1;
- */
+ if(r->type & Rv4){
+ h = &f->v4root[V4H(r->v4.address)];
+ e = &f->v4root[V4H(r->v4.endaddress)];
+ } else {
+ h = &f->v6root[V6H(r->v6.address)];
+ e = &f->v6root[V6H(r->v6.endaddress)];
+ }
+ for(; h <= e; h++) {
+ p = allocroute(r->type);
- for(h = 0; h < IPllen; h++){
- x = nhgetl(a+4*h);
- y = nhgetl(mask+4*h);
- sa[h] = x & y;
- ea[h] = x | ~y;
- }
+ p->ifc = r->ifc;
+ p->ifcid = r->ifcid;
- eh = V6H(ea);
- for(h = V6H(sa); h <= eh; h++) {
- p = allocroute(type);
- memmove(p->v6.address, sa, IPaddrlen);
- memmove(p->v6.endaddress, ea, IPaddrlen);
- memmove(p->v6.gate, gate, IPaddrlen);
- memmove(p->tag, tag, sizeof(p->tag));
+ if(r->type & Rv4)
+ memmove(&p->v4, &r->v4, sizeof(r->v4));
+ else
+ memmove(&p->v6, &r->v6, sizeof(r->v6));
- wlock(&routelock);
- addnode(f, &f->v6root[h], p);
- while(p = f->queue) {
+ memmove(p->tag, r->tag, sizeof(r->tag));
+
+ addnode(f, h, p);
+ while((p = f->queue) != nil) {
f->queue = p->mid;
- walkadd(f, &f->v6root[h], p->left);
+ walkadd(f, h, p->left);
freeroute(p);
}
- wunlock(&routelock);
}
- v6routegeneration++;
- ipifcaddroute(f, 0, a, mask, gate, type);
+ if(r->type & Rv4)
+ v4routegeneration++;
+ else
+ v6routegeneration++;
}
-Route**
-looknode(Route **cur, Route *r)
+static void
+routerem(Fs *f, Route *r)
{
- Route *p;
+ Route **h, **e, **l, *p;
- for(;;){
- p = *cur;
- if(p == 0)
- return 0;
-
- switch(rangecompare(r, p)){
- case Rcontains:
- return 0;
- case Rpreceeds:
- cur = &p->left;
- break;
- case Rfollows:
- cur = &p->right;
- break;
- case Rcontained:
- cur = &p->mid;
- break;
- case Requals:
- return cur;
+ if(r->type & Rv4){
+ h = &f->v4root[V4H(r->v4.address)];
+ e = &f->v4root[V4H(r->v4.endaddress)];
+ } else {
+ h = &f->v6root[V6H(r->v6.address)];
+ e = &f->v6root[V6H(r->v6.endaddress)];
+ }
+
+ for(; h <= e; h++) {
+ if((l = looknode(h, r)) == nil)
+ continue;
+ p = *l;
+ if(--(p->ref) != 0)
+ continue;
+ *l = nil;
+ addqueue(&f->queue, p->left);
+ addqueue(&f->queue, p->mid);
+ addqueue(&f->queue, p->right);
+ freeroute(p);
+
+ while((p = f->queue) != nil) {
+ f->queue = p->mid;
+ walkadd(f, h, p->left);
+ freeroute(p);
}
}
+
+ if(r->type & Rv4)
+ v4routegeneration++;
+ else
+ v6routegeneration++;
}
-void
-v4delroute(Fs *f, uchar *a, uchar *mask, int dolock)
+static Route
+mkroute(uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag)
{
- Route **r, *p;
- Route rt;
- int h, eh;
- ulong m;
+ ulong x, y;
+ Route r;
+ int h;
- m = nhgetl(mask);
- rt.v4.address = nhgetl(a) & m;
- rt.v4.endaddress = rt.v4.address | ~m;
- rt.type = Rv4;
+ memset(&r, 0, sizeof(r));
- eh = V4H(rt.v4.endaddress);
- for(h=V4H(rt.v4.address); h<=eh; h++) {
- if(dolock)
- wlock(&routelock);
- r = looknode(&f->v4root[h], &rt);
- if(r) {
- p = *r;
- if(--(p->ref) == 0){
- *r = 0;
- addqueue(&f->queue, p->left);
- addqueue(&f->queue, p->mid);
- addqueue(&f->queue, p->right);
- freeroute(p);
- while(p = f->queue) {
- f->queue = p->mid;
- walkadd(f, &f->v4root[h], p->left);
- freeroute(p);
- }
- }
+ r.type = type;
+
+ if(type & Rv4){
+ x = nhgetl(a+IPv4off);
+ y = nhgetl(mask+IPv4off);
+ r.v4.address = x & y;
+ r.v4.endaddress = x | ~y;
+
+ x = nhgetl(s+IPv4off);
+ y = nhgetl(smask+IPv4off);
+ if(y != 0)
+ r.type |= Rsrc;
+ r.v4.source = x & y;
+ r.v4.endsource = x | ~y;
+
+ memmove(r.v4.gate, gate+IPv4off, IPv4addrlen);
+ } else {
+ for(h = 0; h < IPllen; h++){
+ x = nhgetl(a+4*h);
+ y = nhgetl(mask+4*h);
+ r.v6.address[h] = x & y;
+ r.v6.endaddress[h] = x | ~y;
+
+ x = nhgetl(s+4*h);
+ y = nhgetl(smask+4*h);
+ if(y != 0)
+ r.type |= Rsrc;
+ r.v6.source[h] = x & y;
+ r.v6.endsource[h] = x | ~y;
}
- if(dolock)
- wunlock(&routelock);
+
+ memmove(r.v6.gate, gate, IPaddrlen);
}
- v4routegeneration++;
- ipifcremroute(f, Rv4, a, mask);
+ if(ifc != nil){
+ r.ifc = ifc;
+ r.ifcid = ifc->ifcid;
+ }
+
+ if(tag != nil)
+ strncpy(r.tag, tag, sizeof(r.tag));
+
+ return r;
}
void
-v6delroute(Fs *f, uchar *a, uchar *mask, int dolock)
+addroute(Fs *f, uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag)
{
- Route **r, *p;
- Route rt;
- int h, eh;
- ulong x, y;
+ Route r = mkroute(a, mask, s, smask, gate, type, ifc, tag);
+ wlock(&routelock);
+ routeadd(f, &r);
+ wunlock(&routelock);
+}
- for(h = 0; h < IPllen; h++){
- x = nhgetl(a+4*h);
- y = nhgetl(mask+4*h);
- rt.v6.address[h] = x & y;
- rt.v6.endaddress[h] = x | ~y;
+void
+remroute(Fs *f, uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag)
+{
+ Route r = mkroute(a, mask, s, smask, gate, type, ifc, tag);
+ wlock(&routelock);
+ routerem(f, &r);
+ wunlock(&routelock);
+}
+
+/* get the outgoing interface for route r */
+static Ipifc*
+routefindipifc(Route *r, Fs *f)
+{
+ uchar local[IPaddrlen], gate[IPaddrlen];
+ Ipifc *ifc;
+ int i;
+
+ ifc = r->ifc;
+ if(ifc != nil && ifc->ifcid == r->ifcid)
+ return ifc;
+
+ if(r->type & Rsrc) {
+ if(r->type & Rv4) {
+ hnputl(local+IPv4off, r->v4.source);
+ memmove(local, v4prefix, IPv4off);
+ } else {
+ for(i = 0; i < IPllen; i++)
+ hnputl(local+4*i, r->v6.source[i]);
+ }
+ } else {
+ ipmove(local, IPnoaddr);
}
- rt.type = 0;
- eh = V6H(rt.v6.endaddress);
- for(h=V6H(rt.v6.address); h<=eh; h++) {
- if(dolock)
- wlock(&routelock);
- r = looknode(&f->v6root[h], &rt);
- if(r) {
- p = *r;
- if(--(p->ref) == 0){
- *r = 0;
- addqueue(&f->queue, p->left);
- addqueue(&f->queue, p->mid);
- addqueue(&f->queue, p->right);
- freeroute(p);
- while(p = f->queue) {
- f->queue = p->mid;
- walkadd(f, &f->v6root[h], p->left);
- freeroute(p);
- }
- }
+ if(r->type & Rifc) {
+ if(r->type & Rv4) {
+ hnputl(gate+IPv4off, r->v4.address);
+ memmove(gate, v4prefix, IPv4off);
+ } else {
+ for(i = 0; i < IPllen; i++)
+ hnputl(gate+4*i, r->v6.address[i]);
}
- if(dolock)
- wunlock(&routelock);
+ } else {
+ if(r->type & Rv4)
+ v4tov6(gate, r->v4.gate);
+ else
+ ipmove(gate, r->v6.gate);
}
- v6routegeneration++;
- ipifcremroute(f, 0, a, mask);
+ if((ifc = findipifc(f, local, gate, r->type)) == nil)
+ return nil;
+
+ r->ifc = ifc;
+ r->ifcid = ifc->ifcid;
+ return ifc;
}
+/*
+ * v4lookup, v6lookup:
+ * lookup a route to destination address a from source address s
+ * and return the route. returns nil if no route was found.
+ * an optional Routehint can be passed in rh to cache the lookup.
+ *
+ * for v4lookup, addresses are in 4 byte format.
+ */
Route*
-v4lookup(Fs *f, uchar *a, Conv *c)
+v4lookup(Fs *f, uchar *a, uchar *s, Routehint *rh)
{
+ ulong la, ls;
Route *p, *q;
- ulong la;
- uchar gate[IPaddrlen];
Ipifc *ifc;
- if(c != nil && c->r != nil && c->r->ifc != nil && c->rgen == v4routegeneration)
- return c->r;
+ if(rh != nil
+ && rh->rgen == v4routegeneration
+ && (q = rh->r) != nil
+ && (ifc = q->ifc) != nil
+ && q->ifcid == ifc->ifcid
+ && q->ref > 0)
+ return q;
la = nhgetl(a);
+ ls = nhgetl(s);
q = nil;
- for(p=f->v4root[V4H(la)]; p;)
- if(la >= p->v4.address) {
- if(la <= p->v4.endaddress) {
- q = p;
- p = p->mid;
- } else
- p = p->right;
- } else
+ for(p = f->v4root[V4H(la)]; p != nil;){
+ if(la < p->v4.address){
p = p->left;
-
- if(q && (q->ifc == nil || q->ifcid != q->ifc->ifcid)){
- if(q->type & Rifc) {
- hnputl(gate+IPv4off, q->v4.address);
- memmove(gate, v4prefix, IPv4off);
- } else
- v4tov6(gate, q->v4.gate);
- ifc = findipifc(f, gate, q->type);
- if(ifc == nil)
- return nil;
- q->ifc = ifc;
- q->ifcid = ifc->ifcid;
+ continue;
+ }
+ if(la > p->v4.endaddress){
+ p = p->right;
+ continue;
+ }
+ if(p->type & Rsrc){
+ if(ls < p->v4.source){
+ p = p->mid;
+ continue;
+ }
+ if(ls > p->v4.endsource){
+ p = p->mid;
+ continue;
+ }
+ }
+ q = p;
+ p = p->mid;
}
- if(c != nil){
- c->r = q;
- c->rgen = v4routegeneration;
+ if(q == nil || q->ref == 0 || routefindipifc(q, f) == nil)
+ return nil;
+
+ if(rh != nil){
+ rh->r = q;
+ rh->rgen = v4routegeneration;
}
return q;
@@ -526,29 +643,35 @@
}
Route*
-v6lookup(Fs *f, uchar *a, Conv *c)
+v6lookup(Fs *f, uchar *a, uchar *s, Routehint *rh)
{
- Route *p, *q;
- ulong la[IPllen];
- int h;
+ ulong la[IPllen], ls[IPllen];
ulong x, y;
- uchar gate[IPaddrlen];
+ Route *p, *q;
Ipifc *ifc;
+ int h;
- if(memcmp(a, v4prefix, IPv4off) == 0){
- q = v4lookup(f, a+IPv4off, c);
- if(q != nil)
- return q;
+ if(isv4(s)){
+ if(isv4(a))
+ return v4lookup(f, a+IPv4off, s+IPv4off, rh);
+ return nil;
}
- if(c != nil && c->r != nil && c->r->ifc != nil && c->rgen == v6routegeneration)
- return c->r;
+ if(rh != nil
+ && rh->rgen == v6routegeneration
+ && (q = rh->r) != nil
+ && (ifc = q->ifc) != nil
+ && q->ifcid == ifc->ifcid
+ && q->ref > 0)
+ return q;
- for(h = 0; h < IPllen; h++)
+ for(h = 0; h < IPllen; h++){
la[h] = nhgetl(a+4*h);
+ ls[h] = nhgetl(s+4*h);
+ }
- q = 0;
- for(p=f->v6root[V6H(la)]; p;){
+ q = nil;
+ for(p = f->v6root[V6H(la)]; p != nil;){
for(h = 0; h < IPllen; h++){
x = la[h];
y = p->v6.address[h];
@@ -571,42 +694,202 @@
}
break;
}
+ if(p->type & Rsrc){
+ for(h = 0; h < IPllen; h++){
+ x = ls[h];
+ y = p->v6.source[h];
+ if(x == y)
+ continue;
+ if(x < y){
+ p = p->mid;
+ goto next;
+ }
+ break;
+ }
+ for(h = 0; h < IPllen; h++){
+ x = ls[h];
+ y = p->v6.endsource[h];
+ if(x == y)
+ continue;
+ if(x > y){
+ p = p->mid;
+ goto next;
+ }
+ break;
+ }
+ }
q = p;
p = p->mid;
next: ;
}
- if(q && (q->ifc == nil || q->ifcid != q->ifc->ifcid)){
- if(q->type & Rifc) {
- for(h = 0; h < IPllen; h++)
- hnputl(gate+4*h, q->v6.address[h]);
- ifc = findipifc(f, gate, q->type);
- } else
- ifc = findipifc(f, q->v6.gate, q->type);
- if(ifc == nil)
- return nil;
- q->ifc = ifc;
- q->ifcid = ifc->ifcid;
+ if(q == nil || q->ref == 0 || routefindipifc(q, f) == nil)
+ return nil;
+
+ if(rh != nil){
+ rh->r = q;
+ rh->rgen = v6routegeneration;
}
- if(c != nil){
- c->r = q;
- c->rgen = v6routegeneration;
- }
return q;
}
+/*
+ * v4source, v6source:
+ * lookup a route to destination address a and also find
+ * a suitable source address s on the outgoing interface.
+ * return the route on success or nil when no route
+ * was found.
+ *
+ * for v4source, addresses are in 4 byte format.
+ */
+Route*
+v4source(Fs *f, uchar *a, uchar *s)
+{
+ uchar src[IPv4addrlen];
+ int splen;
+ ulong x, la;
+ Route *p, *q;
+ Ipifc *ifc;
+
+ q = nil;
+ la = nhgetl(a);
+ rlock(&routelock);
+ for(p = f->v4root[V4H(la)]; p != nil;){
+ if(la < p->v4.address){
+ p = p->left;
+ continue;
+ }
+ if(la > p->v4.endaddress){
+ p = p->right;
+ continue;
+ }
+ splen = 0;
+ if(p->type & Rsrc){
+ /* calculate local prefix length for source specific routes */
+ for(x = ~(p->v4.endsource ^ p->v4.source); x & 0x80000000UL; x <<= 1)
+ splen++;
+ hnputl(src, p->v4.source);
+ }
+ if((ifc = routefindipifc(p, f)) == nil
+ || !ipv4local(ifc, src, splen, (p->type & (Rifc|Rbcast|Rmulti|Rv4))==Rv4? p->v4.gate: a)){
+ p = p->mid;
+ continue;
+ }
+ memmove(s, src, IPv4addrlen);
+ q = p;
+ p = p->mid;
+ }
+ runlock(&routelock);
+ return q;
+}
+
+Route*
+v6source(Fs *f, uchar *a, uchar *s)
+{
+ uchar src[IPaddrlen];
+ int splen, h;
+ ulong x, y, la[IPllen];
+ Route *p, *q;
+ Ipifc *ifc;
+
+ q = nil;
+ for(h = 0; h < IPllen; h++)
+ la[h] = nhgetl(a+4*h);
+ rlock(&routelock);
+ for(p = f->v6root[V6H(la)]; p != nil;){
+ for(h = 0; h < IPllen; h++){
+ x = la[h];
+ y = p->v6.address[h];
+ if(x == y)
+ continue;
+ if(x < y){
+ p = p->left;
+ goto next;
+ }
+ break;
+ }
+ for(h = 0; h < IPllen; h++){
+ x = la[h];
+ y = p->v6.endaddress[h];
+ if(x == y)
+ continue;
+ if(x > y){
+ p = p->right;
+ goto next;
+ }
+ break;
+ }
+ splen = 0;
+ if(p->type & Rsrc){
+ /* calculate local prefix length for source specific routes */
+ for(h = 0; h < IPllen; h++){
+ hnputl(src+4*h, p->v6.source[h]);
+ if((x = ~(p->v6.endsource[h] ^ p->v6.source[h])) != ~0UL){
+ for(; x & 0x80000000UL; x <<= 1)
+ splen++;
+ break;
+ }
+ splen += 32;
+ }
+ }
+ if((ifc = routefindipifc(p, f)) == nil
+ || !ipv6local(ifc, src, splen, a)){
+ p = p->mid;
+ continue;
+ }
+ ipmove(s, src);
+ q = p;
+ p = p->mid;
+next: ;
+ }
+ runlock(&routelock);
+ return q;
+}
+
+static int
+parseroutetype(char *p)
+{
+ int type = 0;
+ switch(*p++){
+ default: return -1;
+ case '4': type |= Rv4;
+ case '6': break;
+ }
+ for(;;) switch(*p++){
+ default:
+ return -1;
+ case 'i':
+ if(((type ^= Rifc) & Rifc) != Rifc) return -1;
+ break;
+ case 'u':
+ if(((type ^= Runi) & (Runi|Rbcast|Rmulti)) != Runi) return -1;
+ break;
+ case 'b':
+ if(((type ^= Rbcast) & (Runi|Rbcast|Rmulti)) != Rbcast) return -1;
+ break;
+ case 'm':
+ if(((type ^= Rmulti) & (Runi|Rbcast|Rmulti)) != Rmulti) return -1;
+ break;
+ case 'p':
+ if(((type ^= Rptpt) & Rptpt) != Rptpt) return -1;
+ break;
+ case '\0':
+ return type;
+ }
+}
+
void
-routetype(int type, char *p)
+routetype(int type, char p[8])
{
- memset(p, ' ', 4);
- p[4] = 0;
if(type & Rv4)
*p++ = '4';
else
*p++ = '6';
+
if(type & Rifc)
*p++ = 'i';
+
if(type & Runi)
*p++ = 'u';
else if(type & Rbcast)
@@ -613,14 +896,14 @@
*p++ = 'b';
else if(type & Rmulti)
*p++ = 'm';
+
if(type & Rptpt)
- *p = 'p';
+ *p++ = 'p';
+ *p = 0;
}
-char *rformat = "%-15I %-4M %-15I %4.4s %4.4s %3s\n";
-
-void
-convroute(Route *r, uchar *addr, uchar *mask, uchar *gate, char *t, int *nifc)
+static void
+convroute(Route *r, uchar *addr, uchar *mask, uchar *src, uchar *smask, uchar *gate)
{
int i;
@@ -627,8 +910,16 @@
if(r->type & Rv4){
memmove(addr, v4prefix, IPv4off);
hnputl(addr+IPv4off, r->v4.address);
+
memset(mask, 0xff, IPv4off);
hnputl(mask+IPv4off, ~(r->v4.endaddress ^ r->v4.address));
+
+ memmove(src, v4prefix, IPv4off);
+ hnputl(src+IPv4off, r->v4.source);
+
+ memset(smask, 0xff, IPv4off);
+ hnputl(smask+IPv4off, ~(r->v4.endsource ^ r->v4.source));
+
memmove(gate, v4prefix, IPv4off);
memmove(gate+IPv4off, r->v4.gate, IPv4addrlen);
} else {
@@ -635,162 +926,186 @@
for(i = 0; i < IPllen; i++){
hnputl(addr + 4*i, r->v6.address[i]);
hnputl(mask + 4*i, ~(r->v6.endaddress[i] ^ r->v6.address[i]));
+ hnputl(src + 4*i, r->v6.source[i]);
+ hnputl(smask + 4*i, ~(r->v6.endsource[i] ^ r->v6.source[i]));
}
memmove(gate, r->v6.gate, IPaddrlen);
}
+}
- routetype(r->type, t);
+static char*
+seprintroute(char *p, char *e, Route *r)
+{
+ uchar addr[IPaddrlen], mask[IPaddrlen], src[IPaddrlen], smask[IPaddrlen], gate[IPaddrlen];
+ char type[8], ifbuf[4], *iname;
- if(r->ifc)
- *nifc = r->ifc->conv->x;
+ convroute(r, addr, mask, src, smask, gate);
+ routetype(r->type, type);
+ if(r->ifc != nil && r->ifcid == r->ifc->ifcid)
+ snprint(iname = ifbuf, sizeof ifbuf, "%d", r->ifc->conv->x);
else
- *nifc = -1;
+ iname = "-";
+ return seprint(p, e, "%-15I %-4M %-15I %-4s %4.4s %3s %-15I %-4M\n",
+ addr, mask, gate, type, r->tag, iname, src, smask);
}
-/*
- * this code is not in rr to reduce stack size
- */
-static void
-sprintroute(Route *r, Routewalk *rw)
+typedef struct Routewalk Routewalk;
+struct Routewalk
{
- int nifc, n;
- char t[5], *iname, ifbuf[5];
- uchar addr[IPaddrlen], mask[IPaddrlen], gate[IPaddrlen];
- char *p;
+ int o;
+ int h;
+ char* p;
+ char* e;
+};
- convroute(r, addr, mask, gate, t, &nifc);
- iname = "-";
- if(nifc != -1) {
- iname = ifbuf;
- snprint(ifbuf, sizeof ifbuf, "%d", nifc);
- }
- p = seprint(rw->p, rw->e, rformat, addr, mask, gate, t, r->tag, iname);
+static int
+rr1(Routewalk *rw, Route *r)
+{
+ int n = seprintroute(rw->p, rw->e, r) - rw->p;
if(rw->o < 0){
- n = p - rw->p;
if(n > -rw->o){
- memmove(rw->p, rw->p-rw->o, n+rw->o);
- rw->p = p + rw->o;
+ memmove(rw->p, rw->p - rw->o, n + rw->o);
+ rw->p += n + rw->o;
}
rw->o += n;
} else
- rw->p = p;
+ rw->p += n;
+ return rw->p < rw->e;
}
-/*
- * recurse descending tree, applying the function in Routewalk
- */
static int
rr(Route *r, Routewalk *rw)
{
int h;
- if(rw->e <= rw->p)
- return 0;
if(r == nil)
return 1;
-
if(rr(r->left, rw) == 0)
return 0;
-
if(r->type & Rv4)
h = V4H(r->v4.address);
else
h = V6H(r->v6.address);
-
- if(h == rw->h)
- rw->walk(r, rw);
-
+ if(h == rw->h){
+ if(rr1(rw, r) == 0)
+ return 0;
+ }
if(rr(r->mid, rw) == 0)
return 0;
-
return rr(r->right, rw);
}
-void
-ipwalkroutes(Fs *f, Routewalk *rw)
+long
+routeread(Fs *f, char *p, ulong offset, int n)
{
+ Routewalk rw[1];
+
+ rw->p = p;
+ rw->e = p+n;
+ rw->o = -offset;
+ if(rw->o > 0)
+ return 0;
+
rlock(&routelock);
- if(rw->e > rw->p) {
+ if(rw->p < rw->e) {
for(rw->h = 0; rw->h < nelem(f->v4root); rw->h++)
if(rr(f->v4root[rw->h], rw) == 0)
break;
}
- if(rw->e > rw->p) {
+ if(rw->p < rw->e) {
for(rw->h = 0; rw->h < nelem(f->v6root); rw->h++)
if(rr(f->v6root[rw->h], rw) == 0)
break;
}
runlock(&routelock);
-}
-long
-routeread(Fs *f, char *p, ulong offset, int n)
-{
- Routewalk rw;
-
- rw.p = p;
- rw.e = p+n;
- rw.o = -offset;
- rw.walk = sprintroute;
-
- ipwalkroutes(f, &rw);
-
- return rw.p - p;
+ return rw->p - p;
}
/*
- * this code is not in routeflush to reduce stack size
+ * 4 add addr mask gate
+ * 5 add addr mask gate ifc
+ * 6 add addr mask gate src smask
+ * 7 add addr mask gate ifc src smask
+ * 8 add addr mask gate tag ifc src smask
+ * 9 add addr mask gate type tag ifc src smask
+ * 3 remove addr mask
+ * 4 remove addr mask gate
+ * 5 remove addr mask src smask
+ * 6 remove addr mask gate src smask
+ * 7 remove addr mask gate ifc src smask
+ * 8 remove addr mask gate tag ifc src smask
+ * 9 remove addr mask gate type tag ifc src smask
*/
-void
-delroute(Fs *f, Route *r, int dolock)
+static Route
+parseroute(Fs *f, char **argv, int argc)
{
- uchar addr[IPaddrlen];
- uchar mask[IPaddrlen];
+ uchar addr[IPaddrlen], mask[IPaddrlen];
+ uchar src[IPaddrlen], smask[IPaddrlen];
uchar gate[IPaddrlen];
- char t[5];
- int nifc;
+ Ipifc *ifc;
+ char *tag;
+ int type;
- convroute(r, addr, mask, gate, t, &nifc);
- if(r->type & Rv4)
- v4delroute(f, addr+IPv4off, mask+IPv4off, dolock);
- else
- v6delroute(f, addr, mask, dolock);
-}
+ type = 0;
+ tag = nil;
+ ifc = nil;
+ ipmove(gate, IPnoaddr);
+ ipmove(src, IPnoaddr);
+ ipmove(smask, IPnoaddr);
-/*
- * recurse until one route is deleted
- * returns 0 if nothing is deleted, 1 otherwise
- */
-int
-routeflush(Fs *f, Route *r, char *tag)
-{
- if(r == nil)
- return 0;
- if(routeflush(f, r->mid, tag))
- return 1;
- if(routeflush(f, r->left, tag))
- return 1;
- if(routeflush(f, r->right, tag))
- return 1;
- if((r->type & Rifc) == 0){
- if(tag == nil || strncmp(tag, r->tag, sizeof(r->tag)) == 0){
- delroute(f, r, 0);
- return 1;
- }
+ if(argc < 3)
+ error(Ebadctl);
+ if(parseipandmask(addr, mask, argv[1], argv[2]) == -1)
+ error(Ebadip);
+
+ if(strcmp(argv[0], "add") == 0 || (argc > 3 && argc != 5)){
+ if(argc < 4)
+ error(Ebadctl);
+ if(parseip(gate, argv[3]) == -1)
+ error(Ebadip);
}
- return 0;
+ if(argc > 4 && (strcmp(argv[0], "add") != 0 || argc != 5)){
+ if(parseipandmask(src, smask, argv[argc-2], argv[argc-1]) == -1)
+ error(Ebadip);
+ }
+ if(argc == 5 && strcmp(argv[0], "add") == 0)
+ ifc = findipifcstr(f, argv[4]);
+ if(argc > 6)
+ ifc = findipifcstr(f, argv[argc-3]);
+ if(argc > 7)
+ tag = argv[argc-4];
+ if(argc > 8){
+ if((type = parseroutetype(argv[argc-5])) < 0)
+ error(Ebadctl);
+ } else {
+ if(isv4(addr))
+ type |= Rv4;
+ }
+ if(argc > 9)
+ error(Ebadctl);
+
+ if(type & Rv4){
+ if(!isv4(addr))
+ error(Ebadip);
+ if(ipcmp(smask, IPnoaddr) != 0 && !isv4(src))
+ error(Ebadip);
+ if(ipcmp(gate, IPnoaddr) != 0 && !isv4(gate))
+ error(Ebadip);
+ } else {
+ if(isv4(addr))
+ error(Ebadip);
+ }
+
+ return mkroute(addr, mask, src, smask, gate, type, ifc, tag);
}
long
routewrite(Fs *f, Chan *c, char *p, int n)
{
- int h, changed;
- char *tag;
Cmdbuf *cb;
- uchar addr[IPaddrlen];
- uchar mask[IPaddrlen];
- uchar gate[IPaddrlen];
- IPaux *a, *na;
+ IPaux *a;
+ Route *x, r;
cb = parsecmd(p, n);
if(waserror()){
@@ -797,54 +1112,44 @@
free(cb);
nexterror();
}
-
+ if(cb->nf < 1)
+ error("short control request");
if(strcmp(cb->f[0], "flush") == 0){
- tag = cb->f[1];
+ char *tag = cb->nf < 2 ? nil : cb->f[1];
+ int h;
+
+ wlock(&routelock);
for(h = 0; h < nelem(f->v4root); h++)
- for(changed = 1; changed;){
- wlock(&routelock);
- changed = routeflush(f, f->v4root[h], tag);
- wunlock(&routelock);
+ while((x = looknodetag(f->v4root[h], tag)) != nil){
+ memmove(&r, x, sizeof(RouteTree) + sizeof(V4route));
+ routerem(f, &r);
}
for(h = 0; h < nelem(f->v6root); h++)
- for(changed = 1; changed;){
- wlock(&routelock);
- changed = routeflush(f, f->v6root[h], tag);
- wunlock(&routelock);
+ while((x = looknodetag(f->v6root[h], tag)) != nil){
+ memmove(&r, x, sizeof(RouteTree) + sizeof(V6route));
+ routerem(f, &r);
}
- } else if(strcmp(cb->f[0], "remove") == 0){
- if(cb->nf < 3)
- error(Ebadarg);
- parseip(addr, cb->f[1]);
- parseipmask(mask, cb->f[2]);
- if(memcmp(addr, v4prefix, IPv4off) == 0)
- v4delroute(f, addr+IPv4off, mask+IPv4off, 1);
- else
- v6delroute(f, addr, mask, 1);
- } else if(strcmp(cb->f[0], "add") == 0){
- if(cb->nf < 4)
- error(Ebadarg);
- parseip(addr, cb->f[1]);
- parseipmask(mask, cb->f[2]);
- parseip(gate, cb->f[3]);
- tag = "none";
- if(c != nil){
+ wunlock(&routelock);
+ } else if(strcmp(cb->f[0], "add") == 0 || strcmp(cb->f[0], "remove") == 0){
+ r = parseroute(f, cb->f, cb->nf);
+ if(*r.tag == 0){
a = c->aux;
- tag = a->tag;
+ strncpy(r.tag, a->tag, sizeof(r.tag));
}
- if(memcmp(addr, v4prefix, IPv4off) == 0)
- v4addroute(f, tag, addr+IPv4off, mask+IPv4off, gate+IPv4off, 0);
+ wlock(&routelock);
+ if(strcmp(cb->f[0], "add") == 0)
+ routeadd(f, &r);
else
- v6addroute(f, tag, addr, mask, gate, 0);
+ routerem(f, &r);
+ wunlock(&routelock);
} else if(strcmp(cb->f[0], "tag") == 0) {
if(cb->nf < 2)
error(Ebadarg);
-
a = c->aux;
- na = newipaux(a->owner, cb->f[1]);
- c->aux = na;
+ c->aux = newipaux(a->owner, cb->f[1]);
free(a);
- }
+ } else
+ error(Ebadctl);
poperror();
free(cb);
--- a/os/ip/iprouter.c
+++ /dev/null
@@ -1,56 +1,0 @@
-#include "u.h"
-#include "../port/lib.h"
-#include "mem.h"
-#include "dat.h"
-#include "fns.h"
-#include "../port/error.h"
-#include "../ip/ip.h"
-
-IProuter iprouter;
-
-/*
- * User level routing. Ip packets we don't know what to do with
- * come here.
- */
-void
-useriprouter(Fs *f, Ipifc *ifc, Block *bp)
-{
- qlock(&f->iprouter);
- if(f->iprouter.q != nil){
- bp = padblock(bp, IPaddrlen);
- if(bp == nil)
- return;
- ipmove(bp->rp, ifc->lifc->local);
- qpass(f->iprouter.q, bp);
- }else
- freeb(bp);
- qunlock(&f->iprouter);
-}
-
-void
-iprouteropen(Fs *f)
-{
- qlock(&f->iprouter);
- f->iprouter.opens++;
- if(f->iprouter.q == nil)
- f->iprouter.q = qopen(64*1024, 0, 0, 0);
- else if(f->iprouter.opens == 1)
- qreopen(f->iprouter.q);
- qunlock(&f->iprouter);
-}
-
-void
-iprouterclose(Fs *f)
-{
- qlock(&f->iprouter);
- f->iprouter.opens--;
- if(f->iprouter.opens == 0)
- qclose(f->iprouter.q);
- qunlock(&f->iprouter);
-}
-
-long
-iprouterread(Fs *f, void *a, int n)
-{
- return qread(f->iprouter.q, a, n);
-}
--- a/os/ip/ipv6.c
+++ b/os/ip/ipv6.c
@@ -8,250 +8,127 @@
#include "ip.h"
#include "ipv6.h"
-enum
-{
- IP4HDR = 20, /* sizeof(Ip4hdr) */
- IP6HDR = 40, /* sizeof(Ip6hdr) */
- IP_HLEN4 = 0x05, /* Header length in words */
- IP_DF = 0x4000, /* Don't fragment */
- IP_MF = 0x2000, /* More fragments */
- IP6FHDR = 8, /* sizeof(Fraghdr6) */
- IP_MAX = (32*1024), /* Maximum Internet packet size */
-};
-
-#define IPV6CLASS(hdr) ((hdr->vcf[0]&0x0F)<<2 | (hdr->vcf[1]&0xF0)>>2)
-#define BLKIPVER(xp) (((Ip6hdr*)((xp)->rp))->vcf[0]&0xF0)
-/*
- * This sleazy macro is stolen shamelessly from ip.c, see comment there.
- */
-#define BKFG(xp) ((Ipfrag*)((xp)->base))
-
-typedef struct IP IP;
-typedef struct Fragment4 Fragment4;
-typedef struct Fragment6 Fragment6;
-typedef struct Ipfrag Ipfrag;
-
-Block* ip6reassemble(IP*, int, Block*, Ip6hdr*);
-void ipfragfree6(IP*, Fragment6*);
-Fragment6* ipfragallo6(IP*);
+static Block* ip6reassemble(IP*, int, Block*);
+static Fragment6* ipfragallo6(IP*);
+static void ipfragfree6(IP*, Fragment6*);
+static Block* procopts(Block *bp);
static Block* procxtns(IP *ip, Block *bp, int doreasm);
-int unfraglen(Block *bp, uchar *nexthdr, int setfh);
-Block* procopts(Block *bp);
+static int unfraglen(Block *bp, uchar *nexthdr, int setfh, int popfh);
-/* MIB II counters */
-enum
+void
+ip_init_6(Fs *f)
{
- Forwarding,
- DefaultTTL,
- InReceives,
- InHdrErrors,
- InAddrErrors,
- ForwDatagrams,
- InUnknownProtos,
- InDiscards,
- InDelivers,
- OutRequests,
- OutDiscards,
- OutNoRoutes,
- ReasmTimeout,
- ReasmReqds,
- ReasmOKs,
- ReasmFails,
- FragOKs,
- FragFails,
- FragCreates,
+ v6params *v6p;
- Nstats,
-};
+ v6p = smalloc(sizeof(v6params));
-static char *statnames[] =
-{
-[Forwarding] "Forwarding",
-[DefaultTTL] "DefaultTTL",
-[InReceives] "InReceives",
-[InHdrErrors] "InHdrErrors",
-[InAddrErrors] "InAddrErrors",
-[ForwDatagrams] "ForwDatagrams",
-[InUnknownProtos] "InUnknownProtos",
-[InDiscards] "InDiscards",
-[InDelivers] "InDelivers",
-[OutRequests] "OutRequests",
-[OutDiscards] "OutDiscards",
-[OutNoRoutes] "OutNoRoutes",
-[ReasmTimeout] "ReasmTimeout",
-[ReasmReqds] "ReasmReqds",
-[ReasmOKs] "ReasmOKs",
-[ReasmFails] "ReasmFails",
-[FragOKs] "FragOKs",
-[FragFails] "FragFails",
-[FragCreates] "FragCreates",
-};
+ v6p->rp.mflag = 0; /* default not managed */
+ v6p->rp.oflag = 0;
+ v6p->rp.maxraint = 600000; /* millisecs */
+ v6p->rp.minraint = 200000;
+ v6p->rp.linkmtu = 0; /* no mtu sent */
+ v6p->rp.reachtime = 0;
+ v6p->rp.rxmitra = 0;
+ v6p->rp.ttl = MAXTTL;
+ v6p->rp.routerlt = (3 * v6p->rp.maxraint) / 1000;
-struct Fragment4
-{
- Block* blist;
- Fragment4* next;
- ulong src;
- ulong dst;
- ushort id;
- ulong age;
-};
+ v6p->hp.rxmithost = 1000; /* v6 RETRANS_TIMER */
-struct Fragment6
-{
- Block* blist;
- Fragment6* next;
- uchar src[IPaddrlen];
- uchar dst[IPaddrlen];
- uint id;
- ulong age;
-};
+ f->v6p = v6p;
+}
-struct Ipfrag
-{
- ushort foff;
- ushort flen;
-};
-
-/* an instance of IP */
-struct IP
-{
- ulong stats[Nstats];
-
- QLock fraglock4;
- Fragment4* flisthead4;
- Fragment4* fragfree4;
- Ref id4;
-
- QLock fraglock6;
- Fragment6* flisthead6;
- Fragment6* fragfree6;
- Ref id6;
-
- int iprouting; /* true if we route like a gateway */
-};
-
int
-ipoput6(Fs *f, Block *bp, int gating, int ttl, int tos, Conv *c)
+ipoput6(Fs *f, Block *bp, int gating, int ttl, int tos, Routehint *rh)
{
- int tentative;
- Ipifc *ifc;
+ int medialen, len, chunk, uflen, flen, seglen, lid, offset, fragoff;
+ int morefrags, blklen, rv = 0;
uchar *gate, nexthdr;
- Ip6hdr *eh;
- int medialen, len, chunk, uflen, flen, seglen, lid, offset, fragoff, morefrags, blklen;
- Route *r, *sr;
- Fraghdr6 fraghdr;
Block *xp, *nb;
+ Fraghdr6 fraghdr;
IP *ip;
- int rv = 0;
+ Ip6hdr *eh;
+ Ipifc *ifc;
+ Route *r;
ip = f->ip;
-
- /* Fill out the ip header */
- eh = (Ip6hdr*)(bp->rp);
-
ip->stats[OutRequests]++;
- /* Number of uchars in data and ip header to write */
+ /* Fill out the ip header */
+ eh = (Ip6hdr*)bp->rp;
+ assert(BLEN(bp) >= IP6HDR);
len = blocklen(bp);
-
- tentative = iptentative(f, eh->src);
- if(tentative){
- netlog(f, Logip, "reject tx of packet with tentative src address\n");
- goto free;
- }
-
- if(gating){
- chunk = nhgets(eh->ploadlen);
- if(chunk > len){
- ip->stats[OutDiscards]++;
- netlog(f, Logip, "short gated packet\n");
- goto free;
- }
- if(chunk + IPV6HDR_LEN < len)
- len = chunk + IPV6HDR_LEN;
- }
-
if(len >= IP_MAX){
-// print("len > IP_MAX, free\n");
ip->stats[OutDiscards]++;
- netlog(f, Logip, "exceeded ip max size %I\n", eh->dst);
+ netlog(f, Logip, "%I -> %I: exceeded ip max size: %d\n", eh->src, eh->dst, len);
goto free;
}
- r = v6lookup(f, eh->dst, c);
- if(r == nil){
-// print("no route for %I, src %I free\n", eh->dst, eh->src);
+ r = v6lookup(f, eh->dst, eh->src, rh);
+ if(r == nil || (r->type & Rv4) != 0 || (ifc = r->ifc) == nil){
ip->stats[OutNoRoutes]++;
- netlog(f, Logip, "no interface %I\n", eh->dst);
+ netlog(f, Logip, "%I -> %I: no interface\n", eh->src, eh->dst);
rv = -1;
goto free;
}
- ifc = r->ifc;
- if(r->type & (Rifc|Runi))
+ if(r->type & (Rifc|Runi|Rbcast|Rmulti))
gate = eh->dst;
else
- if(r->type & (Rbcast|Rmulti)) {
- gate = eh->dst;
- sr = v6lookup(f, eh->src, nil);
- if(sr != nil && (sr->type & Runi))
- ifc = sr->ifc;
- }
- else
gate = r->v6.gate;
- if(!gating)
- eh->vcf[0] = IP_VER6;
- eh->ttl = ttl;
- if(!gating) {
- eh->vcf[0] |= (tos >> 4);
- eh->vcf[1] = (tos << 4);
- }
-
- if(!canrlock(ifc)) {
+ if(!canrlock(ifc)){
+ ip->stats[OutDiscards]++;
goto free;
}
-
if(waserror()){
runlock(ifc);
nexterror();
}
- if(ifc->m == nil) {
+ if(ifc->m == nil)
goto raise;
+
+ if(!gating){
+ eh->vcf[0] = IP_VER6;
+ eh->vcf[0] |= tos >> 4;
+ eh->vcf[1] = tos << 4;
}
+ eh->ttl = ttl;
/* If we dont need to fragment just send it */
medialen = ifc->maxtu - ifc->m->hsize;
if(len <= medialen) {
- hnputs(eh->ploadlen, len-IPV6HDR_LEN);
- ifc->m->bwrite(ifc, bp, V6, gate);
+ hnputs(eh->ploadlen, len - IP6HDR);
+ ipifcoput(ifc, bp, V6, gate);
runlock(ifc);
poperror();
return 0;
}
- if(gating)
- if(ifc->reassemble <= 0) {
-
- /* v6 intermediate nodes are not supposed to fragment pkts;
- we fragment if ifc->reassemble is turned on; an exception
- needed for nat.
+ if(gating && !ifc->reassemble) {
+ /*
+ * v6 intermediate nodes are not supposed to fragment pkts;
+ * we fragment if ifc->reassemble is turned on; an exception
+ * needed for nat.
*/
-
ip->stats[OutDiscards]++;
icmppkttoobig6(f, ifc, bp);
- netlog(f, Logip, "%I: gated pkts not fragmented\n", eh->dst);
+ netlog(f, Logip, "%I -> %I: gated pkts not fragmented\n", eh->src, eh->dst);
goto raise;
}
-
+
/* start v6 fragmentation */
- uflen = unfraglen(bp, &nexthdr, 1);
+ uflen = unfraglen(bp, &nexthdr, 1, 0);
+ if(uflen < IP6HDR || nexthdr == FH) {
+ ip->stats[FragFails]++;
+ ip->stats[OutDiscards]++;
+ netlog(f, Logip, "%I -> %I: fragment header botch\n", eh->src, eh->dst);
+ goto raise;
+ }
if(uflen > medialen) {
ip->stats[FragFails]++;
ip->stats[OutDiscards]++;
- netlog(f, Logip, "%I: unfragmentable part too big\n", eh->dst);
+ netlog(f, Logip, "%I -> %I: unfragmentable part too big: %d\n", eh->src, eh->dst, uflen);
goto raise;
}
@@ -260,7 +137,7 @@
if(seglen < 8) {
ip->stats[FragFails]++;
ip->stats[OutDiscards]++;
- netlog(f, Logip, "%I: seglen < 8\n", eh->dst);
+ netlog(f, Logip, "%I -> %I: seglen < 8\n", eh->src, eh->dst);
goto raise;
}
@@ -271,13 +148,13 @@
xp = bp;
offset = uflen;
- while (xp != nil && offset && offset >= BLEN(xp)) {
+ while (offset && offset >= BLEN(xp)) {
offset -= BLEN(xp);
xp = xp->next;
}
xp->rp += offset;
- fragoff = 0;
+ fragoff = 0;
morefrags = 1;
for(; fragoff < flen; fragoff += seglen) {
@@ -292,7 +169,7 @@
memmove(nb->wp, eh, uflen);
nb->wp += uflen;
- hnputs(fraghdr.offsetRM, fragoff); // last 3 bits must be 0
+ hnputs(fraghdr.offsetRM, fragoff); /* last 3 bits must be 0 */
fraghdr.offsetRM[1] |= morefrags;
memmove(nb->wp, &fraghdr, IP6FHDR);
nb->wp += IP6FHDR;
@@ -300,11 +177,11 @@
/* Copy data */
chunk = seglen;
while (chunk) {
- if(!xp) {
+ if(xp == nil) {
ip->stats[OutDiscards]++;
ip->stats[FragFails]++;
freeblist(nb);
- netlog(f, Logip, "!xp: chunk in v6%d\n", chunk);
+ netlog(f, Logip, "xp == nil: chunk in v6%d\n", chunk);
goto raise;
}
blklen = chunk;
@@ -316,10 +193,9 @@
xp->rp += blklen;
chunk -= blklen;
if(xp->rp == xp->wp)
- xp = xp->next;
+ xp = xp->next;
}
-
- ifc->m->bwrite(ifc, nb, V6, gate);
+ ipifcoput(ifc, nb, V6, gate);
ip->stats[FragCreates]++;
}
ip->stats[FragOKs]++;
@@ -328,7 +204,7 @@
runlock(ifc);
poperror();
free:
- freeblist(bp);
+ freeblist(bp);
return rv;
}
@@ -335,16 +211,10 @@
void
ipiput6(Fs *f, Ipifc *ifc, Block *bp)
{
- int hl;
- int hop, tos;
- uchar proto;
+ int hl, len, hop, tos;
+ IP *ip;
Ip6hdr *h;
Proto *p;
- int notforme;
- int tentative;
- uchar v6dst[IPaddrlen];
- IP *ip;
- Route *r, *sr;
ip = f->ip;
ip->stats[InReceives]++;
@@ -365,40 +235,44 @@
return;
}
- h = (Ip6hdr *)(bp->rp);
-
- memmove(&v6dst[0], &(h->dst)[0], IPaddrlen);
- notforme = ipforme(f, v6dst) == 0;
- tentative = iptentative(f, v6dst);
-
- if(tentative && (h->proto != ICMPv6)) {
- print("tentative addr, drop\n");
- freeblist(bp);
- return;
- }
-
/* Check header version */
- if(BLKIPVER(bp) != IP_VER6) {
+ h = (Ip6hdr*)bp->rp;
+ if((h->vcf[0] & 0xF0) != IP_VER6) {
ip->stats[InHdrErrors]++;
netlog(f, Logip, "ip: bad version %ux\n", (h->vcf[0]&0xF0)>>2);
- freeblist(bp);
+ goto drop;
+ }
+ len = IP6HDR + nhgets(h->ploadlen);
+ if((bp = trimblock(bp, 0, len)) == nil){
+ ip->stats[InHdrErrors]++;
+ netlog(f, Logip, "%I -> %I: bogus packet length: %d\n", h->src, h->dst, len);
return;
}
+ h = (Ip6hdr*)bp->rp;
/* route */
- if(notforme) {
- if(!ip->iprouting){
- freeb(bp);
- return;
+ if(!ipforme(f, h->dst)) {
+ Route *r;
+ Routehint rh;
+ Ipifc *nifc;
+
+ if(!ip->iprouting)
+ goto drop;
+
+ /* don't forward to link-local destinations */
+ if(islinklocal(h->dst) ||
+ (isv6mcast(h->dst) && (h->dst[1]&0xF) <= Link_local_scop)){
+ ip->stats[OutDiscards]++;
+ goto drop;
}
+
/* don't forward to source's network */
- sr = v6lookup(f, h->src, nil);
- r = v6lookup(f, h->dst, nil);
-
- if(r == nil || sr == r){
+ rh.r = nil;
+ r = v6lookup(f, h->dst, h->src, &rh);
+ if(r == nil || (nifc = r->ifc) == nil || (r->type & Rv4) != 0
+ || (nifc == ifc && !ifc->reflect)){
ip->stats[OutDiscards]++;
- freeblist(bp);
- return;
+ goto drop;
}
/* don't forward if packet has timed out */
@@ -406,33 +280,29 @@
if(hop < 1) {
ip->stats[InHdrErrors]++;
icmpttlexceeded6(f, ifc, bp);
- freeblist(bp);
- return;
+ goto drop;
}
/* process headers & reassemble if the interface expects it */
- bp = procxtns(ip, bp, r->ifc->reassemble);
-
+ bp = procxtns(ip, bp, nifc->reassemble);
if(bp == nil)
return;
ip->stats[ForwDatagrams]++;
- h = (Ip6hdr *) (bp->rp);
- tos = IPV6CLASS(h);
+ h = (Ip6hdr*)bp->rp;
+ tos = (h->vcf[0]&0x0F)<<2 | (h->vcf[1]&0xF0)>>2;
hop = h->ttl;
- ipoput6(f, bp, 1, hop-1, tos, nil);
+ ipoput6(f, bp, 1, hop-1, tos, &rh);
return;
}
/* reassemble & process headers if needed */
bp = procxtns(ip, bp, 1);
-
if(bp == nil)
return;
- h = (Ip6hdr *) (bp->rp);
- proto = h->proto;
- p = Fsrcvpcol(f, proto);
+ h = (Ip6hdr*)bp->rp;
+ p = Fsrcvpcol(f, h->proto);
if(p != nil && p->rcv != nil) {
ip->stats[InDelivers]++;
(*p->rcv)(p, ifc, bp);
@@ -441,6 +311,7 @@
ip->stats[InDiscards]++;
ip->stats[InUnknownProtos]++;
+drop:
freeblist(bp);
}
@@ -447,20 +318,20 @@
/*
* ipfragfree6 - copied from ipfragfree4 - assume hold fraglock6
*/
-void
+static void
ipfragfree6(IP *ip, Fragment6 *frag)
{
Fragment6 *fl, **l;
- if(frag->blist)
+ if(frag->blist != nil)
freeblist(frag->blist);
-
- memset(frag->src, 0, IPaddrlen);
- frag->id = 0;
frag->blist = nil;
+ frag->id = 0;
+ memset(frag->src, 0, IPaddrlen);
+ memset(frag->dst, 0, IPaddrlen);
l = &ip->flisthead6;
- for(fl = *l; fl; fl = fl->next) {
+ for(fl = *l; fl != nil; fl = fl->next) {
if(fl == frag) {
*l = frag->next;
break;
@@ -470,13 +341,12 @@
frag->next = ip->fragfree6;
ip->fragfree6 = frag;
-
}
/*
* ipfragallo6 - copied from ipfragalloc4
*/
-Fragment6*
+static Fragment6*
ipfragallo6(IP *ip)
{
Fragment6 *f;
@@ -483,7 +353,7 @@
while(ip->fragfree6 == nil) {
/* free last entry on fraglist */
- for(f = ip->flisthead6; f->next; f = f->next)
+ for(f = ip->flisthead6; f->next != nil; f = f->next)
;
ipfragfree6(ip, f);
}
@@ -497,108 +367,109 @@
}
static Block*
-procxtns(IP *ip, Block *bp, int doreasm) {
-
- int offset;
+procxtns(IP *ip, Block *bp, int doreasm)
+{
uchar proto;
- Ip6hdr *h;
+ int offset;
- h = (Ip6hdr *) (bp->rp);
- offset = unfraglen(bp, &proto, 0);
-
- if((proto == FH) && (doreasm != 0)) {
- bp = ip6reassemble(ip, offset, bp, h);
- if(bp == nil)
- return nil;
- offset = unfraglen(bp, &proto, 0);
+ offset = unfraglen(bp, &proto, 0, doreasm);
+ if(offset >= IP6HDR && proto == FH && doreasm) {
+ bp = ip6reassemble(ip, offset, bp);
+ if(bp == nil)
+ return nil;
+ offset = unfraglen(bp, &proto, 0, 0);
+ if(proto == FH)
+ offset = -1;
}
-
- if(proto == DOH || offset > IP6HDR)
+ if(offset < IP6HDR){
+ ip->stats[InHdrErrors]++;
+ ip->stats[InDiscards]++;
+ freeblist(bp);
+ return nil;
+ }
+ if(proto == DOH || offset > IP6HDR)
bp = procopts(bp);
-
return bp;
}
-
-/* returns length of "Unfragmentable part", i.e., sum of lengths of ipv6 hdr,
- * hop-by-hop & routing headers if present; *nexthdr is set to nexthdr value
- * of the last header in the "Unfragmentable part"; if setfh != 0, nexthdr
- * field of the last header in the "Unfragmentable part" is set to FH.
+/*
+ * returns length of "Unfragmentable part", i.e., sum of lengths of ipv6 hdr,
+ * hop-by-hop & routing headers if present; *nexthdr is set to nexthdr value
+ * of the last header in the "Unfragmentable part"; if setfh != 0, nexthdr
+ * field of the last header in the "Unfragmentable part" is set to FH.
+ * When the last header is a fragment header and popfh != 0 then set
+ * the nexthdr value of the previous header to the nexthdr value of the
+ * fragment header. returns -1 on error.
*/
-int
-unfraglen(Block *bp, uchar *nexthdr, int setfh)
+static int
+unfraglen(Block *bp, uchar *nexthdr, int setfh, int popfh)
{
- uchar *p, *q;
- int ufl, hs;
+ uchar *e, *p, *q;
+ e = bp->wp;
p = bp->rp;
- q = p+6; /* proto, = p+sizeof(Ip6hdr.vcf)+sizeof(Ip6hdr.ploadlen) */
+ q = p+6; /* proto, = p+sizeof(Ip6hdr.vcf)+sizeof(Ip6hdr.ploadlen) */
*nexthdr = *q;
- ufl = IP6HDR;
- p += ufl;
-
- for(;;) {
- if(*nexthdr == HBH || *nexthdr == RH) {
- *nexthdr = *p;
- hs = ((int)*(p+1) + 1) * 8;
- ufl += hs;
- q = p;
- p += hs;
- }
- else
- break;
+ p += IP6HDR;
+ while(*nexthdr == HBH || *nexthdr == RH){
+ if(p+2 > e)
+ return -1;
+ q = p;
+ *nexthdr = *q;
+ p += ((int)p[1] + 1) * 8;
}
-
- if(*nexthdr == FH)
- *q = *p;
-
- if(setfh)
+ if(p > e)
+ return -1;
+ if(*nexthdr == FH){
+ if(p+IP6FHDR > e || *p == FH)
+ return -1;
+ if(popfh)
+ *q = *p;
+ } else if(setfh)
*q = FH;
-
- return ufl;
+ return p - bp->rp;
}
-Block*
+static Block*
procopts(Block *bp)
{
return bp;
}
-Block*
-ip6reassemble(IP* ip, int uflen, Block* bp, Ip6hdr* ih)
+static Block*
+ip6reassemble(IP* ip, int uflen, Block* bp)
{
-
- int fend, offset;
+ int offset, ovlap, fragsize, len;
+ uchar src[IPaddrlen], dst[IPaddrlen];
uint id;
- Fragment6 *f, *fnext;
+ Block *bl, **l, *prev;
Fraghdr6 *fraghdr;
- uchar src[IPaddrlen], dst[IPaddrlen];
- Block *bl, **l, *last, *prev;
- int ovlap, len, fragsize, pktposn;
+ Fragment6 *f, *fnext;
+ Ipfrag *fp, *fq;
+ Ip6hdr* ih;
- fraghdr = (Fraghdr6 *) (bp->rp + uflen);
- memmove(src, ih->src, IPaddrlen);
- memmove(dst, ih->dst, IPaddrlen);
- id = nhgetl(fraghdr->id);
- offset = nhgets(fraghdr->offsetRM) & ~7;
-
/*
- * block lists are too hard, pullupblock into a single block
+ * block lists are too hard, concatblock into a single block
*/
- if(bp->next){
- bp = pullupblock(bp, blocklen(bp));
- ih = (Ip6hdr *)(bp->rp);
- }
+ bp = concatblock(bp);
+ ih = (Ip6hdr*)bp->rp;
+ fraghdr = (Fraghdr6*)(bp->rp + uflen);
+ id = nhgetl(fraghdr->id);
+ offset = nhgets(fraghdr->offsetRM);
+ fragsize = BLEN(bp) - uflen - IP6FHDR;
+ memmove(src, ih->src, IPaddrlen);
+ memmove(dst, ih->dst, IPaddrlen);
+
qlock(&ip->fraglock6);
/*
* find a reassembly queue for this fragment
*/
- for(f = ip->flisthead6; f; f = fnext){
+ for(f = ip->flisthead6; f != nil; f = fnext){
fnext = f->next;
- if(ipcmp(f->src, src)==0 && ipcmp(f->dst, dst)==0 && f->id == id)
+ if(f->id == id && ipcmp(f->src, src) == 0 && ipcmp(f->dst, dst) == 0)
break;
if(f->age < NOW){
ip->stats[ReasmTimeout]++;
@@ -606,28 +477,35 @@
}
}
-
/*
* if this isn't a fragmented packet, accept it
* and get rid of any fragments that might go
* with it.
*/
- if(nhgets(fraghdr->offsetRM)==0) { // first frag is also the last
+ if((offset & ~6) == 0) { /* 1st frag is also last */
if(f != nil) {
- ipfragfree6(ip, f);
ip->stats[ReasmFails]++;
+ ipfragfree6(ip, f);
}
qunlock(&ip->fraglock6);
+
+ /* get rid of frag header */
+ memmove(bp->rp + IP6FHDR, bp->rp, uflen);
+ bp->rp += IP6FHDR;
+ ih = (Ip6hdr*)bp->rp;
+ hnputs(ih->ploadlen, BLEN(bp)-IP6HDR);
+
return bp;
}
- if(bp->base+sizeof(Ipfrag) >= bp->rp){
- bp = padblock(bp, sizeof(Ipfrag));
- bp->rp += sizeof(Ipfrag);
+ if(bp->base+IPFRAGSZ > bp->rp){
+ bp = padblock(bp, IPFRAGSZ);
+ bp->rp += IPFRAGSZ;
}
- BKFG(bp)->foff = offset;
- BKFG(bp)->flen = nhgets(ih->ploadlen) + IP6HDR - uflen - IP6FHDR;
+ fp = (Ipfrag*)bp->base;
+ fp->foff = offset & ~7;
+ fp->flen = fragsize;
/* First fragment allocates a reassembly queue */
if(f == nil) {
@@ -638,8 +516,9 @@
f->blist = bp;
- qunlock(&ip->fraglock6);
ip->stats[ReasmReqds]++;
+ qunlock(&ip->fraglock6);
+
return nil;
}
@@ -649,7 +528,7 @@
prev = nil;
l = &f->blist;
bl = f->blist;
- while(bl != nil && BKFG(bp)->foff > BKFG(bl)->foff) {
+ while(bl != nil && fp->foff > ((Ipfrag*)bl->base)->foff) {
prev = bl;
l = &bl->next;
bl = bl->next;
@@ -656,15 +535,16 @@
}
/* Check overlap of a previous fragment - trim away as necessary */
- if(prev) {
- ovlap = BKFG(prev)->foff + BKFG(prev)->flen - BKFG(bp)->foff;
+ if(prev != nil) {
+ fq = (Ipfrag*)prev->base;
+ ovlap = fq->foff + fq->flen - fp->foff;
if(ovlap > 0) {
- if(ovlap >= BKFG(bp)->flen) {
- freeblist(bp);
+ if(ovlap >= fp->flen) {
qunlock(&ip->fraglock6);
+ freeb(bp);
return nil;
}
- BKFG(prev)->flen -= ovlap;
+ fq->flen -= ovlap;
}
}
@@ -673,29 +553,27 @@
*l = bp;
/* Check to see if succeeding segments overlap */
- if(bp->next) {
+ if(bp->next != nil) {
l = &bp->next;
- fend = BKFG(bp)->foff + BKFG(bp)->flen;
+ offset = fp->foff + fp->flen;
/* Take completely covered segments out */
-
- while(*l) {
- ovlap = fend - BKFG(*l)->foff;
-
- if(ovlap <= 0)
- break;
- if(ovlap < BKFG(*l)->flen) {
- BKFG(*l)->flen -= ovlap;
- BKFG(*l)->foff += ovlap;
- /* move up ih hdrs */
- memmove((*l)->rp + ovlap, (*l)->rp, uflen);
- (*l)->rp += ovlap;
+ while((bl = *l) != nil) {
+ fq = (Ipfrag*)bl->base;
+ ovlap = offset - fq->foff;
+ if(ovlap <= 0)
break;
+ if(ovlap < fq->flen) {
+ /* move up ip and frag header */
+ memmove(bl->rp + ovlap, bl->rp, BLEN(bl) - fq->flen);
+ bl->rp += ovlap;
+ fq->flen -= ovlap;
+ fq->foff += ovlap;
+ break;
}
- last = (*l)->next;
- (*l)->next = nil;
- freeblist(*l);
- *l = last;
+ *l = bl->next;
+ bl->next = nil;
+ freeb(bl);
}
}
@@ -703,45 +581,55 @@
* look for a complete packet. if we get to a fragment
* with the trailing bit of fraghdr->offsetRM[1] set, we're done.
*/
- pktposn = 0;
- for(bl = f->blist; bl; bl = bl->next) {
- if(BKFG(bl)->foff != pktposn)
+ offset = 0;
+ for(bl = f->blist; bl != nil; bl = bl->next, offset += fp->flen) {
+ fp = (Ipfrag*)bl->base;
+ if(fp->foff != offset)
break;
-
- fraghdr = (Fraghdr6 *) (bl->rp + uflen);
- if((fraghdr->offsetRM[1] & 1) == 0) {
- bl = f->blist;
+ fraghdr = (Fraghdr6*)(bl->wp - fp->flen - IP6FHDR);
+ if(fraghdr->offsetRM[1] & 1)
+ continue;
- /* get rid of frag header in first fragment */
+ bl = f->blist;
+ fq = (Ipfrag*)bl->base;
- memmove(bl->rp + IP6FHDR, bl->rp, uflen);
- bl->rp += IP6FHDR;
- len = nhgets(((Ip6hdr*)(bl->rp))->ploadlen) - IP6FHDR;
- bl->wp = bl->rp + len + IP6HDR;
+ /* get rid of frag header in first fragment */
+ memmove(bl->rp + IP6FHDR, bl->rp, BLEN(bl) - fq->flen - IP6FHDR);
+ bl->rp += IP6FHDR;
+ len = BLEN(bl);
- /* Pullup all the fragment headers and
- * return a complete packet
- */
- for(bl = bl->next; bl; bl = bl->next) {
- fragsize = BKFG(bl)->flen;
- len += fragsize;
- bl->rp += uflen + IP6FHDR;
- bl->wp = bl->rp + fragsize;
- }
+ /*
+ * Pullup all the fragment headers and
+ * return a complete packet
+ */
+ for(bl = bl->next; bl != nil && len < IP_MAX; bl = bl->next) {
+ fq = (Ipfrag*)bl->base;
+ fragsize = fq->flen;
+ bl->rp = bl->wp - fragsize;
+ len += fragsize;
+ }
- bl = f->blist;
- f->blist = nil;
+ if(len >= IP_MAX){
ipfragfree6(ip, f);
- ih = (Ip6hdr*)(bl->rp);
- hnputs(ih->ploadlen, len);
+ ip->stats[ReasmFails]++;
qunlock(&ip->fraglock6);
- ip->stats[ReasmOKs]++;
- return bl;
+
+ return nil;
}
- pktposn += BKFG(bl)->flen;
+
+ bl = f->blist;
+ f->blist = nil;
+ ipfragfree6(ip, f);
+
+ ih = (Ip6hdr*)bl->rp;
+ hnputs(ih->ploadlen, len-IP6HDR);
+
+ ip->stats[ReasmOKs]++;
+ qunlock(&ip->fraglock6);
+
+ return bl;
}
qunlock(&ip->fraglock6);
return nil;
}
-
--- a/os/ip/ipv6.h
+++ b/os/ip/ipv6.h
@@ -1,53 +1,31 @@
-#undef MIN
-#define MIN(a, b) ((a) <= (b) ? (a) : (b))
-
-/* rfc 3513 defines the address prefices */
+/*
+ * Internet Protocol Version 6
+ *
+ * rfc2460 defines the protocol, rfc2461 neighbour discovery, and
+ * rfc2462 address autoconfiguration. rfc4443 defines ICMP; was rfc2463.
+ * rfc4291 defines the address architecture (including prefices), was rfc3513.
+ * rfc4007 defines the scoped address architecture.
+ *
+ * global unicast is anything but unspecified (::), loopback (::1),
+ * multicast (ff00::/8), and link-local unicast (fe80::/10).
+ *
+ * site-local (fec0::/10) is now deprecated, originally by rfc3879.
+ *
+ * Unique Local IPv6 Unicast Addresses are defined by rfc4193.
+ * prefix is fc00::/7, scope is global, routing is limited to roughly a site.
+ */
#define isv6mcast(addr) ((addr)[0] == 0xff)
#define islinklocal(addr) ((addr)[0] == 0xfe && ((addr)[1] & 0xc0) == 0x80)
-#define issitelocal(addr) ((addr)[0] == 0xfe && ((addr)[1] & 0xc0) == 0xc0)
-#define isv6global(addr) (((addr)[0] & 0xe0) == 0x20)
-#define optexsts(np) (nhgets((np)->ploadlen) > 24)
-#define issmcast(addr) (memcmp((addr), v6solicitednode, 13) == 0)
+#define optexsts(np) (nhgets((np)->ploadlen) > 24)
+#define issmcast(addr) (memcmp((addr), v6solicitednode, 13) == 0)
-/* from RFC 2460 */
+#ifndef MIN
+#define MIN(a, b) ((a) <= (b)? (a): (b))
+#endif
-typedef struct Ip6hdr Ip6hdr;
-typedef struct Opthdr Opthdr;
-typedef struct Routinghdr Routinghdr;
-typedef struct Fraghdr6 Fraghdr6;
-
-struct Ip6hdr {
- uchar vcf[4]; // version:4, traffic class:8, flow label:20
- uchar ploadlen[2]; // payload length: packet length - 40
- uchar proto; // next header type
- uchar ttl; // hop limit
- uchar src[IPaddrlen];
- uchar dst[IPaddrlen];
-};
-
-struct Opthdr {
- uchar nexthdr;
- uchar len;
-};
-
-struct Routinghdr {
- uchar nexthdr;
- uchar len;
- uchar rtetype;
- uchar segrem;
-};
-
-struct Fraghdr6 {
- uchar nexthdr;
- uchar res;
- uchar offsetRM[2]; // Offset, Res, M flag
- uchar id[4];
-};
-
-
-enum { /* Header Types */
- HBH = 0, //?
+enum { /* Header Types */
+ HBH = 0, /* hop-by-hop multicast routing protocol */
ICMP = 1,
IGMP = 2,
GGP = 3,
@@ -72,89 +50,113 @@
Maxhdrtype = 256,
};
-
enum {
- // multicast flgs and scop
+ /* multicast flags and scopes */
- well_known_flg = 0,
- transient_flg = 1,
+// Well_known_flg = 0,
+// Transient_flg = 1,
- node_local_scop = 1,
- link_local_scop = 2,
- site_local_scop = 5,
- org_local_scop = 8,
- global_scop = 14,
+// Interface_local_scop = 1,
+ Link_local_scop = 2,
+// Site_local_scop = 5,
+// Org_local_scop = 8,
+ Global_scop = 14,
- // various prefix lengths
+ /* various prefix lengths */
+ SOLN_PREF_LEN = 13,
- SOLN_PREF_LEN = 13,
+ /* icmpv6 unreachability codes */
+ Icmp6_no_route = 0,
+ Icmp6_ad_prohib = 1,
+ Icmp6_out_src_scope = 2,
+ Icmp6_adr_unreach = 3,
+ Icmp6_port_unreach = 4,
+ Icmp6_gress_src_fail = 5,
+ Icmp6_rej_route = 6,
+ Icmp6_unknown = 7, /* our own invention for internal use */
- // icmpv6 unreach codes
- icmp6_no_route = 0,
- icmp6_ad_prohib = 1,
- icmp6_unassigned = 2,
- icmp6_adr_unreach = 3,
- icmp6_port_unreach = 4,
- icmp6_unkn_code = 5,
+ /* various flags & constants */
+ v6MINTU = 1280,
+ HOP_LIMIT = 255,
+ IP6HDR = 40, /* sizeof(Ip6hdr) = 8 + 2*16 */
+ IP6FHDR = 8, /* sizeof(Fraghdr6) */
- // various flags & constants
+ /* option types */
- v6MINTU = 1280,
- HOP_LIMIT = 255,
- ETHERHDR_LEN = 14,
- IPV6HDR_LEN = 40,
- IPV4HDR_LEN = 20,
+ /* neighbour discovery */
+ SRC_LLADDR = 1,
+ TARGET_LLADDR = 2,
+ PREFIX_INFO = 3,
+ REDIR_HEADER = 4,
+ MTU_OPTION = 5,
+ /* new since rfc2461; see iana.org/assignments/icmpv6-parameters */
+ V6nd_home = 8,
+ V6nd_srcaddrs = 9, /* rfc3122 */
+ V6nd_ip = 17,
+ /* /lib/rfc/drafts/draft-jeong-dnsop-ipv6-dns-discovery-12.txt */
+ V6nd_rdns = 25,
+ /* plan 9 extensions */
+ V6nd_9fs = 250,
+ V6nd_9auth = 251,
- // option types
+ SRC_UNSPEC = 0,
+ SRC_UNI = 1,
+ TARG_UNI = 2,
+ TARG_MULTI = 3,
- SRC_LLADDRESS = 1,
- TARGET_LLADDRESS = 2,
- PREFIX_INFO = 3,
- REDIR_HEADER = 4,
- MTU_OPTION = 5,
+ Tunitent = 1,
+ Tuniproxy = 2,
+ Tunirany = 3,
- SRC_UNSPEC = 0,
- SRC_UNI = 1,
- TARG_UNI = 2,
- TARG_MULTI = 3,
+ /* Node constants */
+ MAX_MULTICAST_SOLICIT = 3,
+ RETRANS_TIMER = 1000,
+};
- t_unitent = 1,
- t_uniproxy = 2,
- t_unirany = 3,
+typedef struct Ip6hdr Ip6hdr;
+typedef struct Opthdr Opthdr;
+typedef struct Routinghdr Routinghdr;
+typedef struct Fraghdr6 Fraghdr6;
- // Router constants (all times in milliseconds)
+/* we do this in case there's padding at the end of Ip6hdr */
+#define IPV6HDR \
+ uchar vcf[4]; /* version:4, traffic class:8, flow label:20 */\
+ uchar ploadlen[2]; /* payload length: packet length - 40 */ \
+ uchar proto; /* next header type */ \
+ uchar ttl; /* hop limit */ \
+ uchar src[IPaddrlen]; \
+ uchar dst[IPaddrlen]
- MAX_INITIAL_RTR_ADVERT_INTERVAL = 16000,
- MAX_INITIAL_RTR_ADVERTISEMENTS = 3,
- MAX_FINAL_RTR_ADVERTISEMENTS = 3,
- MIN_DELAY_BETWEEN_RAS = 3000,
- MAX_RA_DELAY_TIME = 500,
+struct Ip6hdr {
+ IPV6HDR;
+ uchar payload[];
+};
- // Host constants
+struct Opthdr { /* unused */
+ uchar nexthdr;
+ uchar len;
+};
- MAX_RTR_SOLICITATION_DELAY = 1000,
- RTR_SOLICITATION_INTERVAL = 4000,
- MAX_RTR_SOLICITATIONS = 3,
+/*
+ * Beware routing header type 0 (loose source routing); see
+ * http://www.secdev.org/conf/IPv6_RH_security-csw07.pdf.
+ * Type 1 is unused. Type 2 is for MIPv6 (mobile IPv6) filtering
+ * against type 0 header.
+ */
+struct Routinghdr { /* unused */
+ uchar nexthdr;
+ uchar len;
+ uchar rtetype;
+ uchar segrem;
+};
- // Node constants
-
- MAX_MULTICAST_SOLICIT = 3,
- MAX_UNICAST_SOLICIT = 3,
- MAX_ANYCAST_DELAY_TIME = 1000,
- MAX_NEIGHBOR_ADVERTISEMENT = 3,
- REACHABLE_TIME = 30000,
- RETRANS_TIMER = 1000,
- DELAY_FIRST_PROBE_TIME = 5000,
-
+struct Fraghdr6 {
+ uchar nexthdr;
+ uchar res;
+ uchar offsetRM[2]; /* Offset, Res, M flag */
+ uchar id[4];
};
-extern void ipv62smcast(uchar *, uchar *);
-extern void icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac);
-extern void icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags);
-extern void icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp);
-extern void icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp);
-extern void icmphostunr(Fs *f, Ipifc *ifc, Block *bp, int code, int free);
-
extern uchar v6allnodesN[IPaddrlen];
extern uchar v6allnodesL[IPaddrlen];
extern uchar v6allroutersN[IPaddrlen];
@@ -161,23 +163,16 @@
extern uchar v6allroutersL[IPaddrlen];
extern uchar v6allnodesNmask[IPaddrlen];
extern uchar v6allnodesLmask[IPaddrlen];
-extern uchar v6allroutersS[IPaddrlen];
extern uchar v6solicitednode[IPaddrlen];
extern uchar v6solicitednodemask[IPaddrlen];
extern uchar v6Unspecified[IPaddrlen];
extern uchar v6loopback[IPaddrlen];
-extern uchar v6loopbackmask[IPaddrlen];
extern uchar v6linklocal[IPaddrlen];
extern uchar v6linklocalmask[IPaddrlen];
-extern uchar v6sitelocal[IPaddrlen];
-extern uchar v6sitelocalmask[IPaddrlen];
-extern uchar v6glunicast[IPaddrlen];
extern uchar v6multicast[IPaddrlen];
extern uchar v6multicastmask[IPaddrlen];
extern int v6llpreflen;
-extern int v6slpreflen;
-extern int v6lbpreflen;
extern int v6mcpreflen;
extern int v6snpreflen;
extern int v6aNpreflen;
@@ -184,3 +179,10 @@
extern int v6aLpreflen;
extern int ReTransTimer;
+
+void ipv62smcast(uchar *, uchar *);
+void icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac);
+void icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags);
+void icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp);
+void icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp);
+void icmphostunr6(Fs *f, Ipifc *ifc, Block *bp, int code, int tome);
--- a/os/ip/kernel.h
+++ /dev/null
@@ -1,10 +1,0 @@
-extern int kclose(int);
-extern int kdial(char*, char*, char*, int*);
-extern int kannounce(char*, char*);
-extern void kerrstr(char*);
-extern void kgerrstr(char*);
-extern int kopen(char*, int);
-extern long kread(int, void*, long);
-extern long kseek(int, vlong, int);
-extern long kwrite(int, void*, long);
-extern void kwerrstr(char *, ...);
--- a/os/ip/loopbackmedium.c
+++ b/os/ip/loopbackmedium.c
@@ -28,13 +28,12 @@
LB *lb;
lb = smalloc(sizeof(*lb));
+ lb->readp = (void*)-1;
lb->f = ifc->conv->p->f;
- /* TO DO: make queue size a function of kernel memory */
- lb->q = qopen(128*1024, Qmsg, nil, nil);
+ lb->q = qopen(1024*1024, Qmsg, nil, nil);
ifc->arg = lb;
- ifc->mbps = 1000;
- kproc("loopbackread", loopbackread, ifc, 0);
+ kproc("loopbackread", loopbackread, ifc);
}
@@ -43,13 +42,29 @@
{
LB *lb = ifc->arg;
- if(lb->readp)
+ while(waserror())
+ ;
+
+ /* wat for reader to start */
+ while(lb->readp == (void*)-1)
+ tsleep(&up->sleep, return0, 0, 300);
+
+ if(lb->readp != nil)
postnote(lb->readp, 1, "unbind", 0);
+ poperror();
+
+ wunlock(ifc);
+ while(waserror())
+ ;
+
/* wait for reader to die */
- while(lb->readp != 0)
+ while(lb->readp != nil)
tsleep(&up->sleep, return0, 0, 300);
+ poperror();
+ wlock(ifc);
+
/* clean up */
qfree(lb->q);
free(lb);
@@ -76,23 +91,14 @@
ifc = a;
lb = ifc->arg;
lb->readp = up; /* hide identity under a rock for unbind */
- if(waserror()){
- lb->readp = 0;
- pexit("hangup", 1);
- }
- for(;;){
- bp = qbread(lb->q, Maxtu);
- if(bp == nil)
- continue;
- ifc->in++;
- if(!canrlock(ifc)){
- freeb(bp);
- continue;
- }
+ if(!waserror())
+ while((bp = qbread(lb->q, Maxtu)) != nil){
+ rlock(ifc);
if(waserror()){
runlock(ifc);
nexterror();
}
+ ifc->in++;
if(ifc->lifc == nil)
freeb(bp);
else
@@ -100,6 +106,8 @@
runlock(ifc);
poperror();
}
+ lb->readp = nil;
+ pexit("hangup", 1);
}
Medium loopbackmedium =
--- a/os/ip/nat.c
+++ /dev/null
@@ -1,549 +1,0 @@
-#include "u.h"
-#include "../port/lib.h"
-#include "mem.h"
-#include "dat.h"
-#include "fns.h"
-#include "../port/error.h"
-
-#include "ip.h"
-
-typedef struct NatProto NatProto;
-typedef struct NatAddr NatAddr;
-
-/*
- * NAT.
- */
-struct Nat
-{
- uchar src[IPv4addrlen]; /* Source address */
- uchar sport[2]; /* Source port */
- uchar lport[2]; /* Local port */
- uchar proto; /* Protocol */
- long time; /* Time */
- Conv *conv; /* Conversation */
- Nat *next; /* Next node */
-};
-
-/*
- * Protocol list.
- */
-struct NatProto
-{
- uchar proto; /* Protocol */
- int sport; /* Source port offset */
- int dport; /* Destination port offset */
- int cksum; /* Checksum offset */
- int timeout; /* Timeout */
-};
-
-/*
- * Address list.
- */
-struct NatAddr
-{
- uchar src[IPaddrlen]; /* Source address */
- uchar mask[IPaddrlen]; /* Source address mask */
- uchar net[IPaddrlen]; /* Source network address */
- Iplifc *dst; /* Destination interface */
- NatAddr *next; /* Next node */
-};
-
-static Nat *head = nil;
-static NatAddr *addrhead = nil;
-
-/*
- * Timeouts for ICMP, TCP and UDP are respectively confirmed
- * in RFC 5508, RFC 5382 and RFC 4787.
- */
-static NatProto prototab[] =
-{
- { 1, 4, 4, 2, 60*1000 }, /* ICMP */
- { 6, 0, 2, 16, (2*60*60+4*60)*1000 }, /* TCP */
- { 17, 0, 2, 6, 2*60*1000 }, /* UDP */
- { 40, 6, 8, 0, 10*30*1000 }, /* IL */
- { 255, 0, 2, 6, 2*60*1000 }, /* RUDP */
- { 0 }
-};
-
-NatProto* parseproto(uchar);
-void natprepend(Nat*);
-Nat* natexistout(uchar*, uchar, uchar*);
-Nat* natexistin(uchar, uchar*);
-int natdelete(uchar*, uchar, uchar*);
-int natpurge(uchar);
-Nat* natlport(Proto*, Ip4hdr*, uchar*);
-int natgc(uchar);
-void checksumadjust(uchar*, uchar*, int, uchar*, int);
-Iplifc* natonifco(Ipifc*, Ip4hdr*);
-Iplifc* natonifci(Ipifc*);
-void nataddrprepend(NatAddr*);
-NatAddr* nataddrexist(uchar*, uchar*, Iplifc*);
-int addnataddr(uchar*, uchar*, Iplifc*);
-int removenataddr(uchar*, uchar*, Iplifc*);
-void shownataddr(void);
-void flushnataddr(void);
-
-/*
- * Return protocol attributes if known.
- */
-NatProto*
-parseproto(uchar proto)
-{
- NatProto *np;
-
- for(np = prototab; np->proto; np++)
- if(proto == np->proto)
- return np;
-
- return nil;
-}
-
-/*
- * Output NAT.
- * Return -1 if the packet must be NATed but the protocol is unknown.
- */
-int
-nato(Block *b, Ipifc *ifc, Fs *f)
-{
- Nat *n; /* NAT table */
- NatProto *np; /* Protocol list */
- Iplifc *lifc; /* Logical interface */
- Ip4hdr *h; /* Source IPv4 header */
- Proto *p; /* New protocol */
- uchar *laddr; /* Local address on Iplifc */
- uchar *sport; /* Source port */
- uchar *cksum; /* Source checksum */
-
- h = (Ip4hdr*)(b->rp);
-
- /* Verify on which logical interface NAT is enabled,
- and if this source address must be translated */
- if((lifc=natonifco(ifc, h)) == nil)
- return 0;
-
- laddr = lifc->local+IPv4off;
- p = Fsrcvpcolx(f, h->proto);
-
- if(ip4cmp(h->src, laddr) != 0){
- if((np=parseproto(h->proto)) != nil){
- /* Protocol layer */
- sport = (b->rp)+sizeof(Ip4hdr)+np->sport;
- cksum = (b->rp)+sizeof(Ip4hdr)+np->cksum;
- if((n = natlport(p, h, sport)) == nil)
- return -1;
- memmove(sport, n->lport, 2);
- checksumadjust(cksum, n->sport, 2, n->lport, 2);
- if(np->proto != 1)
- /* ICMP checksum doesn't include IP header */
- checksumadjust(cksum, n->src, IPv4addrlen,
- laddr, IPv4addrlen);
- /* IP layer */
- ip4move(h->src, laddr);
- checksumadjust(h->cksum, n->src, IPv4addrlen,
- h->src, IPv4addrlen);
- return 0;
- }else{
- netlog(f, Lognat, "nat: unknown protocol %d\n", h->proto);
- return -1;
- }
- }
-
- return 0;
-}
-
-/*
- * Input NAT.
- */
-void
-nati(Block *b, Ipifc *ifc)
-{
- Nat *n; /* NAT table */
- NatProto *np; /* Protocol list */
- Ip4hdr *h; /* Source IPv4 header */
- uchar *lport; /* Our local port, and dst port for the packet */
- uchar *cksum; /* Source checksum */
-
- h = (Ip4hdr*)(b->rp);
-
- /* Verify if NAT is enabled on this interface */
- if(natonifci(ifc) == nil)
- return;
-
- if((np=parseproto(h->proto)) != nil){
- lport = (b->rp)+sizeof(Ip4hdr)+np->dport;
- if((n=natexistin(h->proto, lport)) != nil){
- /* Protocol layer */
- cksum = (b->rp)+sizeof(Ip4hdr)+np->cksum;
- checksumadjust(cksum, lport, 2, n->sport, 2);
- memmove(lport, n->sport, 2);
- if(np->proto != 1)
- /* ICMP checksum doesn't include IP header */
- checksumadjust(cksum, h->dst, IPv4addrlen,
- n->src, IPv4addrlen);
- /* IP layer */
- checksumadjust(h->cksum, h->dst, IPv4addrlen,
- n->src, IPv4addrlen);
- ip4move(h->dst, n->src);
- }
- }
-}
-
-/*
- * Add Nat to Nat list.
- */
-void
-natprepend(Nat *n)
-{
- n->next = head;
- head = n;
-}
-
-/*
- * Return Nat if it exists in Nat list.
- */
-Nat*
-natexistout(uchar *src, uchar proto, uchar *sport)
-{
- Nat *c; /* Current node */
-
- for(c=head; c!=nil; c=c->next)
- if(ip4cmp(src, c->src) == 0 &&
- memcmp(sport, c->sport, 2) == 0 &&
- proto == c->proto){
- c->time = NOW;
- return c;
- }
-
- return nil;
-}
-
-/*
- * Return Nat if it exists in Nat list.
- */
-Nat*
-natexistin(uchar proto, uchar *lport)
-{
- Nat *c; /* Current node */
-
- for(c=head; c!=nil; c=c->next)
- if(memcmp(lport, c->lport, 2) == 0 &&
- proto == c->proto){
- c->time = NOW;
- return c;
- }
-
- return nil;
-}
-
-/*
- * Delete Nat in Nat list.
- * Return -1 if it doesn't exist.
- */
-int
-natdelete(uchar src[IPv4addrlen], uchar proto, uchar sport[2])
-{
- Nat *p; /* Precedent node */
- Nat *c; /* Current node */
-
- for(p=nil, c=head; c!=nil; p=c, c=c->next)
- if(ip4cmp(src, c->src) == 0 &&
- memcmp(sport, c->sport, 2) == 0 &&
- proto == c->proto)
- break;
-
- if(c == nil)
- return -1;
-
- if(p == nil)
- head = head->next;
- else
- p->next = c->next;
-
- closeconv(c->conv);
- free(c);
-
- return 0;
-}
-
-/*
- * Purge Nat list.
- */
-int
-natpurge(uchar proto)
-{
- Nat *c; /* Current node */
- int n; /* Number of purged connections */
-
- for(n = 0;; n++){
- do{
- if((c = head) == nil)
- return n;
- head = head->next;
- }while(c->proto != proto);
- closeconv(c->conv);
- free(c);
- }
-}
-
-/*
- * Create a new Nat if necessary.
- */
-Nat*
-natlport(Proto *p, Ip4hdr *h, uchar *sport)
-{
- Nat *n; /* New NAT node */
- Conv *s; /* New conversation */
-
- if((n=natexistout(h->src, h->proto, sport)) == nil){
- qlock(p);
- s = Fsprotoclone(p, "network");
- qunlock(p);
- if(s == nil){
- error(Enodev);
- return nil;
- }
- setlport(s);
- n = malloc(sizeof(Nat));
- ip4move(n->src, h->src);
- memmove(n->sport, sport, 2);
- memmove(n->lport, &s->lport, 2);
- n->proto = h->proto;
- n->time = NOW;
- n->conv = s;
- natprepend(n);
- }
-
- return n;
-}
-
-/*
- * Nat list garbage collector.
- */
-int
-natgc(uchar proto){
- Nat *p; /* Precedent node */
- Nat *c; /* Current node */
- NatProto *np; /* Protocol list */
- int n; /* Number of garbage collected connections */
-
- n = 0;
- p = nil;
- c = head;
-
- np = parseproto(proto);
-
- while(c != nil){
- if(NOW - c->time > np->timeout){
- if(p == nil){
- head = head->next;
- if(proto == c->proto)
- n++;
- closeconv(c->conv);
- free(c);
- p = nil;
- c = head;
- }else{
- p->next = c->next;
- if(proto == c->proto)
- n++;
- closeconv(c->conv);
- free(c);
- c = p->next;
- }
- }else{
- p = c;
- c = c->next;
- }
- }
-
- if(n == 0) /* Prevent Conv saturation */
- n = natpurge(proto);
-
- return n;
-}
-
-/*
- * Function checksumadjust from RFC 3022.
- */
-void
-checksumadjust(uchar *chksum, uchar *optr, int olen, uchar *nptr, int nlen)
-{
- long x, old, new;
-
- x=chksum[0]*256+chksum[1];
- x=~x & 0xffff;
- while(olen){
- old=optr[0]*256+optr[1];
- optr+=2;
- x-=old & 0xffff;
- if(x<=0){
- x--;
- x&=0xffff;
- }
- olen-=2;
- }
- while(nlen){
- new=nptr[0]*256+nptr[1];
- nptr+=2;
- x+=new & 0xffff;
- if(x & 0x10000){
- x++;
- x&=0xffff;
- }
- nlen-=2;
- }
- x=~x & 0xffff;
- chksum[0]=x/256;
- chksum[1]=x & 0xff;
-}
-
-/*
- * Add NatAddr to NatAddr list.
- */
-void
-nataddrprepend(NatAddr *na)
-{
- na->next = addrhead;
- addrhead = na;
-}
-
-/*
- * Return NatAddr if it exists in NatAddr list.
- */
-NatAddr*
-nataddrexist(uchar *src, uchar *mask, Iplifc *dst)
-{
- NatAddr *c; /* Current node */
-
- for(c=addrhead; c!=nil; c=c->next)
- if(ipcmp(src, c->src) == 0 &&
- ipcmp(mask, c->mask) == 0 &&
- dst == c->dst)
- return c;
-
- return nil;
-}
-
-/*
- * Create a new NatAddr.
- * Return -1 if it already exist.
- */
-int
-addnataddr(uchar *src, uchar *mask, Iplifc *dst)
-{
- NatAddr *na; /* New address node */
- uchar net[IPaddrlen]; /* Network address */
-
- maskip(src, mask, net);
-
- if(nataddrexist(src, mask, dst) != nil)
- return -1;
-
- na = malloc(sizeof(NatAddr));
- ipmove(na->src, src);
- ipmove(na->mask, mask);
- ipmove(na->net, net);
- na->dst = dst;
-
- nataddrprepend(na);
-
- return 0;
-}
-
-/*
- * Remove a NatAddr.
- * Return -1 if it doesn't exist.
- */
-int
-removenataddr(uchar *src, uchar *mask, Iplifc *dst)
-{
- NatAddr *c; /* Current node */
- NatAddr *p; /* Precedent node */
-
- for(p=nil, c=addrhead; c!=nil; p=c, c=c->next)
- if(ipcmp(src, c->src) == 0 &&
- ipcmp(mask, c->mask) == 0 &&
- dst == c->dst)
- break;
-
- if(c == nil)
- return -1;
-
- if(p == nil)
- addrhead = addrhead->next;
- else
- p->next = c->next;
-
- return 0;
-}
-
-/*
- * Display NatAddr list.
- */
-void
-shownataddr(void)
-{
- NatAddr *c; /* Current node */
-
- for(c=addrhead; c!=nil; c=c->next)
- print("%I %V %I\n", c->src, c->mask+IPv4off, c->dst->local);
-}
-
-/*
- * Flush NatAddr list.
- */
-void
-flushnataddr(void)
-{
- NatAddr *c; /* Current node */
-
- while((c=addrhead) != nil){
- addrhead = addrhead->next;
- free(c);
- }
-}
-
-/*
- * Return logical interface if NAT is enabled on this interface,
- * and the source address must be translated.
- */
-Iplifc*
-natonifco(Ipifc *ifc, Ip4hdr* h)
-{
- NatAddr *na; /* Address list */
- Iplifc *lifc; /* Logical interface */
- uchar src[IPaddrlen]; /* Source address */
- uchar net[IPaddrlen]; /* Source network address */
-
- for(lifc=ifc->lifc; lifc!=nil; lifc=lifc->next)
- for(na=addrhead; na; na=na->next)
- if(lifc == na->dst){
- /* NAT enabled on this logical interface */
- v4tov6(src, h->src);
- maskip(src, na->mask, net);
- if(ipcmp(net, na->net) == 0)
- /* Source address must be translated */
- return lifc;
- }
-
- return nil;
-}
-
-/*
- * Return logical interface if NAT is enabled on this interface.
- */
-Iplifc*
-natonifci(Ipifc *ifc)
-{
- NatAddr *na; /* Address list */
- Iplifc *lifc; /* Logical interface */
-
- for(lifc=ifc->lifc; lifc!=nil; lifc=lifc->next)
- for(na=addrhead; na; na=na->next)
- if(lifc == na->dst){
- /* NAT enabled on this logical interface */
- return lifc;
- }
-
- return nil;
-}
--- a/os/ip/netdevmedium.c
+++ b/os/ip/netdevmedium.c
@@ -49,12 +49,13 @@
mchan = namec(argv[2], Aopen, ORDWR, 0);
er = smalloc(sizeof(*er));
+ er->readp = (void*)-1;
er->mchan = mchan;
er->f = ifc->conv->p->f;
ifc->arg = er;
- kproc("netdevread", netdevread, ifc, 0);
+ kproc("netdevread", netdevread, ifc);
}
/*
@@ -65,13 +66,29 @@
{
Netdevrock *er = ifc->arg;
+ while(waserror())
+ ;
+
+ /* wait for reader to start */
+ while(er->readp == (void*)-1)
+ tsleep(&up->sleep, return0, 0, 300);
+
if(er->readp != nil)
postnote(er->readp, 1, "unbind", 0);
- /* wait for readers to die */
+ poperror();
+
+ wunlock(ifc);
+ while(waserror())
+ ;
+
+ /* wait for reader to die */
while(er->readp != nil)
tsleep(&up->sleep, return0, 0, 300);
+ poperror();
+ wlock(ifc);
+
if(er->mchan != nil)
cclose(er->mchan);
@@ -86,8 +103,6 @@
{
Netdevrock *er = ifc->arg;
- if(bp->next)
- bp = concatblock(bp);
if(BLEN(bp) < ifc->mintu)
bp = adjustblock(bp, ifc->mintu);
@@ -104,34 +119,22 @@
Ipifc *ifc;
Block *bp;
Netdevrock *er;
- char *argv[1];
ifc = a;
er = ifc->arg;
er->readp = up; /* hide identity under a rock for unbind */
- if(waserror()){
- er->readp = nil;
- pexit("hangup", 1);
- }
+ if(!waserror())
for(;;){
bp = devtab[er->mchan->type]->bread(er->mchan, ifc->maxtu, 0);
if(bp == nil){
- /*
- * get here if mchan is a pipe and other side hangs up
- * clean up this interface & get out
-ZZZ is this a good idea?
- */
poperror();
- er->readp = nil;
- argv[0] = "unbind";
- if(!waserror())
+ if(!waserror()){
+ static char *argv[] = { "unbind" };
ifc->conv->p->ctl(ifc->conv, argv, 1);
- pexit("hangup", 1);
+ }
+ break;
}
- if(!canrlock(ifc)){
- freeb(bp);
- continue;
- }
+ rlock(ifc);
if(waserror()){
runlock(ifc);
nexterror();
@@ -144,6 +147,8 @@
runlock(ifc);
poperror();
}
+ er->readp = nil;
+ pexit("hangup", 1);
}
void
--- a/os/ip/netlog.c
+++ b/os/ip/netlog.c
@@ -7,7 +7,7 @@
#include "../ip/ip.h"
enum {
- Nlog = 4*1024,
+ Nlog = 16*1024,
};
/*
@@ -39,12 +39,12 @@
{ "ppp", Logppp, },
{ "ip", Logip, },
{ "fs", Logfs, },
- { "tcp", Logtcp, },
{ "il", Logil, },
+ { "tcp", Logtcp, },
{ "icmp", Logicmp, },
{ "udp", Logudp, },
{ "compress", Logcompress, },
- { "ilmsg", Logil|Logilmsg, },
+ { "logilmsg", Logilmsg, },
{ "gre", Loggre, },
{ "tcpwin", Logtcp|Logtcpwin, },
{ "tcprxmt", Logtcp|Logtcprxmt, },
@@ -85,8 +85,11 @@
nexterror();
}
if(f->alog->opens == 0){
- if(f->alog->buf == nil)
+ if(f->alog->buf == nil){
f->alog->buf = malloc(Nlog);
+ if(f->alog->buf == nil)
+ error(Enomem);
+ }
f->alog->rptr = f->alog->buf;
f->alog->end = f->alog->buf + Nlog;
}
@@ -202,6 +205,7 @@
else
f->alog->iponlyset = 1;
free(cb);
+ poperror();
return;
default:
@@ -227,7 +231,7 @@
void
netlog(Fs *f, int mask, char *fmt, ...)
{
- char buf[128], *t, *fp;
+ char buf[256], *t, *fp;
int i, n;
va_list arg;
--- a/os/ip/nullmedium.c
+++ b/os/ip/nullmedium.c
@@ -19,8 +19,9 @@
}
static void
-nullbwrite(Ipifc*, Block*, int, uchar*)
+nullbwrite(Ipifc*, Block *bp, int, uchar*)
{
+ freeb(bp);
error("nullbwrite");
}
--- a/os/ip/pktmedium.c
+++ b/os/ip/pktmedium.c
@@ -16,10 +16,10 @@
Medium pktmedium =
{
.name= "pkt",
-.hsize= 14,
-.mintu= 40,
+.hsize= 0,
+.mintu= 0,
.maxtu= 4*1024,
-.maclen= 6,
+.maclen= 0,
.bind= pktbind,
.unbind= pktunbind,
.bwrite= pktbwrite,
@@ -28,12 +28,13 @@
};
/*
- * called to bind an IP ifc to an ethernet device
+ * called to bind an IP ifc to an packet device
* called with ifc wlock'd
*/
static void
-pktbind(Ipifc*, int, char**)
+pktbind(Ipifc*, int argc, char **argv)
{
+ USED(argc, argv);
}
/*
@@ -51,7 +52,6 @@
pktbwrite(Ipifc *ifc, Block *bp, int, uchar*)
{
/* enqueue onto the conversation's rq */
- bp = concatblock(bp);
if(ifc->conv->snoopers.ref > 0)
qpass(ifc->conv->sq, copyblock(bp, BLEN(bp)));
qpass(ifc->conv->rq, bp);
--- a/os/ip/plan9.c
+++ /dev/null
@@ -1,36 +1,0 @@
-#include "u.h"
-#include "../port/lib.h"
-#include "mem.h"
-#include "dat.h"
-#include "fns.h"
-#include "../port/error.h"
-#include "ip.h"
-
-/*
- * some hacks for commonality twixt inferno and plan9
- */
-
-char*
-commonuser(void)
-{
- return up->env->user;
-}
-
-Chan*
-commonfdtochan(int fd, int mode, int a, int b)
-{
- return fdtochan(up->env->fgrp, fd, mode, a, b);
-}
-
-char*
-commonerror(void)
-{
- return up->env->errstr;
-}
-
-int
-postnote(Proc *p, int, char *, int)
-{
- swiproc(p, 0);
- return 0;
-}
--- a/os/ip/ppp.c
+++ /dev/null
@@ -1,1656 +1,0 @@
-#include "u.h"
-#include "../port/lib.h"
-#include "mem.h"
-#include "dat.h"
-#include "fns.h"
-#include "../port/error.h"
-#include <libcrypt.h>
-#include <kernel.h>
-#include "ip.h"
-#include "ppp.h"
-
-int nocompress;
-Ipaddr pppdns[2];
-
-/*
- * Calculate FCS - rfc 1331
- */
-ushort fcstab[256] =
-{
- 0x0000, 0x1189, 0x2312, 0x329b, 0x4624, 0x57ad, 0x6536, 0x74bf,
- 0x8c48, 0x9dc1, 0xaf5a, 0xbed3, 0xca6c, 0xdbe5, 0xe97e, 0xf8f7,
- 0x1081, 0x0108, 0x3393, 0x221a, 0x56a5, 0x472c, 0x75b7, 0x643e,
- 0x9cc9, 0x8d40, 0xbfdb, 0xae52, 0xdaed, 0xcb64, 0xf9ff, 0xe876,
- 0x2102, 0x308b, 0x0210, 0x1399, 0x6726, 0x76af, 0x4434, 0x55bd,
- 0xad4a, 0xbcc3, 0x8e58, 0x9fd1, 0xeb6e, 0xfae7, 0xc87c, 0xd9f5,
- 0x3183, 0x200a, 0x1291, 0x0318, 0x77a7, 0x662e, 0x54b5, 0x453c,
- 0xbdcb, 0xac42, 0x9ed9, 0x8f50, 0xfbef, 0xea66, 0xd8fd, 0xc974,
- 0x4204, 0x538d, 0x6116, 0x709f, 0x0420, 0x15a9, 0x2732, 0x36bb,
- 0xce4c, 0xdfc5, 0xed5e, 0xfcd7, 0x8868, 0x99e1, 0xab7a, 0xbaf3,
- 0x5285, 0x430c, 0x7197, 0x601e, 0x14a1, 0x0528, 0x37b3, 0x263a,
- 0xdecd, 0xcf44, 0xfddf, 0xec56, 0x98e9, 0x8960, 0xbbfb, 0xaa72,
- 0x6306, 0x728f, 0x4014, 0x519d, 0x2522, 0x34ab, 0x0630, 0x17b9,
- 0xef4e, 0xfec7, 0xcc5c, 0xddd5, 0xa96a, 0xb8e3, 0x8a78, 0x9bf1,
- 0x7387, 0x620e, 0x5095, 0x411c, 0x35a3, 0x242a, 0x16b1, 0x0738,
- 0xffcf, 0xee46, 0xdcdd, 0xcd54, 0xb9eb, 0xa862, 0x9af9, 0x8b70,
- 0x8408, 0x9581, 0xa71a, 0xb693, 0xc22c, 0xd3a5, 0xe13e, 0xf0b7,
- 0x0840, 0x19c9, 0x2b52, 0x3adb, 0x4e64, 0x5fed, 0x6d76, 0x7cff,
- 0x9489, 0x8500, 0xb79b, 0xa612, 0xd2ad, 0xc324, 0xf1bf, 0xe036,
- 0x18c1, 0x0948, 0x3bd3, 0x2a5a, 0x5ee5, 0x4f6c, 0x7df7, 0x6c7e,
- 0xa50a, 0xb483, 0x8618, 0x9791, 0xe32e, 0xf2a7, 0xc03c, 0xd1b5,
- 0x2942, 0x38cb, 0x0a50, 0x1bd9, 0x6f66, 0x7eef, 0x4c74, 0x5dfd,
- 0xb58b, 0xa402, 0x9699, 0x8710, 0xf3af, 0xe226, 0xd0bd, 0xc134,
- 0x39c3, 0x284a, 0x1ad1, 0x0b58, 0x7fe7, 0x6e6e, 0x5cf5, 0x4d7c,
- 0xc60c, 0xd785, 0xe51e, 0xf497, 0x8028, 0x91a1, 0xa33a, 0xb2b3,
- 0x4a44, 0x5bcd, 0x6956, 0x78df, 0x0c60, 0x1de9, 0x2f72, 0x3efb,
- 0xd68d, 0xc704, 0xf59f, 0xe416, 0x90a9, 0x8120, 0xb3bb, 0xa232,
- 0x5ac5, 0x4b4c, 0x79d7, 0x685e, 0x1ce1, 0x0d68, 0x3ff3, 0x2e7a,
- 0xe70e, 0xf687, 0xc41c, 0xd595, 0xa12a, 0xb0a3, 0x8238, 0x93b1,
- 0x6b46, 0x7acf, 0x4854, 0x59dd, 0x2d62, 0x3ceb, 0x0e70, 0x1ff9,
- 0xf78f, 0xe606, 0xd49d, 0xc514, 0xb1ab, 0xa022, 0x92b9, 0x8330,
- 0x7bc7, 0x6a4e, 0x58d5, 0x495c, 0x3de3, 0x2c6a, 0x1ef1, 0x0f78
-};
-
-static char *snames[] =
-{
- "Sclosed",
- "Sclosing",
- "Sreqsent",
- "Sackrcvd",
- "Sacksent",
- "Sopened",
-};
-
-static void init(PPP*);
-static void setphase(PPP*, int);
-static void pinit(PPP*, Pstate*);
-static void ppptimer(void*);
-static void ptimer(PPP*, Pstate*);
-static int getframe(PPP*, Block**);
-static Block* putframe(PPP*, int, Block*);
-static uchar* escapebyte(PPP*, ulong, uchar*, ushort*);
-static void config(PPP*, Pstate*, int);
-static int getopts(PPP*, Pstate*, Block*);
-static void rejopts(PPP*, Pstate*, Block*, int);
-static void newstate(PPP*, Pstate*, int);
-static void rcv(PPP*, Pstate*, Block*);
-static void getchap(PPP*, Block*);
-static void getpap(PPP*, Block*);
-static void sendpap(PPP*);
-static void getlqm(PPP*, Block*);
-static void putlqm(PPP*);
-static void hangup(PPP*);
-static void remove(PPP*);
-
-static int validv4(Ipaddr);
-static void invalidate(Ipaddr);
-static void ipconnect(PPP *);
-static void setdefroute(PPP *, Ipaddr);
-static void printopts(PPP *, Pstate*, Block*, int);
-static void sendtermreq(PPP*, Pstate*);
-
-static void
-errlog(PPP *ppp, char *err)
-{
- int n;
- char msg[64];
-
- n = snprint(msg, sizeof(msg), "%s\n", err);
- qproduce(ppp->ifc->conv->eq, msg, n);
-}
-
-static void
-init(PPP* ppp)
-{
- if(ppp->inbuf == nil){
- ppp->inbuf = allocb(4096);
- ppp->outbuf = allocb(4096);
-
- ppp->lcp = malloc(sizeof(Pstate));
- ppp->ipcp = malloc(sizeof(Pstate));
- if(ppp->lcp == nil || ppp->ipcp == nil)
- error("ppp init: malloc");
-
- ppp->lcp->proto = Plcp;
- ppp->lcp->state = Sclosed;
- ppp->ipcp->proto = Pipcp;
- ppp->ipcp->state = Sclosed;
-
- kproc("ppptimer", ppptimer, ppp, KPDUPPG|KPDUPFDG);
- }
-
- pinit(ppp, ppp->lcp);
- setphase(ppp, Plink);
-}
-
-static void
-setphase(PPP *ppp, int phase)
-{
- int oldphase;
-
- oldphase = ppp->phase;
-
- ppp->phase = phase;
- switch(phase){
- default:
- panic("ppp: unknown phase %d", phase);
- case Pdead:
- /* restart or exit? */
- pinit(ppp, ppp->lcp);
- setphase(ppp, Plink);
- break;
- case Plink:
- /* link down */
- switch(oldphase) {
- case Pnet:
- newstate(ppp, ppp->ipcp, Sclosed);
- }
- break;
- case Pauth:
- if(ppp->usepap)
- sendpap(ppp);
- else if(!ppp->usechap)
- setphase(ppp, Pnet);
- break;
- case Pnet:
- pinit(ppp, ppp->ipcp);
- break;
- case Pterm:
- /* what? */
- break;
- }
-}
-
-static void
-pinit(PPP *ppp, Pstate *p)
-{
- p->timeout = 0;
-
- switch(p->proto){
- case Plcp:
- ppp->magic = TK2MS(MACHP(0)->ticks);
- ppp->xctlmap = 0xffffffff;
- ppp->period = 0;
- p->optmask = 0xffffffff;
- ppp->rctlmap = 0;
- ppp->ipcp->state = Sclosed;
- ppp->ipcp->optmask = 0xffffffff;
-
- /* quality goo */
- ppp->timeout = 0;
- memset(&ppp->in, 0, sizeof(ppp->in));
- memset(&ppp->out, 0, sizeof(ppp->out));
- memset(&ppp->pin, 0, sizeof(ppp->pin));
- memset(&ppp->pout, 0, sizeof(ppp->pout));
- memset(&ppp->sin, 0, sizeof(ppp->sin));
- break;
- case Pipcp:
- if(ppp->localfrozen == 0)
- invalidate(ppp->local);
- if(ppp->remotefrozen == 0)
- invalidate(ppp->remote);
- p->optmask = 0xffffffff;
- ppp->ctcp = compress_init(ppp->ctcp);
- ppp->usedns = 3;
- invalidate(ppp->dns1);
- invalidate(ppp->dns2);
- break;
- }
- p->confid = p->rcvdconfid = -1;
- config(ppp, p, 1);
- newstate(ppp, p, Sreqsent);
-}
-
-/*
- * change protocol to a new state.
- */
-static void
-newstate(PPP *ppp, Pstate *p, int state)
-{
- netlog(ppp->f, Logppp, "%ux %ux %s->%s ctlmap %lux/%lux flags %ux mtu %d mru %d\n", ppp, p->proto,
- snames[p->state], snames[state], ppp->rctlmap, ppp->xctlmap, p->flags,
- ppp->mtu, ppp->mru);
-
- if(p->proto == Plcp) {
- if(state == Sopened)
- setphase(ppp, Pauth);
- else if(state == Sclosed)
- setphase(ppp, Pdead);
- else if(p->state == Sopened)
- setphase(ppp, Plink);
- }
-
- if(p->proto == Pipcp && state == Sopened && validv4(ppp->local) && validv4(ppp->remote)){
- netlog(ppp->f, Logppp, "pppnewstate: local %I remote %I\n", ppp->local, ppp->remote);
- ipmove(pppdns[0], ppp->dns1);
- ipmove(pppdns[1], ppp->dns2);
- ipconnect(ppp);
- /* if this is the only network, set up a default route */
-// if(ppp->ifc->link==nil) /* how??? */
- setdefroute(ppp, ppp->remote);
- errlog(ppp, Enoerror);
- }
-
- p->state = state;
-}
-
-static void
-remove(PPP *ppp)
-{
- free(ppp->ipcp);
- ppp->ipcp = 0;
- free(ppp->ctcp);
- ppp->ctcp = 0;
- free(ppp->lcp);
- ppp->lcp = 0;
- if (ppp->inbuf) {
- freeb(ppp->inbuf);
- ppp->inbuf = nil;
- }
- if (ppp->outbuf) {
- freeb(ppp->outbuf);
- ppp->outbuf = nil;
- }
- free(ppp);
-}
-
-void
-pppclose(PPP *ppp)
-{
- hangup(ppp);
- remove(ppp);
-}
-
-static void
-dumpblock(Block *b)
-{
- char x[256];
- int i;
-
- for(i = 0; i < (sizeof(x)-1)/3 && b->rp+i < b->wp; i++)
- sprint(&x[3*i], "%2.2ux ", b->rp[i]);
- print("%s\n", x);
-}
-
-/* returns (protocol, information) */
-static int
-getframe(PPP *ppp, Block **info)
-{
- uchar *p, *from, *to;
- int n, len, proto;
- ulong c;
- ushort fcs;
- Block *buf, *b;
-
- buf = ppp->inbuf;
- for(;;){
- /* read till we hit a frame byte or run out of room */
- for(p = buf->rp; buf->wp < buf->lim;){
- for(; p < buf->wp; p++)
- if(*p == HDLC_frame)
- goto break2;
-
- len = buf->lim - buf->wp;
- n = 0;
- if(ppp->dchan != nil)
- n = kchanio(ppp->dchan, buf->wp, len, OREAD);
- netlog(ppp->f, Logppp, "ppp kchanio %d bytes\n", n);
- if(n <= 0){
- buf->wp = buf->rp;
-// if(n < 0)
-// print("ppp kchanio(%s) returned %d: %r",
-// ppp->dchan->path->elem, n);
- *info = nil;
- return 0;
- }
- buf->wp += n;
- }
-break2:
-
- /* copy into block, undoing escapes, and caculating fcs */
- fcs = PPP_initfcs;
- b = allocb(p - buf->rp);
- to = b->wp;
- for(from = buf->rp; from != p;){
- c = *from++;
- if(c == HDLC_esc){
- if(from == p)
- break;
- c = *from++ ^ 0x20;
- } else if((c < 0x20) && (ppp->rctlmap & (1 << c)))
- continue;
- *to++ = c;
- fcs = (fcs >> 8) ^ fcstab[(fcs ^ c) & 0xff];
- }
-
- /* copy down what's left in buffer */
- p++;
- memmove(buf->rp, p, buf->wp - p);
- n = p - buf->rp;
- buf->wp -= n;
- b->wp = to - 2;
-
- /* return to caller if checksum matches */
- if(fcs == PPP_goodfcs){
- if(b->rp[0] == PPP_addr && b->rp[1] == PPP_ctl)
- b->rp += 2;
- proto = *b->rp++;
- if((proto & 0x1) == 0)
- proto = (proto<<8) | *b->rp++;
- if(b->rp < b->wp){
- ppp->in.bytes += n;
- ppp->in.packets++;
- *info = b;
- return proto;
- }
- } else if(BLEN(b) > 0){
- ppp->ifc->inerr++;
- ppp->in.discards++;
- netlog(ppp->f, Logppp, "len %d/%d cksum %ux (%ux %ux %ux %ux)\n",
- BLEN(b), BLEN(buf), fcs, b->rp[0],
- b->rp[1], b->rp[2], b->rp[3]);
- }
-
- freeblist(b);
- }
- *info = nil;
- return 0;
-}
-
-/* send a PPP frame */
-static Block *
-putframe(PPP *ppp, int proto, Block *b)
-{
- Block *buf;
- uchar *to, *from;
- ushort fcs;
- ulong ctlmap;
- int c;
- Block *bp;
-
- if(ppp->dchan == nil){
- netlog(ppp->f, Logppp, "putframe: dchan down\n");
- errlog(ppp, Ehungup);
- return b;
- }
- netlog(ppp->f, Logppp, "putframe %ux %d %d (%d bytes)\n", proto, b->rp[0], b->rp[1], BLEN(b));
-
- ppp->out.packets++;
-
- if(proto == Plcp)
- ctlmap = 0xffffffff;
- else
- ctlmap = ppp->xctlmap;
-
- /* make sure we have head room */
- if(b->rp - b->base < 4){
- b = padblock(b, 4);
- b->rp += 4;
- }
-
- /* add in the protocol and address, we'd better have left room */
- from = b->rp;
- *--from = proto;
- if(!(ppp->lcp->flags&Fpc) || proto > 0x100 || proto == Plcp)
- *--from = proto>>8;
- if(!(ppp->lcp->flags&Fac) || proto == Plcp){
- *--from = PPP_ctl;
- *--from = PPP_addr;
- }
-
- qlock(&ppp->outlock);
- buf = ppp->outbuf;
-
- /* escape and checksum the body */
- fcs = PPP_initfcs;
- to = buf->rp;
-
- *to++ = HDLC_frame;
-
- for(bp = b; bp; bp = bp->next){
- if(bp != b)
- from = bp->rp;
- for(; from < bp->wp; from++){
- c = *from;
- if(c == HDLC_frame || c == HDLC_esc
- || (c < 0x20 && ((1<<c) & ctlmap))){
- *to++ = HDLC_esc;
- *to++ = c ^ 0x20;
- } else
- *to++ = c;
- fcs = (fcs >> 8) ^ fcstab[(fcs ^ c) & 0xff];
- }
- }
-
- /* add on and escape the checksum */
- fcs = ~fcs;
- c = fcs;
- if(c == HDLC_frame || c == HDLC_esc
- || (c < 0x20 && ((1<<c) & ctlmap))){
- *to++ = HDLC_esc;
- *to++ = c ^ 0x20;
- } else
- *to++ = c;
- c = fcs>>8;
- if(c == HDLC_frame || c == HDLC_esc
- || (c < 0x20 && ((1<<c) & ctlmap))){
- *to++ = HDLC_esc;
- *to++ = c ^ 0x20;
- } else
- *to++ = c;
-
- /* add frame marker and send */
- *to++ = HDLC_frame;
- buf->wp = to;
- if(ppp->dchan == nil){
- netlog(ppp->f, Logppp, "putframe: dchan down\n");
- errlog(ppp, Ehungup);
- }else{
- kchanio(ppp->dchan, buf->rp, BLEN(buf), OWRITE);
- ppp->out.bytes += BLEN(buf);
- }
-
- qunlock(&ppp->outlock);
- return b;
-}
-
-#define IPB2LCP(b) ((Lcpmsg*)((b)->wp-4))
-
-static Block*
-alloclcp(int code, int id, int len)
-{
- Block *b;
- Lcpmsg *m;
-
- /*
- * leave room for header
- */
- b = allocb(len);
-
- m = (Lcpmsg*)b->wp;
- m->code = code;
- m->id = id;
- b->wp += 4;
-
- return b;
-}
-
-static void
-putao(Block *b, int type, int aproto, int alg)
-{
- *b->wp++ = type;
- *b->wp++ = 5;
- hnputs(b->wp, aproto);
- b->wp += 2;
- *b->wp++ = alg;
-}
-
-static void
-putlo(Block *b, int type, ulong val)
-{
- *b->wp++ = type;
- *b->wp++ = 6;
- hnputl(b->wp, val);
- b->wp += 4;
-}
-
-static void
-putv4o(Block *b, int type, Ipaddr val)
-{
- *b->wp++ = type;
- *b->wp++ = 6;
- if(v6tov4(b->wp, val) < 0){
- /*panic("putv4o")*/;
- }
- b->wp += 4;
-}
-
-static void
-putso(Block *b, int type, ulong val)
-{
- *b->wp++ = type;
- *b->wp++ = 4;
- hnputs(b->wp, val);
- b->wp += 2;
-}
-
-static void
-puto(Block *b, int type)
-{
- *b->wp++ = type;
- *b->wp++ = 2;
-}
-
-/*
- * send configuration request
- */
-static void
-config(PPP *ppp, Pstate *p, int newid)
-{
- Block *b;
- Lcpmsg *m;
- int id;
-
- if(newid){
- id = ++(p->id);
- p->confid = id;
- p->timeout = Timeout;
- } else
- id = p->confid;
- b = alloclcp(Lconfreq, id, 256);
- m = IPB2LCP(b);
- USED(m);
-
- switch(p->proto){
- case Plcp:
- if(p->optmask & Fmagic)
- putlo(b, Omagic, ppp->magic);
- if(p->optmask & Fmtu)
- putso(b, Omtu, ppp->mru);
- if(p->optmask & Fac)
- puto(b, Oac);
- if(p->optmask & Fpc)
- puto(b, Opc);
- if(p->optmask & Fctlmap)
- putlo(b, Octlmap, 0); /* we don't want anything escaped */
- break;
- case Pipcp:
- if((p->optmask & Fipaddr) /*&& validv4(ppp->local)*/)
- putv4o(b, Oipaddr, ppp->local);
- if(!nocompress && (p->optmask & Fipcompress)){
- *b->wp++ = Oipcompress;
- *b->wp++ = 6;
- hnputs(b->wp, Pvjctcp);
- b->wp += 2;
- *b->wp++ = MAX_STATES-1;
- *b->wp++ = 1;
- }
- if(ppp->usedns & 1)
- putlo(b, Oipdns, 0);
- if(ppp->usedns & 2)
- putlo(b, Oipdns2, 0);
- break;
- }
-
- hnputs(m->len, BLEN(b));
- b = putframe(ppp, p->proto, b);
- freeblist(b);
-}
-
-/*
- * parse configuration request, sends an ack or reject packet
- *
- * returns: -1 if request was syntacticly incorrect
- * 0 if packet was accepted
- * 1 if packet was rejected
- */
-static int
-getopts(PPP *ppp, Pstate *p, Block *b)
-{
- Lcpmsg *m, *repm;
- Lcpopt *o;
- uchar *cp;
- ulong rejecting, nacking, flags, proto;
- ulong mtu, ctlmap, period;
- ulong x;
- Block *repb;
- Ipaddr ipaddr;
-
- rejecting = 0;
- nacking = 0;
- flags = 0;
-
- /* defaults */
- invalidate(ipaddr);
- mtu = ppp->mtu;
-
- ctlmap = 0xffffffff;
- period = 0;
-
- m = (Lcpmsg*)b->rp;
- repb = alloclcp(Lconfack, m->id, BLEN(b));
- repm = IPB2LCP(repb);
-
- /* copy options into ack packet */
- memmove(repm->data, m->data, b->wp - m->data);
- repb->wp += b->wp - m->data;
-
- /* look for options we don't recognize or like */
- for(cp = m->data; cp < b->wp; cp += o->len){
- o = (Lcpopt*)cp;
- if(cp + o->len > b->wp || o->len == 0){
- freeblist(repb);
- netlog(ppp->f, Logppp, "ppp %s: bad option length %ux\n", ppp->ifc->dev,
- o->type);
- return -1;
- }
-
- switch(p->proto){
- case Plcp:
- switch(o->type){
- case Oac:
- flags |= Fac;
- continue;
- case Opc:
- flags |= Fpc;
- continue;
- case Omtu:
- mtu = nhgets(o->data);
- if(mtu < ppp->ifc->m->mintu){
- netlog(ppp->f, Logppp, "bogus mtu %d\n", mtu);
- mtu = ppp->ifc->m->mintu;
- }
- continue;
- case Omagic:
- if(ppp->magic == nhgetl(o->data))
- netlog(ppp->f, Logppp, "ppp: possible loop\n");
- continue;
- case Octlmap:
- ctlmap = nhgetl(o->data);
- continue;
- case Oquality:
- proto = nhgets(o->data);
- if(proto != Plqm)
- break;
- x = nhgetl(o->data+2)*10;
- period = (x+Period-1)/Period;
- continue;
- case Oauth:
- proto = nhgets(o->data);
- if(proto == Ppap && ppp->chapname[0] && ppp->secret[0]){
- ppp->usepap = 1;
- netlog(ppp->f, Logppp, "PPP %s: select PAP\n", ppp->ifc->dev);
- continue;
- }
- if(proto != Pchap || o->data[2] != APmd5){
- if(!nacking){
- nacking = 1;
- repb->wp = repm->data;
- repm->code = Lconfnak;
- }
- putao(repb, Oauth, Pchap, APmd5);
- }
- else
- ppp->usechap = 1;
- ppp->usepap = 0;
- continue;
- }
- break;
- case Pipcp:
- switch(o->type){
- case Oipaddr:
- v4tov6(ipaddr, o->data);
- if(!validv4(ppp->remote))
- continue;
- if(!validv4(ipaddr) && !rejecting){
- /* other side requesting an address */
- if(!nacking){
- nacking = 1;
- repb->wp = repm->data;
- repm->code = Lconfnak;
- }
- putv4o(repb, Oipaddr, ppp->remote);
- }
- continue;
- case Oipcompress:
- proto = nhgets(o->data);
- if(nocompress || proto != Pvjctcp || compress_negotiate(ppp->ctcp, o->data+2) < 0)
- break;
- flags |= Fipcompress;
- continue;
- }
- break;
- }
-
- /* come here if option is not recognized */
- if(!rejecting){
- rejecting = 1;
- repb->wp = repm->data;
- repm->code = Lconfrej;
- }
- netlog(ppp->f, Logppp, "ppp %s: bad %ux option %d\n", ppp->ifc->dev, p->proto, o->type);
- memmove(repb->wp, o, o->len);
- repb->wp += o->len;
- }
-
- /* permanent changes only after we know that we liked the packet */
- if(!rejecting && !nacking){
- switch(p->proto){
- case Plcp:
- netlog(ppp->f, Logppp, "Plcp: mtu: %d %d x:%lux/r:%lux %lux\n", mtu, ppp->mtu, ppp->xctlmap, ppp->rctlmap, ctlmap);
- ppp->period = period;
- ppp->xctlmap = ctlmap;
- if(mtu > Maxmtu)
- mtu = Maxmtu;
- if(mtu < Minmtu)
- mtu = Minmtu;
- ppp->mtu = mtu;
- break;
- case Pipcp:
- if(validv4(ipaddr) && ppp->remotefrozen == 0)
- ipmove(ppp->remote, ipaddr);
- break;
- }
- p->flags = flags;
- }
-
- hnputs(repm->len, BLEN(repb));
- repb = putframe(ppp, p->proto, repb);
- freeblist(repb);
-
- return rejecting || nacking;
-}
-
-/*
- * parse configuration rejection, just stop sending anything that they
- * don't like (except for ipcp address nak).
- */
-static void
-rejopts(PPP *ppp, Pstate *p, Block *b, int code)
-{
- Lcpmsg *m;
- Lcpopt *o;
-
- /* just give up trying what the other side doesn't like */
- m = (Lcpmsg*)b->rp;
- for(b->rp = m->data; b->rp < b->wp; b->rp += o->len){
- o = (Lcpopt*)b->rp;
- if(b->rp + o->len > b->wp || o->len == 0){
- netlog(ppp->f, Logppp, "ppp %s: bad roption length %ux\n", ppp->ifc->dev,
- o->type);
- return;
- }
-
- if(code == Lconfrej){
- if(o->type < 8*sizeof(p->optmask))
- p->optmask &= ~(1<<o->type);
- if(o->type == Oipdns)
- ppp->usedns &= ~1;
- else if(o->type == Oipdns2)
- ppp->usedns &= ~2;
- netlog(ppp->f, Logppp, "ppp %s: %ux rejecting %d\n", ppp->ifc->dev, p->proto,
- o->type);
- continue;
- }
-
- switch(p->proto){
- case Plcp:
- switch(o->type){
- case Octlmap:
- ppp->rctlmap = nhgetl(o->data);
- break;
- default:
- if(o->type < 8*sizeof(p->optmask))
- p->optmask &= ~(1<<o->type);
- break;
- };
- case Pipcp:
- switch(o->type){
- case Oipaddr:
- if(!validv4(ppp->local))
- v4tov6(ppp->local, o->data);
-// if(o->type < 8*sizeof(p->optmask))
-// p->optmask &= ~(1<<o->type);
- break;
- case Oipdns:
- if(!validv4(ppp->dns1))
- v4tov6(ppp->dns1, o->data);
- ppp->usedns &= ~1;
- break;
- case Oipdns2:
- if(!validv4(ppp->dns2))
- v4tov6(ppp->dns2, o->data);
- ppp->usedns &= ~2;
- break;
- default:
- if(o->type < 8*sizeof(p->optmask))
- p->optmask &= ~(1<<o->type);
- break;
- }
- break;
- }
- }
-}
-
-
-/*
- * put a messages through the lcp or ipcp state machine. They are
- * very similar.
- */
-static void
-rcv(PPP *ppp, Pstate *p, Block *b)
-{
- ulong len;
- int err;
- Lcpmsg *m;
-
- if(BLEN(b) < 4){
- netlog(ppp->f, Logppp, "ppp %s: short lcp message\n", ppp->ifc->dev);
- freeblist(b);
- return;
- }
- m = (Lcpmsg*)b->rp;
- len = nhgets(m->len);
- if(BLEN(b) < len){
- netlog(ppp->f, Logppp, "ppp %s: short lcp message\n", ppp->ifc->dev);
- freeblist(b);
- return;
- }
-
- netlog(ppp->f, Logppp, "ppp: %ux rcv %d len %d id %d/%d/%d\n",
- p->proto, m->code, len, m->id, p->confid, p->id);
-
- if(p->proto != Plcp && ppp->lcp->state != Sopened){
- netlog(ppp->f, Logppp, "ppp: non-lcp with lcp not open\n");
- freeb(b);
- return;
- }
-
- qlock(ppp);
- switch(m->code){
- case Lconfreq:
- /* flush the output queue */
- if(p->state == Sopened && p->proto == Plcp)
- kchanio(ppp->cchan, "f", 1, OWRITE);
-
- printopts(ppp, p, b, 0);
- err = getopts(ppp, p, b);
- if(err < 0)
- break;
-
- if(m->id == p->rcvdconfid)
- break; /* don't change state for duplicates */
- p->rcvdconfid = m->id;
-
- switch(p->state){
- case Sackrcvd:
- if(err)
- break;
- newstate(ppp, p, Sopened);
- break;
- case Sclosed:
- case Sopened:
- config(ppp, p, 1);
- if(err == 0)
- newstate(ppp, p, Sacksent);
- else
- newstate(ppp, p, Sreqsent);
- break;
- break;
- case Sreqsent:
- case Sacksent:
- if(err == 0)
- newstate(ppp, p, Sacksent);
- else
- newstate(ppp, p, Sreqsent);
- break;
- }
- break;
- case Lconfack:
- if(p->confid != m->id){
- /* ignore if it isn't the message we're sending */
- netlog(ppp->f, Logppp, "ppp: dropping confack\n");
- break;
- }
- p->confid = -1; /* ignore duplicates */
- p->id++; /* avoid sending duplicates */
-
- switch(p->state){
- case Sopened:
- case Sackrcvd:
- config(ppp, p, 1);
- newstate(ppp, p, Sreqsent);
- break;
- case Sreqsent:
- newstate(ppp, p, Sackrcvd);
- break;
- case Sacksent:
- newstate(ppp, p, Sopened);
- break;
- }
- break;
- case Lconfrej:
- case Lconfnak:
- if(p->confid != m->id) {
- /* ignore if it isn't the message we're sending */
- netlog(ppp->f, Logppp, "ppp: dropping confrej or confnak\n");
- break;
- }
- p->confid = -1; /* ignore duplicates */
- p->id++; /* avoid sending duplicates */
-
- switch(p->state){
- case Sopened:
- case Sackrcvd:
- config(ppp, p, 1);
- newstate(ppp, p, Sreqsent);
- break;
- case Sreqsent:
- case Sacksent:
- printopts(ppp, p, b, 0);
- rejopts(ppp, p, b, m->code);
- config(ppp, p, 1);
- break;
- }
- break;
- case Ltermreq:
- m->code = Ltermack;
- b = putframe(ppp, p->proto, b);
-
- switch(p->state){
- case Sackrcvd:
- case Sacksent:
- newstate(ppp, p, Sreqsent);
- break;
- case Sopened:
- newstate(ppp, p, Sclosing);
- break;
- }
- break;
- case Ltermack:
- if(p->termid != m->id) /* ignore if it isn't the message we're sending */
- break;
-
- if(p->proto == Plcp)
- ppp->ipcp->state = Sclosed;
- switch(p->state){
- case Sclosing:
- newstate(ppp, p, Sclosed);
- break;
- case Sackrcvd:
- newstate(ppp, p, Sreqsent);
- break;
- case Sopened:
- config(ppp, p, 0);
- newstate(ppp, p, Sreqsent);
- break;
- }
- break;
- case Lcoderej:
- netlog(ppp->f, Logppp, "ppp %s: code reject %d\n", ppp->ifc->dev, m->data[0]);
- break;
- case Lprotorej:
- netlog(ppp->f, Logppp, "ppp %s: proto reject %lux\n", ppp->ifc->dev, nhgets(m->data));
- break;
- case Lechoreq:
- m->code = Lechoack;
- b = putframe(ppp, p->proto, b);
- break;
- case Lechoack:
- case Ldiscard:
- /* nothing to do */
- break;
- }
-
- qunlock(ppp);
- freeblist(b);
-}
-
-/*
- * timer for protocol state machine
- */
-static void
-ptimer(PPP *ppp, Pstate *p)
-{
- if(p->state == Sopened || p->state == Sclosed)
- return;
-
- p->timeout--;
- switch(p->state){
- case Sclosing:
- sendtermreq(ppp, p);
- break;
- case Sreqsent:
- case Sacksent:
- if(p->timeout <= 0){
- if(p->proto && ppp->cchan != nil)
- kchanio(ppp->cchan, "f", 1, OWRITE); /* flush output queue */
- newstate(ppp, p, Sclosed);
- } else {
- config(ppp, p, 0);
- }
- break;
- case Sackrcvd:
- if(p->timeout <= 0){
- if(p->proto && ppp->cchan != nil)
- kchanio(ppp->cchan, "f", 1, OWRITE); /* flush output queue */
- newstate(ppp, p, Sclosed);
- }
- else {
- config(ppp, p, 0);
- newstate(ppp, p, Sreqsent);
- }
- break;
- }
-}
-
-/*
- * timer for ppp
- */
-static void
-ppptimer(void *arg)
-{
- PPP *ppp;
-
- ppp = arg;
- ppp->timep = up;
- if(waserror()){
- netlog(ppp->f, Logppp, "ppptimer: %I: %s\n", ppp->local, up->env->errstr);
- ppp->timep = 0;
- pexit("hangup", 1);
- }
- for(;;){
- tsleep(&up->sleep, return0, nil, Period);
- if(ppp->pppup){
- qlock(ppp);
-
- ptimer(ppp, ppp->lcp);
- if(ppp->lcp->state == Sopened)
- ptimer(ppp, ppp->ipcp);
-
- if(ppp->period && --(ppp->timeout) <= 0){
- ppp->timeout = ppp->period;
- putlqm(ppp);
- }
-
- qunlock(ppp);
- }
- }
-}
-
-static void
-setdefroute(PPP *ppp, Ipaddr gate)
-{
- int fd, n;
- char path[128], msg[128];
-
- snprint(path, sizeof path, "#I%d/iproute", ppp->f->dev);
- fd = kopen(path, ORDWR);
- if(fd < 0)
- return;
- n = snprint(msg, sizeof(msg), "add 0 0 %I", gate);
- kwrite(fd, msg, n);
- kclose(fd);
-}
-
-static void
-ipconnect(PPP *ppp)
-{
- int fd, n;
- char path[128], msg[128];
-
- snprint(path, sizeof path, "#I%d/ipifc/%d/ctl", ppp->f->dev, ppp->ifc->conv->x);
- fd = kopen(path, ORDWR);
- if(fd < 0)
- return;
- n = snprint(msg, sizeof(msg), "connect %I 255.255.255.255 %I", ppp->local, ppp->remote);
- if (kwrite(fd, msg, n) != n)
- print("ppp ipconnect: %s: %r\n", msg);
- kclose(fd);
-}
-
-PPP*
-pppopen(PPP *ppp, char *dev,
- Ipaddr ipaddr, Ipaddr remip,
- int mtu, int framing,
- char *chapname, char *secret)
-{
- int fd, cfd;
- char ctl[Maxpath];
-
- invalidate(ppp->remote);
- invalidate(ppp->local);
- invalidate(ppp->dns1);
- invalidate(ppp->dns2);
- ppp->mtu = Defmtu;
- ppp->mru = mtu;
- ppp->framing = framing;
-
- if(remip != nil && validv4(remip)){
- ipmove(ppp->remote, remip);
- ppp->remotefrozen = 1;
- }
- if(ipaddr != nil && validv4(ipaddr)){
- ipmove(ppp->local, ipaddr);
- ppp->localfrozen = 1;
- }
-
- /* authentication goo */
- ppp->secret[0] = 0;
- if(secret != nil)
- strncpy(ppp->secret, secret, sizeof(ppp->secret));
- ppp->chapname[0] = 0;
- if(chapname != nil)
- strncpy(ppp->chapname, chapname, sizeof(ppp->chapname));
-
- if(strchr(dev, '!'))
- fd = kdial(dev, nil, nil, nil);
- else
- fd = kopen(dev, ORDWR);
- if(fd < 0){
- netlog(ppp->f, Logppp, "ppp: can't open %s\n", dev);
- return nil;
- }
- ppp->dchan = fdtochan(up->env->fgrp, fd, ORDWR, 0, 1);
- kclose(fd);
-
- /* set up serial line */
-/* XXX this stuff belongs in application, not driver */
- sprint(ctl, "%sctl", dev);
- cfd = kopen(ctl, ORDWR);
- if(cfd >= 0){
- ppp->cchan = fdtochan(up->env->fgrp, cfd, ORDWR, 0, 1);
- kclose(cfd);
- kchanio(ppp->cchan, "m1", 2, OWRITE); /* cts/rts flow control/fifo's) on */
- kchanio(ppp->cchan, "q64000", 6, OWRITE);/* increas q size to 64k */
- kchanio(ppp->cchan, "n1", 2, OWRITE); /* nonblocking writes on */
- kchanio(ppp->cchan, "r1", 2, OWRITE); /* rts on */
- kchanio(ppp->cchan, "d1", 2, OWRITE); /* dtr on */
- }
-
- ppp->pppup = 1;
- init(ppp);
- return ppp;
-}
-
-static void
-hangup(PPP *ppp)
-{
- qlock(ppp);
- if(waserror()){
- qunlock(ppp);
- nexterror();
- }
- netlog(ppp->f, Logppp, "PPP Hangup\n");
- errlog(ppp, Ehungup);
- if(ppp->pppup && ppp->cchan != nil){
- kchanio(ppp->cchan, "f", 1, OWRITE); /* flush */
- kchanio(ppp->cchan, "h", 1, OWRITE); /* hangup */
- }
- cclose(ppp->dchan);
- cclose(ppp->cchan);
- ppp->dchan = nil;
- ppp->cchan = nil;
- ppp->pppup = 0;
- qunlock(ppp);
- poperror();
-}
-
-/* return next input IP packet */
-Block*
-pppread(PPP *ppp)
-{
- Block *b;
- int proto;
- Lcpmsg *m;
-
- for(;;){
- proto = getframe(ppp, &b);
- if(b == nil)
- return nil;
- netlog(ppp->f, Logppp, "ppp: read proto %d len %d\n", proto, blocklen(b));
- switch(proto){
- case Plcp:
- rcv(ppp, ppp->lcp, b);
- break;
- case Pipcp:
- rcv(ppp, ppp->ipcp, b);
- break;
- case Pip:
- if(ppp->ipcp->state == Sopened)
- return b;
- freeblist(b);
- break;
- case Plqm:
- getlqm(ppp, b);
- break;
- case Pchap:
- getchap(ppp, b);
- break;
- case Ppap:
- getpap(ppp, b);
- break;
- case Pvjctcp:
- case Pvjutcp:
- if(ppp->ipcp->state == Sopened){
- b = tcpuncompress(ppp->ctcp, b, proto, ppp->f);
- if(b != nil)
- return b;
- }
- freeblist(b);
- break;
- default:
- netlog(ppp->f, Logppp, "unknown proto %ux\n", proto);
- if(ppp->lcp->state == Sopened){
- /* reject the protocol */
- b->rp -= 6;
- m = (Lcpmsg*)b->rp;
- m->code = Lprotorej;
- m->id = ++ppp->lcp->id;
- hnputs(m->data, proto);
- hnputs(m->len, BLEN(b));
- b = putframe(ppp, Plcp, b);
- }
- freeblist(b);
- break;
- }
- }
- return nil; /* compiler confused */
-}
-
-/* transmit an IP packet */
-int
-pppwrite(PPP *ppp, Block *b)
-{
- ushort proto;
- int r;
-
- qlock(ppp);
-
- /* can't send ip packets till we're established */
- if(ppp->ipcp->state != Sopened)
- goto ret;
-
- /* link hung up */
- if(ppp->dchan == nil)
- goto ret;
-
- b = concatblock(b); /* or else compression will barf */
-
- proto = Pip;
- if(ppp->ipcp->flags & Fipcompress)
- proto = compress(ppp->ctcp, b, ppp->f);
- b = putframe(ppp, proto, b);
-
-
-ret:
- qunlock(ppp);
-
- r = blocklen(b);
- netlog(ppp->f, Logppp, "ppp wrt len %d\n", r);
-
- freeblist(b);
- return r;
-}
-
-/*
- * link quality management
- */
-static void
-getlqm(PPP *ppp, Block *b)
-{
- Qualpkt *p;
-
- p = (Qualpkt*)b->rp;
- if(BLEN(b) == sizeof(Qualpkt)){
- ppp->in.reports++;
- ppp->pout.reports = nhgetl(p->peeroutreports);
- ppp->pout.packets = nhgetl(p->peeroutpackets);
- ppp->pout.bytes = nhgetl(p->peeroutbytes);
- ppp->pin.reports = nhgetl(p->peerinreports);
- ppp->pin.packets = nhgetl(p->peerinpackets);
- ppp->pin.discards = nhgetl(p->peerindiscards);
- ppp->pin.errors = nhgetl(p->peerinerrors);
- ppp->pin.bytes = nhgetl(p->peerinbytes);
-
- /* save our numbers at time of reception */
- memmove(&ppp->sin, &ppp->in, sizeof(Qualstats));
-
- }
- freeblist(b);
- if(ppp->period == 0)
- putlqm(ppp);
-
-}
-static void
-putlqm(PPP *ppp)
-{
- Qualpkt *p;
- Block *b;
-
- b = allocb(sizeof(Qualpkt));
- b->wp += sizeof(Qualpkt);
- p = (Qualpkt*)b->rp;
- hnputl(p->magic, 0);
-
- /* heresay (what he last told us) */
- hnputl(p->lastoutreports, ppp->pout.reports);
- hnputl(p->lastoutpackets, ppp->pout.packets);
- hnputl(p->lastoutbytes, ppp->pout.bytes);
-
- /* our numbers at time of last reception */
- hnputl(p->peerinreports, ppp->sin.reports);
- hnputl(p->peerinpackets, ppp->sin.packets);
- hnputl(p->peerindiscards, ppp->sin.discards);
- hnputl(p->peerinerrors, ppp->sin.errors);
- hnputl(p->peerinbytes, ppp->sin.bytes);
-
- /* our numbers now */
- hnputl(p->peeroutreports, ppp->out.reports+1);
- hnputl(p->peeroutpackets, ppp->out.packets+1);
- hnputl(p->peeroutbytes, ppp->out.bytes+53/*hack*/);
-
- b = putframe(ppp, Plqm, b);
- freeblist(b);
- ppp->out.reports++;
-}
-
-/*
- * challenge response dialog
- */
-static void
-getchap(PPP *ppp, Block *b)
-{
- Lcpmsg *m;
- int len, vlen, n;
- char md5buf[512];
-
- m = (Lcpmsg*)b->rp;
- len = nhgets(m->len);
- if(BLEN(b) < len){
- netlog(ppp->f, Logppp, "ppp %s: short chap message\n", ppp->ifc->dev);
- freeblist(b);
- return;
- }
-
- switch(m->code){
- case Cchallenge:
- vlen = m->data[0];
- if(vlen > len - 5){
- netlog(ppp->f, Logppp, "PPP %s: bad challenge len\n", ppp->ifc->dev);
- freeblist(b);
- break;
- }
-
- netlog(ppp->f, Logppp, "PPP %s: CHAP Challenge\n", ppp->ifc->dev);
-netlog(ppp->f, Logppp, "(secret %s chapname %s id %d)\n", ppp->secret, ppp->chapname, m->id);
- /* create string to hash */
- md5buf[0] = m->id;
- strcpy(md5buf+1, ppp->secret);
- n = strlen(ppp->secret) + 1;
- memmove(md5buf+n, m->data+1, vlen);
- n += vlen;
- freeblist(b);
-
- /* send reply */
- len = 4 + 1 + 16 + strlen(ppp->chapname);
- b = alloclcp(2, md5buf[0], len);
- m = IPB2LCP(b);
- m->data[0] = 16;
- md5((uchar*)md5buf, n, m->data+1, 0);
- memmove((char*)m->data+17, ppp->chapname, strlen(ppp->chapname));
- hnputs(m->len, len);
- b->wp += len-4;
- b = putframe(ppp, Pchap, b);
- break;
- case Cresponse:
- netlog(ppp->f, Logppp, "PPP %s: chap response?\n", ppp->ifc->dev);
- break;
- case Csuccess:
- netlog(ppp->f, Logppp, "PPP %s: chap succeeded\n", ppp->ifc->dev);
- setphase(ppp, Pnet);
- break;
- case Cfailure:
- netlog(ppp->f, Logppp, "PPP %s: chap failed: %.*s\n", ppp->ifc->dev, len-4, m->data);
- errlog(ppp, Eperm);
- break;
- default:
- netlog(ppp->f, Logppp, "PPP %s: chap code %d?\n", ppp->ifc->dev, m->code);
- break;
- }
- freeblist(b);
-}
-
-/*
- * password authentication protocol dialog
- * -- obsolete but all we know how to use with NT just now
- */
-static void
-sendpap(PPP *ppp)
-{
- Lcpmsg *m;
- int clen, slen, len;
- Block *b;
- uchar *p;
-
- clen = strlen(ppp->chapname);
- slen = strlen(ppp->secret);
- len = 4 + 1 + clen + 1 + slen;
- ppp->papid = ++ppp->lcp->id;
- b = alloclcp(Cpapreq, ppp->papid, len);
- m = IPB2LCP(b);
- p = m->data;
- p[0] = clen;
- memmove(p+1, ppp->chapname, clen);
- p += clen + 1;
- p[0] = slen;
- memmove(p+1, ppp->secret, slen);
- hnputs(m->len, len);
- b->wp += len-4;
- b = putframe(ppp, Ppap, b);
- netlog(ppp->f, Logppp, "PPP %s: sent pap auth req (%d)\n", ppp->ifc->dev, len);
- freeblist(b);
-}
-
-static void
-getpap(PPP *ppp, Block *b)
-{
- Lcpmsg *m;
- int len;
-
- m = (Lcpmsg*)b->rp;
- len = nhgets(m->len);
- if(BLEN(b) < len){
- netlog(ppp->f, Logppp, "ppp %s: short pap message\n", ppp->ifc->dev);
- freeblist(b);
- return;
- }
-
- switch(m->code){
- case Cpapreq:
- netlog(ppp->f, Logppp, "PPP %s: pap request?\n", ppp->ifc->dev);
- break;
- case Cpapack:
- netlog(ppp->f, Logppp, "PPP %s: PAP succeeded\n", ppp->ifc->dev);
- setphase(ppp, Pnet);
- break;
- case Cpapnak:
- if(m->data[0])
- netlog(ppp->f, Logppp, "PPP %s: PAP failed: %.*s\n", ppp->ifc->dev, len-5, m->data+1);
- else
- netlog(ppp->f, Logppp, "PPP %s: PAP failed\n", ppp->ifc->dev);
- errlog(ppp, Eperm);
- break;
- default:
- netlog(ppp->f, Logppp, "PPP %s: pap code %d?\n", ppp->ifc->dev, m->code);
- break;
- }
- freeblist(b);
-}
-
-static void
-printopts(PPP *ppp, Pstate *p, Block *b, int send)
-{
- Lcpmsg *m;
- Lcpopt *o;
- int proto, x, period;
- uchar *cp;
- char *code, *dir;
-
- m = (Lcpmsg*)b->rp;
- switch(m->code) {
- default: code = "<unknown>"; break;
- case Lconfreq: code = "confrequest"; break;
- case Lconfack: code = "confack"; break;
- case Lconfnak: code = "confnak"; break;
- case Lconfrej: code = "confreject"; break;
- }
-
- if(send)
- dir = "send";
- else
- dir = "recv";
-
- netlog(ppp->f, Logppp, "ppp: %s %s: id=%d\n", dir, code, m->id);
-
- for(cp = m->data; cp < b->wp; cp += o->len){
- o = (Lcpopt*)cp;
- if(cp + o->len > b->wp || o->len == 0){
- netlog(ppp->f, Logppp, "\tbad option length %ux\n", o->type);
- return;
- }
-
- switch(p->proto){
- case Plcp:
- switch(o->type){
- default:
- netlog(ppp->f, Logppp, "\tunknown %d len=%d\n", o->type, o->len);
- break;
- case Omtu:
- netlog(ppp->f, Logppp, "\tmtu = %d\n", nhgets(o->data));
- break;
- case Octlmap:
- netlog(ppp->f, Logppp, "\tctlmap = %ux\n", nhgetl(o->data));
- break;
- case Oauth:
- netlog(ppp->f, Logppp, "\tauth = ", nhgetl(o->data));
- proto = nhgets(o->data);
- switch(proto) {
- default:
- netlog(ppp->f, Logppp, "unknown auth proto %d\n", proto);
- break;
- case Ppap:
- netlog(ppp->f, Logppp, "password\n");
- break;
- case Pchap:
- netlog(ppp->f, Logppp, "chap %ux\n", o->data[2]);
- break;
- }
- break;
- case Oquality:
- proto = nhgets(o->data);
- switch(proto) {
- default:
- netlog(ppp->f, Logppp, "\tunknown quality proto %d\n", proto);
- break;
- case Plqm:
- x = nhgetl(o->data+2)*10;
- period = (x+Period-1)/Period;
- netlog(ppp->f, Logppp, "\tlqm period = %d\n", period);
- break;
- }
- case Omagic:
- netlog(ppp->f, Logppp, "\tmagic = %ux\n", nhgetl(o->data));
- break;
- case Opc:
- netlog(ppp->f, Logppp, "\tprotocol compress\n");
- break;
- case Oac:
- netlog(ppp->f, Logppp, "\taddr compress\n");
- break;
- }
- break;
- case Pccp:
- switch(o->type){
- default:
- netlog(ppp->f, Logppp, "\tunknown %d len=%d\n", o->type, o->len);
- break;
- case Ocoui:
- netlog(ppp->f, Logppp, "\tOUI\n");
- break;
- case Ocstac:
- netlog(ppp->f, Logppp, "\tstac LZS\n");
- break;
- case Ocmppc:
- netlog(ppp->f, Logppp, "\tMicrosoft PPC len=%d %ux\n", o->len, nhgetl(o->data));
- break;
- }
- break;
- case Pecp:
- switch(o->type){
- default:
- netlog(ppp->f, Logppp, "\tunknown %d len=%d\n", o->type, o->len);
- break;
- case Oeoui:
- netlog(ppp->f, Logppp, "\tOUI\n");
- break;
- case Oedese:
- netlog(ppp->f, Logppp, "\tDES\n");
- break;
- }
- break;
- case Pipcp:
- switch(o->type){
- default:
- netlog(ppp->f, Logppp, "\tunknown %d len=%d\n", o->type, o->len);
- break;
- case Oipaddrs:
- netlog(ppp->f, Logppp, "\tip addrs - deprecated\n");
- break;
- case Oipcompress:
- netlog(ppp->f, Logppp, "\tip compress\n");
- break;
- case Oipaddr:
- netlog(ppp->f, Logppp, "\tip addr %V\n", o->data);
- break;
- case Oipdns:
- netlog(ppp->f, Logppp, "\tdns addr %V\n", o->data);
- break;
- case Oipwins:
- netlog(ppp->f, Logppp, "\twins addr %V\n", o->data);
- break;
- case Oipdns2:
- netlog(ppp->f, Logppp, "\tdns2 addr %V\n", o->data);
- break;
- case Oipwins2:
- netlog(ppp->f, Logppp, "\twins2 addr %V\n", o->data);
- break;
- }
- break;
- }
- }
-}
-
-static void
-sendtermreq(PPP *ppp, Pstate *p)
-{
- Block *b;
- Lcpmsg *m;
-
- p->termid = ++(p->id);
- b = alloclcp(Ltermreq, p->termid, 4);
- m = IPB2LCP(b);
- hnputs(m->len, 4);
- putframe(ppp, p->proto, b);
- freeb(b);
- newstate(ppp, p, Sclosing);
-}
-
-static void
-sendechoreq(PPP *ppp, Pstate *p)
-{
- Block *b;
- Lcpmsg *m;
-
- p->termid = ++(p->id);
- b = alloclcp(Lechoreq, p->id, 4);
- m = IPB2LCP(b);
- hnputs(m->len, 4);
- putframe(ppp, p->proto, b);
- freeb(b);
-}
-
-/*
- * return non-zero if this is a valid v4 address
- */
-static int
-validv4(Ipaddr addr)
-{
- return memcmp(addr, v4prefix, IPv4off) == 0;
-}
-
-static void
-invalidate(Ipaddr addr)
-{
- ipmove(addr, IPnoaddr);
-}
--- a/os/ip/ppp.h
+++ /dev/null
@@ -1,258 +1,0 @@
-typedef struct PPP PPP;
-typedef struct Pstate Pstate;
-typedef struct Lcpmsg Lcpmsg;
-typedef struct Lcpopt Lcpopt;
-typedef struct Qualpkt Qualpkt;
-typedef struct Qualstats Qualstats;
-typedef struct Tcpc Tcpc;
-
-typedef uchar Ipaddr[IPaddrlen];
-
-enum
-{
- HDLC_frame= 0x7e,
- HDLC_esc= 0x7d,
-
- /* PPP frame fields */
- PPP_addr= 0xff,
- PPP_ctl= 0x3,
- PPP_initfcs= 0xffff,
- PPP_goodfcs= 0xf0b8,
-
- /* PPP phases */
- Pdead= 0,
- Plink, /* doing LCP */
- Pauth, /* doing chap */
- Pnet, /* doing IPCP, CCP */
- Pterm, /* closing down */
-
- /* PPP protocol types */
- Pip= 0x21, /* internet */
- Pvjctcp= 0x2d, /* compressing van jacobson tcp */
- Pvjutcp= 0x2f, /* uncompressing van jacobson tcp */
- Pcdata= 0xfd, /* compressed datagram */
- Pipcp= 0x8021, /* ip control */
- Pecp= 0x8053, /* encryption control */
- Pccp= 0x80fd, /* compressed datagram control */
- Plcp= 0xc021, /* link control */
- Ppap= 0xc023, /* password auth. protocol */
- Plqm= 0xc025, /* link quality monitoring */
- Pchap= 0xc223, /* challenge/response */
-
- /* LCP codes */
- Lconfreq= 1,
- Lconfack= 2,
- Lconfnak= 3,
- Lconfrej= 4,
- Ltermreq= 5,
- Ltermack= 6,
- Lcoderej= 7,
- Lprotorej= 8,
- Lechoreq= 9,
- Lechoack= 10,
- Ldiscard= 11,
-
- /* Lcp configure options */
- Omtu= 1,
- Octlmap= 2,
- Oauth= 3,
- Oquality= 4,
- Omagic= 5,
- Opc= 7,
- Oac= 8,
- Obad= 12, /* for testing */
-
- /* authentication protocols */
- APmd5= 5,
-
- /* lcp flags */
- Fmtu= 1<<Omtu,
- Fctlmap= 1<<Octlmap,
- Fauth= 1<<Oauth,
- Fquality= 1<<Oquality,
- Fmagic= 1<<Omagic,
- Fpc= 1<<Opc,
- Fac= 1<<Oac,
- Fbad= 1<<Obad,
-
- /* Chap codes */
- Cchallenge= 1,
- Cresponse= 2,
- Csuccess= 3,
- Cfailure= 4,
-
- /* Pap codes */
- Cpapreq= 1,
- Cpapack= 2,
- Cpapnak= 3,
-
- /* link states */
- Sclosed= 0,
- Sclosing,
- Sreqsent,
- Sackrcvd,
- Sacksent,
- Sopened,
-
- /* ccp configure options */
- Ocoui= 0, /* proprietary compression */
- Ocstac= 17, /* stac electronics LZS */
- Ocmppc= 18, /* microsoft ppc */
-
- /* ccp flags */
- Fcoui= 1<<Ocoui,
- Fcstac= 1<<Ocstac,
- Fcmppc= 1<<Ocmppc,
-
- /* ecp configure options */
- Oeoui= 0, /* proprietary compression */
- Oedese= 1, /* DES */
-
- /* ecp flags */
- Feoui= 1<<Oeoui,
- Fedese= 1<<Oedese,
-
- /* ipcp configure options */
- Oipaddrs= 1,
- Oipcompress= 2,
- Oipaddr= 3,
- Oipdns= 129,
- Oipwins= 130,
- Oipdns2= 131,
- Oipwins2= 132,
-
- /* ipcp flags */
- Fipaddrs= 1<<Oipaddrs,
- Fipcompress= 1<<Oipcompress,
- Fipaddr= 1<<Oipaddr,
-
- Period= 3*1000, /* period of retransmit process (in ms) */
- Timeout= 10, /* xmit timeout (in Periods) */
-
- MAX_STATES = 16, /* van jacobson compression states */
- Defmtu= 1450, /* default that we will ask for */
- Minmtu= 128, /* minimum that we will accept */
- Maxmtu= 2000, /* maximum that we will accept */
-};
-
-
-struct Pstate
-{
- int proto; /* protocol type */
- int timeout; /* for current state */
- int rxtimeout; /* for current retransmit */
- ulong flags; /* options received */
- uchar id; /* id of current message */
- uchar confid; /* id of current config message */
- uchar termid; /* id of current termination message */
- uchar rcvdconfid; /* id of last conf message received */
- uchar state; /* PPP link state */
- ulong optmask; /* which options to request */
- int echoack; /* recieved echo ack */
- int echotimeout; /* echo timeout */
-};
-
-struct Qualstats
-{
- ulong reports;
- ulong packets;
- ulong bytes;
- ulong discards;
- ulong errors;
-};
-
-struct PPP
-{
- QLock;
-
- Chan* dchan; /* serial line */
- Chan* cchan; /* serial line control */
- int framing; /* non-zero to use framing characters */
- Ipaddr local;
- int localfrozen;
- Ipaddr remote;
- int remotefrozen;
-
- int pppup;
- Fs *f; /* file system we belong to */
- Ipifc* ifc;
- Proc* readp; /* reading process */
- Proc* timep; /* timer process */
- Block* inbuf; /* input buffer */
- Block* outbuf; /* output buffer */
- QLock outlock; /* and its lock */
-
- ulong magic; /* magic number to detect loop backs */
- ulong rctlmap; /* map of chars to ignore in rcvr */
- ulong xctlmap; /* map of chars to excape in xmit */
- int phase; /* PPP phase */
- Pstate* lcp; /* lcp state */
- Pstate* ipcp; /* ipcp state */
- char secret[256]; /* md5 key */
- char chapname[256]; /* chap system name */
- Tcpc* ctcp;
- ulong mtu; /* maximum xmit size */
- ulong mru; /* maximum recv size */
-
- int baud;
- int usepap; /* authentication is PAP in every sense, not CHAP */
- int papid;
- int usechap;
-
- /* rfc */
- int usedns;
- Ipaddr dns1;
- Ipaddr dns2;
-
- /* link quality monitoring */
- int period; /* lqm period */
- int timeout; /* time to next lqm packet */
- Qualstats in; /* local */
- Qualstats out;
- Qualstats pin; /* peer */
- Qualstats pout;
- Qualstats sin; /* saved */
-};
-
-PPP* pppopen(PPP*, char*, Ipaddr, Ipaddr, int, int, char*, char*);
-Block* pppread(PPP*);
-int pppwrite(PPP*, Block*);
-void pppclose(PPP*);
-
-struct Lcpmsg
-{
- uchar code;
- uchar id;
- uchar len[2];
- uchar data[1];
-};
-
-struct Lcpopt
-{
- uchar type;
- uchar len;
- uchar data[1];
-};
-
-struct Qualpkt
-{
- uchar magic[4];
-
- uchar lastoutreports[4];
- uchar lastoutpackets[4];
- uchar lastoutbytes[4];
- uchar peerinreports[4];
- uchar peerinpackets[4];
- uchar peerindiscards[4];
- uchar peerinerrors[4];
- uchar peerinbytes[4];
- uchar peeroutreports[4];
- uchar peeroutpackets[4];
- uchar peeroutbytes[4];
-};
-
-ushort compress(Tcpc*, Block*, Fs*);
-Tcpc* compress_init(Tcpc*);
-int compress_negotiate(Tcpc*, uchar*);
-ushort tcpcompress(Tcpc*, Block*, Fs*);
-Block* tcpuncompress(Tcpc*, Block*, ushort, Fs*);
--- a/os/ip/pppmedium.c
+++ /dev/null
@@ -1,192 +1,0 @@
-#include "u.h"
-#include "../port/lib.h"
-#include "mem.h"
-#include "dat.h"
-#include "fns.h"
-#include "../port/error.h"
-
-#include "ip.h"
-#include "kernel.h"
-#include "ppp.h"
-
-static void pppreader(void *a);
-static void pppbind(Ipifc *ifc, int argc, char **argv);
-static void pppunbind(Ipifc *ifc);
-static void pppbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip);
-static void deadremote(Ipifc *ifc);
-
-Medium pppmedium =
-{
-.name= "ppp",
-.hsize= 4,
-.mintu= Minmtu,
-.maxtu= Maxmtu,
-.maclen= 0,
-.bind= pppbind,
-.unbind= pppunbind,
-.bwrite= pppbwrite,
-.unbindonclose= 0, /* don't unbind on last close */
-};
-
-/*
- * called to bind an IP ifc to an ethernet device
- * called with ifc wlock'd
- */
-static void
-pppbind(Ipifc *ifc, int argc, char **argv)
-{
- PPP *ppp;
- Ipaddr ipaddr, remip;
- int mtu, framing;
- char *chapname, *secret;
-
- if(argc < 3)
- error(Ebadarg);
-
- ipmove(ipaddr, IPnoaddr);
- ipmove(remip, IPnoaddr);
- mtu = Defmtu;
- framing = 1;
- chapname = nil;
- secret = nil;
-
- switch(argc){
- default:
- case 9:
- if(argv[8][0] != '-')
- secret = argv[8];
- case 8:
- if(argv[7][0] != '-')
- chapname = argv[7];
- case 7:
- if(argv[6][0] != '-')
- framing = strtoul(argv[6], 0, 0);
- case 6:
- if(argv[5][0] != '-')
- mtu = strtoul(argv[5], 0, 0);
- case 5:
- if(argv[4][0] != '-')
- parseip(remip, argv[4]);
- case 4:
- if(argv[3][0] != '-')
- parseip(ipaddr, argv[3]);
- case 3:
- break;
- }
-
- ppp = smalloc(sizeof(*ppp));
- ppp->ifc = ifc;
- ppp->f = ifc->conv->p->f;
- ifc->arg = ppp;
- if(waserror()){
- pppunbind(ifc);
- nexterror();
- }
- if(pppopen(ppp, argv[2], ipaddr, remip, mtu, framing, chapname, secret) == nil)
- error("ppp open failed");
- poperror();
- kproc("pppreader", pppreader, ifc, KPDUPPG|KPDUPFDG);
-}
-
-static void
-pppreader(void *a)
-{
- Ipifc *ifc;
- Block *bp;
- PPP *ppp;
-
- ifc = a;
- ppp = ifc->arg;
- ppp->readp = up; /* hide identity under a rock for unbind */
- setpri(PriHi);
-
- if(waserror()){
- netlog(ppp->f, Logppp, "pppreader: %I: %s\n", ppp->local, up->env->errstr);
- ppp->readp = 0;
- deadremote(ifc);
- pexit("hangup", 1);
- }
-
- for(;;){
- bp = pppread(ppp);
- if(bp == nil)
- error("hungup");
- if(!canrlock(ifc)){
- freeb(bp);
- continue;
- }
- if(waserror()){
- runlock(ifc);
- nexterror();
- }
- ifc->in++;
- if(ifc->lifc == nil)
- freeb(bp);
- else
- ipiput(ppp->f, ifc, bp);
- runlock(ifc);
- poperror();
- }
-}
-
-/*
- * called with ifc wlock'd
- */
-static void
-pppunbind(Ipifc *ifc)
-{
- PPP *ppp = ifc->arg;
-
- if(ppp == nil)
- return;
- if(ppp->readp)
- postnote(ppp->readp, 1, "unbind", 0);
- if(ppp->timep)
- postnote(ppp->timep, 1, "unbind", 0);
-
- /* wait for kprocs to die */
- while(ppp->readp != 0 || ppp->timep != 0)
- tsleep(&up->sleep, return0, 0, 300);
-
- pppclose(ppp);
- qclose(ifc->conv->eq);
- ifc->arg = nil;
-}
-
-/*
- * called by ipoput with a single packet to write with ifc rlock'd
- */
-static void
-pppbwrite(Ipifc *ifc, Block *bp, int, uchar*)
-{
- PPP *ppp = ifc->arg;
-
- pppwrite(ppp, bp);
- ifc->out++;
-}
-
-/*
- * If the other end hangs up, we have to unbind the interface. An extra
- * unbind (in the case where we are hanging up) won't do any harm.
- */
-static void
-deadremote(Ipifc *ifc)
-{
- int fd;
- char path[128];
- PPP *ppp;
-
- ppp = ifc->arg;
- snprint(path, sizeof path, "#I%d/ipifc/%d/ctl", ppp->f->dev, ifc->conv->x);
- fd = kopen(path, ORDWR);
- if(fd < 0)
- return;
- kwrite(fd, "unbind", sizeof("unbind")-1);
- kclose(fd);
-}
-
-void
-pppmediumlink(void)
-{
- addipmedium(&pppmedium);
-}
--- a/os/ip/ptclbsum.c
+++ /dev/null
@@ -1,72 +1,0 @@
-#include "u.h"
-#include "../port/lib.h"
-#include "mem.h"
-#include "dat.h"
-#include "fns.h"
-#include "../port/error.h"
-#include "ip.h"
-
-static short endian = 1;
-static uchar* aendian = (uchar*)&endian;
-#define LITTLE *aendian
-
-ushort
-ptclbsum(uchar *addr, int len)
-{
- ulong losum, hisum, mdsum, x;
- ulong t1, t2;
-
- losum = 0;
- hisum = 0;
- mdsum = 0;
-
- x = 0;
- if((uintptr)addr & 1) {
- if(len) {
- hisum += addr[0];
- len--;
- addr++;
- }
- x = 1;
- }
- while(len >= 16) {
- t1 = *(ushort*)(addr+0);
- t2 = *(ushort*)(addr+2); mdsum += t1;
- t1 = *(ushort*)(addr+4); mdsum += t2;
- t2 = *(ushort*)(addr+6); mdsum += t1;
- t1 = *(ushort*)(addr+8); mdsum += t2;
- t2 = *(ushort*)(addr+10); mdsum += t1;
- t1 = *(ushort*)(addr+12); mdsum += t2;
- t2 = *(ushort*)(addr+14); mdsum += t1;
- mdsum += t2;
- len -= 16;
- addr += 16;
- }
- while(len >= 2) {
- mdsum += *(ushort*)addr;
- len -= 2;
- addr += 2;
- }
- if(x) {
- if(len)
- losum += addr[0];
- if(LITTLE)
- losum += mdsum;
- else
- hisum += mdsum;
- } else {
- if(len)
- hisum += addr[0];
- if(LITTLE)
- hisum += mdsum;
- else
- losum += mdsum;
- }
-
- losum += hisum >> 8;
- losum += (hisum & 0xff) << 8;
- while(hisum = losum>>16)
- losum = hisum + (losum & 0xffff);
-
- return losum & 0xffff;
-}
--- a/os/ip/rudp.c
+++ b/os/ip/rudp.c
@@ -1,4 +1,5 @@
/*
+ * Reliable User Datagram Protocol, currently only for IPv4.
* This protocol is compatible with UDP's packet format.
* It could be done over UDP if need be.
*/
@@ -25,20 +26,17 @@
enum
{
- UDP_HDRSIZE = 20, /* pseudo header + udp header */
UDP_PHDRSIZE = 12, /* pseudo header */
+// UDP_HDRSIZE = 20, /* pseudo header + udp header */
UDP_RHDRSIZE = 36, /* pseudo header + udp header + rudp header */
UDP_IPHDR = 8, /* ip header */
IP_UDPPROTO = 254,
- UDP_USEAD7 = 52,
- UDP_USEAD6 = 36,
- UDP_USEAD4 = 12,
+ UDP_USEAD7 = 52, /* size of new ipv6 headers struct */
Rudprxms = 200,
Rudptickms = 50,
Rudpmaxxmit = 10,
Maxunacked = 100,
-
};
#define Hangupgen 0xffffffff /* used only in hangup messages */
@@ -205,7 +203,7 @@
qlock(&rpriv->apl);
if(rpriv->ackprocstarted == 0){
sprint(kpname, "#I%drudpack", rudp->f->dev);
- kproc(kpname, relackproc, rudp, 0);
+ kproc(kpname, relackproc, rudp);
rpriv->ackprocstarted = 1;
}
qunlock(&rpriv->apl);
@@ -240,6 +238,7 @@
qlock(ucb);
for(r = ucb->r; r; r = r->next)
m += snprint(state+m, n-m, " %I/%ld", r->addr, UNACKED(r));
+ m += snprint(state+m, n-m, "\n");
qunlock(ucb);
return m;
}
@@ -281,7 +280,7 @@
/* force out any delayed acks */
ucb = (Rudpcb*)c->ptcl;
qlock(ucb);
- for(r = ucb->r; r; r = r->next){
+ for(r = ucb->r; r != nil; r = r->next){
if(r->acksent != r->rcvseq)
relsendack(c, r, 0);
}
@@ -374,27 +373,10 @@
rport = nhgets(bp->rp);
bp->rp += 2+2; /* Ignore local port */
break;
- case 6:
- /* get user specified addresses */
- bp = pullupblock(bp, UDP_USEAD6);
- if(bp == nil)
- return;
- ipmove(raddr, bp->rp);
- bp->rp += IPaddrlen;
- ipmove(laddr, bp->rp);
- bp->rp += IPaddrlen;
- /* pick interface closest to dest */
- if(ipforme(f, laddr) != Runi)
- findlocalip(f, laddr, raddr);
- rport = nhgets(bp->rp);
-
- bp->rp += 4; /* Igonore local port */
- break;
default:
ipmove(raddr, c->raddr);
ipmove(laddr, c->laddr);
rport = c->rport;
-
break;
}
@@ -402,9 +384,6 @@
/* Make space to fit rudp & ip header */
bp = padblock(bp, UDP_IPHDR+UDP_RHDRSIZE);
- if(bp == nil)
- return;
-
uh = (Udphdr *)(bp->rp);
uh->vihl = IP_VER4;
@@ -417,7 +396,6 @@
uh->frag[1] = 0;
hnputs(uh->udpplen, ptcllen);
switch(ucb->headers){
- case 6:
case 7:
v6tov4(uh->udpdst, raddr);
hnputs(uh->udpdport, rport);
@@ -528,7 +506,7 @@
c = iphtlook(&upriv->ht, raddr, rport, laddr, lport);
if(c == nil){
- /* no converstation found */
+ /* no conversation found */
upriv->ustats.rudpNoPorts++;
qunlock(rudp);
netlog(f, Logudp, "udp: no conv %I!%d -> %I!%d\n", raddr, rport,
@@ -574,45 +552,32 @@
p = bp->rp;
ipmove(p, raddr); p += IPaddrlen;
ipmove(p, laddr); p += IPaddrlen;
- ipmove(p, ifc->lifc->local); p += IPaddrlen;
+ if(!ipv6local(ifc, p, 0, raddr))
+ ipmove(p, ifc->lifc != nil ? ifc->lifc->local : IPnoaddr);
+ p += IPaddrlen;
hnputs(p, rport); p += 2;
hnputs(p, lport);
break;
- case 6:
- /* pass the src address */
- bp = padblock(bp, UDP_USEAD6);
- p = bp->rp;
- ipmove(p, raddr); p += IPaddrlen;
- ipmove(p, ipforme(f, laddr)==Runi ? laddr : ifc->lifc->local); p += IPaddrlen;
- hnputs(p, rport); p += 2;
- hnputs(p, lport);
- break;
default:
/* connection oriented rudp */
if(ipcmp(c->raddr, IPnoaddr) == 0){
- /* save the src address in the conversation */
+ /* reply with the same ip address (if not broadcast) */
+ if(ipforme(f, laddr) != Runi)
+ ipv6local(ifc, laddr, 0, raddr);
+ ipmove(c->laddr, laddr);
ipmove(c->raddr, raddr);
c->rport = rport;
-
- /* reply with the same ip address (if not broadcast) */
- if(ipforme(f, laddr) == Runi)
- ipmove(c->laddr, laddr);
- else
- v4tov6(c->laddr, ifc->lifc->local);
}
break;
}
- if(bp->next)
- bp = concatblock(bp);
if(qfull(c->rq)) {
- netlog(f, Logrudp, "rudp: qfull %I.%d -> %I.%d\n", raddr, rport,
- laddr, lport);
+ netlog(f, Logrudp, "rudp: qfull %I.%d -> %I.%d\n",
+ raddr, rport, laddr, lport);
freeblist(bp);
+ } else {
+ qpass(c->rq, concatblock(bp));
}
- else
- qpass(c->rq, bp);
-
qunlock(ucb);
}
@@ -629,16 +594,14 @@
if(n < 1)
return rudpunknown;
- if(strcmp(f[0], "headers++4") == 0){
- ucb->headers = 7;
+ if(strcmp(f[0], "headers") == 0){
+ ucb->headers = 7; /* new headers format */
return nil;
- } else if(strcmp(f[0], "headers") == 0){
- ucb->headers = 6;
- return nil;
} else if(strcmp(f[0], "hangup") == 0){
if(n < 3)
return "bad syntax";
- parseip(ip, f[1]);
+ if (parseip(ip, f[1]) == -1)
+ return Ebadip;
x = atoi(f[2]);
qlock(ucb);
relforget(c, ip, x, 1);
@@ -645,7 +608,7 @@
qunlock(ucb);
return nil;
} else if(strcmp(f[0], "randdrop") == 0){
- x = 10; /* default is 10% */
+ x = 10; /* default is 10% */
if(n > 1)
x = atoi(f[1]);
if(x > 100 || x < 0)
@@ -672,12 +635,13 @@
pdest = nhgets(h->udpdport);
/* Look for a connection */
- for(p = rudp->conv; *p; p++) {
- s = *p;
+ for(p = rudp->conv; (s = *p) != nil; p++) {
if(s->rport == pdest)
if(s->lport == psource)
if(ipcmp(s->raddr, dest) == 0)
if(ipcmp(s->laddr, source) == 0){
+ if(s->ignoreadvice)
+ break;
qhangup(s->rq, msg);
qhangup(s->wq, msg);
break;
@@ -701,12 +665,6 @@
upriv->orders);
}
-int
-rudpgc(Proto *rudp)
-{
- return natgc(rudp->ipproto);
-}
-
void
rudpinit(Fs *fs)
{
@@ -725,9 +683,8 @@
rudp->rcv = rudpiput;
rudp->advise = rudpadvise;
rudp->stats = rudpstats;
- rudp->gc = rudpgc;
rudp->ipproto = IP_UDPPROTO;
- rudp->nc = 16;
+ rudp->nc = 32;
rudp->ptclsize = sizeof(Rudpcb);
Fsproto(fs, rudp);
@@ -770,6 +727,8 @@
rudp = (Proto *)a;
+ while(waserror())
+ ;
loop:
tsleep(&up->sleep, return0, 0, Rudptickms);
@@ -989,8 +948,6 @@
Fs *f;
bp = allocb(UDP_IPHDR + UDP_RHDRSIZE);
- if(bp == nil)
- return;
bp->wp += UDP_IPHDR + UDP_RHDRSIZE;
f = c->p->f;
uh = (Udphdr *)(bp->rp);
--- a/os/ip/tcp.c
+++ b/os/ip/tcp.c
@@ -41,13 +41,13 @@
EOLOPT = 0,
NOOPOPT = 1,
MSSOPT = 2,
- MSS_LENGTH = 4, /* Mean segment size */
+ MSS_LENGTH = 4, /* Maximum segment size */
WSOPT = 3,
WS_LENGTH = 3, /* Bits to scale window size by */
MSL2 = 10,
MSPTICK = 50, /* Milliseconds per timer tick */
- DEF_MSS = 1460, /* Default mean segment */
- DEF_MSS6 = 1280, /* Default mean segment (min) for v6 */
+ DEF_MSS = 1460, /* Default maximum segment */
+ DEF_MSS6 = 1220, /* Default maximum segment (min) for v6 */
DEF_RTT = 500, /* Default round trip */
DEF_KAT = 120000, /* Default time (ms) between keep alives */
TCP_LISTEN = 0, /* Listen connection */
@@ -81,7 +81,13 @@
NLHT = 256, /* hash table size, must be a power of 2 */
LHTMASK = NLHT-1,
- HaveWS = 1<<8,
+ /*
+ * window is 64kb · 2ⁿ
+ * these factors determine the ultimate bandwidth-delay product.
+ * 64kb · 2⁵ = 2mb, or 2x overkill for 100mbps · 70ms.
+ */
+ Maxqscale = 4, /* maximum queuing scale */
+ Defadvscale = 4, /* default advertisement */
};
/* Must correspond to the enumeration above */
@@ -169,8 +175,9 @@
ulong seq;
ulong ack;
uchar flags;
- ushort ws; /* window scale option (if not zero) */
- ulong wnd;
+ uchar update;
+ ushort ws; /* window scale option */
+ ulong wnd; /* prescaled window*/
ushort urg;
ushort mss; /* max segment size option (if not zero) */
ushort len; /* size of data */
@@ -205,44 +212,53 @@
ulong wnd; /* Tcp send window */
ulong urg; /* Urgent data pointer */
ulong wl2;
- int scale; /* how much to right shift window in xmitted packets */
+ uint scale; /* how much to right shift window in xmitted packets */
/* to implement tahoe and reno TCP */
ulong dupacks; /* number of duplicate acks rcvd */
+ ulong partialack;
int recovery; /* loss recovery flag */
- ulong rxt; /* right window marker for recovery */
+ int retransmit; /* retransmit 1 packet @ una flag */
+ int rto;
+ ulong rxt; /* right window marker for recovery "recover" rfc3782 */
} snd;
struct {
ulong nxt; /* Receive pointer to next uchar slot */
ulong wnd; /* Receive window incoming */
+ ulong wsnt; /* Last wptr sent. important to track for large bdp */
+ ulong wptr;
ulong urg; /* Urgent pointer */
+ ulong ackptr; /* last acked sequence */
int blocked;
- int una; /* unacked data segs */
- int scale; /* how much to left shift window in rcved packets */
+ uint scale; /* how much to left shift window in rcv'd packets */
} rcv;
ulong iss; /* Initial sequence number */
- int sawwsopt; /* true if we saw a wsopt on the incoming SYN */
ulong cwind; /* Congestion window */
- int scale; /* desired snd.scale */
- ushort ssthresh; /* Slow start threshold */
+ ulong abcbytes; /* appropriate byte counting rfc 3465 */
+ uint scale; /* desired snd.scale */
+ ulong ssthresh; /* Slow start threshold */
int resent; /* Bytes just resent */
int irs; /* Initial received squence */
- ushort mss; /* Mean segment size */
+ ushort mss; /* Maximum segment size */
int rerecv; /* Overlap of data rerecevived */
- ulong window; /* Recevive window */
+ ulong window; /* Our receive window (queue) */
+ uint qscale; /* Log2 of our receive window (queue) */
uchar backoff; /* Exponential backoff counter */
int backedoff; /* ms we've backed off for rexmits */
uchar flags; /* State flags */
Reseq *reseq; /* Resequencing queue */
+ int nreseq;
+ int reseqlen;
Tcptimer timer; /* Activity timer */
Tcptimer acktimer; /* Acknowledge timer */
Tcptimer rtt_timer; /* Round trip timer */
Tcptimer katimer; /* keep alive timer */
ulong rttseq; /* Round trip sequence */
- int srtt; /* Shortened round trip */
+ int srtt; /* Smoothed round trip */
int mdev; /* Mean deviation of round trip */
int kacounter; /* count down for keep alive */
uint sndsyntime; /* time syn sent */
ulong time; /* time Finwait2 or Syn_received was sent */
+ ulong timeuna; /* snd.una when time was set */
int nochecksum; /* non-zero means don't send checksums */
int flgcnt; /* number of flags in the sequence (FIN,SEQ) */
@@ -285,11 +301,11 @@
};
int tcp_irtt = DEF_RTT; /* Initial guess at round trip time */
-ushort tcp_mss = DEF_MSS; /* Maximum segment size to be sent */
enum {
/* MIB stats */
MaxConn,
+ Mss,
ActiveOpens,
PassiveOpens,
EstabResets,
@@ -297,6 +313,7 @@
InSegs,
OutSegs,
RetransSegs,
+ RetransSegsSent,
RetransTimeouts,
InErrs,
OutRsts,
@@ -305,14 +322,27 @@
CsumErrs,
HlenErrs,
LenErrs,
+ Resequenced,
OutOfOrder,
+ ReseqBytelim,
+ ReseqPktlim,
+ Delayack,
+ Wopenack,
+ Recovery,
+ RecoveryDone,
+ RecoveryRTO,
+ RecoveryNoSeq,
+ RecoveryCwind,
+ RecoveryPA,
+
Nstats
};
-static char *statnames[] =
+static char *statnames[Nstats] =
{
[MaxConn] "MaxConn",
+[Mss] "MaxSegment",
[ActiveOpens] "ActiveOpens",
[PassiveOpens] "PassiveOpens",
[EstabResets] "EstabResets",
@@ -320,6 +350,7 @@
[InSegs] "InSegs",
[OutSegs] "OutSegs",
[RetransSegs] "RetransSegs",
+[RetransSegsSent] "RetransSegsSent",
[RetransTimeouts] "RetransTimeouts",
[InErrs] "InErrs",
[OutRsts] "OutRsts",
@@ -327,6 +358,19 @@
[HlenErrs] "HlenErrs",
[LenErrs] "LenErrs",
[OutOfOrder] "OutOfOrder",
+[Resequenced] "Resequenced",
+[ReseqBytelim] "ReseqBytelim",
+[ReseqPktlim] "ReseqPktlim",
+[Delayack] "Delayack",
+[Wopenack] "Wopenack",
+
+[Recovery] "Recovery",
+[RecoveryDone] "RecoveryDone",
+[RecoveryRTO] "RecoveryRTO",
+
+[RecoveryNoSeq] "RecoveryNoSeq",
+[RecoveryCwind] "RecoveryCwind",
+[RecoveryPA] "RecoveryPA",
};
typedef struct Tcppriv Tcppriv;
@@ -347,7 +391,7 @@
QLock apl;
int ackprocstarted;
- ulong stats[Nstats];
+ uvlong stats[Nstats];
};
/*
@@ -356,34 +400,34 @@
* of DoS attack.
*
* To avoid stateless Conv hogs, we pick a sequence number at random. If
- * it that number gets acked by the other end, we shut down the connection.
- * Look for tcpporthogedefense in the code.
+ * that number gets acked by the other end, we shut down the connection.
+ * Look for tcpporthogdefense in the code.
*/
int tcpporthogdefense = 0;
-int addreseq(Tcpctl*, Tcppriv*, Tcp*, Block*, ushort);
-void getreseq(Tcpctl*, Tcp*, Block**, ushort*);
-void localclose(Conv*, char*);
-void procsyn(Conv*, Tcp*);
-void tcpiput(Proto*, Ipifc*, Block*);
-void tcpoutput(Conv*);
-int tcptrim(Tcpctl*, Tcp*, Block**, ushort*);
-void tcpstart(Conv*, int);
-void tcptimeout(void*);
-void tcpsndsyn(Conv*, Tcpctl*);
-void tcprcvwin(Conv*);
-void tcpacktimer(void*);
-void tcpkeepalive(void*);
-void tcpsetkacounter(Tcpctl*);
-void tcprxmit(Conv*);
-void tcpsettimer(Tcpctl*);
-void tcpsynackrtt(Conv*);
-void tcpsetscale(Conv*, Tcpctl*, ushort, ushort);
+static int addreseq(Fs*, Tcpctl*, Tcppriv*, Tcp*, Block*, ushort);
+static int dumpreseq(Tcpctl*);
+static void getreseq(Tcpctl*, Tcp*, Block**, ushort*);
+static void limbo(Conv*, uchar*, uchar*, Tcp*, int);
+static void limborexmit(Proto*);
+static void localclose(Conv*, char*);
+static void procsyn(Conv*, Tcp*);
+static void tcpacktimer(void*);
+static void tcpiput(Proto*, Ipifc*, Block*);
+static void tcpkeepalive(void*);
+static void tcpoutput(Conv*);
+static void tcprcvwin(Conv*);
+static void tcprxmit(Conv*);
+static void tcpsetkacounter(Tcpctl*);
+static void tcpsetscale(Conv*, Tcpctl*, ushort, ushort);
+static void tcpsettimer(Tcpctl*);
+static void tcpsndsyn(Conv*, Tcpctl*);
+static void tcpstart(Conv*, int);
+static void tcpsynackrtt(Conv*);
+static void tcptimeout(void*);
+static int tcptrim(Tcpctl*, Tcp*, Block**, ushort*);
-static void limborexmit(Proto*);
-static void limbo(Conv*, uchar*, uchar*, Tcp*, int);
-
-void
+static void
tcpsetstate(Conv *s, uchar newstate)
{
Tcpctl *tcb;
@@ -403,11 +447,6 @@
if(newstate == Established)
tpriv->stats[CurrEstab]++;
- /**
- print( "%d/%d %s->%s CurrEstab=%d\n", s->lport, s->rport,
- tcpstates[oldstate], tcpstates[newstate], tpriv->tstats.tcpCurrEstab );
- **/
-
switch(newstate) {
case Closed:
qclose(s->rq);
@@ -430,7 +469,12 @@
tcpconnect(Conv *c, char **argv, int argc)
{
char *e;
+ Tcpctl *tcb;
+ tcb = (Tcpctl*)(c->ptcl);
+ if(tcb->state != Closed)
+ return Econinuse;
+
e = Fsstdconnect(c, argv, argc);
if(e != nil)
return e;
@@ -447,12 +491,14 @@
s = (Tcpctl*)(c->ptcl);
return snprint(state, n,
- "%s qin %d qout %d srtt %d mdev %d cwin %lud swin %lud>>%d rwin %lud>>%d timer.start %d timer.count %d rerecv %d katimer.start %d katimer.count %d\n",
+ "%s qin %d qout %d rq %d.%d srtt %d mdev %d sst %lud cwin %lud swin %lud>>%d rwin %lud>>%d qscale %d timer.start %d timer.count %d rerecv %d katimer.start %d katimer.count %d\n",
tcpstates[s->state],
c->rq ? qlen(c->rq) : 0,
c->wq ? qlen(c->wq) : 0,
- s->srtt, s->mdev,
- s->cwind, s->snd.wnd, s->rcv.scale, s->rcv.wnd, s->snd.scale,
+ s->nreseq, s->reseqlen,
+ s->srtt, s->mdev, s->ssthresh,
+ s->cwind, s->snd.wnd, s->snd.scale, s->rcv.wnd, s->rcv.scale,
+ s->qscale,
s->timer.start, s->timer.count, s->rerecv,
s->katimer.start, s->katimer.count);
}
@@ -470,7 +516,12 @@
tcpannounce(Conv *c, char **argv, int argc)
{
char *e;
+ Tcpctl *tcb;
+ tcb = (Tcpctl*)(c->ptcl);
+ if(tcb->state != Closed)
+ return Econinuse;
+
e = Fsstdannounce(c, argv, argc);
if(e != nil)
return e;
@@ -524,7 +575,7 @@
}
}
-void
+static void
tcpkick(void *x)
{
Conv *s = x;
@@ -546,7 +597,6 @@
/*
* Push data
*/
- tcprcvwin(s);
tcpoutput(s);
break;
default:
@@ -558,7 +608,9 @@
poperror();
}
-void
+static int seq_lt(ulong, ulong);
+
+static void
tcprcvwin(Conv *s) /* Call with tcb locked */
{
int w;
@@ -568,12 +620,20 @@
w = tcb->window - qlen(s->rq);
if(w < 0)
w = 0;
- tcb->rcv.wnd = w;
- if(w == 0)
+ /* RFC 1122 § 4.2.2.17 do not move right edge of window left */
+ if(seq_lt(tcb->rcv.nxt + w, tcb->rcv.wptr))
+ w = tcb->rcv.wptr - tcb->rcv.nxt;
+ if(w != tcb->rcv.wnd)
+ if(w>>tcb->rcv.scale == 0 || tcb->window > 4*tcb->mss && w < tcb->mss/4){
tcb->rcv.blocked = 1;
+ netlog(s->p->f, Logtcp, "tcprcvwin: window %lud qlen %d ws %ud lport %d\n",
+ tcb->window, qlen(s->rq), tcb->rcv.scale, s->lport);
+ }
+ tcb->rcv.wnd = w;
+ tcb->rcv.wptr = tcb->rcv.nxt + w;
}
-void
+static void
tcpacktimer(void *v)
{
Tcpctl *tcb;
@@ -589,7 +649,6 @@
qlock(s);
if(tcb->state != Closed){
tcb->flags |= FORCE;
- tcprcvwin(s);
tcpoutput(s);
}
qunlock(s);
@@ -597,10 +656,52 @@
}
static void
+tcpcongestion(Tcpctl *tcb)
+{
+ ulong inflight;
+
+ inflight = tcb->snd.nxt - tcb->snd.una;
+ if(inflight > tcb->cwind)
+ inflight = tcb->cwind;
+ tcb->ssthresh = inflight / 2;
+ if(tcb->ssthresh < 2*tcb->mss)
+ tcb->ssthresh = 2*tcb->mss;
+}
+
+enum {
+ L = 2, /* aggressive slow start; legal values ∈ (1.0, 2.0) */
+};
+
+static void
+tcpabcincr(Tcpctl *tcb, uint acked)
+{
+ uint limit;
+
+ tcb->abcbytes += acked;
+ if(tcb->cwind < tcb->ssthresh){
+ /* slow start */
+ if(tcb->snd.rto)
+ limit = 1*tcb->mss;
+ else
+ limit = L*tcb->mss;
+ tcb->cwind += MIN(tcb->abcbytes, limit);
+ tcb->abcbytes = 0;
+ }
+ else{
+ tcb->snd.rto = 0;
+ /* avoidance */
+ if(tcb->abcbytes >= tcb->cwind){
+ tcb->abcbytes -= tcb->cwind;
+ tcb->cwind += tcb->mss;
+ }
+ }
+}
+
+static void
tcpcreate(Conv *c)
{
c->rq = qopen(QMAX, Qcoalesce, tcpacktimer, c);
- c->wq = qopen((3*QMAX)/2, Qkick, tcpkick, c);
+ c->wq = qopen(QMAX, Qkick, tcpkick, c);
}
static void
@@ -608,7 +709,7 @@
{
if(newstate != TcptimerON){
if(t->state == TcptimerON){
- // unchain
+ /* unchain */
if(priv->timers == t){
priv->timers = t->next;
if(t->prev != nil)
@@ -622,7 +723,7 @@
}
} else {
if(t->state != TcptimerON){
- // chain
+ /* chain */
if(t->prev != nil || t->next != nil)
panic("timerstate2");
t->prev = nil;
@@ -635,7 +736,7 @@
t->state = newstate;
}
-void
+static void
tcpackproc(void *a)
{
Tcptimer *t, *tp, *timeo;
@@ -646,6 +747,9 @@
tcp = a;
priv = tcp->priv;
+ while(waserror())
+ ;
+
for(;;) {
tsleep(&up->sleep, return0, 0, MSPTICK);
@@ -681,7 +785,7 @@
}
}
-void
+static void
tcpgo(Tcppriv *priv, Tcptimer *t)
{
if(t == nil || t->start == 0)
@@ -693,7 +797,7 @@
qunlock(&priv->tl);
}
-void
+static void
tcphalt(Tcppriv *priv, Tcptimer *t)
{
if(t == nil)
@@ -704,17 +808,16 @@
qunlock(&priv->tl);
}
-int
+static int
backoff(int n)
{
return 1 << n;
}
-void
+static void
localclose(Conv *s, char *reason) /* called with tcb locked */
{
Tcpctl *tcb;
- Reseq *rp,*rp1;
Tcppriv *tpriv;
tpriv = s->p->priv;
@@ -728,12 +831,7 @@
tcphalt(tpriv, &tcb->katimer);
/* Flush reassembly queue; nothing more can arrive */
- for(rp = tcb->reseq; rp != nil; rp = rp1) {
- rp1 = rp->next;
- freeblist(rp->bp);
- free(rp);
- }
- tcb->reseq = nil;
+ dumpreseq(tcb);
if(tcb->state == Syn_sent)
Fsconnected(s, reason);
@@ -747,45 +845,46 @@
}
/* mtu (- TCP + IP hdr len) of 1st hop */
-int
-tcpmtu(Proto *tcp, uchar *addr, int version, int *scale)
+static int
+tcpmtu(Route *r, int version, uint *scale)
{
Ipifc *ifc;
int mtu;
- ifc = findipifc(tcp->f, addr, 0);
- switch(version){
- default:
- case V4:
- mtu = DEF_MSS;
- if(ifc != nil)
- mtu = ifc->maxtu - ifc->m->hsize - (TCP4_PKT + TCP4_HDRSIZE);
- break;
- case V6:
- mtu = DEF_MSS6;
- if(ifc != nil)
- mtu = ifc->maxtu - ifc->m->hsize - (TCP6_PKT + TCP6_HDRSIZE);
- break;
- }
- if(ifc != nil){
- if(ifc->mbps > 100)
- *scale = HaveWS | 3;
- else if(ifc->mbps > 10)
- *scale = HaveWS | 1;
- else
- *scale = HaveWS | 0;
- } else
- *scale = HaveWS | 0;
+ /*
+ * set the ws. it doesn't commit us to anything.
+ * ws is the ultimate limit to the bandwidth-delay product.
+ */
+ *scale = Defadvscale;
- return mtu;
+ /*
+ * currently we do not implement path MTU discovery
+ * so use interface MTU *only* if directly reachable
+ * or when we use V4 which allows routers to fragment.
+ * otherwise, we use the default MSS which assumes a
+ * safe minimum MTU of 1280 bytes for V6.
+ */
+ if(r != nil && (ifc = r->ifc) != nil){
+ mtu = ifc->maxtu - ifc->m->hsize;
+ if(version == V4)
+ return mtu - (TCP4_PKT + TCP4_HDRSIZE);
+ mtu -= TCP6_PKT + TCP6_HDRSIZE;
+ if((r->type & (Rifc|Runi)) != 0 || mtu <= DEF_MSS6)
+ return mtu;
+ }
+ if(version == V6)
+ return DEF_MSS6;
+ else
+ return DEF_MSS;
}
-void
+static void
inittcpctl(Conv *s, int mode)
{
Tcpctl *tcb;
Tcp4hdr* h4;
Tcp6hdr* h6;
+ Tcppriv *tpriv;
int mss;
tcb = (Tcpctl*)s->ptcl;
@@ -792,7 +891,7 @@
memset(tcb, 0, sizeof(Tcpctl));
- tcb->ssthresh = 65535;
+ tcb->ssthresh = QMAX; /* reset by tcpsetscale() */
tcb->srtt = tcp_irtt<<LOGAGAIN;
tcb->mdev = 0;
@@ -841,19 +940,18 @@
}
tcb->mss = tcb->cwind = mss;
+ tcb->abcbytes = 0;
+ tpriv = s->p->priv;
+ tpriv->stats[Mss] = tcb->mss;
/* default is no window scaling */
- tcb->window = QMAX;
- tcb->rcv.wnd = QMAX;
- tcb->rcv.scale = 0;
- tcb->snd.scale = 0;
- qsetlimit(s->rq, QMAX);
+ tcpsetscale(s, tcb, 0, 0);
}
/*
* called with s qlocked
*/
-void
+static void
tcpstart(Conv *s, int mode)
{
Tcpctl *tcb;
@@ -865,8 +963,8 @@
if(tpriv->ackprocstarted == 0){
qlock(&tpriv->apl);
if(tpriv->ackprocstarted == 0){
- sprint(kpname, "#I%dtcpack", s->p->f->dev);
- kproc(kpname, tcpackproc, s->p, 0);
+ snprint(kpname, sizeof(kpname), "#I%dtcpack", s->p->f->dev);
+ kproc(kpname, tcpackproc, s->p);
tpriv->ackprocstarted = 1;
}
qunlock(&tpriv->apl);
@@ -895,28 +993,28 @@
}
static char*
-tcpflag(ushort flag)
+tcpflag(char *buf, char *e, ushort flag)
{
- static char buf[128];
+ char *p;
- sprint(buf, "%d", flag>>10); /* Head len */
+ p = seprint(buf, e, "%d", flag>>10); /* Head len */
if(flag & URG)
- strcat(buf, " URG");
+ p = seprint(p, e, " URG");
if(flag & ACK)
- strcat(buf, " ACK");
+ p = seprint(p, e, " ACK");
if(flag & PSH)
- strcat(buf, " PSH");
+ p = seprint(p, e, " PSH");
if(flag & RST)
- strcat(buf, " RST");
+ p = seprint(p, e, " RST");
if(flag & SYN)
- strcat(buf, " SYN");
+ p = seprint(p, e, " SYN");
if(flag & FIN)
- strcat(buf, " FIN");
-
+ p = seprint(p, e, " FIN");
+ USED(p);
return buf;
}
-Block *
+static Block*
htontcp6(Tcp *tcph, Block *data, Tcp6hdr *ph, Tcpctl *tcb)
{
int dlen;
@@ -940,14 +1038,10 @@
if(data) {
dlen = blocklen(data);
data = padblock(data, hdrlen + TCP6_PKT);
- if(data == nil)
- return nil;
}
else {
dlen = 0;
data = allocb(hdrlen + TCP6_PKT + 64); /* the 64 pad is to meet mintu's */
- if(data == nil)
- return nil;
data->wp += hdrlen + TCP6_PKT;
}
@@ -1000,7 +1094,7 @@
return data;
}
-Block *
+static Block*
htontcp4(Tcp *tcph, Block *data, Tcp4hdr *ph, Tcpctl *tcb)
{
int dlen;
@@ -1013,7 +1107,7 @@
if(tcph->flags & SYN){
if(tcph->mss)
hdrlen += MSS_LENGTH;
- if(tcph->ws)
+ if(1)
hdrlen += WS_LENGTH;
optpad = hdrlen & 3;
if(optpad)
@@ -1024,14 +1118,10 @@
if(data) {
dlen = blocklen(data);
data = padblock(data, hdrlen + TCP4_PKT);
- if(data == nil)
- return nil;
}
else {
dlen = 0;
data = allocb(hdrlen + TCP4_PKT + 64); /* the 64 pad is to meet mintu's */
- if(data == nil)
- return nil;
data->wp += hdrlen + TCP4_PKT;
}
@@ -1055,7 +1145,8 @@
hnputs(opt, tcph->mss);
opt += 2;
}
- if(tcph->ws != 0){
+ /* always offer. rfc1323 §2.2 */
+ if(1){
*opt++ = WSOPT;
*opt++ = WS_LENGTH;
*opt++ = tcph->ws;
@@ -1074,7 +1165,7 @@
return data;
}
-int
+static int
ntohtcp6(Tcp *tcph, Block **bpp)
{
Tcp6hdr *h;
@@ -1103,6 +1194,7 @@
tcph->urg = nhgets(h->tcpurg);
tcph->mss = 0;
tcph->ws = 0;
+ tcph->update = 0;
tcph->len = nhgets(h->ploadlen) - hdrlen;
*bpp = pullupblock(*bpp, hdrlen+TCP6_PKT);
@@ -1127,7 +1219,7 @@
break;
case WSOPT:
if(optlen == WS_LENGTH && *(optr+2) <= 14)
- tcph->ws = HaveWS | *(optr+2);
+ tcph->ws = *(optr+2);
break;
}
n -= optlen;
@@ -1136,7 +1228,7 @@
return hdrlen;
}
-int
+static int
ntohtcp4(Tcp *tcph, Block **bpp)
{
Tcp4hdr *h;
@@ -1166,6 +1258,7 @@
tcph->urg = nhgets(h->tcpurg);
tcph->mss = 0;
tcph->ws = 0;
+ tcph->update = 0;
tcph->len = nhgets(h->length) - (hdrlen + TCP4_PKT);
*bpp = pullupblock(*bpp, hdrlen+TCP4_PKT);
@@ -1190,7 +1283,7 @@
break;
case WSOPT:
if(optlen == WS_LENGTH && *(optr+2) <= 14)
- tcph->ws = HaveWS | *(optr+2);
+ tcph->ws = *(optr+2);
break;
}
n -= optlen;
@@ -1200,16 +1293,19 @@
}
/*
- * For outgiing calls, generate an initial sequence
+ * For outgoing calls, generate an initial sequence
* number and put a SYN on the send queue
*/
-void
+static void
tcpsndsyn(Conv *s, Tcpctl *tcb)
{
+ Tcppriv *tpriv;
+
tcb->iss = (nrand(1<<16)<<16)|nrand(1<<16);
tcb->rttseq = tcb->iss;
tcb->snd.wl2 = tcb->iss;
tcb->snd.una = tcb->iss;
+ tcb->snd.rxt = tcb->iss;
tcb->snd.ptr = tcb->rttseq;
tcb->snd.nxt = tcb->rttseq;
tcb->flgcnt++;
@@ -1217,7 +1313,9 @@
tcb->sndsyntime = NOW;
/* set desired mss and scale */
- tcb->mss = tcpmtu(s->p, s->laddr, s->ipversion, &tcb->scale);
+ tcb->mss = tcpmtu(v6lookup(s->p->f, s->raddr, s->laddr, s), s->ipversion, &tcb->scale);
+ tpriv = s->p->priv;
+ tpriv->stats[Mss] = tcb->mss;
}
void
@@ -1229,7 +1327,7 @@
Tcp4hdr ph4;
Tcp6hdr ph6;
- netlog(tcp->f, Logtcp, "sndrst: %s", reason);
+ netlog(tcp->f, Logtcp, "sndrst: %s\n", reason);
tpriv = tcp->priv;
@@ -1307,7 +1405,7 @@
* send a reset to the remote side and close the conversation
* called with s qlocked
*/
-char*
+static char*
tcphangup(Conv *s)
{
Tcp seg;
@@ -1322,7 +1420,7 @@
memset(&seg, 0, sizeof seg);
seg.flags = RST | ACK;
seg.ack = tcb->rcv.nxt;
- tcb->rcv.una = 0;
+ tcb->rcv.ackptr = seg.ack;
seg.seq = tcb->snd.ptr;
seg.wnd = 0;
seg.urg = 0;
@@ -1353,7 +1451,7 @@
/*
* (re)send a SYN ACK
*/
-int
+static int
sndsynack(Proto *tcp, Limbo *lp)
{
Block *hbp;
@@ -1360,7 +1458,7 @@
Tcp4hdr ph4;
Tcp6hdr ph6;
Tcp seg;
- int scale;
+ uint scale;
/* make pseudo header */
switch(lp->version) {
@@ -1388,11 +1486,12 @@
panic("sndrst: version %d", lp->version);
}
+ memset(&seg, 0, sizeof seg);
seg.seq = lp->iss;
seg.ack = lp->irs+1;
seg.flags = SYN|ACK;
seg.urg = 0;
- seg.mss = tcpmtu(tcp, lp->laddr, lp->version, &scale);
+ seg.mss = tcpmtu(v6lookup(tcp->f, lp->raddr, lp->laddr, nil), lp->version, &scale);
seg.wnd = QMAX;
/* if the other side set scale, we should too */
@@ -1570,6 +1669,18 @@
}
}
+static void
+initialwindow(Tcpctl *tcb)
+{
+ /* RFC 3390 initial window */
+ if(tcb->mss < 1095)
+ tcb->cwind = 4*tcb->mss;
+ else if(tcb->mss < 2190)
+ tcb->cwind = 4380;
+ else
+ tcb->cwind = 2*tcb->mss;
+}
+
/*
* come here when we finally get an ACK to our SYN-ACK.
* lookup call in limbo. if found, create a new conversation
@@ -1596,7 +1707,7 @@
/* find a call in limbo */
h = hashipa(src, segp->source);
for(l = &tpriv->lht[h]; (lp = *l) != nil; l = &lp->next){
- netlog(s->p->f, Logtcp, "tcpincoming s %I,%ux/%I,%ux d %I,%ux/%I,%ux v %d/%d",
+ netlog(s->p->f, Logtcp, "tcpincoming s %I!%ud/%I!%ud d %I!%ud/%I!%ud v %d/%d\n",
src, segp->source, lp->raddr, lp->rport,
dst, segp->dest, lp->laddr, lp->lport,
version, lp->version
@@ -1611,7 +1722,7 @@
/* we're assuming no data with the initial SYN */
if(segp->seq != lp->irs+1 || segp->ack != lp->iss+1){
- netlog(s->p->f, Logtcp, "tcpincoming s %lux/%lux a %lux %lux",
+ netlog(s->p->f, Logtcp, "tcpincoming s %lux/%lux a %lux %lux\n",
segp->seq, lp->irs+1, segp->ack, lp->iss+1);
lp = nil;
} else {
@@ -1641,6 +1752,8 @@
tcb->irs = lp->irs;
tcb->rcv.nxt = tcb->irs+1;
+ tcb->rcv.wptr = tcb->rcv.nxt;
+ tcb->rcv.wsnt = 0;
tcb->rcv.urg = tcb->rcv.nxt;
tcb->iss = lp->iss;
@@ -1649,19 +1762,24 @@
tcb->snd.una = tcb->iss+1;
tcb->snd.ptr = tcb->iss+1;
tcb->snd.nxt = tcb->iss+1;
+ tcb->snd.rxt = tcb->iss+1;
tcb->flgcnt = 0;
tcb->flags |= SYNACK;
+ /* set desired mss and scale */
+ tcb->mss = tcpmtu(v6lookup(s->p->f, src, dst, s), version, &tcb->scale);
+
/* our sending max segment size cannot be bigger than what he asked for */
if(lp->mss != 0 && lp->mss < tcb->mss)
tcb->mss = lp->mss;
+ tpriv->stats[Mss] = tcb->mss;
/* window scaling */
tcpsetscale(new, tcb, lp->rcvscale, lp->sndscale);
- /* the congestion window always starts out as a single segment */
+ /* congestion window */
tcb->snd.wnd = segp->wnd;
- tcb->cwind = tcb->mss;
+ initialwindow(tcb);
/* set initial round trip time */
tcb->sndsyntime = lp->lastsend+lp->rexmits*SYNACK_RXTIMER;
@@ -1700,7 +1818,7 @@
return new;
}
-int
+static int
seq_within(ulong x, ulong low, ulong high)
{
if(low <= high){
@@ -1714,25 +1832,25 @@
return 0;
}
-int
+static int
seq_lt(ulong x, ulong y)
{
return (int)(x-y) < 0;
}
-int
+static int
seq_le(ulong x, ulong y)
{
return (int)(x-y) <= 0;
}
-int
+static int
seq_gt(ulong x, ulong y)
{
return (int)(x-y) > 0;
}
-int
+static int
seq_ge(ulong x, ulong y)
{
return (int)(x-y) >= 0;
@@ -1742,7 +1860,7 @@
* use the time between the first SYN and it's ack as the
* initial round trip time
*/
-void
+static void
tcpsynackrtt(Conv *s)
{
Tcpctl *tcb;
@@ -1760,46 +1878,59 @@
tcphalt(tpriv, &tcb->rtt_timer);
}
-void
+static void
update(Conv *s, Tcp *seg)
{
int rtt, delta;
Tcpctl *tcb;
ulong acked;
- ulong expand;
Tcppriv *tpriv;
+ if(seg->update)
+ return;
+ seg->update = 1;
+
tpriv = s->p->priv;
tcb = (Tcpctl*)s->ptcl;
- /* if everything has been acked, force output(?) */
- if(seq_gt(seg->ack, tcb->snd.nxt)) {
- tcb->flags |= FORCE;
- return;
+ /* catch zero-window updates, update window & recover */
+ if(tcb->snd.wnd == 0 && seg->wnd > 0)
+ if(seq_lt(seg->ack, tcb->snd.ptr)){
+ netlog(s->p->f, Logtcp, "tcp: zwu ack %lud una %lud ptr %lud win %lud\n",
+ seg->ack, tcb->snd.una, tcb->snd.ptr, seg->wnd);
+ tcb->snd.wnd = seg->wnd;
+ goto recovery;
}
- /* added by Dong Lin for fast retransmission */
- if(seg->ack == tcb->snd.una
- && tcb->snd.una != tcb->snd.nxt
- && seg->len == 0
- && seg->wnd == tcb->snd.wnd) {
-
- /* this is a pure ack w/o window update */
- netlog(s->p->f, Logtcprxmt, "dupack %lud ack %lud sndwnd %d advwin %d\n",
- tcb->snd.dupacks, seg->ack, tcb->snd.wnd, seg->wnd);
-
- if(++tcb->snd.dupacks == TCPREXMTTHRESH) {
- /*
- * tahoe tcp rxt the packet, half sshthresh,
- * and set cwnd to one packet
- */
+ /* newreno fast retransmit */
+ if(seg->ack == tcb->snd.una)
+ if(tcb->snd.una != tcb->snd.nxt)
+ if(++tcb->snd.dupacks == 3){
+recovery:
+ if(tcb->snd.recovery){
+ tpriv->stats[RecoveryCwind]++;
+ tcb->cwind += tcb->mss;
+ }else if(seq_le(tcb->snd.rxt, seg->ack)){
+ tpriv->stats[Recovery]++;
+ tcb->abcbytes = 0;
tcb->snd.recovery = 1;
+ tcb->snd.partialack = 0;
tcb->snd.rxt = tcb->snd.nxt;
- netlog(s->p->f, Logtcprxmt, "fast rxt %lud, nxt %lud\n", tcb->snd.una, tcb->snd.nxt);
+ tcpcongestion(tcb);
+ tcb->cwind = tcb->ssthresh + 3*tcb->mss;
+ netlog(s->p->f, Logtcpwin, "recovery inflate %ld ss %ld @%lud\n",
+ tcb->cwind, tcb->ssthresh, tcb->snd.rxt);
tcprxmit(s);
- } else {
- /* do reno tcp here. */
+ }else{
+ tpriv->stats[RecoveryNoSeq]++;
+ netlog(s->p->f, Logtcpwin, "!recov %lud not ≤ %lud %ld\n",
+ tcb->snd.rxt, seg->ack, tcb->snd.rxt - seg->ack);
+ /* do not enter fast retransmit */
+ /* do not change ssthresh */
}
+ }else if(tcb->snd.recovery){
+ tpriv->stats[RecoveryCwind]++;
+ tcb->cwind += tcb->mss;
}
/*
@@ -1807,6 +1938,9 @@
*/
if(seq_gt(seg->ack, tcb->snd.wl2)
|| (tcb->snd.wl2 == seg->ack && seg->wnd > tcb->snd.wnd)){
+ /* clear dupack if we advance wl2 */
+ if(tcb->snd.wl2 != seg->ack)
+ tcb->snd.dupacks = 0;
tcb->snd.wnd = seg->wnd;
tcb->snd.wl2 = seg->ack;
}
@@ -1816,22 +1950,11 @@
* don't let us hangup if sending into a closed window and
* we're still getting acks
*/
- if((tcb->flags&RETRAN) && tcb->snd.wnd == 0){
+ if((tcb->flags&RETRAN) && tcb->snd.wnd == 0)
tcb->backedoff = MAXBACKMS/4;
- }
return;
}
- /*
- * any positive ack turns off fast rxt,
- * (should we do new-reno on partial acks?)
- */
- if(!tcb->snd.recovery || seq_ge(seg->ack, tcb->snd.rxt)) {
- tcb->snd.dupacks = 0;
- tcb->snd.recovery = 0;
- } else
- netlog(s->p->f, Logtcp, "rxt next %lud, cwin %ud\n", seg->ack, tcb->cwind);
-
/* Compute the new send window size */
acked = seg->ack - tcb->snd.una;
@@ -1843,24 +1966,41 @@
goto done;
}
- /* slow start as long as we're not recovering from lost packets */
- if(tcb->cwind < tcb->snd.wnd && !tcb->snd.recovery) {
- if(tcb->cwind < tcb->ssthresh) {
- expand = tcb->mss;
- if(acked < expand)
- expand = acked;
+ /*
+ * congestion control
+ */
+ if(tcb->snd.recovery){
+ if(seq_ge(seg->ack, tcb->snd.rxt)){
+ /* recovery finished; deflate window */
+ tpriv->stats[RecoveryDone]++;
+ tcb->snd.dupacks = 0;
+ tcb->snd.recovery = 0;
+ tcb->cwind = (tcb->snd.nxt - tcb->snd.una) + tcb->mss;
+ if(tcb->ssthresh < tcb->cwind)
+ tcb->cwind = tcb->ssthresh;
+ netlog(s->p->f, Logtcpwin, "recovery deflate %ld %ld\n",
+ tcb->cwind, tcb->ssthresh);
+ } else {
+ /* partial ack; we lost more than one segment */
+ tpriv->stats[RecoveryPA]++;
+ if(tcb->cwind > acked)
+ tcb->cwind -= acked;
+ else{
+ netlog(s->p->f, Logtcpwin, "partial ack neg\n");
+ tcb->cwind = tcb->mss;
+ }
+ netlog(s->p->f, Logtcpwin, "partial ack %ld left %ld cwind %ld\n",
+ acked, tcb->snd.rxt - seg->ack, tcb->cwind);
+
+ if(acked >= tcb->mss)
+ tcb->cwind += tcb->mss;
+ tcb->snd.partialack++;
}
- else
- expand = ((int)tcb->mss * tcb->mss) / tcb->cwind;
+ } else
+ tcpabcincr(tcb, acked);
- if(tcb->cwind + expand < tcb->cwind)
- expand = tcb->snd.wnd - tcb->cwind;
- if(tcb->cwind + expand > tcb->snd.wnd)
- expand = tcb->snd.wnd - tcb->cwind;
- tcb->cwind += expand;
- }
-
/* Adjust the timers according to the round trip time */
+ /* todo: fix sloppy treatment of overflow cases here. */
if(tcb->rtt_timer.state == TcptimerON && seq_ge(seg->ack, tcb->rttseq)) {
tcphalt(tpriv, &tcb->rtt_timer);
if((tcb->flags&RETRAN) == 0) {
@@ -1891,13 +2031,23 @@
done:
if(qdiscard(s->wq, acked) < acked)
tcb->flgcnt--;
-
tcb->snd.una = seg->ack;
+
+ /* newreno fast recovery */
+ if(tcb->snd.recovery)
+ tcprxmit(s);
+
if(seq_gt(seg->ack, tcb->snd.urg))
tcb->snd.urg = seg->ack;
- if(tcb->snd.una != tcb->snd.nxt)
- tcpgo(tpriv, &tcb->timer);
+ if(tcb->snd.una != tcb->snd.nxt){
+ /* “impatient” variant */
+ if(!tcb->snd.recovery || tcb->snd.partialack == 1){
+ tcb->time = NOW;
+ tcb->timeuna = tcb->snd.una;
+ tcpgo(tpriv, &tcb->timer);
+ }
+ }
else
tcphalt(tpriv, &tcb->timer);
@@ -1904,12 +2054,13 @@
if(seq_lt(tcb->snd.ptr, tcb->snd.una))
tcb->snd.ptr = tcb->snd.una;
- tcb->flags &= ~RETRAN;
+ if(!tcb->snd.recovery)
+ tcb->flags &= ~RETRAN;
tcb->backoff = 0;
tcb->backedoff = 0;
}
-void
+static void
tcpiput(Proto *tcp, Ipifc*, Block *bp)
{
Tcp seg;
@@ -1917,7 +2068,7 @@
Tcp6hdr *h6;
int hdrlen;
Tcpctl *tcb;
- ushort length;
+ ushort length, csum;
uchar source[IPaddrlen], dest[IPaddrlen];
Conv *s;
Fs *f;
@@ -1980,10 +2131,12 @@
h6->ttl = proto;
hnputl(h6->vcf, length);
if((h6->tcpcksum[0] || h6->tcpcksum[1]) &&
- ptclcsum(bp, TCP6_IPLEN, length+TCP6_PHDRSIZE)) {
+ (csum = ptclcsum(bp, TCP6_IPLEN, length+TCP6_PHDRSIZE)) != 0) {
tpriv->stats[CsumErrs]++;
tpriv->stats[InErrs]++;
- netlog(f, Logtcp, "bad tcp proto cksum\n");
+ netlog(f, Logtcp,
+ "bad tcpv6 proto cksum: got %#ux, computed %#ux\n",
+ h6->tcpcksum[0]<<8 | h6->tcpcksum[1], csum);
freeblist(bp);
return;
}
@@ -1995,7 +2148,7 @@
if(hdrlen < 0){
tpriv->stats[HlenErrs]++;
tpriv->stats[InErrs]++;
- netlog(f, Logtcp, "bad tcp hdr len\n");
+ netlog(f, Logtcp, "bad tcpv6 hdr len\n");
return;
}
@@ -2005,7 +2158,7 @@
if(bp == nil){
tpriv->stats[LenErrs]++;
tpriv->stats[InErrs]++;
- netlog(f, Logtcp, "tcp len < 0 after trim\n");
+ netlog(f, Logtcp, "tcpv6 len < 0 after trim\n");
return;
}
}
@@ -2016,7 +2169,8 @@
/* Look for a matching conversation */
s = iphtlook(&tpriv->ht, source, seg.source, dest, seg.dest);
if(s == nil){
- netlog(f, Logtcp, "iphtlook failed");
+ netlog(f, Logtcp, "iphtlook(src %I!%d, dst %I!%d) failed\n",
+ source, seg.source, dest, seg.dest);
reset:
qunlock(tcp);
sndrst(tcp, source, dest, length, &seg, version, "no conversation");
@@ -2136,8 +2290,12 @@
}
/* Cut the data to fit the receive window */
+ tcprcvwin(s);
if(tcptrim(tcb, &seg, &bp, &length) == -1) {
- netlog(f, Logtcp, "tcp len < 0, %lud %d\n", seg.seq, length);
+ if(seg.seq+1 != tcb->rcv.nxt || length != 1)
+ netlog(f, Logtcp, "tcp: trim: !inwind: seq %lud-%lud win %lud-%lud l %d from %I\n",
+ seg.seq, seg.seq + length - 1,
+ tcb->rcv.nxt, tcb->rcv.nxt + tcb->rcv.wnd-1, length, s->raddr);
update(s, &seg);
if(qlen(s->wq)+tcb->flgcnt == 0 && tcb->state == Closing) {
tcphalt(tpriv, &tcb->rtt_timer);
@@ -2168,12 +2326,15 @@
if(seg.seq != tcb->rcv.nxt)
if(length != 0 || (seg.flags & (SYN|FIN))) {
update(s, &seg);
- if(addreseq(tcb, tpriv, &seg, bp, length) < 0)
+ if(addreseq(f, tcb, tpriv, &seg, bp, length) < 0)
print("reseq %I.%d -> %I.%d\n", s->raddr, s->rport, s->laddr, s->lport);
- tcb->flags |= FORCE;
+ tcb->flags |= FORCE; /* force duplicate ack; RFC 5681 §3.2 */
goto output;
}
+ if(tcb->nreseq > 0)
+ tcb->flags |= FORCE; /* filled hole in sequence space; RFC 5681 §3.2 */
+
/*
* keep looping till we've processed this packet plus any
* adjacent packets in the resequence queue
@@ -2238,7 +2399,8 @@
goto raise;
}
case Time_wait:
- tcb->flags |= FORCE;
+ if(seg.flags & FIN)
+ tcb->flags |= FORCE;
if(tcb->timer.state != TcptimerON)
tcpgo(tpriv, &tcb->timer);
}
@@ -2272,34 +2434,12 @@
* receive queue
*/
if(bp) {
- bp = packblock(bp);
- if(bp == nil)
- panic("tcp packblock");
- qpassnolim(s->rq, bp);
+ qpassnolim(s->rq, packblock(bp));
bp = nil;
-
- /*
- * Force an ack every 2 data messages. This is
- * a hack for rob to make his home system run
- * faster.
- *
- * this also keeps the standard TCP congestion
- * control working since it needs an ack every
- * 2 max segs worth. This is not quite that,
- * but under a real stream is equivalent since
- * every packet has a max seg in it.
- */
- if(++(tcb->rcv.una) >= 2)
- tcb->flags |= FORCE;
}
tcb->rcv.nxt += length;
/*
- * update our rcv window
- */
- tcprcvwin(s);
-
- /*
* turn on the acktimer if there's something
* to ack
*/
@@ -2373,8 +2513,11 @@
getreseq(tcb, &seg, &bp, &length);
- if(tcptrim(tcb, &seg, &bp, &length) == 0)
+ tcprcvwin(s);
+ if(tcptrim(tcb, &seg, &bp, &length) == 0){
+ tcb->flags |= FORCE;
break;
+ }
}
}
output:
@@ -2394,15 +2537,15 @@
* the lock to ipoput the packet so some care has to be
* taken by callers.
*/
-void
+static void
tcpoutput(Conv *s)
{
Tcp seg;
- int msgs;
+ uint msgs;
Tcpctl *tcb;
Block *hbp, *bp;
- int sndcnt, n;
- ulong ssize, dsize, usable, sent;
+ int sndcnt;
+ ulong ssize, dsize, sent;
Fs *f;
Tcppriv *tpriv;
uchar version;
@@ -2411,9 +2554,26 @@
tpriv = s->p->priv;
version = s->ipversion;
- for(msgs = 0; msgs < 100; msgs++) {
- tcb = (Tcpctl*)s->ptcl;
+ tcb = (Tcpctl*)s->ptcl;
+ /* force ack every 2*mss */
+ if((tcb->flags & FORCE) == 0)
+ if(tcb->rcv.nxt - tcb->rcv.ackptr >= 2*tcb->mss){
+ tpriv->stats[Delayack]++;
+ tcb->flags |= FORCE;
+ }
+
+ /* force ack if window opening */
+ if(0)
+ if((tcb->flags & FORCE) == 0){
+ tcprcvwin(s);
+ if((int)(tcb->rcv.wptr - tcb->rcv.wsnt) >= 2*tcb->mss){
+ tpriv->stats[Wopenack]++;
+ tcb->flags |= FORCE;
+ }
+ }
+
+ for(msgs = 0; msgs < 100; msgs++) {
switch(tcb->state) {
case Listen:
case Closed:
@@ -2421,7 +2581,12 @@
return;
}
+ /* Don't send anything else until our SYN has been acked */
+ if(tcb->snd.ptr != tcb->iss && (tcb->flags & SYNACK) == 0)
+ break;
+
/* force an ack when a window has opened up */
+ tcprcvwin(s);
if(tcb->rcv.blocked && tcb->rcv.wnd > 0){
tcb->rcv.blocked = 0;
tcb->flags |= FORCE;
@@ -2429,54 +2594,57 @@
sndcnt = qlen(s->wq)+tcb->flgcnt;
sent = tcb->snd.ptr - tcb->snd.una;
-
- /* Don't send anything else until our SYN has been acked */
- if(tcb->snd.ptr != tcb->iss && (tcb->flags & SYNACK) == 0)
- break;
-
- /* Compute usable segment based on offered window and limit
- * window probes to one
- */
+ ssize = sndcnt;
if(tcb->snd.wnd == 0){
- if(sent != 0) {
- if((tcb->flags&FORCE) == 0)
- break;
-// tcb->snd.ptr = tcb->snd.una;
+ /* zero window probe */
+ if(sent > 0)
+ if(!(tcb->flags & FORCE))
+ break; /* already probing, rto re-probes */
+ if(ssize < sent)
+ ssize = 0;
+ else{
+ ssize -= sent;
+ if(ssize > 0)
+ ssize = 1;
}
- usable = 1;
+ } else {
+ /* calculate usable segment size */
+ if(ssize > tcb->cwind)
+ ssize = tcb->cwind;
+ if(ssize > tcb->snd.wnd)
+ ssize = tcb->snd.wnd;
+
+ if(ssize < sent)
+ ssize = 0;
+ else {
+ ssize -= sent;
+ if(ssize > tcb->mss)
+ ssize = tcb->mss;
+ }
}
- else {
- usable = tcb->cwind;
- if(tcb->snd.wnd < usable)
- usable = tcb->snd.wnd;
- usable -= sent;
- }
- ssize = sndcnt-sent;
- if(ssize && usable < 2)
- netlog(s->p->f, Logtcp, "throttled snd.wnd %lud cwind %lud\n",
- tcb->snd.wnd, tcb->cwind);
- if(usable < ssize)
- ssize = usable;
- if(tcb->mss < ssize)
- ssize = tcb->mss;
+
dsize = ssize;
seg.urg = 0;
- if(ssize == 0)
- if((tcb->flags&FORCE) == 0)
- break;
+ if(!(tcb->flags & FORCE)){
+ if(ssize == 0)
+ break;
+ if(ssize < tcb->mss)
+ if(tcb->snd.nxt == tcb->snd.ptr)
+ if(sent > TCPREXMTTHRESH*tcb->mss)
+ break;
+ }
tcb->flags &= ~FORCE;
- tcprcvwin(s);
/* By default we will generate an ack */
tcphalt(tpriv, &tcb->acktimer);
- tcb->rcv.una = 0;
seg.source = s->lport;
seg.dest = s->rport;
seg.flags = ACK;
seg.mss = 0;
seg.ws = 0;
+ seg.update = 0;
switch(tcb->state){
case Syn_sent:
seg.flags = 0;
@@ -2516,20 +2684,9 @@
}
}
- if(sent+dsize == sndcnt)
+ if(sent+dsize == sndcnt && dsize)
seg.flags |= PSH;
- /* keep track of balance of resent data */
- if(seq_lt(tcb->snd.ptr, tcb->snd.nxt)) {
- n = tcb->snd.nxt - tcb->snd.ptr;
- if(ssize < n)
- n = ssize;
- tcb->resent += n;
- netlog(f, Logtcp, "rexmit: %I.%d -> %I.%d ptr %lux nxt %lux\n",
- s->raddr, s->rport, s->laddr, s->lport, tcb->snd.ptr, tcb->snd.nxt);
- tpriv->stats[RetransSegs]++;
- }
-
tcb->snd.ptr += ssize;
/* Pull up the send pointer so we can accept acks
@@ -2565,13 +2722,17 @@
* expect acknowledges
*/
if(ssize != 0){
- if(tcb->timer.state != TcptimerON)
+ if(tcb->timer.state != TcptimerON){
+ tcb->time = NOW;
+ tcb->timeuna = tcb->snd.una;
tcpgo(tpriv, &tcb->timer);
+ }
/* If round trip timer isn't running, start it.
* measure the longest packet only in case the
* transmission time dominates RTT
*/
+ if(tcb->snd.retransmit == 0)
if(tcb->rtt_timer.state != TcptimerON)
if(ssize == tcb->mss) {
tcpgo(tpriv, &tcb->rtt_timer);
@@ -2580,6 +2741,10 @@
}
tpriv->stats[OutSegs]++;
+ if(tcb->snd.retransmit)
+ tpriv->stats[RetransSegsSent]++;
+ tcb->rcv.ackptr = seg.ack;
+ tcb->rcv.wsnt = tcb->rcv.wptr;
/* put off the next keep alive */
tcpgo(tpriv, &tcb->katimer);
@@ -2600,9 +2765,8 @@
default:
panic("tcpoutput2: version %d", version);
}
- if((msgs%4) == 1){
+ if((msgs%4) == 3){
qunlock(s);
- sched();
qlock(s);
}
}
@@ -2611,7 +2775,7 @@
/*
* the BSD convention (hack?) for keep alives. resend last uchar acked.
*/
-void
+static void
tcpsendka(Conv *s)
{
Tcp seg;
@@ -2621,6 +2785,7 @@
tcb = (Tcpctl*)s->ptcl;
dbp = nil;
+ memset(&seg, 0, sizeof seg);
seg.urg = 0;
seg.source = s->lport;
seg.dest = s->rport;
@@ -2632,7 +2797,8 @@
else
seg.seq = tcb->snd.una-1;
seg.ack = tcb->rcv.nxt;
- tcb->rcv.una = 0;
+ tcb->rcv.ackptr = seg.ack;
+ tcprcvwin(s);
seg.wnd = tcb->rcv.wnd;
if(tcb->state == Finwait2){
seg.flags |= FIN;
@@ -2666,7 +2832,7 @@
/*
* set connection to time out after 12 minutes
*/
-void
+static void
tcpsetkacounter(Tcpctl *tcb)
{
tcb->kacounter = (12 * 60 * 1000) / (tcb->katimer.start*MSPTICK);
@@ -2678,7 +2844,7 @@
* if we've timed out, close the connection
* otherwise, send a keepalive and restart the timer
*/
-void
+static void
tcpkeepalive(void *v)
{
Tcpctl *tcb;
@@ -2706,7 +2872,7 @@
/*
* start keepalive timer
*/
-char*
+static char*
tcpstartka(Conv *s, char **f, int n)
{
Tcpctl *tcb;
@@ -2729,7 +2895,7 @@
/*
* turn checksums on/off
*/
-char*
+static char*
tcpsetchecksum(Conv *s, char **f, int)
{
Tcpctl *tcb;
@@ -2740,30 +2906,38 @@
return nil;
}
-void
+/*
+ * retransmit (at most) one segment at snd.una.
+ * preserve cwind & snd.ptr
+ */
+static void
tcprxmit(Conv *s)
{
Tcpctl *tcb;
+ Tcppriv *tpriv;
+ ulong tcwind, tptr;
tcb = (Tcpctl*)s->ptcl;
-
tcb->flags |= RETRAN|FORCE;
- tcb->snd.ptr = tcb->snd.una;
- /*
- * We should be halving the slow start threshhold (down to one
- * mss) but leaving it at mss seems to work well enough
- */
- tcb->ssthresh = tcb->mss;
-
- /*
- * pull window down to a single packet
- */
+ tptr = tcb->snd.ptr;
+ tcwind = tcb->cwind;
+ tcb->snd.ptr = tcb->snd.una;
tcb->cwind = tcb->mss;
+ tcb->snd.retransmit = 1;
tcpoutput(s);
+ tcb->snd.retransmit = 0;
+ tcb->cwind = tcwind;
+ tcb->snd.ptr = tptr;
+
+ tpriv = s->p->priv;
+ tpriv->stats[RetransSegs]++;
}
-void
+/*
+ * todo: RFC 4138 F-RTO
+ */
+static void
tcptimeout(void *arg)
{
Conv *s;
@@ -2792,11 +2966,29 @@
localclose(s, Etimedout);
break;
}
- netlog(s->p->f, Logtcprxmt, "timeout rexmit 0x%lux %d/%d\n", tcb->snd.una, tcb->timer.start, NOW);
+ netlog(s->p->f, Logtcprxmt, "rxm %d/%d %ldms %lud rto %d %lud %s\n",
+ tcb->srtt, tcb->mdev, NOW-tcb->time,
+ tcb->snd.una-tcb->timeuna, tcb->snd.rto, tcb->snd.ptr,
+ tcpstates[s->state]);
tcpsettimer(tcb);
+ if(tcb->snd.rto == 0)
+ tcpcongestion(tcb);
tcprxmit(s);
+ tcb->snd.ptr = tcb->snd.una;
+ tcb->cwind = tcb->mss;
+ tcb->snd.rto = 1;
tpriv->stats[RetransTimeouts]++;
- tcb->snd.dupacks = 0;
+
+ if(tcb->snd.recovery){
+ tcb->snd.dupacks = 0; /* reno rto */
+ tcb->snd.recovery = 0;
+ tpriv->stats[RecoveryRTO]++;
+ tcb->snd.rxt = tcb->snd.nxt;
+ netlog(s->p->f, Logtcpwin,
+ "rto recovery rxt @%lud\n", tcb->snd.nxt);
+ }
+
+ tcb->abcbytes = 0;
break;
case Time_wait:
localclose(s, nil);
@@ -2808,7 +3000,7 @@
poperror();
}
-int
+static int
inwindow(Tcpctl *tcb, int seq)
{
return seq_within(seq, tcb->rcv.nxt, tcb->rcv.nxt+tcb->rcv.wnd-1);
@@ -2817,36 +3009,83 @@
/*
* set up state for a received SYN (or SYN ACK) packet
*/
-void
+static void
procsyn(Conv *s, Tcp *seg)
{
Tcpctl *tcb;
+ Tcppriv *tpriv;
tcb = (Tcpctl*)s->ptcl;
tcb->flags |= FORCE;
tcb->rcv.nxt = seg->seq + 1;
+ tcb->rcv.wptr = tcb->rcv.nxt;
+ tcb->rcv.wsnt = 0;
tcb->rcv.urg = tcb->rcv.nxt;
tcb->irs = seg->seq;
/* our sending max segment size cannot be bigger than what he asked for */
- if(seg->mss != 0 && seg->mss < tcb->mss)
+ if(seg->mss != 0 && seg->mss < tcb->mss) {
tcb->mss = seg->mss;
+ tpriv = s->p->priv;
+ tpriv->stats[Mss] = tcb->mss;
+ }
- /* the congestion window always starts out as a single segment */
+ /* if the server does not support ws option, disable window scaling */
+ if(seg->ws == 0){
+ tcb->scale = 0;
+ tcb->snd.scale = 0;
+ }
+
tcb->snd.wnd = seg->wnd;
- tcb->cwind = tcb->mss;
+ initialwindow(tcb);
}
-int
-addreseq(Tcpctl *tcb, Tcppriv *tpriv, Tcp *seg, Block *bp, ushort length)
+static int
+dumpreseq(Tcpctl *tcb)
{
- Reseq *rp, *rp1;
- int i, rqlen, qmax;
+ Reseq *r, *next;
+ for(r = tcb->reseq; r != nil; r = next){
+ next = r->next;
+ freeblist(r->bp);
+ free(r);
+ }
+ tcb->reseq = nil;
+ tcb->nreseq = 0;
+ tcb->reseqlen = 0;
+ return -1;
+}
+
+static void
+logreseq(Fs *f, Reseq *r, ulong n)
+{
+ char *s;
+
+ for(; r != nil; r = r->next){
+ s = nil;
+ if(r->next == nil && r->seg.seq != n)
+ s = "hole/end";
+ else if(r->next == nil)
+ s = "end";
+ else if(r->seg.seq != n)
+ s = "hole";
+ if(s != nil)
+ netlog(f, Logtcp, "%s %lud-%lud (%ld) %#ux\n", s,
+ n, r->seg.seq, r->seg.seq-n, r->seg.flags);
+ n = r->seg.seq + r->seg.len;
+ }
+}
+
+static int
+addreseq(Fs *f, Tcpctl *tcb, Tcppriv *tpriv, Tcp *seg, Block *bp, ushort length)
+{
+ Reseq *rp, **rr;
+ int qmax;
+
rp = malloc(sizeof(Reseq));
if(rp == nil){
- freeblist(bp); /* bp always consumed by add_reseq */
+ freeblist(bp); /* bp always consumed by addreseq */
return 0;
}
@@ -2854,56 +3093,39 @@
rp->bp = bp;
rp->length = length;
- /* Place on reassembly list sorting by starting seq number */
- rp1 = tcb->reseq;
- if(rp1 == nil || seq_lt(seg->seq, rp1->seg.seq)) {
- rp->next = rp1;
- tcb->reseq = rp;
- if(rp->next != nil)
- tpriv->stats[OutOfOrder]++;
- return 0;
- }
+ tcb->reseqlen += length;
+ tcb->nreseq++;
- rqlen = 0;
- for(i = 0;; i++) {
- rqlen += rp1->length;
- if(rp1->next == nil || seq_lt(seg->seq, rp1->next->seg.seq)) {
- rp->next = rp1->next;
- rp1->next = rp;
+ /* Place on reassembly list sorting by starting seq number */
+ for(rr = &tcb->reseq;; rr = &(*rr)->next)
+ if(*rr == nil || seq_lt(seg->seq, (*rr)->seg.seq)){
+ rp->next = *rr;
+ *rr = rp;
+ tpriv->stats[Resequenced]++;
if(rp->next != nil)
tpriv->stats[OutOfOrder]++;
break;
}
- rp1 = rp1->next;
- }
- qmax = QMAX<<tcb->rcv.scale;
- if(rqlen > qmax){
- print("resequence queue > window: %d > %d\n", rqlen, qmax);
- i = 0;
- for(rp1 = tcb->reseq; rp1 != nil; rp1 = rp1->next){
- print("%#lux %#lux %#ux\n", rp1->seg.seq,
- rp1->seg.ack, rp1->seg.flags);
- if(i++ > 10){
- print("...\n");
- break;
- }
- }
- // delete entire reassembly queue; wait for retransmit.
- // - should we be smarter and only delete the tail?
- for(rp = tcb->reseq; rp != nil; rp = rp1){
- rp1 = rp->next;
- freeblist(rp->bp);
- free(rp);
- }
- tcb->reseq = nil;
-
- return -1;
+ qmax = tcb->window;
+ if(tcb->reseqlen > qmax){
+ netlog(f, Logtcp, "tcp: reseq: queue > window: %d > %d; %d packets\n", tcb->reseqlen, qmax, tcb->nreseq);
+ logreseq(f, tcb->reseq, tcb->rcv.nxt);
+ tpriv->stats[ReseqBytelim]++;
+ return dumpreseq(tcb);
}
+ qmax = tcb->window / tcb->mss; /* ~190 for qscale==2, 390 for qscale=3 */
+ if(tcb->nreseq > qmax){
+ netlog(f, Logtcp, "resequence queue > packets: %d %d; %d bytes\n", tcb->nreseq, qmax, tcb->reseqlen);
+ logreseq(f, tcb->reseq, tcb->rcv.nxt);
+ tpriv->stats[ReseqPktlim]++;
+ return dumpreseq(tcb);
+ }
+
return 0;
}
-void
+static void
getreseq(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length)
{
Reseq *rp;
@@ -2918,10 +3140,13 @@
*bp = rp->bp;
*length = rp->length;
+ tcb->nreseq--;
+ tcb->reseqlen -= rp->length;
+
free(rp);
}
-int
+static int
tcptrim(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length)
{
ushort len;
@@ -2992,7 +3217,7 @@
return 0;
}
-void
+static void
tcpadvise(Proto *tcp, Block *bp, char *msg)
{
Tcp4hdr *h4;
@@ -3011,8 +3236,7 @@
v4tov6(source, h4->tcpsrc);
psource = nhgets(h4->tcpsport);
pdest = nhgets(h4->tcpdport);
- }
- else {
+ } else {
ipmove(dest, h6->tcpdst);
ipmove(source, h6->tcpsrc);
psource = nhgets(h6->tcpsport);
@@ -3021,8 +3245,7 @@
/* Look for a connection */
qlock(tcp);
- for(p = tcp->conv; *p; p++) {
- s = *p;
+ for(p = tcp->conv; (s = *p) != nil; p++) {
tcb = (Tcpctl*)s->ptcl;
if(s->rport == pdest)
if(s->lport == psource)
@@ -3029,6 +3252,8 @@
if(tcb->state != Closed)
if(ipcmp(s->raddr, dest) == 0)
if(ipcmp(s->laddr, source) == 0){
+ if(s->ignoreadvice)
+ break;
qlock(s);
qunlock(tcp);
switch(tcb->state){
@@ -3058,9 +3283,11 @@
}
/* called with c qlocked */
-char*
+static char*
tcpctl(Conv* c, char** f, int n)
{
+ if(n == 1 && strcmp(f[0], "close") == 0)
+ return tcpclose(c), nil;
if(n == 1 && strcmp(f[0], "hangup") == 0)
return tcphangup(c);
if(n >= 1 && strcmp(f[0], "keepalive") == 0)
@@ -3072,7 +3299,7 @@
return "unknown control request";
}
-int
+static int
tcpstats(Proto *tcp, char *buf, int len)
{
Tcppriv *priv;
@@ -3083,7 +3310,7 @@
p = buf;
e = p+len;
for(i = 0; i < Nstats; i++)
- p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
+ p = seprint(p, e, "%s: %llud\n", statnames[i], priv->stats[i]);
return p - buf;
}
@@ -3096,7 +3323,7 @@
* of questionable validity so we try to use them only when we're
* up against the wall.
*/
-int
+static int
tcpgc(Proto *tcp)
{
Conv *c, **pp, **ep;
@@ -3104,7 +3331,7 @@
Tcpctl *tcb;
- n = natgc(tcp->ipproto);
+ n = 0;
ep = &tcp->conv[tcp->nc];
for(pp = tcp->conv; pp < ep; pp++) {
c = *pp;
@@ -3116,13 +3343,13 @@
switch(tcb->state){
case Syn_received:
if(NOW - tcb->time > 5000){
- localclose(c, "timed out");
+ localclose(c, Etimedout);
n++;
}
break;
case Finwait2:
if(NOW - tcb->time > 5*60*1000){
- localclose(c, "timed out");
+ localclose(c, Etimedout);
n++;
}
break;
@@ -3132,7 +3359,7 @@
return n;
}
-void
+static void
tcpsettimer(Tcpctl *tcb)
{
int x;
@@ -3141,9 +3368,9 @@
x = backoff(tcb->backoff) *
(tcb->mdev + (tcb->srtt>>LOGAGAIN) + MSPTICK) / MSPTICK;
- /* bounded twixt 1/2 and 64 seconds */
- if(x < 500/MSPTICK)
- x = 500/MSPTICK;
+ /* bounded twixt 0.3 and 64 seconds */
+ if(x < 300/MSPTICK)
+ x = 300/MSPTICK;
else if(x > (64000/MSPTICK))
x = 64000/MSPTICK;
tcb->timer.start = x;
@@ -3177,18 +3404,37 @@
Fsproto(fs, tcp);
}
-void
+static void
tcpsetscale(Conv *s, Tcpctl *tcb, ushort rcvscale, ushort sndscale)
{
- if(rcvscale){
- tcb->rcv.scale = rcvscale & 0xff;
- tcb->snd.scale = sndscale & 0xff;
- tcb->window = QMAX<<tcb->snd.scale;
- qsetlimit(s->rq, tcb->window);
- } else {
- tcb->rcv.scale = 0;
- tcb->snd.scale = 0;
- tcb->window = QMAX;
- qsetlimit(s->rq, tcb->window);
- }
+ /*
+ * guess at reasonable queue sizes. there's no current way
+ * to know how many nic receive buffers we can safely tie up in the
+ * tcp stack, and we don't adjust our queues to maximize throughput
+ * and minimize bufferbloat. n.b. the offer (rcvscale) needs to be
+ * respected, but we still control our own buffer commitment by
+ * keeping a seperate qscale.
+ */
+ tcb->rcv.scale = rcvscale & 0xff;
+ tcb->snd.scale = sndscale & 0xff;
+ tcb->qscale = rcvscale & 0xff;
+ if(rcvscale > Maxqscale)
+ tcb->qscale = Maxqscale;
+
+ if(rcvscale != tcb->rcv.scale)
+ netlog(s->p->f, Logtcp, "tcpsetscale: window %lud qlen %d >> window %ud lport %d\n",
+ tcb->window, qlen(s->rq), QMAX<<tcb->qscale, s->lport);
+ tcb->window = QMAX<<tcb->qscale;
+ tcb->ssthresh = tcb->window;
+
+ /*
+ * it's important to set wq large enough to cover the full
+ * bandwidth-delay product. it's possible to be in loss
+ * recovery with a big window, and we need to keep sending
+ * into the inflated window. the difference can be huge
+ * for even modest (70ms) ping times.
+ */
+ qsetlimit(s->rq, QMAX<<tcb->qscale);
+ qsetlimit(s->wq, QMAX<<tcb->qscale);
+ tcprcvwin(s);
}
--- a/os/ip/udp.c
+++ b/os/ip/udp.c
@@ -24,7 +24,6 @@
IP_UDPPROTO = 17,
UDP_USEAD7 = 52,
- UDP_USEAD6 = 36,
Udprxms = 200,
Udptickms = 100,
@@ -40,7 +39,7 @@
uchar length[2]; /* packet length */
uchar id[2]; /* Identification */
uchar frag[2]; /* Fragment information */
- uchar Unused;
+ uchar Unused;
uchar udpproto; /* Protocol */
uchar udpplen[2]; /* Header plus data length */
uchar udpsrc[IPv4addrlen]; /* Ip source */
@@ -73,10 +72,10 @@
typedef struct Udpstats Udpstats;
struct Udpstats
{
- ulong udpInDatagrams;
+ uvlong udpInDatagrams;
ulong udpNoPorts;
ulong udpInErrors;
- ulong udpOutDatagrams;
+ uvlong udpOutDatagrams;
};
typedef struct Udppriv Udppriv;
@@ -101,7 +100,6 @@
typedef struct Udpcb Udpcb;
struct Udpcb
{
- QLock;
uchar headers;
};
@@ -125,7 +123,7 @@
static int
udpstate(Conv *c, char *state, int n)
{
- return snprint(state, n, "%s qin %d qout %d",
+ return snprint(state, n, "%s qin %d qout %d\n",
c->inuse ? "Open" : "Closed",
c->rq ? qlen(c->rq) : 0,
c->wq ? qlen(c->wq) : 0
@@ -151,7 +149,7 @@
static void
udpcreate(Conv *c)
{
- c->rq = qopen(64*1024, Qmsg, 0, 0);
+ c->rq = qopen(512*1024, Qmsg, 0, 0);
c->wq = qbypass(udpkick, c);
}
@@ -175,8 +173,6 @@
ucb = (Udpcb*)c->ptcl;
ucb->headers = 0;
-
- qunlock(c);
}
void
@@ -192,12 +188,13 @@
Udppriv *upriv;
Fs *f;
int version;
- Conv *rc;
+ Routehint *rh;
+ ushort csum;
upriv = c->p->priv;
f = c->p->f;
- netlog(c->p->f, Logudp, "udp: kick\n");
+// netlog(c->p->f, Logudp, "udp: kick\n"); /* frequent and uninteresting */
if(bp == nil)
return;
@@ -219,21 +216,6 @@
rport = nhgets(bp->rp);
bp->rp += 2+2; /* Ignore local port */
break;
- case 6:
- /* get user specified addresses */
- bp = pullupblock(bp, UDP_USEAD6);
- if(bp == nil)
- return;
- ipmove(raddr, bp->rp);
- bp->rp += IPaddrlen;
- ipmove(laddr, bp->rp);
- bp->rp += IPaddrlen;
- /* pick interface closest to dest */
- if(ipforme(f, laddr) != Runi)
- findlocalip(f, laddr, raddr);
- rport = nhgets(bp->rp);
- bp->rp += 2+2; /* Ignore local port */
- break;
default:
rport = 0;
break;
@@ -240,18 +222,12 @@
}
if(ucb->headers) {
- if(memcmp(laddr, v4prefix, IPv4off) == 0 ||
- ipcmp(laddr, IPnoaddr) == 0)
+ if(isv4(laddr) || ipcmp(laddr, IPnoaddr) == 0)
version = V4;
else
version = V6;
} else {
- if( (memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
- memcmp(c->laddr, v4prefix, IPv4off) == 0)
- || ipcmp(c->raddr, IPnoaddr) == 0)
- version = V4;
- else
- version = V6;
+ version = convipvers(c);
}
dlen = blocklen(bp);
@@ -260,9 +236,6 @@
switch(version){
case V4:
bp = padblock(bp, UDP4_IPHDR_SZ+UDP_UDPHDR_SZ);
- if(bp == nil)
- return;
-
uh4 = (Udp4hdr *)(bp->rp);
ptcllen = dlen + UDP_UDPHDR_SZ;
uh4->Unused = 0;
@@ -274,7 +247,7 @@
v6tov4(uh4->udpdst, raddr);
hnputs(uh4->udpdport, rport);
v6tov4(uh4->udpsrc, laddr);
- rc = nil;
+ rh = nil;
} else {
v6tov4(uh4->udpdst, c->raddr);
hnputs(uh4->udpdport, c->rport);
@@ -281,25 +254,26 @@
if(ipcmp(c->laddr, IPnoaddr) == 0)
findlocalip(f, c->laddr, c->raddr);
v6tov4(uh4->udpsrc, c->laddr);
- rc = c;
+ rh = c;
}
hnputs(uh4->udpsport, c->lport);
hnputs(uh4->udplen, ptcllen);
uh4->udpcksum[0] = 0;
uh4->udpcksum[1] = 0;
- hnputs(uh4->udpcksum,
- ptclcsum(bp, UDP4_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP4_PHDR_SZ));
+ csum = ptclcsum(bp, UDP4_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP4_PHDR_SZ);
+ if(csum == 0)
+ csum = 0xffff; /* -0 */
+ hnputs(uh4->udpcksum, csum);
uh4->vihl = IP_VER4;
- ipoput4(f, bp, 0, c->ttl, c->tos, rc);
+ ipoput4(f, bp, 0, c->ttl, c->tos, rh);
break;
case V6:
+ /*
+ * using the v6 ip header to create pseudo header
+ * first then reset it to the normal ip header
+ */
bp = padblock(bp, UDP6_IPHDR_SZ+UDP_UDPHDR_SZ);
- if(bp == nil)
- return;
-
- // using the v6 ip header to create pseudo header
- // first then reset it to the normal ip header
uh6 = (Udp6hdr *)(bp->rp);
memset(uh6, 0, 8);
ptcllen = dlen + UDP_UDPHDR_SZ;
@@ -309,7 +283,7 @@
ipmove(uh6->udpdst, raddr);
hnputs(uh6->udpdport, rport);
ipmove(uh6->udpsrc, laddr);
- rc = nil;
+ rh = nil;
} else {
ipmove(uh6->udpdst, c->raddr);
hnputs(uh6->udpdport, c->rport);
@@ -316,19 +290,21 @@
if(ipcmp(c->laddr, IPnoaddr) == 0)
findlocalip(f, c->laddr, c->raddr);
ipmove(uh6->udpsrc, c->laddr);
- rc = c;
+ rh = c;
}
hnputs(uh6->udpsport, c->lport);
hnputs(uh6->udplen, ptcllen);
uh6->udpcksum[0] = 0;
uh6->udpcksum[1] = 0;
- hnputs(uh6->udpcksum,
- ptclcsum(bp, UDP6_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP6_PHDR_SZ));
+ csum = ptclcsum(bp, UDP6_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP6_PHDR_SZ);
+ if(csum == 0)
+ csum = 0xffff; /* -0 */
+ hnputs(uh6->udpcksum, csum);
memset(uh6, 0, 8);
uh6->viclfl[0] = IP_VER6;
hnputs(uh6->len, ptcllen);
uh6->nextheader = IP_UDPPROTO;
- ipoput6(f, bp, 0, c->ttl, c->tos, rc);
+ ipoput6(f, bp, 0, c->ttl, c->tos, rh);
break;
default:
@@ -360,10 +336,8 @@
uh4 = (Udp4hdr*)(bp->rp);
version = ((uh4->vihl&0xF0)==IP_VER6) ? V6 : V4;
- /*
- * Put back pseudo header for checksum
- * (remember old values for icmpnoconv())
- */
+ /* Put back pseudo header for checksum
+ * (remember old values for icmpnoconv()) */
switch(version) {
case V4:
ottl = uh4->Unused;
@@ -423,7 +397,7 @@
c = iphtlook(&upriv->ht, raddr, rport, laddr, lport);
if(c == nil){
- /* no converstation found */
+ /* no conversation found */
upriv->ustats.udpNoPorts++;
qunlock(udp);
netlog(f, Logudp, "udp: no conv %I!%d -> %I!%d\n", raddr, rport,
@@ -434,7 +408,7 @@
icmpnoconv(f, bp);
break;
case V6:
- icmphostunr(f, ifc, bp, icmp6_port_unreach, 0);
+ icmphostunr6(f, ifc, bp, Icmp6_port_unreach, 0);
break;
default:
panic("udpiput2: version %d", version);
@@ -448,18 +422,8 @@
if(c->state == Announced){
if(ucb->headers == 0){
/* create a new conversation */
- if(ipforme(f, laddr) != Runi) {
- switch(version){
- case V4:
- v4tov6(laddr, ifc->lifc->local);
- break;
- case V6:
- ipmove(laddr, ifc->lifc->local);
- break;
- default:
- panic("udpiput3: version %d", version);
- }
- }
+ if(ipforme(f, laddr) != Runi)
+ ipv6local(ifc, laddr, 0, raddr);
c = Fsnewcall(c, raddr, rport, laddr, lport, version);
if(c == nil){
qunlock(udp);
@@ -507,33 +471,21 @@
p = bp->rp;
ipmove(p, raddr); p += IPaddrlen;
ipmove(p, laddr); p += IPaddrlen;
- ipmove(p, ifc->lifc->local); p += IPaddrlen;
+ if(!ipv6local(ifc, p, 0, raddr))
+ ipmove(p, ifc->lifc != nil ? ifc->lifc->local : IPnoaddr);
+ p += IPaddrlen;
hnputs(p, rport); p += 2;
hnputs(p, lport);
break;
- case 6:
- /* pass the src address */
- bp = padblock(bp, UDP_USEAD6);
- p = bp->rp;
- ipmove(p, raddr); p += IPaddrlen;
- ipmove(p, ipforme(f, laddr)==Runi ? laddr : ifc->lifc->local); p += IPaddrlen;
- hnputs(p, rport); p += 2;
- hnputs(p, lport);
- break;
}
- if(bp->next)
- bp = concatblock(bp);
-
if(qfull(c->rq)){
- qunlock(c);
- netlog(f, Logudp, "udp: qfull %I.%d -> %I.%d\n", raddr, rport,
- laddr, lport);
+ netlog(f, Logudp, "udp: qfull %I.%d -> %I.%d\n",
+ raddr, rport, laddr, lport);
freeblist(bp);
- return;
+ } else {
+ qpass(c->rq, concatblock(bp));
}
-
- qpass(c->rq, bp);
qunlock(c);
}
@@ -545,11 +497,13 @@
ucb = (Udpcb*)c->ptcl;
if(n == 1){
- if(strcmp(f[0], "oldheaders") == 0){
- ucb->headers = 6;
+ if(strcmp(f[0], "hangup") == 0){
+ qhangup(c->rq, nil);
+ qhangup(c->wq, nil);
return nil;
- } else if(strcmp(f[0], "headers") == 0){
- ucb->headers = 7;
+ }
+ if(strcmp(f[0], "headers") == 0){
+ ucb->headers = 7; /* new headers format */
return nil;
}
}
@@ -564,34 +518,25 @@
uchar source[IPaddrlen], dest[IPaddrlen];
ushort psource, pdest;
Conv *s, **p;
- int version;
h4 = (Udp4hdr*)(bp->rp);
- version = ((h4->vihl&0xF0)==IP_VER6) ? V6 : V4;
+ h6 = (Udp6hdr*)(bp->rp);
- switch(version) {
- case V4:
+ if((h4->vihl&0xF0)==IP_VER4) {
v4tov6(dest, h4->udpdst);
v4tov6(source, h4->udpsrc);
psource = nhgets(h4->udpsport);
pdest = nhgets(h4->udpdport);
- break;
- case V6:
- h6 = (Udp6hdr*)(bp->rp);
+ } else {
ipmove(dest, h6->udpdst);
ipmove(source, h6->udpsrc);
psource = nhgets(h6->udpsport);
pdest = nhgets(h6->udpdport);
- break;
- default:
- panic("udpadvise: version %d", version);
- return; /* to avoid a warning */
}
/* Look for a connection */
qlock(udp);
- for(p = udp->conv; *p; p++) {
- s = *p;
+ for(p = udp->conv; (s = *p) != nil; p++) {
if(s->rport == pdest)
if(s->lport == psource)
if(ipcmp(s->raddr, dest) == 0)
@@ -617,7 +562,8 @@
Udppriv *upriv;
upriv = udp->priv;
- return snprint(buf, len, "InDatagrams: %lud\nNoPorts: %lud\nInErrors: %lud\nOutDatagrams: %lud\n",
+ return snprint(buf, len, "InDatagrams: %llud\nNoPorts: %lud\n"
+ "InErrors: %lud\nOutDatagrams: %llud\n",
upriv->ustats.udpInDatagrams,
upriv->ustats.udpNoPorts,
upriv->ustats.udpInErrors,
@@ -624,12 +570,6 @@
upriv->ustats.udpOutDatagrams);
}
-int
-udpgc(Proto *udp)
-{
- return natgc(udp->ipproto);
-}
-
void
udpinit(Fs *fs)
{
@@ -647,7 +587,6 @@
udp->rcv = udpiput;
udp->advise = udpadvise;
udp->stats = udpstats;
- udp->gc = udpgc;
udp->ipproto = IP_UDPPROTO;
udp->nc = Nchans;
udp->ptclsize = sizeof(Udpcb);
diff -u a/os/ip//arp.c b/os/ip//arp.c
--- a/os/ip//arp.c
+++ b/os/ip//arp.c
@@ -47,7 +47,8 @@
#define haship(s) ((s)[IPaddrlen-1]%NHASH)
-extern int ReTransTimer = RETRANS_TIMER;
+int ReTransTimer = RETRANS_TIMER;
+
static void rxmitproc(void *v);
void
@@ -57,145 +58,121 @@
f->arp->f = f;
f->arp->rxmt = nil;
f->arp->dropf = f->arp->dropl = nil;
- kproc("rxmitproc", rxmitproc, f->arp, 0);
+ kproc("rxmitproc", rxmitproc, f->arp);
}
-/*
- * create a new arp entry for an ip address.
- */
-static Arpent*
-newarp6(Arp *arp, uchar *ip, Ipifc *ifc, int addrxt)
+static void
+freeblistchain(Block *bp)
{
- uint t;
- Block *next, *xp;
- Arpent *a, *e, *f, **l;
- Medium *m = ifc->m;
- int empty;
+ Block *next;
- /* find oldest entry */
- e = &arp->cache[NCACHE];
- a = arp->cache;
- t = a->utime;
- for(f = a; f < e; f++){
- if(f->utime < t){
- t = f->utime;
- a = f;
- }
+ while(bp != nil){
+ next = bp->list;
+ freeblist(bp);
+ bp = next;
}
+}
- /* dump waiting packets */
- xp = a->hold;
- a->hold = nil;
+/* take out of re-transmit chain */
+static Arpent**
+rxmtunchain(Arp *arp, Arpent *a)
+{
+ Arpent **l;
- if(isv4(a->ip)){
- while(xp){
- next = xp->list;
- freeblist(xp);
- xp = next;
+ for(l = &arp->rxmt; *l != nil; l = &((*l)->nextrxt)){
+ if(*l == a){
+ *l = a->nextrxt;
+ break;
}
}
- else { // queue icmp unreachable for rxmitproc later on, w/o arp lock
- if(xp){
- if(arp->dropl == nil)
- arp->dropf = xp;
- else
- arp->dropl->list = xp;
+ a->nextrxt = nil;
+ return l;
+}
- for(next = xp->list; next; next = next->list)
- xp = next;
- arp->dropl = xp;
- wakeup(&arp->rxmtq);
- }
- }
+static void
+cleanarpent(Arp *arp, Arpent *a)
+{
+ Arpent **l;
+ Block *bp;
/* take out of current chain */
- l = &arp->hash[haship(a->ip)];
- for(f = *l; f; f = f->hash){
- if(f == a){
+ for(l = &arp->hash[haship(a->ip)]; *l != nil; l = &((*l)->hash)){
+ if(*l == a){
*l = a->hash;
break;
}
- l = &f->hash;
}
+ a->hash = nil;
- /* insert into new chain */
- l = &arp->hash[haship(ip)];
- a->hash = *l;
- *l = a;
+ /* dump waiting packets */
+ bp = a->hold;
+ a->hold = nil;
+ if(isv4(a->ip))
+ freeblistchain(bp);
+ else {
+ rxmtunchain(arp, a);
- memmove(a->ip, ip, sizeof(a->ip));
- a->utime = NOW;
- a->ctime = 0;
- a->type = m;
+ /* queue icmp unreachable for rxmitproc later on, w/o arp lock */
+ if(bp != nil){
+ if(arp->dropf == nil)
+ arp->dropf = bp;
+ else
+ arp->dropl->list = bp;
+ arp->dropl = a->last;
- a->rtime = NOW + ReTransTimer;
- a->rxtsrem = MAX_MULTICAST_SOLICIT;
- a->ifc = ifc;
- a->ifcid = ifc->ifcid;
-
- /* put to the end of re-transmit chain; addrxt is 0 when isv4(a->ip) */
- if(!ipismulticast(a->ip) && addrxt){
- l = &arp->rxmt;
- empty = (*l==nil);
-
- for(f = *l; f; f = f->nextrxt){
- if(f == a){
- *l = a->nextrxt;
- break;
- }
- l = &f->nextrxt;
+ if(bp == arp->dropf)
+ wakeup(&arp->rxmtq);
}
- for(f = *l; f; f = f->nextrxt){
- l = &f->nextrxt;
- }
- *l = a;
- if(empty)
- wakeup(&arp->rxmtq);
}
+ a->last = nil;
- a->nextrxt = nil;
+ a->ifc = nil;
+ a->ifcid = 0;
- return a;
-}
+ a->state = 0;
+ a->rxtsrem = 0;
-/* called with arp qlocked */
+ a->utime = 0;
+ a->ctime = 0;
-void
-cleanarpent(Arp *arp, Arpent *a)
+ memset(a->ip, 0, sizeof(a->ip));
+ memset(a->mac, 0, sizeof(a->mac));
+}
+
+/*
+ * create a new arp entry for an ip address on ifc.
+ */
+static Arpent*
+newarpent(Arp *arp, uchar *ip, Ipifc *ifc)
{
- Arpent *f, **l;
+ Arpent *a, *e, *f, **l;
+ ulong t;
- a->utime = 0;
- a->ctime = 0;
- a->type = 0;
- a->state = 0;
-
- /* take out of current chain */
- l = &arp->hash[haship(a->ip)];
- for(f = *l; f; f = f->hash){
- if(f == a){
- *l = a->hash;
- break;
+ /* find oldest entry */
+ e = &arp->cache[NCACHE];
+ a = arp->cache;
+ t = a->utime;
+ for(f = a; f < e; f++){
+ if(f->utime < t){
+ t = f->utime;
+ a = f;
}
- l = &f->hash;
}
+ cleanarpent(arp, a);
- /* take out of re-transmit chain */
- l = &arp->rxmt;
- for(f = *l; f; f = f->nextrxt){
- if(f == a){
- *l = a->nextrxt;
- break;
- }
- l = &f->nextrxt;
- }
- a->nextrxt = nil;
- a->hash = nil;
- a->hold = nil;
- a->last = nil;
- a->ifc = nil;
+ ipmove(a->ip, ip);
+ a->ifc = ifc;
+ a->ifcid = ifc->ifcid;
+
+ /* insert into new chain */
+ l = &arp->hash[haship(ip)];
+ a->hash = *l;
+ *l = a;
+
+ return a;
}
+
/*
* fill in the media address if we have it. Otherwise return an
* Arpent that represents the state of the address resolution FSM
@@ -207,7 +184,6 @@
{
int hash;
Arpent *a;
- Medium *type = ifc->m;
uchar v6ip[IPaddrlen];
if(version == V4){
@@ -217,30 +193,28 @@
qlock(arp);
hash = haship(ip);
- for(a = arp->hash[hash]; a; a = a->hash){
- if(memcmp(ip, a->ip, sizeof(a->ip)) == 0)
- if(type == a->type)
+ for(a = arp->hash[hash]; a != nil; a = a->hash){
+ if(a->ifc == ifc && a->ifcid == ifc->ifcid && ipcmp(ip, a->ip) == 0)
break;
}
-
if(a == nil){
- a = newarp6(arp, ip, ifc, (version != V4));
+ a = newarpent(arp, ip, ifc);
a->state = AWAIT;
}
a->utime = NOW;
if(a->state == AWAIT){
if(bp != nil){
- if(a->hold)
- a->last->list = bp;
- else
+ bp->list = nil;
+ if(a->hold == nil)
a->hold = bp;
+ else
+ a->last->list = bp;
a->last = bp;
- bp->list = nil;
}
return a; /* return with arp qlocked */
}
- memmove(mac, a->mac, a->type->maclen);
+ memmove(mac, a->mac, ifc->m->maclen);
/* remove old entries */
if(NOW - a->ctime > 15*60*1000)
@@ -269,132 +243,82 @@
arpresolve(Arp *arp, Arpent *a, Medium *type, uchar *mac)
{
Block *bp;
- Arpent *f, **l;
- if(!isv4(a->ip)){
- l = &arp->rxmt;
- for(f = *l; f; f = f->nextrxt){
- if(f == a){
- *l = a->nextrxt;
- break;
- }
- l = &f->nextrxt;
- }
- }
-
memmove(a->mac, mac, type->maclen);
- a->type = type;
+ if(a->state == AWAIT && !isv4(a->ip)){
+ rxmtunchain(arp, a);
+ a->rxtsrem = 0;
+ }
a->state = AOK;
- a->utime = NOW;
+ a->ctime = a->utime = NOW;
bp = a->hold;
- a->hold = nil;
+ a->hold = a->last = nil;
qunlock(arp);
return bp;
}
-void
-arpenter(Fs *fs, int version, uchar *ip, uchar *mac, int n, int refresh)
+int
+arpenter(Fs *fs, int version, uchar *ip, uchar *mac, int n, uchar *ia, Ipifc *ifc, int refresh)
{
- Arp *arp;
- Route *r;
- Arpent *a, *f, **l;
- Ipifc *ifc;
- Medium *type;
- Block *bp, *next;
uchar v6ip[IPaddrlen];
+ Block *bp, *next;
+ Arpent *a;
+ Route *r;
+ Arp *arp;
- arp = fs->arp;
+ if(ifc->m == nil || ifc->m->maclen != n || ifc->m->maclen == 0)
+ return -1;
- if(n != 6){
-// print("arp: len = %d\n", n);
- return;
- }
-
switch(version){
case V4:
- r = v4lookup(fs, ip, nil);
+ r = v4lookup(fs, ip, ia, nil);
v4tov6(v6ip, ip);
ip = v6ip;
break;
case V6:
- r = v6lookup(fs, ip, nil);
+ r = v6lookup(fs, ip, ia, nil);
break;
default:
panic("arpenter: version %d", version);
- return; /* to supress warnings */
+ return -1; /* to supress warnings */
}
- if(r == nil){
-// print("arp: no route for entry\n");
- return;
- }
+ if(r == nil || r->ifc != ifc || (r->type & (Rbcast|Rmulti)) != 0)
+ return -1;
- ifc = r->ifc;
- type = ifc->m;
-
+ arp = fs->arp;
qlock(arp);
- for(a = arp->hash[haship(ip)]; a; a = a->hash){
- if(a->type != type || (a->state != AWAIT && a->state != AOK))
+ for(a = arp->hash[haship(ip)]; a != nil; a = a->hash){
+ if(a->ifc != ifc || a->ifcid != ifc->ifcid)
continue;
-
if(ipcmp(a->ip, ip) == 0){
- a->state = AOK;
- memmove(a->mac, mac, type->maclen);
-
- if(version == V6){
- /* take out of re-transmit chain */
- l = &arp->rxmt;
- for(f = *l; f; f = f->nextrxt){
- if(f == a){
- *l = a->nextrxt;
- break;
- }
- l = &f->nextrxt;
- }
- }
-
- a->ifc = ifc;
- a->ifcid = ifc->ifcid;
- bp = a->hold;
- a->hold = nil;
if(version == V4)
ip += IPv4off;
- a->utime = NOW;
- a->ctime = a->utime;
- qunlock(arp);
-
- while(bp){
+ bp = arpresolve(arp, a, ifc->m, mac); /* unlocks arp */
+ for(; bp != nil; bp = next){
next = bp->list;
- if(ifc != nil){
- if(waserror()){
- runlock(ifc);
- nexterror();
- }
- rlock(ifc);
- if(ifc->m != nil)
- ifc->m->bwrite(ifc, bp, version, ip);
- else
- freeb(bp);
- runlock(ifc);
- poperror();
- } else
- freeb(bp);
- bp = next;
+ bp->list = nil;
+ if(waserror()){
+ freeblistchain(next);
+ break;
+ }
+ ipifcoput(ifc, bp, version, ip);
+ poperror();
}
- return;
+ return 1;
}
}
if(refresh == 0){
- a = newarp6(arp, ip, ifc, 0);
+ a = newarpent(arp, ip, ifc);
a->state = AOK;
- a->type = type;
- a->ctime = NOW;
- memmove(a->mac, mac, type->maclen);
+ a->ctime = a->utime = NOW;
+ memmove(a->mac, mac, n);
}
-
qunlock(arp);
+
+ return refresh == 0;
}
int
@@ -401,13 +325,12 @@
arpwrite(Fs *fs, char *s, int len)
{
int n;
- Route *r;
Arp *arp;
- Block *bp;
- Arpent *a, *fl, **l;
+ Arpent *a, *x;
Medium *m;
- char *f[4], buf[256];
- uchar ip[IPaddrlen], mac[MAClen];
+ Ipifc *ifc;
+ char *f[5], buf[256];
+ uchar ip[IPaddrlen], ia[IPaddrlen], mac[MAClen];
arp = fs->arp;
@@ -420,7 +343,7 @@
if(len > 0 && buf[len-1] == '\n')
buf[len-1] = 0;
- n = getfields(buf, f, 4, 1, " ");
+ n = getfields(buf, f, nelem(f), 1, " ");
if(strcmp(f[0], "flush") == 0){
qlock(arp);
for(a = arp->cache; a < &arp->cache[NCACHE]; a++){
@@ -427,19 +350,20 @@
memset(a->ip, 0, sizeof(a->ip));
memset(a->mac, 0, sizeof(a->mac));
a->hash = nil;
+ a->nextrxt = nil;
+ a->ifc = nil;
+ a->ifcid = 0;
a->state = 0;
+ a->rxtsrem = 0;
+ a->ctime = 0;
a->utime = 0;
- while(a->hold != nil){
- bp = a->hold->list;
- freeblist(a->hold);
- a->hold = bp;
- }
+ freeblistchain(a->hold);
+ a->hold = a->last = nil;
}
memset(arp->hash, 0, sizeof(arp->hash));
-// clear all pkts on these lists (rxmt, dropf/l)
+ freeblistchain(arp->dropf);
+ arp->dropf = arp->dropl = nil;
arp->rxmt = nil;
- arp->dropf = nil;
- arp->dropl = nil;
qunlock(arp);
} else if(strcmp(f[0], "add") == 0){
switch(n){
@@ -446,64 +370,53 @@
default:
error(Ebadarg);
case 3:
- parseip(ip, f[1]);
- if(isv4(ip))
- r = v4lookup(fs, ip+IPv4off, nil);
- else
- r = v6lookup(fs, ip, nil);
- if(r == nil)
- error("Destination unreachable");
- m = r->ifc->m;
- n = parsemac(mac, f[2], m->maclen);
+ if(parseip(ip, f[1]) == -1)
+ error(Ebadip);
+ if((n = parsemac(mac, f[2], sizeof(mac))) <= 0)
+ error(Ebadarp);
+ findlocalip(fs, ia, ip);
break;
case 4:
m = ipfindmedium(f[1]);
- if(m == nil)
+ if(m == nil || m->maclen == 0)
error(Ebadarp);
- parseip(ip, f[2]);
- n = parsemac(mac, f[3], m->maclen);
+ if(parseip(ip, f[2]) == -1)
+ error(Ebadip);
+ if((n = parsemac(mac, f[3], sizeof(mac))) != m->maclen)
+ error(Ebadarp);
+ findlocalip(fs, ia, ip);
break;
+ case 5:
+ m = ipfindmedium(f[1]);
+ if(m == nil || m->maclen == 0)
+ error(Ebadarp);
+ if(parseip(ip, f[2]) == -1)
+ error(Ebadip);
+ if((n = parsemac(mac, f[3], sizeof(mac))) != m->maclen)
+ error(Ebadarp);
+ if(parseip(ia, f[4]) == -1)
+ error(Ebadip);
+ break;
}
-
- if(m->ares == nil)
- error(Ebadarp);
-
- m->ares(fs, V6, ip, mac, n, 0);
+ if((ifc = findipifc(fs, ia, ia, Runi)) == nil)
+ error("no interface");
+ rlock(ifc);
+ if(!ipv6local(ifc, ia, 0, ip) || arpenter(fs, V6, ip, mac, n, ia, ifc, 0) < 0){
+ runlock(ifc);
+ error("destination unreachable");
+ }
+ runlock(ifc);
} else if(strcmp(f[0], "del") == 0){
- if(n != 2)
+ if (n != 2)
error(Ebadarg);
-
- parseip(ip, f[1]);
+ if (parseip(ip, f[1]) == -1)
+ error(Ebadip);
qlock(arp);
-
- l = &arp->hash[haship(ip)];
- for(a = *l; a; a = a->hash){
- if(memcmp(ip, a->ip, sizeof(a->ip)) == 0){
- *l = a->hash;
- break;
- }
- l = &a->hash;
+ for(a = arp->hash[haship(ip)]; a != nil; a = x){
+ x = a->hash;
+ if(ipcmp(ip, a->ip) == 0)
+ cleanarpent(arp, a);
}
-
- if(a){
- /* take out of re-transmit chain */
- l = &arp->rxmt;
- for(fl = *l; fl; fl = fl->nextrxt){
- if(fl == a){
- *l = a->nextrxt;
- break;
- }
- l = &fl->nextrxt;
- }
-
- a->nextrxt = nil;
- a->hash = nil;
- a->hold = nil;
- a->last = nil;
- a->ifc = nil;
- memset(a->ip, 0, sizeof(a->ip));
- memset(a->mac, 0, sizeof(a->mac));
- }
qunlock(arp);
} else
error(Ebadarp);
@@ -511,13 +424,6 @@
return len;
}
-enum
-{
- Alinelen= 90,
-};
-
-char *aformat = "%-6.6s %-8.8s %-40.40I %-32.32s\n";
-
static void
convmac(char *p, uchar *mac, int n)
{
@@ -526,136 +432,136 @@
}
int
-arpread(Arp *arp, char *p, ulong offset, int len)
+arpread(Arp *arp, char *s, ulong offset, int len)
{
+ char mac[2*MAClen+1], *state, *mname, *p;
+ uchar ip[IPaddrlen], ia[IPaddrlen];
+ Ipifc *ifc;
Arpent *a;
- int n;
- char mac[2*MAClen+1];
+ long n, o;
- if(offset % Alinelen)
- return 0;
-
- offset = offset/Alinelen;
- len = len/Alinelen;
-
- n = 0;
+ p = s;
+ o = -offset;
for(a = arp->cache; len > 0 && a < &arp->cache[NCACHE]; a++){
- if(a->state == 0)
+ if(a->state == 0 || (ifc = a->ifc) == nil)
continue;
- if(offset > 0){
- offset--;
+
+ rlock(ifc);
+ qlock(arp);
+ state = arpstate[a->state];
+ ipmove(ip, a->ip);
+ if(ifc->m == nil || a->ifcid != ifc->ifcid || !ipv6local(ifc, ia, 0, ip)){
+ qunlock(arp);
+ runlock(ifc);
continue;
}
- len--;
- qlock(arp);
- convmac(mac, a->mac, a->type->maclen);
- n += sprint(p+n, aformat, a->type->name, arpstate[a->state], a->ip, mac);
+ mname = ifc->m->name;
+ convmac(mac, a->mac, ifc->m->maclen);
qunlock(arp);
+ runlock(ifc);
+
+ n = snprint(up->genbuf, sizeof up->genbuf,
+ "%-6.6s %-4.4s %-40.40I %-16.16s %I\n",
+ mname, state, ip, mac, ia);
+ o += n;
+ if(o <= 0)
+ continue;
+ if(n > len)
+ break;
+ memmove(p, up->genbuf, n);
+ len -= n;
+ p += n;
}
- return n;
+ return p - s;
}
-extern int
-rxmitsols(Arp *arp)
+void
+ndpsendsol(Fs *f, Ipifc *ifc, Arpent *a)
{
- uint sflag;
- Block *next, *xp;
- Arpent *a, *b, **l;
- Fs *f;
- uchar ipsrc[IPaddrlen];
- Ipifc *ifc = nil;
- long nrxt;
+ uchar targ[IPaddrlen], src[IPaddrlen];
+ Arpent **l;
- qlock(arp);
- f = arp->f;
+ a->ctime = NOW;
+ if(a->rxtsrem == 0)
+ a->rxtsrem = MAX_MULTICAST_SOLICIT;
+ else
+ a->rxtsrem--;
- a = arp->rxmt;
- if(a==nil){
- nrxt = 0;
- goto dodrops; //return nrxt;
- }
- nrxt = a->rtime - NOW;
- if(nrxt > 3*ReTransTimer/4)
- goto dodrops; //return nrxt;
+ /* put on end of re-transmit chain */
+ for(l = rxmtunchain(f->arp, a); *l != nil; l = &(*l)->nextrxt)
+ ;
+ *l = a;
- for(; a; a = a->nextrxt){
- ifc = a->ifc;
- assert(ifc != nil);
- if((a->rxtsrem <= 0) || !(canrlock(ifc)) || (a->ifcid != ifc->ifcid)){
- xp = a->hold;
- a->hold = nil;
+ if(l == &f->arp->rxmt)
+ wakeup(&f->arp->rxmtq);
- if(xp){
- if(arp->dropl == nil)
- arp->dropf = xp;
- else
- arp->dropl->list = xp;
- }
+ /* try to use source address of original packet */
+ ipmove(targ, a->ip);
+ if(a->last != nil){
+ ipmove(src, ((Ip6hdr*)a->last->rp)->src);
+ arprelease(f->arp, a);
- cleanarpent(arp, a);
- }
- else
- break;
+ if(iplocalonifc(ifc, src) != nil || ipproxyifc(f, ifc, src))
+ goto send;
+ } else {
+ arprelease(f->arp, a);
}
- if(a == nil)
- goto dodrops;
+ if(!ipv6local(ifc, src, 0, targ))
+ return;
+send:
+ if(!waserror()){
+ icmpns(f, src, SRC_UNI, targ, TARG_MULTI, ifc->mac);
+ poperror();
+ }
+}
+static void
+rxmitsols(Arp *arp)
+{
+ Block *next, *bp;
+ Arpent *a;
+ Ipifc *ifc;
+ Route *r;
- qunlock(arp); /* for icmpns */
- if((sflag = ipv6anylocal(ifc, ipsrc)) != SRC_UNSPEC)
- icmpns(f, ipsrc, sflag, a->ip, TARG_MULTI, ifc->mac);
-
- runlock(ifc);
- qlock(arp);
-
- /* put to the end of re-transmit chain */
- l = &arp->rxmt;
- for(b = *l; b; b = b->nextrxt){
- if(b == a){
- *l = a->nextrxt;
- break;
+ qlock(arp);
+ while((a = arp->rxmt) != nil && NOW - a->ctime > 3*ReTransTimer/4){
+ if(a->rxtsrem > 0 && (ifc = a->ifc) != nil && canrlock(ifc)){
+ if(a->ifcid == ifc->ifcid){
+ ndpsendsol(arp->f, ifc, a); /* unlocks arp */
+ runlock(ifc);
+ qlock(arp);
+ continue;
+ }
+ runlock(ifc);
}
- l = &b->nextrxt;
+ cleanarpent(arp, a);
}
- for(b = *l; b; b = b->nextrxt){
- l = &b->nextrxt;
- }
- *l = a;
- a->rxtsrem--;
- a->nextrxt = nil;
- a->rtime = NOW + ReTransTimer;
-
- a = arp->rxmt;
- if(a==nil)
- nrxt = 0;
- else
- nrxt = a->rtime - NOW;
-
-dodrops:
- xp = arp->dropf;
- arp->dropf = nil;
- arp->dropl = nil;
+ bp = arp->dropf;
+ arp->dropf = arp->dropl = nil;
qunlock(arp);
- for(; xp; xp = next){
- next = xp->list;
- icmphostunr(f, ifc, xp, icmp6_adr_unreach, 1);
+ for(; bp != nil; bp = next){
+ next = bp->list;
+ bp->list = nil;
+ r = v6lookup(arp->f, ((Ip6hdr*)bp->rp)->src, ((Ip6hdr*)bp->rp)->dst, nil);
+ if(r != nil && (ifc = r->ifc) != nil && canrlock(ifc)){
+ if(!waserror()){
+ icmphostunr6(arp->f, ifc, bp, Icmp6_adr_unreach, (r->type & Runi) != 0);
+ poperror();
+ }
+ runlock(ifc);
+ }
+ freeblist(bp);
}
-
- return nrxt;
-
}
static int
rxready(void *v)
{
- Arp *arp = (Arp *) v;
- int x;
+ Arp *arp = (Arp *)v;
- x = ((arp->rxmt != nil) || (arp->dropf != nil));
-
- return x;
+ return arp->rxmt != nil || arp->dropf != nil;
}
static void
@@ -662,20 +568,15 @@
rxmitproc(void *v)
{
Arp *arp = v;
- long wakeupat;
arp->rxmitp = up;
- //print("arp rxmitproc started\n");
if(waserror()){
- arp->rxmitp = 0;
+ arp->rxmitp = nil;
pexit("hangup", 1);
}
for(;;){
- wakeupat = rxmitsols(arp);
- if(wakeupat == 0)
- sleep(&arp->rxmtq, rxready, v);
- else if(wakeupat > ReTransTimer/4)
- tsleep(&arp->rxmtq, return0, 0, wakeupat);
+ sleep(&arp->rxmtq, rxready, v);
+ rxmitsols(arp);
+ tsleep(&arp->rxmtq, return0, nil, ReTransTimer/4);
}
}
-
diff -u a/os/ip//devip.c b/os/ip//devip.c
--- a/os/ip//devip.c
+++ b/os/ip//devip.c
@@ -14,7 +14,6 @@
Qbootp,
Qndb,
Qiproute,
- Qiprouter,
Qipselftab,
Qlog,
@@ -43,11 +42,11 @@
Maskproto= (1<<Logproto)-1,
Shiftproto= Logtype + Logconv,
- Nfs= 32,
+ Nfs= 128,
};
-#define TYPE(x) ( ((u32)(x).path) & Masktype )
-#define CONV(x) ( (((u32)(x).path) >> Shiftconv) & Maskconv )
-#define PROTO(x) ( (((u32)(x).path) >> Shiftproto) & Maskproto )
+#define TYPE(x) ( ((ulong)(x).path) & Masktype )
+#define CONV(x) ( (((ulong)(x).path) >> Shiftconv) & Maskconv )
+#define PROTO(x) ( (((ulong)(x).path) >> Shiftproto) & Maskproto )
#define QID(p, c, y) ( ((p)<<(Shiftproto)) | ((c)<<Shiftconv) | (y) )
static char network[] = "network";
@@ -58,8 +57,7 @@
extern void nullmediumlink(void);
extern void pktmediumlink(void);
-static long ndbwrite(Fs*, char*, ulong, int);
-extern void closeconv(Conv*);
+ long ndbwrite(Fs *f, char *a, ulong off, int n);
static int
ip3gen(Chan *c, int i, Dir *dp)
@@ -121,7 +119,7 @@
mkqid(&q, QID(PROTO(c->qid), 0, Qstats), 0, QTFILE);
devdir(c, q, "stats", 0, network, 0444, dp);
return 1;
- }
+ }
return -1;
}
@@ -144,11 +142,10 @@
return -1;
case Qarp:
p = "arp";
+ prot = 0664;
break;
case Qbootp:
p = "bootp";
- if(bootp == nil)
- return 0;
break;
case Qndb:
p = "ndb";
@@ -157,14 +154,12 @@
break;
case Qiproute:
p = "iproute";
+ prot = 0664;
break;
case Qipselftab:
p = "ipselftab";
prot = 0444;
break;
- case Qiprouter:
- p = "iprouter";
- break;
case Qlog:
p = "log";
break;
@@ -188,7 +183,7 @@
case Qtopdir:
if(s == DEVDOTDOT){
mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
- sprint(up->genbuf, "#I%ud", c->dev);
+ snprint(up->genbuf, sizeof up->genbuf, "#I%lud", c->dev);
devdir(c, q, up->genbuf, 0, network, 0555, dp);
return 1;
}
@@ -206,19 +201,18 @@
case Qndb:
case Qlog:
case Qiproute:
- case Qiprouter:
case Qipselftab:
return ip1gen(c, TYPE(c->qid), dp);
case Qprotodir:
if(s == DEVDOTDOT){
mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
- sprint(up->genbuf, "#I%ud", c->dev);
+ snprint(up->genbuf, sizeof up->genbuf, "#I%lud", c->dev);
devdir(c, q, up->genbuf, 0, network, 0555, dp);
return 1;
}
if(s < f->p[PROTO(c->qid)]->ac) {
cv = f->p[PROTO(c->qid)]->conv[s];
- sprint(up->genbuf, "%d", s);
+ snprint(up->genbuf, sizeof up->genbuf, "%d", s);
mkqid(&q, QID(PROTO(c->qid), s, Qconvdir), 0, QTDIR);
devdir(c, q, up->genbuf, 0, cv->owner, 0555, dp);
return 1;
@@ -262,45 +256,14 @@
fmtinstall('M', eipfmt);
}
-static Fs*
-ipgetfs(int dev)
-{
- extern void (*ipprotoinit[])(Fs*);
- Fs *f;
- int i;
-
- if(dev >= Nfs)
- return nil;
-
- qlock(&fslock);
- if(ipfs[dev] == nil){
- f = smalloc(sizeof(Fs));
- ip_init(f);
- arpinit(f);
- netloginit(f);
- for(i = 0; ipprotoinit[i]; i++)
- ipprotoinit[i](f);
- f->dev = dev;
- ipfs[dev] = f;
- }
- qunlock(&fslock);
-
- return ipfs[dev];
-}
-
IPaux*
newipaux(char *owner, char *tag)
{
IPaux *a;
- int n;
a = smalloc(sizeof(*a));
kstrdup(&a->owner, owner);
- memset(a->tag, ' ', sizeof(a->tag));
- n = strlen(tag);
- if(n > sizeof(a->tag))
- n = sizeof(a->tag);
- memmove(a->tag, tag, n);
+ strncpy(a->tag, tag, sizeof(a->tag));
return a;
}
@@ -310,13 +273,29 @@
ipattach(char* spec)
{
Chan *c;
- int dev;
+ ulong dev;
- dev = atoi(spec);
+ dev = strtoul(spec, nil, 10);
if(dev >= Nfs)
- error("bad specification");
+ error(Enodev);
- ipgetfs(dev);
+ qlock(&fslock);
+ if(ipfs[dev] == nil){
+ extern void (*ipprotoinit[])(Fs*);
+ Fs *f;
+ int i;
+
+ f = smalloc(sizeof(Fs));
+ ip_init(f);
+ arpinit(f);
+ netloginit(f);
+ for(i = 0; ipprotoinit[i]; i++)
+ ipprotoinit[i](f);
+ f->dev = dev;
+ ipfs[dev] = f;
+ }
+ qunlock(&fslock);
+
c = devattach('I', spec);
mkqid(&c->qid, QID(0, 0, Qtopdir), 0, QTDIR);
c->dev = dev;
@@ -327,7 +306,7 @@
}
static Walkqid*
-ipwalk(Chan* c, Chan *nc, char **name, s32 nname)
+ipwalk(Chan* c, Chan *nc, char **name, int nname)
{
IPaux *a = c->aux;
Walkqid* w;
@@ -338,8 +317,9 @@
return w;
}
-static s32
-ipstat(Chan* c, uchar* db, s32 n)
+
+static int
+ipstat(Chan* c, uchar* db, int n)
{
return devstat(c, db, n, nil, 0, ipgen);
}
@@ -360,7 +340,7 @@
};
static Chan*
-ipopen(Chan* c, u32 omode)
+ipopen(Chan* c, int omode)
{
Conv *cv, *nc;
Proto *p;
@@ -375,7 +355,7 @@
default:
break;
case Qndb:
- if(omode & (OWRITE|OTRUNC) && !iseve())
+ if((omode & (OWRITE|OTRUNC)) != 0 && !iseve())
error(Eperm);
if((omode & (OWRITE|OTRUNC)) == (OWRITE|OTRUNC))
f->ndb[0] = 0;
@@ -383,10 +363,10 @@
case Qlog:
netlogopen(f);
break;
- case Qiprouter:
- iprouteropen(f);
- break;
case Qiproute:
+ case Qarp:
+ if(omode != OREAD && !iseve())
+ error(Eperm);
break;
case Qtopdir:
case Qprotodir:
@@ -412,13 +392,8 @@
case Qclone:
p = f->p[PROTO(c->qid)];
qlock(p);
- if(waserror()){
- qunlock(p);
- nexterror();
- }
cv = Fsprotoclone(p, ATTACHER(c));
qunlock(p);
- poperror();
if(cv == nil) {
error(Enodev);
break;
@@ -437,15 +412,12 @@
qunlock(p);
nexterror();
}
- if((perm & (cv->perm>>6)) != perm) {
- if(strcmp(ATTACHER(c), cv->owner) != 0)
- error(Eperm);
- if((perm & cv->perm) != perm)
- error(Eperm);
+ if(strcmp(ATTACHER(c), cv->owner) == 0)
+ perm <<= 6;
+ if((perm & cv->perm) != perm && !iseve())
+ error(Eperm);
- }
- cv->inuse++;
- if(cv->inuse == 1){
+ if(++cv->inuse == 1){
kstrdup(&cv->owner, ATTACHER(c));
cv->perm = 0660;
}
@@ -455,24 +427,26 @@
break;
case Qlisten:
cv = f->p[PROTO(c->qid)]->conv[CONV(c->qid)];
- if((perm & (cv->perm>>6)) != perm) {
- if(strcmp(ATTACHER(c), cv->owner) != 0)
- error(Eperm);
- if((perm & cv->perm) != perm)
- error(Eperm);
-
+ qlock(cv);
+ if(waserror()){
+ qunlock(cv);
+ nexterror();
}
+ if(strcmp(ATTACHER(c), cv->owner) == 0)
+ perm <<= 6;
+ if((perm & cv->perm) != perm && !iseve())
+ error(Eperm);
if(cv->state != Announced)
error("not announced");
+ cv->inuse++;
+ qunlock(cv);
+ poperror();
if(waserror()){
closeconv(cv);
nexterror();
}
- qlock(cv);
- cv->inuse++;
- qunlock(cv);
nc = nil;
while(nc == nil) {
@@ -494,7 +468,6 @@
if(nc != nil){
cv->incall = nc->next;
mkqid(&c->qid, QID(PROTO(c->qid), nc->x, Qctl), 0, QTFILE);
- kstrdup(&cv->owner, ATTACHER(c));
}
qunlock(cv);
@@ -511,13 +484,25 @@
return c;
}
-static s32
-ipwstat(Chan *c, uchar *dp, s32 n)
+static Chan*
+ipcreate(Chan*, char*, int, ulong)
{
- Dir *d;
+ error(Eperm);
+ return 0;
+}
+
+static void
+ipremove(Chan*)
+{
+ error(Eperm);
+}
+
+static int
+ipwstat(Chan *c, uchar *dp, int n)
+{
+ Dir *dir;
Conv *cv;
Fs *f;
- Proto *p;
f = ipfs[c->dev];
switch(TYPE(c->qid)) {
@@ -529,28 +514,40 @@
break;
}
- d = smalloc(sizeof(*d)+n);
+ dir = smalloc(sizeof(Dir)+n);
if(waserror()){
- free(d);
+ free(dir);
nexterror();
}
- n = convM2D(dp, n, d, (char*)&d[1]);
+ n = convM2D(dp, n, &dir[0], (char*)&dir[1]);
if(n == 0)
error(Eshortstat);
- p = f->p[PROTO(c->qid)];
- cv = p->conv[CONV(c->qid)];
- if(!iseve() && strcmp(ATTACHER(c), cv->owner) != 0)
+
+ cv = f->p[PROTO(c->qid)]->conv[CONV(c->qid)];
+ qlock(cv);
+ if(waserror()){
+ qunlock(cv);
+ nexterror();
+ }
+ if(strcmp(ATTACHER(c), cv->owner) != 0 && !iseve())
error(Eperm);
- if(!emptystr(d->uid))
- kstrdup(&cv->owner, d->uid);
- if(d->mode != ~0UL)
- cv->perm = d->mode & 0777;
+ if(!emptystr(dir->uid)){
+ if(strcmp(dir->uid, commonuser()) != 0 && !iseve())
+ error(Eperm);
+ kstrdup(&cv->owner, dir->uid);
+ }
+ if(dir->mode != ~0UL)
+ cv->perm = dir->mode & 0666;
+ qunlock(cv);
poperror();
- free(d);
+
+ free(dir);
+ poperror();
+
return n;
}
-extern void
+void
closeconv(Conv *cv)
{
Conv *nc;
@@ -564,7 +561,7 @@
}
/* close all incoming calls since no listen will ever happen */
- for(nc = cv->incall; nc; nc = cv->incall){
+ for(nc = cv->incall; nc != nil; nc = cv->incall){
cv->incall = nc->next;
closeconv(nc);
}
@@ -576,9 +573,9 @@
while((mp = cv->multi) != nil)
ipifcremmulti(cv, mp->ma, mp->ia);
- cv->r = nil;
- cv->rgen = 0;
- cv->p->close(cv);
+ if(cv->p->close != nil)
+ (*cv->p->close)(cv);
+
cv->state = Idle;
qunlock(cv);
}
@@ -596,10 +593,6 @@
if(c->flag & COPEN)
netlogclose(f);
break;
- case Qiprouter:
- if(c->flag & COPEN)
- iprouterclose(f);
- break;
case Qdata:
case Qctl:
case Qerr:
@@ -620,13 +613,13 @@
Statelen= 32*1024,
};
-static s32
-ipread(Chan *ch, void *a, s32 n, s64 off)
+static long
+ipread(Chan *ch, void *a, long n, vlong off)
{
Conv *c;
Proto *x;
char *buf, *p;
- s32 rv;
+ long rv;
Fs *f;
ulong offset = off;
@@ -648,21 +641,22 @@
return readstr(offset, a, n, f->ndb);
case Qiproute:
return routeread(f, a, offset, n);
- case Qiprouter:
- return iprouterread(f, a, n);
case Qipselftab:
return ipselftabread(f, a, offset, n);
case Qlog:
return netlogread(f, a, offset, n);
case Qctl:
- sprint(up->genbuf, "%ud", CONV(ch->qid));
- return readstr(offset, p, n, up->genbuf);
+ buf = smalloc(16);
+ snprint(buf, 16, "%lud", CONV(ch->qid));
+ rv = readstr(offset, p, n, buf);
+ free(buf);
+ return rv;
case Qremote:
buf = smalloc(Statelen);
x = f->p[PROTO(ch->qid)];
c = x->conv[CONV(ch->qid)];
if(x->remote == nil) {
- sprint(buf, "%I!%d\n", c->raddr, c->rport);
+ snprint(buf, Statelen, "%I!%d\n", c->raddr, c->rport);
} else {
(*x->remote)(c, buf, Statelen-2);
}
@@ -674,7 +668,7 @@
x = f->p[PROTO(ch->qid)];
c = x->conv[CONV(ch->qid)];
if(x->local == nil) {
- sprint(buf, "%I!%d\n", c->laddr, c->lport);
+ snprint(buf, Statelen, "%I!%d\n", c->laddr, c->lport);
} else {
(*x->local)(c, buf, Statelen-2);
}
@@ -711,7 +705,7 @@
}
static Block*
-ipbread(Chan* ch, s32 n, u32 offset)
+ipbread(Chan* ch, long n, ulong offset)
{
Conv *c;
Proto *x;
@@ -740,7 +734,7 @@
/*
* set a local port making sure the quad of raddr,rport,laddr,lport is unique
*/
-static char*
+char*
setluniqueport(Conv* c, int lport)
{
Proto *p;
@@ -771,51 +765,63 @@
}
/*
+ * is lport in use by anyone?
+ */
+static int
+lportinuse(Proto *p, ushort lport)
+{
+ int x;
+
+ for(x = 0; x < p->nc && p->conv[x]; x++)
+ if(p->conv[x]->lport == lport)
+ return 1;
+ return 0;
+}
+
+/*
* pick a local port and set it
*/
-extern void
+char *
setlport(Conv* c)
{
Proto *p;
- ushort *pp;
- int x, found;
+ int i, port;
p = c->p;
- if(c->restricted)
- pp = &p->nextrport;
- else
- pp = &p->nextport;
qlock(p);
- for(;;(*pp)++){
+ if(c->restricted){
+ /* Restricted ports cycle between 600 and 1024. */
+ for(i=0; i<1024-600; i++){
+ if(p->nextrport >= 1024 || p->nextrport < 600)
+ p->nextrport = 600;
+ port = p->nextrport++;
+ if(!lportinuse(p, port))
+ goto chosen;
+ }
+ }else{
/*
- * Fsproto initialises p->nextport to 0 and the restricted
- * ports (p->nextrport) to 600.
- * Restricted ports must lie between 600 and 1024.
- * For the initial condition or if the unrestricted port number
- * has wrapped round, select a random port between 5000 and 1<<15
- * to start at.
+ * Unrestricted ports are chosen randomly
+ * between 2^15 and 2^16. There are at most
+ * 4*Nchan = 4096 ports in use at any given time,
+ * so even in the worst case, a random probe has a
+ * 1 - 4096/2^15 = 87% chance of success.
+ * If 64 successive probes fail, there is a bug somewhere
+ * (or a once in 10^58 event has happened, but that's
+ * less likely than a venti collision).
*/
- if(c->restricted){
- if(*pp >= 1024)
- *pp = 600;
+ for(i=0; i<64; i++){
+ port = (1<<15) + nrand(1<<15);
+ if(!lportinuse(p, port))
+ goto chosen;
}
- else while(*pp < 5000)
- *pp = nrand(1<<15);
-
- found = 0;
- for(x = 0; x < p->nc; x++){
- if(p->conv[x] == nil)
- break;
- if(p->conv[x]->lport == *pp){
- found = 1;
- break;
- }
- }
- if(found == 0)
- break;
}
- c->lport = (*pp)++;
qunlock(p);
+ return "no ports available";
+
+chosen:
+ c->lport = port;
+ qunlock(p);
+ return nil;
}
/*
@@ -822,7 +828,7 @@
* set a local address and port from a string of the form
* [address!]port[!r]
*/
-static char*
+char*
setladdrport(Conv* c, char* str, int announcing)
{
char *p;
@@ -830,8 +836,6 @@
ushort lport;
uchar addr[IPaddrlen];
- rv = nil;
-
/*
* ignore restricted part if it exists. it's
* meaningless on local ports.
@@ -854,8 +858,9 @@
if(strcmp(str, "*") == 0)
ipmove(c->laddr, IPnoaddr);
else {
- parseip(addr, str);
- if(ipforme(c->p->f, addr))
+ if(parseip(addr, str) == -1)
+ return Ebadip;
+ if(ipforme(c->p->f, addr) != 0 || ipismulticast(addr))
ipmove(c->laddr, addr);
else
return "not a local IP address";
@@ -869,9 +874,13 @@
return setluniqueport(c, 0);
}
- lport = atoi(p);
+ str = p;
+ lport = strtol(str, &p, 10);
+ if(p <= str || strchr("!", *p) == nil)
+ return "bad numeric port";
+
if(lport <= 0)
- setlport(c);
+ rv = setlport(c);
else
rv = setluniqueport(c, lport);
return rv;
@@ -886,13 +895,17 @@
if(p == nil)
return "malformed address";
*p++ = 0;
- parseip(c->raddr, str);
- c->rport = atoi(p);
- p = strchr(p, '!');
- if(p){
- if(strstr(p, "!r") != nil)
- c->restricted = 1;
- }
+ if(parseip(c->raddr, str) == -1)
+ return Ebadip;
+
+ str = p;
+ c->rport = strtol(str, &p, 10);
+ if(p <= str || strchr("!", *p) == nil)
+ return "bad numeric port";
+
+ if(strstr(p, "!r") != nil)
+ c->restricted = 1;
+
return nil;
}
@@ -912,7 +925,9 @@
if(p != nil)
return p;
setladdr(c);
- setlport(c);
+ p = setlport(c);
+ if (p != nil)
+ return p;
break;
case 3:
p = setraddrport(c, argv[1]);
@@ -923,12 +938,7 @@
return p;
}
- if((memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
- memcmp(c->laddr, v4prefix, IPv4off) == 0)
- || ipcmp(c->raddr, IPnoaddr) == 0)
- c->ipversion = V4;
- else
- c->ipversion = V6;
+ c->ipversion = convipvers(c);
return nil;
}
@@ -978,10 +988,11 @@
c->rport = 0;
switch(argc){
default:
- return "bad args to announce";
+ break;
case 2:
return setladdrport(c, argv[1], 1);
}
+ return "bad args to announce";
}
/*
@@ -1028,10 +1039,11 @@
{
switch(argc){
default:
- return "bad args to bind";
+ break;
case 2:
return setladdrport(c, argv[1], 0);
}
+ return "bad args to bind";
}
static void
@@ -1042,7 +1054,7 @@
if(x->bind == nil)
p = Fsstdbind(c, cb->f, cb->nf);
else
- p = x->bind(c, cb->f, cb->nf);
+ p = (*x->bind)(c, cb->f, cb->nf);
if(p != nil)
error(p);
}
@@ -1065,8 +1077,8 @@
c->ttl = atoi(cb->f[1]);
}
-static s32
-ipwrite(Chan* ch, void *v, s32 n, s64 off)
+static long
+ipwrite(Chan* ch, void *v, long n, vlong off)
{
Conv *c;
Proto *x;
@@ -1075,6 +1087,7 @@
uchar ia[IPaddrlen], ma[IPaddrlen];
Fs *f;
char *a;
+ ulong offset = off;
a = v;
f = ipfs[ch->dev];
@@ -1099,7 +1112,8 @@
netlogctl(f, a, n);
return n;
case Qndb:
- return ndbwrite(f, a, off, n);
+ return ndbwrite(f, a, offset, n);
+ break;
case Qctl:
x = f->p[PROTO(ch->qid)];
c = x->conv[CONV(ch->qid)];
@@ -1131,13 +1145,15 @@
if(cb->nf == 2){
if(!ipismulticast(c->raddr))
error("addmulti for a non multicast address");
- parseip(ia, cb->f[1]);
+ if (parseip(ia, cb->f[1]) == -1)
+ error(Ebadip);
ipifcaddmulti(c, c->raddr, ia);
} else {
- parseip(ma, cb->f[2]);
+ if (parseip(ia, cb->f[1]) == -1 ||
+ parseip(ma, cb->f[2]) == -1)
+ error(Ebadip);
if(!ipismulticast(ma))
error("addmulti for a non multicast address");
- parseip(ia, cb->f[1]);
ipifcaddmulti(c, ma, ia);
}
} else if(strcmp(cb->f[0], "remmulti") == 0){
@@ -1145,10 +1161,11 @@
error("remmulti needs interface address");
if(!ipismulticast(c->raddr))
error("remmulti for a non multicast address");
- parseip(ia, cb->f[1]);
+ if (parseip(ia, cb->f[1]) == -1)
+ error(Ebadip);
ipifcremmulti(c, c->raddr, ia);
} else if(x->ctl != nil) {
- p = x->ctl(c, cb->f, cb->nf);
+ p = (*x->ctl)(c, cb->f, cb->nf);
if(p != nil)
error(p);
} else
@@ -1160,13 +1177,12 @@
return n;
}
-static s32
-ipbwrite(Chan* ch, Block* bp, u32 offset)
+static long
+ipbwrite(Chan* ch, Block* bp, ulong offset)
{
Conv *c;
Proto *x;
Fs *f;
- int n;
switch(TYPE(ch->qid)){
case Qdata:
@@ -1177,11 +1193,7 @@
if(c->wq == nil)
error(Eperm);
- if(bp->next)
- bp = concatblock(bp);
- n = BLEN(bp);
- qbwrite(c->wq, bp);
- return n;
+ return qbwrite(c->wq, bp);
default:
return devbwrite(ch, bp, offset);
}
@@ -1198,13 +1210,13 @@
ipwalk,
ipstat,
ipopen,
- devcreate,
+ ipcreate,
ipclose,
ipread,
ipbread,
ipwrite,
ipbwrite,
- devremove,
+ ipremove,
ipwstat,
};
@@ -1224,12 +1236,15 @@
p->qid.type = QTDIR;
p->qid.path = QID(f->np, 0, Qprotodir);
+ if(p->nc > Maskconv+1){
+ print("Fsproto: %s nc %d > %d\n", p->name, p->nc, Maskconv+1);
+ p->nc = Maskconv+1;
+ }
p->conv = malloc(sizeof(Conv*)*(p->nc+1));
if(p->conv == nil)
panic("Fsproto");
p->x = f->np;
- p->nextport = 0;
p->nextrport = 600;
f->p[f->np++] = p;
@@ -1262,21 +1277,33 @@
if(c == nil){
c = malloc(sizeof(Conv));
if(c == nil)
- error(Enomem);
- qlock(c);
+ return nil;
+ if(waserror()){
+ qfree(c->rq);
+ qfree(c->wq);
+ qfree(c->eq);
+ qfree(c->sq);
+ free(c->ptcl);
+ free(c);
+ return nil;
+ }
c->p = p;
c->x = pp - p->conv;
if(p->ptclsize != 0){
c->ptcl = malloc(p->ptclsize);
- if(c->ptcl == nil) {
- free(c);
+ if(c->ptcl == nil)
error(Enomem);
- }
}
- *pp = c;
- p->ac++;
c->eq = qopen(1024, Qmsg, 0, 0);
+ if(c->eq == nil)
+ error(Enomem);
(*p->create)(c);
+ if(c->rq == nil || c->wq == nil)
+ error(Enomem);
+ poperror();
+ qlock(c);
+ *pp = c;
+ p->ac++;
break;
}
if(canqlock(c)){
@@ -1291,8 +1318,11 @@
}
}
if(pp >= ep) {
- if(p->gc != nil && (*p->gc)(p))
- goto retry;
+ if(p->gc != nil){
+ print("Fsprotoclone: garbage collecting %s Convs\n", p->name);
+ if((*p->gc)(p))
+ goto retry;
+ }
return nil;
}
@@ -1307,8 +1337,9 @@
c->lport = 0;
c->rport = 0;
c->restricted = 0;
+ c->ignoreadvice = 0;
c->ttl = MAXTTL;
- c->tos = DFLTTOS;
+ c->tos = 0;
qreopen(c->rq);
qreopen(c->wq);
qreopen(c->eq);
@@ -1321,7 +1352,7 @@
Fsconnected(Conv* c, char* msg)
{
if(msg != nil && *msg != '\0')
- kstrcpy(c->cerr, msg, sizeof(c->cerr));
+ strncpy(c->cerr, msg, ERRMAX-1);
switch(c->state){
@@ -1368,12 +1399,19 @@
for(l = &c->incall; *l; l = &(*l)->next)
i++;
if(i >= Maxincall) {
+ static int beenhere;
+
qunlock(c);
+ if (!beenhere) {
+ beenhere = 1;
+ print("Fsnewcall: incall queue full (%d) on port %d\n",
+ i, c->lport);
+ }
return nil;
}
/* find a free conversation */
- nc = Fsprotoclone(c->p, network);
+ nc = Fsprotoclone(c->p, c->owner);
if(nc == nil) {
qunlock(c);
return nil;
@@ -1394,12 +1432,12 @@
return nc;
}
-static long
+long
ndbwrite(Fs *f, char *a, ulong off, int n)
{
if(off > strlen(f->ndb))
error(Eio);
- if(off+n >= sizeof(f->ndb)-1)
+ if(off+n >= sizeof(f->ndb))
error(Eio);
memmove(f->ndb+off, a, n);
f->ndb[off+n] = 0;
@@ -1411,7 +1449,7 @@
ulong
scalednconv(void)
{
- if(conf.npage*BY2PG >= 128*MB)
+ if(cpuserver && conf.npage*BY2PG >= 128*MB)
return Nchans*4;
return Nchans;
}
diff -u a/os/ip//esp.c b/os/ip//esp.c
--- a/os/ip//esp.c
+++ b/os/ip//esp.c
@@ -1,3 +1,11 @@
+/*
+ * Encapsulating Security Payload for IPsec for IPv4, rfc1827.
+ * extended to IPv6.
+ * rfc2104 defines hmac computation.
+ * currently only implements tunnel mode.
+ * TODO: verify aes algorithms;
+ * transport mode (host-to-host)
+ */
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
@@ -6,47 +14,79 @@
#include "../port/error.h"
#include "ip.h"
+#include "ipv6.h"
+#include <libsec.h>
-#include "libsec.h"
+#define BITS2BYTES(bi) (((bi) + BI2BY - 1) / BI2BY)
+#define BYTES2BITS(by) ((by) * BI2BY)
+typedef struct Algorithm Algorithm;
+typedef struct Esp4hdr Esp4hdr;
+typedef struct Esp6hdr Esp6hdr;
+typedef struct Espcb Espcb;
typedef struct Esphdr Esphdr;
+typedef struct Esppriv Esppriv;
typedef struct Esptail Esptail;
typedef struct Userhdr Userhdr;
-typedef struct Esppriv Esppriv;
-typedef struct Espcb Espcb;
-typedef struct Algorithm Algorithm;
-typedef struct Esprc4 Esprc4;
-#define DPRINT if(0)print
+enum {
+ Encrypt,
+ Decrypt,
-enum
-{
- IP_ESPPROTO = 50,
- EsphdrSize = 28, // includes IP header
- IphdrSize = 20, // options have been striped
- EsptailSize = 2, // does not include pad or auth data
- UserhdrSize = 4, // user visable header size - if enabled
+ IP_ESPPROTO = 50, /* IP v4 and v6 protocol number */
+ Esp4hdrlen = IP4HDR + 8,
+ Esp6hdrlen = IP6HDR + 8,
+
+ Esptaillen = 2, /* does not include pad or auth data */
+ Userhdrlen = 4, /* user-visible header size - if enabled */
+
+ Desblk = BITS2BYTES(64),
+ Des3keysz = BITS2BYTES(192),
+
+ Aesblk = BITS2BYTES(128),
+ Aeskeysz = BITS2BYTES(128),
};
struct Esphdr
{
- /* ip header */
+ uchar espspi[4]; /* Security parameter index */
+ uchar espseq[4]; /* Sequence number */
+ uchar payload[];
+};
+
+/*
+ * tunnel-mode (network-to-network, etc.) layout is:
+ * new IP hdrs | ESP hdr |
+ * enc { orig IP hdrs | TCP/UDP hdr | user data | ESP trailer } | ESP ICV
+ *
+ * transport-mode (host-to-host) layout would be:
+ * orig IP hdrs | ESP hdr |
+ * enc { TCP/UDP hdr | user data | ESP trailer } | ESP ICV
+ */
+struct Esp4hdr
+{
+ /* ipv4 header */
uchar vihl; /* Version and header length */
uchar tos; /* Type of service */
uchar length[2]; /* packet length */
uchar id[2]; /* Identification */
uchar frag[2]; /* Fragment information */
- uchar Unused;
+ uchar Unused;
uchar espproto; /* Protocol */
uchar espplen[2]; /* Header plus data length */
uchar espsrc[4]; /* Ip source */
uchar espdst[4]; /* Ip destination */
- /* esp header */
- uchar espspi[4]; /* Security parameter index */
- uchar espseq[4]; /* Sequence number */
+ Esphdr;
};
+/* tunnel-mode layout */
+struct Esp6hdr
+{
+ IPV6HDR;
+ Esphdr;
+};
+
struct Esptail
{
uchar pad;
@@ -53,16 +93,28 @@
uchar nexthdr;
};
+/* IP-version-dependent data */
+typedef struct Versdep Versdep;
+struct Versdep
+{
+ ulong version;
+ ulong iphdrlen;
+ ulong hdrlen; /* iphdrlen + esp hdr len */
+ ulong spi;
+ uchar laddr[IPaddrlen];
+ uchar raddr[IPaddrlen];
+};
+
/* header as seen by the user */
struct Userhdr
{
- uchar nexthdr; // next protocol
+ uchar nexthdr; /* next protocol */
uchar unused[3];
};
struct Esppriv
{
- ulong in;
+ uvlong in;
ulong inerrors;
};
@@ -72,77 +124,68 @@
struct Espcb
{
int incoming;
- int header; // user user level header
+ int header; /* user-level header */
ulong spi;
- ulong seq; // last seq sent
- ulong window; // for replay attacks
+ ulong seq; /* last seq sent */
+ ulong window; /* for replay attacks */
+
char *espalg;
- void *espstate; // other state for esp
- int espivlen; // in bytes
+ void *espstate; /* other state for esp */
+ int espivlen; /* in bytes */
int espblklen;
int (*cipher)(Espcb*, uchar *buf, int len);
+
char *ahalg;
- void *ahstate; // other state for esp
- int ahlen; // auth data length in bytes
+ void *ahstate; /* other state for esp */
+ int ahlen; /* auth data length in bytes */
int ahblklen;
int (*auth)(Espcb*, uchar *buf, int len, uchar *hash);
+ DigestState *ds;
};
struct Algorithm
{
char *name;
- int keylen; // in bits
- void (*init)(Espcb*, char* name, uchar *key, int keylen);
+ int keylen; /* in bits */
+ void (*init)(Espcb*, char* name, uchar *key, unsigned keylen);
};
-
-enum {
- RC4forward = 10*1024*1024, // maximum skip forward
- RC4back = 100*1024, // maximum look back
-};
-
-struct Esprc4
-{
- ulong cseq; // current byte sequence number
- RC4state current;
-
- int ovalid; // old is valid
- ulong lgseq; // last good sequence
- ulong oseq; // old byte sequence number
- RC4state old;
-};
-
static Conv* convlookup(Proto *esp, ulong spi);
static char *setalg(Espcb *ecb, char **f, int n, Algorithm *alg);
-static void nullespinit(Espcb*, char*, uchar *key, int keylen);
-static void nullahinit(Espcb*, char*, uchar *key, int keylen);
-static void shaahinit(Espcb*, char*, uchar *key, int keylen);
-static void md5ahinit(Espcb*, char*, uchar *key, int keylen);
-static void desespinit(Espcb *ecb, char *name, uchar *k, int n);
-static void rc4espinit(Espcb *ecb, char *name, uchar *k, int n);
static void espkick(void *x);
+static void nullespinit(Espcb*, char*, uchar *key, unsigned keylen);
+static void des3espinit(Espcb*, char*, uchar *key, unsigned keylen);
+static void aescbcespinit(Espcb*, char*, uchar *key, unsigned keylen);
+static void aesctrespinit(Espcb*, char*, uchar *key, unsigned keylen);
+static void desespinit(Espcb *ecb, char *name, uchar *k, unsigned n);
+
+static void nullahinit(Espcb*, char*, uchar *key, unsigned keylen);
+static void shaahinit(Espcb*, char*, uchar *key, unsigned keylen);
+static void md5ahinit(Espcb*, char*, uchar *key, unsigned keylen);
+
static Algorithm espalg[] =
{
- "null", 0, nullespinit,
- "des_56_cbc", 64, desespinit,
- "rc4_128", 128, rc4espinit,
- nil, 0, nil,
+ "null", 0, nullespinit,
+ "des3_cbc", 192, des3espinit, /* new rfc2451, des-ede3 */
+ "aes_128_cbc", 128, aescbcespinit, /* new rfc3602 */
+ "aes_ctr", 128, aesctrespinit, /* new rfc3686 */
+ "des_56_cbc", 64, desespinit, /* rfc2405, deprecated */
+ nil, 0, nil,
};
static Algorithm ahalg[] =
{
- "null", 0, nullahinit,
- "hmac_sha1_96", 128, shaahinit,
- "hmac_md5_96", 128, md5ahinit,
- nil, 0, nil,
+ "null", 0, nullahinit,
+ "hmac_sha1_96", 128, shaahinit, /* rfc2404 */
+ "hmac_md5_96", 128, md5ahinit, /* rfc2403 */
+ nil, 0, nil,
};
static char*
espconnect(Conv *c, char **argv, int argc)
{
- char *p, *pp;
- char *e = nil;
+ char *p, *pp, *e = nil;
ulong spi;
Espcb *ecb = (Espcb*)c->ptcl;
@@ -157,7 +200,10 @@
break;
}
*p++ = 0;
- parseip(c->raddr, argv[1]);
+ if (parseip(c->raddr, argv[1]) == -1) {
+ e = Ebadip;
+ break;
+ }
findlocalip(c->p->f, c->laddr, c->raddr);
ecb->incoming = 0;
ecb->seq = 0;
@@ -215,26 +261,86 @@
ipmove(c->raddr, IPnoaddr);
ecb = (Espcb*)c->ptcl;
- free(ecb->espstate);
- free(ecb->ahstate);
+ secfree(ecb->espstate);
+ secfree(ecb->ahstate);
memset(ecb, 0, sizeof(Espcb));
}
+static int
+pktipvers(Fs *f, Block **bpp)
+{
+ if (*bpp == nil || BLEN(*bpp) == 0) {
+ /* get enough to identify the IP version */
+ *bpp = pullupblock(*bpp, IP4HDR);
+ if(*bpp == nil) {
+ netlog(f, Logesp, "esp: short packet\n");
+ return 0;
+ }
+ }
+ return (((Esp4hdr*)(*bpp)->rp)->vihl & 0xf0) == IP_VER4? V4: V6;
+}
+
static void
+getverslens(int version, Versdep *vp)
+{
+ vp->version = version;
+ switch(vp->version) {
+ case V4:
+ vp->iphdrlen = IP4HDR;
+ vp->hdrlen = Esp4hdrlen;
+ break;
+ case V6:
+ vp->iphdrlen = IP6HDR;
+ vp->hdrlen = Esp6hdrlen;
+ break;
+ default:
+ panic("esp: getverslens version %d wrong", version);
+ }
+}
+
+static void
+getpktspiaddrs(uchar *pkt, Versdep *vp)
+{
+ Esp4hdr *eh4;
+ Esp6hdr *eh6;
+
+ switch(vp->version) {
+ case V4:
+ eh4 = (Esp4hdr*)pkt;
+ v4tov6(vp->raddr, eh4->espsrc);
+ v4tov6(vp->laddr, eh4->espdst);
+ vp->spi = nhgetl(eh4->espspi);
+ break;
+ case V6:
+ eh6 = (Esp6hdr*)pkt;
+ ipmove(vp->raddr, eh6->src);
+ ipmove(vp->laddr, eh6->dst);
+ vp->spi = nhgetl(eh6->espspi);
+ break;
+ default:
+ panic("esp: getpktspiaddrs vp->version %ld wrong", vp->version);
+ }
+}
+
+/*
+ * encapsulate next IP packet on x's write queue in IP/ESP packet
+ * and initiate output of the result.
+ */
+static void
espkick(void *x)
{
+ int nexthdr, payload, pad, align;
+ uchar *auth;
+ Block *bp;
Conv *c = x;
- Esphdr *eh;
+ Esp4hdr *eh4;
+ Esp6hdr *eh6;
+ Espcb *ecb;
Esptail *et;
Userhdr *uh;
- Espcb *ecb;
- Block *bp;
- int nexthdr;
- int payload;
- int pad;
- int align;
- uchar *auth;
+ Versdep vers;
+ getverslens(convipvers(c), &vers);
bp = qget(c->wq);
if(bp == nil)
return;
@@ -244,7 +350,7 @@
if(ecb->header) {
/* make sure the message has a User header */
- bp = pullupblock(bp, UserhdrSize);
+ bp = pullupblock(bp, Userhdrlen);
if(bp == nil) {
qunlock(c);
return;
@@ -251,15 +357,16 @@
}
uh = (Userhdr*)bp->rp;
nexthdr = uh->nexthdr;
- bp->rp += UserhdrSize;
+ bp->rp += Userhdrlen;
} else {
- nexthdr = 0; // what should this be?
+ nexthdr = 0; /* what should this be? */
}
payload = BLEN(bp) + ecb->espivlen;
/* Make space to fit ip header */
- bp = padblock(bp, EsphdrSize + ecb->espivlen);
+ bp = padblock(bp, vers.hdrlen + ecb->espivlen);
+ getpktspiaddrs(bp->rp, &vers);
align = 4;
if(ecb->espblklen > align)
@@ -266,7 +373,7 @@
align = ecb->espblklen;
if(align % ecb->ahblklen != 0)
panic("espkick: ahblklen is important after all");
- pad = (align-1) - (payload + EsptailSize-1)%align;
+ pad = (align-1) - (payload + Esptaillen-1)%align;
/*
* Make space for tail
@@ -273,70 +380,88 @@
* this is done by calling padblock with a negative size
* Padblock does not change bp->wp!
*/
- bp = padblock(bp, -(pad+EsptailSize+ecb->ahlen));
- bp->wp += pad+EsptailSize+ecb->ahlen;
+ bp = padblock(bp, -(pad+Esptaillen+ecb->ahlen));
+ bp->wp += pad+Esptaillen+ecb->ahlen;
- eh = (Esphdr *)(bp->rp);
- et = (Esptail*)(bp->rp + EsphdrSize + payload + pad);
+ et = (Esptail*)(bp->rp + vers.hdrlen + payload + pad);
- // fill in tail
+ /* fill in tail */
et->pad = pad;
et->nexthdr = nexthdr;
- ecb->cipher(ecb, bp->rp+EsphdrSize, payload+pad+EsptailSize);
- auth = bp->rp + EsphdrSize + payload + pad + EsptailSize;
+ /* encrypt the payload */
+ ecb->cipher(ecb, bp->rp + vers.hdrlen, payload + pad + Esptaillen);
+ auth = bp->rp + vers.hdrlen + payload + pad + Esptaillen;
- // fill in head
- eh->vihl = IP_VER4;
- hnputl(eh->espspi, ecb->spi);
- hnputl(eh->espseq, ++ecb->seq);
- v6tov4(eh->espsrc, c->laddr);
- v6tov4(eh->espdst, c->raddr);
- eh->espproto = IP_ESPPROTO;
- eh->frag[0] = 0;
- eh->frag[1] = 0;
+ /* fill in head; construct a new IP header and an ESP header */
+ if (vers.version == V4) {
+ eh4 = (Esp4hdr *)bp->rp;
+ eh4->vihl = IP_VER4;
+ v6tov4(eh4->espsrc, c->laddr);
+ v6tov4(eh4->espdst, c->raddr);
+ eh4->espproto = IP_ESPPROTO;
+ eh4->frag[0] = 0;
+ eh4->frag[1] = 0;
- ecb->auth(ecb, bp->rp+IphdrSize, (EsphdrSize-IphdrSize)+payload+pad+EsptailSize, auth);
+ hnputl(eh4->espspi, ecb->spi);
+ hnputl(eh4->espseq, ++ecb->seq);
+ } else {
+ eh6 = (Esp6hdr *)bp->rp;
+ eh6->vcf[0] = IP_VER6;
+ ipmove(eh6->src, c->laddr);
+ ipmove(eh6->dst, c->raddr);
+ eh6->proto = IP_ESPPROTO;
+ hnputl(eh6->espspi, ecb->spi);
+ hnputl(eh6->espseq, ++ecb->seq);
+ }
+
+ /* compute secure hash */
+ ecb->auth(ecb, bp->rp + vers.iphdrlen, (vers.hdrlen - vers.iphdrlen) +
+ payload + pad + Esptaillen, auth);
+
qunlock(c);
- //print("esp: pass down: %uld\n", BLEN(bp));
- ipoput4(c->p->f, bp, 0, c->ttl, c->tos, c);
+ /* print("esp: pass down: %uld\n", BLEN(bp)); */
+ if (vers.version == V4)
+ ipoput4(c->p->f, bp, 0, c->ttl, c->tos, c);
+ else
+ ipoput6(c->p->f, bp, 0, c->ttl, c->tos, c);
}
+/*
+ * decapsulate IP packet from IP/ESP packet in bp and
+ * pass the result up the spi's Conv's read queue.
+ */
void
espiput(Proto *esp, Ipifc*, Block *bp)
{
- Esphdr *eh;
- Esptail *et;
- Userhdr *uh;
+ int payload, nexthdr;
+ uchar *auth, *espspi;
Conv *c;
Espcb *ecb;
- uchar raddr[IPaddrlen], laddr[IPaddrlen];
+ Esptail *et;
Fs *f;
- uchar *auth;
- ulong spi;
- int payload, nexthdr;
+ Userhdr *uh;
+ Versdep vers;
f = esp->f;
- bp = pullupblock(bp, EsphdrSize+EsptailSize);
+ getverslens(pktipvers(f, &bp), &vers);
+
+ bp = pullupblock(bp, vers.hdrlen + Esptaillen);
if(bp == nil) {
netlog(f, Logesp, "esp: short packet\n");
return;
}
+ getpktspiaddrs(bp->rp, &vers);
- eh = (Esphdr*)(bp->rp);
- spi = nhgetl(eh->espspi);
- v4tov6(raddr, eh->espsrc);
- v4tov6(laddr, eh->espdst);
-
qlock(esp);
/* Look for a conversation structure for this port */
- c = convlookup(esp, spi);
+ c = convlookup(esp, vers.spi);
if(c == nil) {
qunlock(esp);
- netlog(f, Logesp, "esp: no conv %I -> %I!%d\n", raddr,
- laddr, spi);
+ netlog(f, Logesp, "esp: no conv %I -> %I!%lud\n", vers.raddr,
+ vers.laddr, vers.spi);
icmpnoconv(f, bp);
freeblist(bp);
return;
@@ -346,76 +471,83 @@
qunlock(esp);
ecb = c->ptcl;
- // too hard to do decryption/authentication on block lists
- if(bp->next)
+ /* too hard to do decryption/authentication on block lists */
+ if(bp->next != nil)
bp = concatblock(bp);
- if(BLEN(bp) < EsphdrSize + ecb->espivlen + EsptailSize + ecb->ahlen) {
+ if(BLEN(bp) < vers.hdrlen + ecb->espivlen + Esptaillen + ecb->ahlen) {
qunlock(c);
- netlog(f, Logesp, "esp: short block %I -> %I!%d\n", raddr,
- laddr, spi);
+ netlog(f, Logesp, "esp: short block %I -> %I!%lud\n", vers.raddr,
+ vers.laddr, vers.spi);
freeb(bp);
return;
}
- eh = (Esphdr*)(bp->rp);
auth = bp->wp - ecb->ahlen;
- if(!ecb->auth(ecb, eh->espspi, auth-eh->espspi, auth)) {
+ espspi = vers.version == V4? ((Esp4hdr*)bp->rp)->espspi:
+ ((Esp6hdr*)bp->rp)->espspi;
+
+ /* compute secure hash and authenticate */
+ if(!ecb->auth(ecb, espspi, auth - espspi, auth)) {
qunlock(c);
-print("esp: bad auth %I -> %I!%ld\n", raddr, laddr, spi);
- netlog(f, Logesp, "esp: bad auth %I -> %I!%d\n", raddr,
- laddr, spi);
+print("esp: bad auth %I -> %I!%ld\n", vers.raddr, vers.laddr, vers.spi);
+ netlog(f, Logesp, "esp: bad auth %I -> %I!%lud\n", vers.raddr,
+ vers.laddr, vers.spi);
freeb(bp);
return;
}
- payload = BLEN(bp)-EsphdrSize-ecb->ahlen;
- if(payload<=0 || payload%4 != 0 || payload%ecb->espblklen!=0) {
+ payload = BLEN(bp) - vers.hdrlen - ecb->ahlen;
+ if(payload <= 0 || payload % 4 != 0 || payload % ecb->espblklen != 0) {
qunlock(c);
- netlog(f, Logesp, "esp: bad length %I -> %I!%d payload=%d BLEN=%d\n", raddr,
- laddr, spi, payload, BLEN(bp));
+ netlog(f, Logesp, "esp: bad length %I -> %I!%lud payload=%d BLEN=%zd\n",
+ vers.raddr, vers.laddr, vers.spi, payload, BLEN(bp));
freeb(bp);
return;
}
- if(!ecb->cipher(ecb, bp->rp+EsphdrSize, payload)) {
+
+ /* decrypt payload */
+ if(!ecb->cipher(ecb, bp->rp + vers.hdrlen, payload)) {
qunlock(c);
-print("esp: cipher failed %I -> %I!%ld: %r\n", raddr, laddr, spi);
- netlog(f, Logesp, "esp: cipher failed %I -> %I!%d: %r\n", raddr,
- laddr, spi);
+print("esp: cipher failed %I -> %I!%ld: %s\n", vers.raddr, vers.laddr, vers.spi, up->errstr);
+ netlog(f, Logesp, "esp: cipher failed %I -> %I!%lud: %s\n",
+ vers.raddr, vers.laddr, vers.spi, up->errstr);
freeb(bp);
return;
}
- payload -= EsptailSize;
- et = (Esptail*)(bp->rp + EsphdrSize + payload);
+ payload -= Esptaillen;
+ et = (Esptail*)(bp->rp + vers.hdrlen + payload);
payload -= et->pad + ecb->espivlen;
nexthdr = et->nexthdr;
if(payload <= 0) {
qunlock(c);
- netlog(f, Logesp, "esp: short packet after decrypt %I -> %I!%d\n", raddr,
- laddr, spi);
+ netlog(f, Logesp, "esp: short packet after decrypt %I -> %I!%lud\n",
+ vers.raddr, vers.laddr, vers.spi);
freeb(bp);
return;
}
- // trim packet
- bp->rp += EsphdrSize + ecb->espivlen;
+ /* trim packet */
+ bp->rp += vers.hdrlen + ecb->espivlen; /* toss original IP & ESP hdrs */
bp->wp = bp->rp + payload;
if(ecb->header) {
- // assume UserhdrSize < EsphdrSize
- bp->rp -= UserhdrSize;
+ /* assume Userhdrlen < Esp4hdrlen < Esp6hdrlen */
+ bp->rp -= Userhdrlen;
uh = (Userhdr*)bp->rp;
- memset(uh, 0, UserhdrSize);
+ memset(uh, 0, Userhdrlen);
uh->nexthdr = nexthdr;
}
+ /* ingress filtering here? */
+
if(qfull(c->rq)){
- netlog(f, Logesp, "esp: qfull %I -> %I.%uld\n", raddr,
- laddr, spi);
+ netlog(f, Logesp, "esp: qfull %I -> %I.%uld\n", vers.raddr,
+ vers.laddr, vers.spi);
freeblist(bp);
}else {
-//print("esp: pass up: %uld\n", BLEN(bp));
- qpass(c->rq, bp);
+// print("esp: pass up: %uld\n", BLEN(bp));
+ qpass(c->rq, bp); /* pass packet up the read queue */
}
qunlock(c);
@@ -440,19 +572,19 @@
return e;
}
+/* called from icmp(v6) for unreachable hosts, time exceeded, etc. */
void
espadvise(Proto *esp, Block *bp, char *msg)
{
- Esphdr *h;
Conv *c;
- ulong spi;
+ Versdep vers;
- h = (Esphdr*)(bp->rp);
+ getverslens(pktipvers(esp->f, &bp), &vers);
+ getpktspiaddrs(bp->rp, &vers);
- spi = nhgets(h->espspi);
qlock(esp);
- c = convlookup(esp, spi);
- if(c != nil) {
+ c = convlookup(esp, vers.spi);
+ if(c != nil && !c->ignoreadvice) {
qhangup(c->rq, msg);
qhangup(c->wq, msg);
}
@@ -466,7 +598,7 @@
Esppriv *upriv;
upriv = esp->priv;
- return snprint(buf, len, "%lud %lud\n",
+ return snprint(buf, len, "%llud %lud\n",
upriv->in,
upriv->inerrors);
}
@@ -520,10 +652,10 @@
setalg(Espcb *ecb, char **f, int n, Algorithm *alg)
{
uchar *key;
- int i, nbyte, nchar;
- int c;
+ int c, nbyte, nchar;
+ uint i;
- if(n < 2)
+ if(n < 2 || n > 3)
return "bad format";
for(; alg->name; alg++)
if(strcmp(f[1], alg->name) == 0)
@@ -531,10 +663,14 @@
if(alg->name == nil)
return "unknown algorithm";
- if(n != 3)
- return "bad format";
nbyte = (alg->keylen + 7) >> 3;
- nchar = strlen(f[2]);
+ if (n == 2)
+ nchar = 0;
+ else
+ nchar = strlen(f[2]);
+ if(nchar != 2 * nbyte) /* TODO: maybe < is ok */
+ return "key not required length";
+ /* convert hex digits from ascii, in place */
for(i=0; i<nchar; i++) {
c = f[2][i];
if(c >= '0' && c <= '9')
@@ -544,21 +680,27 @@
else if(c >= 'A' && c <= 'F')
f[2][i] -= 'A'-10;
else
- return "bad character in key";
+ return "non-hex character in key";
}
- key = smalloc(nbyte);
- for(i=0; i<nchar && i*2<nbyte; i++) {
+ /* collapse hex digits into complete bytes in reverse order in key */
+ key = secalloc(nbyte);
+ for(i = 0; i < nchar && i/2 < nbyte; i++) {
c = f[2][nchar-i-1];
if(i&1)
c <<= 4;
- key[i>>1] |= c;
+ key[i/2] |= c;
}
-
+ memset(f[2], 0, nchar);
alg->init(ecb, alg->name, key, alg->keylen);
- free(key);
+ secfree(key);
return nil;
}
+
+/*
+ * null encryption
+ */
+
static int
nullcipher(Espcb*, uchar*, int)
{
@@ -566,7 +708,7 @@
}
static void
-nullespinit(Espcb *ecb, char *name, uchar*, int)
+nullespinit(Espcb *ecb, char *name, uchar*, unsigned)
{
ecb->espalg = name;
ecb->espblklen = 1;
@@ -581,7 +723,7 @@
}
static void
-nullahinit(Espcb *ecb, char *name, uchar*, int)
+nullahinit(Espcb *ecb, char *name, uchar*, unsigned)
{
ecb->ahalg = name;
ecb->ahblklen = 1;
@@ -589,26 +731,28 @@
ecb->auth = nullauth;
}
-void
+
+/*
+ * sha1
+ */
+
+static void
seanq_hmac_sha1(uchar hash[SHA1dlen], uchar *t, long tlen, uchar *key, long klen)
{
- uchar ipad[65], opad[65];
int i;
+ uchar ipad[Hmacblksz+1], opad[Hmacblksz+1], innerhash[SHA1dlen];
DigestState *digest;
- uchar innerhash[SHA1dlen];
- for(i=0; i<64; i++){
- ipad[i] = 0x36;
- opad[i] = 0x5c;
- }
- ipad[64] = opad[64] = 0;
- for(i=0; i<klen; i++){
+ memset(ipad, 0x36, Hmacblksz);
+ memset(opad, 0x5c, Hmacblksz);
+ ipad[Hmacblksz] = opad[Hmacblksz] = 0;
+ for(i = 0; i < klen; i++){
ipad[i] ^= key[i];
opad[i] ^= key[i];
}
- digest = sha1(ipad, 64, nil, nil);
+ digest = sha1(ipad, Hmacblksz, nil, nil);
sha1(t, tlen, innerhash, digest);
- digest = sha1(opad, 64, nil, nil);
+ digest = sha1(opad, Hmacblksz, nil, nil);
sha1(innerhash, SHA1dlen, hash, digest);
}
@@ -615,11 +759,11 @@
static int
shaauth(Espcb *ecb, uchar *t, int tlen, uchar *auth)
{
- uchar hash[SHA1dlen];
int r;
+ uchar hash[SHA1dlen];
memset(hash, 0, SHA1dlen);
- seanq_hmac_sha1(hash, t, tlen, (uchar*)ecb->ahstate, 16);
+ seanq_hmac_sha1(hash, t, tlen, (uchar*)ecb->ahstate, BITS2BYTES(128));
r = memcmp(auth, hash, ecb->ahlen) == 0;
memmove(auth, hash, ecb->ahlen);
return r;
@@ -626,40 +770,162 @@
}
static void
-shaahinit(Espcb *ecb, char *name, uchar *key, int klen)
+shaahinit(Espcb *ecb, char *name, uchar *key, unsigned klen)
{
if(klen != 128)
panic("shaahinit: bad keylen");
- klen >>= 8; // convert to bytes
+ klen /= BI2BY;
ecb->ahalg = name;
ecb->ahblklen = 1;
- ecb->ahlen = 12;
+ ecb->ahlen = BITS2BYTES(96);
ecb->auth = shaauth;
- ecb->ahstate = smalloc(klen);
+ ecb->ahstate = secalloc(klen);
memmove(ecb->ahstate, key, klen);
}
-void
+
+/*
+ * aes
+ */
+static int
+aescbccipher(Espcb *ecb, uchar *p, int n) /* 128-bit blocks */
+{
+ uchar tmp[AESbsize], q[AESbsize];
+ uchar *pp, *tp, *ip, *eip, *ep;
+ AESstate *ds = ecb->espstate;
+
+ ep = p + n;
+ if(ecb->incoming) {
+ memmove(ds->ivec, p, AESbsize);
+ p += AESbsize;
+ while(p < ep){
+ memmove(tmp, p, AESbsize);
+ aes_decrypt(ds->dkey, ds->rounds, p, q);
+ memmove(p, q, AESbsize);
+ tp = tmp;
+ ip = ds->ivec;
+ for(eip = ip + AESbsize; ip < eip; ){
+ *p++ ^= *ip;
+ *ip++ = *tp++;
+ }
+ }
+ } else {
+ memmove(p, ds->ivec, AESbsize);
+ for(p += AESbsize; p < ep; p += AESbsize){
+ pp = p;
+ ip = ds->ivec;
+ for(eip = ip + AESbsize; ip < eip; )
+ *pp++ ^= *ip++;
+ aes_encrypt(ds->ekey, ds->rounds, p, q);
+ memmove(ds->ivec, q, AESbsize);
+ memmove(p, q, AESbsize);
+ }
+ }
+ return 1;
+}
+
+static void
+aescbcespinit(Espcb *ecb, char *name, uchar *k, unsigned n)
+{
+ uchar key[Aeskeysz], ivec[Aeskeysz];
+
+ n = BITS2BYTES(n);
+ if(n > Aeskeysz)
+ n = Aeskeysz;
+ memset(key, 0, sizeof(key));
+ memmove(key, k, n);
+ prng(ivec, Aeskeysz);
+ ecb->espalg = name;
+ ecb->espblklen = Aesblk;
+ ecb->espivlen = Aesblk;
+ ecb->cipher = aescbccipher;
+ ecb->espstate = secalloc(sizeof(AESstate));
+ setupAESstate(ecb->espstate, key, n /* keybytes */, ivec);
+ memset(ivec, 0, sizeof(ivec));
+ memset(key, 0, sizeof(key));
+}
+
+static int
+aesctrcipher(Espcb *ecb, uchar *p, int n) /* 128-bit blocks */
+{
+ uchar tmp[AESbsize], q[AESbsize];
+ uchar *pp, *tp, *ip, *eip, *ep;
+ AESstate *ds = ecb->espstate;
+
+ ep = p + n;
+ if(ecb->incoming) {
+ memmove(ds->ivec, p, AESbsize);
+ p += AESbsize;
+ while(p < ep){
+ memmove(tmp, p, AESbsize);
+ aes_decrypt(ds->dkey, ds->rounds, p, q);
+ memmove(p, q, AESbsize);
+ tp = tmp;
+ ip = ds->ivec;
+ for(eip = ip + AESbsize; ip < eip; ){
+ *p++ ^= *ip;
+ *ip++ = *tp++;
+ }
+ }
+ } else {
+ memmove(p, ds->ivec, AESbsize);
+ for(p += AESbsize; p < ep; p += AESbsize){
+ pp = p;
+ ip = ds->ivec;
+ for(eip = ip + AESbsize; ip < eip; )
+ *pp++ ^= *ip++;
+ aes_encrypt(ds->ekey, ds->rounds, p, q);
+ memmove(ds->ivec, q, AESbsize);
+ memmove(p, q, AESbsize);
+ }
+ }
+ return 1;
+}
+
+static void
+aesctrespinit(Espcb *ecb, char *name, uchar *k, unsigned n)
+{
+ uchar key[Aesblk], ivec[Aesblk];
+
+ n = BITS2BYTES(n);
+ if(n > Aeskeysz)
+ n = Aeskeysz;
+ memset(key, 0, sizeof(key));
+ memmove(key, k, n);
+ prng(ivec, Aesblk);
+ ecb->espalg = name;
+ ecb->espblklen = Aesblk;
+ ecb->espivlen = Aesblk;
+ ecb->cipher = aesctrcipher;
+ ecb->espstate = secalloc(sizeof(AESstate));
+ setupAESstate(ecb->espstate, key, n /* keybytes */, ivec);
+ memset(ivec, 0, sizeof(ivec));
+ memset(key, 0, sizeof(key));
+}
+
+
+/*
+ * md5
+ */
+
+static void
seanq_hmac_md5(uchar hash[MD5dlen], uchar *t, long tlen, uchar *key, long klen)
{
- uchar ipad[65], opad[65];
int i;
+ uchar ipad[Hmacblksz+1], opad[Hmacblksz+1], innerhash[MD5dlen];
DigestState *digest;
- uchar innerhash[MD5dlen];
- for(i=0; i<64; i++){
- ipad[i] = 0x36;
- opad[i] = 0x5c;
- }
- ipad[64] = opad[64] = 0;
- for(i=0; i<klen; i++){
+ memset(ipad, 0x36, Hmacblksz);
+ memset(opad, 0x5c, Hmacblksz);
+ ipad[Hmacblksz] = opad[Hmacblksz] = 0;
+ for(i = 0; i < klen; i++){
ipad[i] ^= key[i];
opad[i] ^= key[i];
}
- digest = md5(ipad, 64, nil, nil);
+ digest = md5(ipad, Hmacblksz, nil, nil);
md5(t, tlen, innerhash, digest);
- digest = md5(opad, 64, nil, nil);
+ digest = md5(opad, Hmacblksz, nil, nil);
md5(innerhash, MD5dlen, hash, digest);
}
@@ -670,7 +936,7 @@
int r;
memset(hash, 0, MD5dlen);
- seanq_hmac_md5(hash, t, tlen, (uchar*)ecb->ahstate, 16);
+ seanq_hmac_md5(hash, t, tlen, (uchar*)ecb->ahstate, BITS2BYTES(128));
r = memcmp(auth, hash, ecb->ahlen) == 0;
memmove(auth, hash, ecb->ahlen);
return r;
@@ -677,168 +943,102 @@
}
static void
-md5ahinit(Espcb *ecb, char *name, uchar *key, int klen)
+md5ahinit(Espcb *ecb, char *name, uchar *key, unsigned klen)
{
if(klen != 128)
panic("md5ahinit: bad keylen");
- klen >>= 3; // convert to bytes
-
-
+ klen = BITS2BYTES(klen);
ecb->ahalg = name;
ecb->ahblklen = 1;
- ecb->ahlen = 12;
+ ecb->ahlen = BITS2BYTES(96);
ecb->auth = md5auth;
- ecb->ahstate = smalloc(klen);
+ ecb->ahstate = secalloc(klen);
memmove(ecb->ahstate, key, klen);
}
+
+/*
+ * des, single and triple
+ */
+
static int
descipher(Espcb *ecb, uchar *p, int n)
{
- uchar tmp[8];
- uchar *pp, *tp, *ip, *eip, *ep;
DESstate *ds = ecb->espstate;
- ep = p + n;
if(ecb->incoming) {
- memmove(ds->ivec, p, 8);
- p += 8;
- while(p < ep){
- memmove(tmp, p, 8);
- block_cipher(ds->expanded, p, 1);
- tp = tmp;
- ip = ds->ivec;
- for(eip = ip+8; ip < eip; ){
- *p++ ^= *ip;
- *ip++ = *tp++;
- }
- }
+ memmove(ds->ivec, p, Desblk);
+ desCBCdecrypt(p + Desblk, n - Desblk, ds);
} else {
- memmove(p, ds->ivec, 8);
- for(p += 8; p < ep; p += 8){
- pp = p;
- ip = ds->ivec;
- for(eip = ip+8; ip < eip; )
- *pp++ ^= *ip++;
- block_cipher(ds->expanded, p, 0);
- memmove(ds->ivec, p, 8);
- }
+ memmove(p, ds->ivec, Desblk);
+ desCBCencrypt(p + Desblk, n - Desblk, ds);
}
return 1;
}
-
+
+static int
+des3cipher(Espcb *ecb, uchar *p, int n)
+{
+ DES3state *ds = ecb->espstate;
+
+ if(ecb->incoming) {
+ memmove(ds->ivec, p, Desblk);
+ des3CBCdecrypt(p + Desblk, n - Desblk, ds);
+ } else {
+ memmove(p, ds->ivec, Desblk);
+ des3CBCencrypt(p + Desblk, n - Desblk, ds);
+ }
+ return 1;
+}
+
static void
-desespinit(Espcb *ecb, char *name, uchar *k, int n)
+desespinit(Espcb *ecb, char *name, uchar *k, unsigned n)
{
- uchar key[8];
- uchar ivec[8];
- int i;
-
- // bits to bytes
- n = (n+7)>>3;
- if(n > 8)
- n = 8;
+ uchar key[Desblk], ivec[Desblk];
+
+ n = BITS2BYTES(n);
+ if(n > Desblk)
+ n = Desblk;
memset(key, 0, sizeof(key));
memmove(key, k, n);
- for(i=0; i<8; i++)
- ivec[i] = nrand(256);
+ prng(ivec, Desblk);
ecb->espalg = name;
- ecb->espblklen = 8;
- ecb->espivlen = 8;
+ ecb->espblklen = Desblk;
+ ecb->espivlen = Desblk;
+
ecb->cipher = descipher;
- ecb->espstate = smalloc(sizeof(DESstate));
+ ecb->espstate = secalloc(sizeof(DESstate));
setupDESstate(ecb->espstate, key, ivec);
+ memset(ivec, 0, sizeof(ivec));
+ memset(key, 0, sizeof(key));
}
-static int
-rc4cipher(Espcb *ecb, uchar *p, int n)
+static void
+des3espinit(Espcb *ecb, char *name, uchar *k, unsigned n)
{
- Esprc4 *esprc4;
- RC4state tmpstate;
- ulong seq;
- long d, dd;
+ uchar key[3][Desblk], ivec[Desblk];
- if(n < 4)
- return 0;
+ n = BITS2BYTES(n);
+ if(n > Des3keysz)
+ n = Des3keysz;
+ memset(key, 0, sizeof(key));
+ memmove(key, k, n);
+ prng(ivec, Desblk);
+ ecb->espalg = name;
+ ecb->espblklen = Desblk;
+ ecb->espivlen = Desblk;
- esprc4 = ecb->espstate;
- if(ecb->incoming) {
- seq = nhgetl(p);
- p += 4;
- n -= 4;
- d = seq-esprc4->cseq;
- if(d == 0) {
- rc4(&esprc4->current, p, n);
- esprc4->cseq += n;
- if(esprc4->ovalid) {
- dd = esprc4->cseq - esprc4->lgseq;
- if(dd > RC4back)
- esprc4->ovalid = 0;
- }
- } else if(d > 0) {
-print("missing packet: %uld %ld\n", seq, d);
- // this link is hosed
- if(d > RC4forward) {
- strcpy(up->errstr, "rc4cipher: skipped too much");
- return 0;
- }
- esprc4->lgseq = seq;
- if(!esprc4->ovalid) {
- esprc4->ovalid = 1;
- esprc4->oseq = esprc4->cseq;
- memmove(&esprc4->old, &esprc4->current, sizeof(RC4state));
- }
- rc4skip(&esprc4->current, d);
- rc4(&esprc4->current, p, n);
- esprc4->cseq = seq+n;
- } else {
-print("reordered packet: %uld %ld\n", seq, d);
- dd = seq - esprc4->oseq;
- if(!esprc4->ovalid || -d > RC4back || dd < 0) {
- strcpy(up->errstr, "rc4cipher: too far back");
- return 0;
- }
- memmove(&tmpstate, &esprc4->old, sizeof(RC4state));
- rc4skip(&tmpstate, dd);
- rc4(&tmpstate, p, n);
- return 1;
- }
-
- // move old state up
- if(esprc4->ovalid) {
- dd = esprc4->cseq - RC4back - esprc4->oseq;
- if(dd > 0) {
- rc4skip(&esprc4->old, dd);
- esprc4->oseq += dd;
- }
- }
- } else {
- hnputl(p, esprc4->cseq);
- p += 4;
- n -= 4;
- rc4(&esprc4->current, p, n);
- esprc4->cseq += n;
- }
- return 1;
+ ecb->cipher = des3cipher;
+ ecb->espstate = secalloc(sizeof(DES3state));
+ setupDES3state(ecb->espstate, key, ivec);
+ memset(ivec, 0, sizeof(ivec));
+ memset(key, 0, sizeof(key));
}
-static void
-rc4espinit(Espcb *ecb, char *name, uchar *k, int n)
-{
- Esprc4 *esprc4;
- // bits to bytes
- n = (n+7)>>3;
- esprc4 = smalloc(sizeof(Esprc4));
- memset(esprc4, 0, sizeof(Esprc4));
- setupRC4state(&esprc4->current, k, n);
- ecb->espalg = name;
- ecb->espblklen = 4;
- ecb->espivlen = 4;
- ecb->cipher = rc4cipher;
- ecb->espstate = esprc4;
-}
-
+/*
+ * interfacing to devip
+ */
void
espinit(Fs *fs)
{
diff -u a/os/ip//ethermedium.c b/os/ip//ethermedium.c
--- a/os/ip//ethermedium.c
+++ b/os/ip//ethermedium.c
@@ -5,9 +5,9 @@
#include "fns.h"
#include "../port/error.h"
+#include "../port/netif.h"
#include "ip.h"
#include "ipv6.h"
-#include "kernel.h"
typedef struct Etherhdr Etherhdr;
struct Etherhdr
@@ -18,10 +18,10 @@
};
static uchar ipbroadcast[IPaddrlen] = {
- 0xff,0xff,0xff,0xff,
- 0xff,0xff,0xff,0xff,
- 0xff,0xff,0xff,0xff,
0xff,0xff,0xff,0xff,
+ 0xff,0xff,0xff,0xff,
+ 0xff,0xff,0xff,0xff,
+ 0xff,0xff,0xff,0xff,
};
static uchar etherbroadcast[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
@@ -33,12 +33,12 @@
static void etherbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip);
static void etheraddmulti(Ipifc *ifc, uchar *a, uchar *ia);
static void etherremmulti(Ipifc *ifc, uchar *a, uchar *ia);
+static void etherareg(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *ip);
static Block* multicastarp(Fs *f, Arpent *a, Medium*, uchar *mac);
static void sendarp(Ipifc *ifc, Arpent *a);
-static void sendgarp(Ipifc *ifc, uchar*);
+static void sendndp(Ipifc *ifc, Arpent *a);
static int multicastea(uchar *ea, uchar *ip);
static void recvarpproc(void*);
-static void resolveaddr6(Ipifc *ifc, Arpent *a);
static void etherpref2addr(uchar *pref, uchar *ea);
Medium ethermedium =
@@ -53,8 +53,7 @@
.bwrite= etherbwrite,
.addmulti= etheraddmulti,
.remmulti= etherremmulti,
-.ares= arpenter,
-.areg= sendgarp,
+.areg= etherareg,
.pref2addr= etherpref2addr,
};
@@ -70,8 +69,7 @@
.bwrite= etherbwrite,
.addmulti= etheraddmulti,
.remmulti= etherremmulti,
-.ares= arpenter,
-.areg= sendgarp,
+.areg= etherareg,
.pref2addr= etherpref2addr,
};
@@ -94,9 +92,6 @@
*/
enum
{
- ETARP = 0x0806,
- ETIP4 = 0x0800,
- ETIP6 = 0x86DD,
ARPREQUEST = 1,
ARPREPLY = 2,
};
@@ -127,128 +122,92 @@
static void
etherbind(Ipifc *ifc, int argc, char **argv)
{
- Chan *mchan4, *cchan4, *achan, *mchan6, *cchan6;
- char addr[Maxpath]; //char addr[2*KNAMELEN];
- char dir[Maxpath]; //char dir[2*KNAMELEN];
- char *buf;
- int fd, cfd, n;
- char *ptr;
+ char addr[Maxpath], dir[Maxpath];
Etherrock *er;
+ Chan *c;
+ int n;
if(argc < 2)
error(Ebadarg);
- mchan4 = cchan4 = achan = mchan6 = cchan6 = nil;
- buf = nil;
+ /*
+ * get mac address
+ */
+ snprint(addr, sizeof(addr), "%s/addr", argv[2]);
+ c = namec(addr, Aopen, OREAD, 0);
if(waserror()){
- if(mchan4 != nil)
- cclose(mchan4);
- if(cchan4 != nil)
- cclose(cchan4);
- if(achan != nil)
- cclose(achan);
- if(mchan6 != nil)
- cclose(mchan6);
- if(cchan6 != nil)
- cclose(cchan6);
- if(buf != nil)
- free(buf);
- nexterror();
+ cclose(c);
+ nexterror();
}
+ n = devtab[c->type]->read(c, addr, sizeof(addr)-1, 0);
+ if(n < 0)
+ error(Eio);
+ addr[n] = 0;
+ if(parsemac(ifc->mac, addr, sizeof(ifc->mac)) != 6)
+ error("could not find mac address");
+ cclose(c);
+ poperror();
+ er = smalloc(sizeof(*er));
+ er->read4p = er->read6p = er->arpp = (void*)-1;
+ er->mchan4 = er->cchan4 = er->mchan6 = er->cchan6 = er->achan = nil;
+ er->f = ifc->conv->p->f;
+
+ if(waserror()){
+ if(er->mchan4 != nil)
+ cclose(er->mchan4);
+ if(er->cchan4 != nil)
+ cclose(er->cchan4);
+ if(er->mchan6 != nil)
+ cclose(er->mchan6);
+ if(er->cchan6 != nil)
+ cclose(er->cchan6);
+ if(er->achan != nil)
+ cclose(er->achan);
+ free(er);
+ nexterror();
+ }
+
/*
- * open ip converstation
+ * open ipv4 conversation
*
* the dial will fail if the type is already open on
* this device.
*/
- snprint(addr, sizeof(addr), "%s!0x800", argv[2]);
- fd = kdial(addr, nil, dir, &cfd);
- if(fd < 0)
- errorf("dial 0x800 failed: %s", up->env->errstr);
- mchan4 = commonfdtochan(fd, ORDWR, 0, 1);
- cchan4 = commonfdtochan(cfd, ORDWR, 0, 1);
- kclose(fd);
- kclose(cfd);
+ snprint(addr, sizeof(addr), "%s!0x800", argv[2]); /* ETIP4 */
+ er->mchan4 = chandial(addr, nil, dir, &er->cchan4);
/*
* make it non-blocking
*/
- devtab[cchan4->type]->write(cchan4, nbmsg, strlen(nbmsg), 0);
+ devtab[er->cchan4->type]->write(er->cchan4, nbmsg, strlen(nbmsg), 0);
/*
- * get mac address and speed
- */
- snprint(addr, sizeof(addr), "%s/stats", dir);
- fd = kopen(addr, OREAD);
- if(fd < 0)
- errorf("can't open ether stats: %s", up->env->errstr);
-
- buf = smalloc(512);
- n = kread(fd, buf, 511);
- kclose(fd);
- if(n <= 0)
- error(Eio);
- buf[n] = 0;
-
- ptr = strstr(buf, "addr: ");
- if(!ptr)
- error(Eio);
- ptr += 6;
- parsemac(ifc->mac, ptr, 6);
-
- ptr = strstr(buf, "mbps: ");
- if(ptr){
- ptr += 6;
- ifc->mbps = atoi(ptr);
- } else
- ifc->mbps = 100;
-
- /*
- * open arp conversation
- */
- snprint(addr, sizeof(addr), "%s!0x806", argv[2]);
- fd = kdial(addr, nil, nil, nil);
- if(fd < 0)
- errorf("dial 0x806 failed: %s", up->env->errstr);
- achan = commonfdtochan(fd, ORDWR, 0, 1);
- kclose(fd);
-
- /*
- * open ip conversation
+ * open ipv6 conversation
*
* the dial will fail if the type is already open on
* this device.
*/
- snprint(addr, sizeof(addr), "%s!0x86DD", argv[2]);
- fd = kdial(addr, nil, dir, &cfd);
- if(fd < 0)
- errorf("dial 0x86DD failed: %s", up->env->errstr);
- mchan6 = commonfdtochan(fd, ORDWR, 0, 1);
- cchan6 = commonfdtochan(cfd, ORDWR, 0, 1);
- kclose(fd);
- kclose(cfd);
+ snprint(addr, sizeof(addr), "%s!0x86DD", argv[2]); /* ETIP6 */
+ er->mchan6 = chandial(addr, nil, dir, &er->cchan6);
/*
* make it non-blocking
*/
- devtab[cchan6->type]->write(cchan6, nbmsg, strlen(nbmsg), 0);
+ devtab[er->cchan6->type]->write(er->cchan6, nbmsg, strlen(nbmsg), 0);
- er = smalloc(sizeof(*er));
- er->mchan4 = mchan4;
- er->cchan4 = cchan4;
- er->achan = achan;
- er->mchan6 = mchan6;
- er->cchan6 = cchan6;
- er->f = ifc->conv->p->f;
- ifc->arg = er;
-
- free(buf);
+ /*
+ * open arp conversation
+ */
+ snprint(addr, sizeof(addr), "%s!0x806", argv[2]); /* ETARP */
+ er->achan = chandial(addr, nil, nil, nil);
poperror();
- kproc("etherread4", etherread4, ifc, 0);
- kproc("recvarpproc", recvarpproc, ifc, 0);
- kproc("etherread6", etherread6, ifc, 0);
+ ifc->arg = er;
+
+ kproc("etherread4", etherread4, ifc);
+ kproc("etherread6", etherread6, ifc);
+ kproc("recvarpproc", recvarpproc, ifc);
}
/*
@@ -259,21 +218,35 @@
{
Etherrock *er = ifc->arg;
- if(er->read4p)
+ while(waserror())
+ ;
+
+ /* wait for readers to start */
+ while(er->arpp == (void*)-1 || er->read4p == (void*)-1 || er->read6p == (void*)-1)
+ tsleep(&up->sleep, return0, 0, 300);
+
+ if(er->read4p != nil)
postnote(er->read4p, 1, "unbind", 0);
- if(er->read6p)
+ if(er->read6p != nil)
postnote(er->read6p, 1, "unbind", 0);
- if(er->arpp)
+ if(er->arpp != nil)
postnote(er->arpp, 1, "unbind", 0);
+ poperror();
+
+ wunlock(ifc);
+ while(waserror())
+ ;
+
/* wait for readers to die */
- while(er->arpp != 0 || er->read4p != 0 || er->read6p != 0)
+ while(er->arpp != nil || er->read4p != nil || er->read6p != nil)
tsleep(&up->sleep, return0, 0, 300);
+ poperror();
+ wlock(ifc);
+
if(er->mchan4 != nil)
cclose(er->mchan4);
- if(er->achan != nil)
- cclose(er->achan);
if(er->cchan4 != nil)
cclose(er->cchan4);
if(er->mchan6 != nil)
@@ -280,6 +253,8 @@
cclose(er->mchan6);
if(er->cchan6 != nil)
cclose(er->cchan6);
+ if(er->achan != nil)
+ cclose(er->achan);
free(er);
}
@@ -297,16 +272,16 @@
/* get mac address of destination */
a = arpget(er->f->arp, bp, version, ifc, ip, mac);
- if(a){
+ if(a != nil){
/* check for broadcast or multicast */
bp = multicastarp(er->f, a, ifc->m, mac);
- if(bp==nil){
+ if(bp == nil){
switch(version){
case V4:
sendarp(ifc, a);
break;
- case V6:
- resolveaddr6(ifc, a);
+ case V6:
+ sendndp(ifc, a);
break;
default:
panic("etherbwrite: version %d", version);
@@ -317,8 +292,6 @@
/* make it a single block with space for the ether header */
bp = padblock(bp, ifc->m->hsize);
- if(bp->next)
- bp = concatblock(bp);
if(BLEN(bp) < ifc->mintu)
bp = adjustblock(bp, ifc->mintu);
eh = (Etherhdr*)bp->rp;
@@ -358,29 +331,28 @@
ifc = a;
er = ifc->arg;
er->read4p = up; /* hide identity under a rock for unbind */
- if(waserror()){
- er->read4p = 0;
- pexit("hangup", 1);
- }
+ if(!waserror())
for(;;){
bp = devtab[er->mchan4->type]->bread(er->mchan4, ifc->maxtu, 0);
- if(!canrlock(ifc)){
- freeb(bp);
- continue;
- }
+ if(bp == nil)
+ break;
+ rlock(ifc);
if(waserror()){
runlock(ifc);
nexterror();
}
ifc->in++;
- bp->rp += ifc->m->hsize;
- if(ifc->lifc == nil)
+ if(ifc->lifc == nil || BLEN(bp) <= ifc->m->hsize)
freeb(bp);
- else
+ else {
+ bp->rp += ifc->m->hsize;
ipiput4(er->f, ifc, bp);
+ }
runlock(ifc);
poperror();
}
+ er->read4p = nil;
+ pexit("hangup", 1);
}
@@ -397,29 +369,28 @@
ifc = a;
er = ifc->arg;
er->read6p = up; /* hide identity under a rock for unbind */
- if(waserror()){
- er->read6p = 0;
- pexit("hangup", 1);
- }
+ if(!waserror())
for(;;){
bp = devtab[er->mchan6->type]->bread(er->mchan6, ifc->maxtu, 0);
- if(!canrlock(ifc)){
- freeb(bp);
- continue;
- }
+ if(bp == nil)
+ break;
+ rlock(ifc);
if(waserror()){
runlock(ifc);
nexterror();
}
ifc->in++;
- bp->rp += ifc->m->hsize;
- if(ifc->lifc == nil)
+ if(ifc->lifc == nil || BLEN(bp) <= ifc->m->hsize)
freeb(bp);
- else
+ else {
+ bp->rp += ifc->m->hsize;
ipiput6(er->f, ifc, bp);
+ }
runlock(ifc);
poperror();
}
+ er->read6p = nil;
+ pexit("hangup", 1);
}
static void
@@ -477,6 +448,7 @@
Block *bp;
Etherarp *e;
Etherrock *er = ifc->arg;
+ uchar targ[IPv4addrlen], src[IPv4addrlen];
/* don't do anything if it's been less than a second since the last */
if(NOW - a->ctime < 1000){
@@ -484,6 +456,9 @@
return;
}
+ /* try to keep it around for a second more */
+ a->ctime = NOW;
+
/* remove all but the last message */
while((bp = a->hold) != nil){
if(bp == a->last)
@@ -492,18 +467,20 @@
freeblist(bp);
}
- /* try to keep it around for a second more */
- a->ctime = NOW;
+ memmove(targ, a->ip+IPv4off, IPv4addrlen);
arprelease(er->f->arp, a);
+ if(!ipv4local(ifc, src, 0, targ))
+ return;
+
n = sizeof(Etherarp);
- if(n < a->type->mintu)
- n = a->type->mintu;
+ if(n < ifc->m->mintu)
+ n = ifc->m->mintu;
bp = allocb(n);
memset(bp->rp, 0, n);
e = (Etherarp*)bp->rp;
- memmove(e->tpa, a->ip+IPv4off, sizeof(e->tpa));
- ipv4local(ifc, e->spa);
+ memmove(e->tpa, targ, sizeof(e->tpa));
+ memmove(e->spa, src, sizeof(e->spa));
memmove(e->sha, ifc->mac, sizeof(e->sha));
memset(e->d, 0xff, sizeof(e->d)); /* ethernet broadcast */
memmove(e->s, ifc->mac, sizeof(e->s));
@@ -516,18 +493,14 @@
hnputs(e->op, ARPREQUEST);
bp->wp += n;
- n = devtab[er->achan->type]->bwrite(er->achan, bp, 0);
- if(n < 0)
- print("arp: send: %r\n");
+ devtab[er->achan->type]->bwrite(er->achan, bp, 0);
}
static void
-resolveaddr6(Ipifc *ifc, Arpent *a)
+sendndp(Ipifc *ifc, Arpent *a)
{
- int sflag;
Block *bp;
Etherrock *er = ifc->arg;
- uchar ipsrc[IPaddrlen];
/* don't do anything if it's been less than a second since the last */
if(NOW - a->ctime < ReTransTimer){
@@ -543,19 +516,7 @@
freeblist(bp);
}
- /* try to keep it around for a second more */
- a->ctime = NOW;
- a->rtime = NOW + ReTransTimer;
- if(a->rxtsrem <= 0) {
- arprelease(er->f->arp, a);
- return;
- }
-
- a->rxtsrem--;
- arprelease(er->f->arp, a);
-
- if(sflag = ipv6anylocal(ifc, ipsrc))
- icmpns(er->f, ipsrc, sflag, a->ip, TARG_MULTI, ifc->mac);
+ ndpsendsol(er->f, ifc, a); /* unlocks arp */
}
/*
@@ -569,10 +530,6 @@
Etherarp *e;
Etherrock *er = ifc->arg;
- /* don't arp for our initial non address */
- if(ipcmp(ip, IPnoaddr) == 0)
- return;
-
n = sizeof(Etherarp);
if(n < ifc->m->mintu)
n = ifc->m->mintu;
@@ -593,15 +550,13 @@
hnputs(e->op, ARPREQUEST);
bp->wp += n;
- n = devtab[er->achan->type]->bwrite(er->achan, bp, 0);
- if(n < 0)
- print("garp: send: %r\n");
+ devtab[er->achan->type]->bwrite(er->achan, bp, 0);
}
static void
recvarp(Ipifc *ifc)
{
- int n;
+ int n, forme;
Block *ebp, *rbp;
Etherarp *e, *r;
uchar ip[IPaddrlen];
@@ -609,11 +564,11 @@
Etherrock *er = ifc->arg;
ebp = devtab[er->achan->type]->bread(er->achan, ifc->maxtu, 0);
- if(ebp == nil) {
- print("arp: rcv: %r\n");
+ if(ebp == nil)
return;
- }
+ rlock(ifc);
+
e = (Etherarp*)ebp->rp;
switch(nhgets(e->op)) {
default:
@@ -620,9 +575,13 @@
break;
case ARPREPLY:
+ /* make sure not to enter multi/broadcat address */
+ if(e->sha[0] & 1)
+ break;
+
/* check for machine using my ip address */
v4tov6(ip, e->spa);
- if(iplocalonifc(ifc, ip) || ipproxyifc(er->f, ifc, ip)){
+ if(iplocalonifc(ifc, ip) != nil || ipproxyifc(er->f, ifc, ip)){
if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) != 0){
print("arprep: 0x%E/0x%E also has ip addr %V\n",
e->s, e->sha, e->spa);
@@ -630,46 +589,47 @@
}
}
- /* make sure we're not entering broadcast addresses */
- if(ipcmp(ip, ipbroadcast) == 0 ||
- !memcmp(e->sha, etherbroadcast, sizeof(e->sha))){
- print("arprep: 0x%E/0x%E cannot register broadcast address %I\n",
- e->s, e->sha, e->spa);
- break;
- }
-
- arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), 0);
+ /* refresh what we know about sender */
+ arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), e->tpa, ifc, 1);
break;
case ARPREQUEST:
+ /* don't reply to multi/broadcat addresses */
+ if(e->sha[0] & 1)
+ break;
+
/* don't answer arps till we know who we are */
- if(ifc->lifc == 0)
+ if(ifc->lifc == nil)
break;
/* check for machine using my ip or ether address */
v4tov6(ip, e->spa);
- if(iplocalonifc(ifc, ip) || ipproxyifc(er->f, ifc, ip)){
+ if(iplocalonifc(ifc, ip) != nil || ipproxyifc(er->f, ifc, ip)){
if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) != 0){
- if (memcmp(eprinted, e->spa, sizeof(e->spa))){
+ if(memcmp(eprinted, e->spa, sizeof(e->spa)) != 0){
/* print only once */
- print("arpreq: 0x%E also has ip addr %V\n", e->sha, e->spa);
+ print("arpreq: 0x%E also has ip addr %V\n",
+ e->sha, e->spa);
memmove(eprinted, e->spa, sizeof(e->spa));
}
+ break;
}
} else {
if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) == 0){
- print("arpreq: %V also has ether addr %E\n", e->spa, e->sha);
+ print("arpreq: %V also has ether addr %E\n",
+ e->spa, e->sha);
break;
}
}
- /* refresh what we know about sender */
- arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), 1);
-
- /* answer only requests for our address or systems we're proxying for */
+ /*
+ * when request is for our address or systems we're proxying for,
+ * enter senders address into arp table and reply, otherwise just
+ * refresh the senders address.
+ */
v4tov6(ip, e->tpa);
- if(!iplocalonifc(ifc, ip))
- if(!ipproxyifc(er->f, ifc, ip))
+ forme = iplocalonifc(ifc, ip) != nil || ipproxyifc(er->f, ifc, ip);
+ if(arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), e->tpa, ifc, !forme) < 0 || !forme)
break;
n = sizeof(Etherarp);
@@ -692,10 +652,14 @@
memmove(r->s, ifc->mac, sizeof(r->s));
rbp->wp += n;
- n = devtab[er->achan->type]->bwrite(er->achan, rbp, 0);
- if(n < 0)
- print("arp: write: %r\n");
+ runlock(ifc);
+ freeb(ebp);
+
+ devtab[er->achan->type]->bwrite(er->achan, rbp, 0);
+ return;
}
+
+ runlock(ifc);
freeb(ebp);
}
@@ -707,7 +671,7 @@
er->arpp = up;
if(waserror()){
- er->arpp = 0;
+ er->arpp = nil;
pexit("hangup", 1);
}
for(;;)
@@ -749,14 +713,9 @@
multicastarp(Fs *f, Arpent *a, Medium *medium, uchar *mac)
{
/* is it broadcast? */
- switch(ipforme(f, a->ip)){
- case Runi:
- return nil;
- case Rbcast:
- memset(mac, 0xff, 6);
+ if(ipforme(f, a->ip) == Rbcast){
+ memset(mac, 0xff, medium->maclen);
return arpresolve(f->arp, a, medium, mac);
- default:
- break;
}
/* if multicast, fill in mac */
@@ -778,11 +737,11 @@
}
-static void
+static void
etherpref2addr(uchar *pref, uchar *ea)
{
- pref[8] = ea[0] | 0x2;
- pref[9] = ea[1];
+ pref[8] = ea[0] ^ 0x2;
+ pref[9] = ea[1];
pref[10] = ea[2];
pref[11] = 0xFF;
pref[12] = 0xFE;
@@ -789,4 +748,41 @@
pref[13] = ea[3];
pref[14] = ea[4];
pref[15] = ea[5];
+}
+
+static void
+etherareg(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *ip)
+{
+ static char tdad[] = "dad6";
+ uchar a[IPaddrlen];
+
+ if(ipcmp(ip, IPnoaddr) == 0 || ipcmp(ip, v4prefix) == 0)
+ return;
+
+ if(isv4(ip)){
+ sendgarp(ifc, ip);
+ return;
+ }
+
+ if((lifc->type&Rv4) != 0)
+ return;
+
+ if(!lifc->tentative){
+ icmpna(f, lifc->local, v6allnodesL, ip, ifc->mac, 1<<5);
+ return;
+ }
+
+ if(ipcmp(lifc->local, ip) != 0)
+ return;
+
+ /* temporarily add route for duplicate address detection */
+ ipv62smcast(a, ip);
+ addroute(f, a, IPallbits, v6Unspecified, IPallbits, ip, Rmulti, ifc, tdad);
+ if(waserror()){
+ remroute(f, a, IPallbits, v6Unspecified, IPallbits, ip, Rmulti, ifc, tdad);
+ nexterror();
+ }
+ icmpns(f, 0, SRC_UNSPEC, ip, TARG_MULTI, ifc->mac);
+ poperror();
+ remroute(f, a, IPallbits, v6Unspecified, IPallbits, ip, Rmulti, ifc, tdad);
}
diff -u a/os/ip//gre.c b/os/ip//gre.c
--- a/os/ip//gre.c
+++ b/os/ip//gre.c
@@ -1,3 +1,6 @@
+/*
+ * Generic Routing Encapsulation over IPv4, rfc1702
+ */
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
@@ -7,10 +10,7 @@
#include "ip.h"
-#define DPRINT if(0)print
-
-enum
-{
+enum {
GRE_IPONLY = 12, /* size of ip header */
GRE_IPPLUSGRE = 12, /* minimum size of GRE header */
IP_GREPROTO = 47,
@@ -18,10 +18,33 @@
GRErxms = 200,
GREtickms = 100,
GREmaxxmit = 10,
+
+ K = 1024,
+ GREqlen = 256 * K,
+
+ GRE_cksum = 0x8000,
+ GRE_routing = 0x4000,
+ GRE_key = 0x2000,
+ GRE_seq = 0x1000,
+
+ Nring = 1 << 10, /* power of two, please */
+ Ringmask = Nring - 1,
+
+ GREctlraw = 0,
+ GREctlcooked,
+ GREctlretunnel,
+ GREctlreport,
+ GREctldlsuspend,
+ GREctlulsuspend,
+ GREctldlresume,
+ GREctlulresume,
+ GREctlforward,
+ GREctlulkey,
+ Ncmds,
};
-typedef struct GREhdr
-{
+typedef struct GREhdr GREhdr;
+struct GREhdr{
/* ip header */
uchar vihl; /* Version and header length */
uchar tos; /* Type of service */
@@ -28,7 +51,7 @@
uchar len[2]; /* packet length (including headers) */
uchar id[2]; /* Identification */
uchar frag[2]; /* Fragment information */
- uchar Unused;
+ uchar ttl;
uchar proto; /* Protocol */
uchar cksum[2]; /* checksum */
uchar src[4]; /* Ip source */
@@ -37,21 +60,115 @@
/* gre header */
uchar flags[2];
uchar eproto[2]; /* encapsulation protocol */
-} GREhdr;
+};
typedef struct GREpriv GREpriv;
-struct GREpriv
-{
- int raw; /* Raw GRE mode */
-
+struct GREpriv{
/* non-MIB stats */
- ulong csumerr; /* checksum errors */
- ulong lenerr; /* short packet */
+ uvlong lenerr; /* short packet */
};
+typedef struct Bring Bring;
+struct Bring{
+ Block *ring[Nring];
+ long produced;
+ long consumed;
+};
+
+typedef struct GREconv GREconv;
+struct GREconv{
+ int raw;
+
+ /* Retunnelling information. v4 only */
+ uchar north[4]; /* HA */
+ uchar south[4]; /* Base station */
+ uchar hoa[4]; /* Home address */
+ uchar coa[4]; /* Careof address */
+ ulong seq; /* Current sequence # */
+ int dlsusp; /* Downlink suspended? */
+ int ulsusp; /* Uplink suspended? */
+ ulong ulkey; /* GRE key */
+
+ QLock lock; /* Lock for rings */
+ Bring dlpending; /* Ring of pending packets */
+ Bring dlbuffered; /* Received while suspended */
+ Bring ulbuffered; /* Received while suspended */
+};
+
+typedef struct Metablock Metablock;
+struct Metablock{
+ uchar *rp;
+ ulong seq;
+};
+
+static char *grectlcooked(Conv *, int, char **);
+static char *grectldlresume(Conv *, int, char **);
+static char *grectldlsuspend(Conv *, int, char **);
+static char *grectlforward(Conv *, int, char **);
+static char *grectlraw(Conv *, int, char **);
+static char *grectlreport(Conv *, int, char **);
+static char *grectlretunnel(Conv *, int, char **);
+static char *grectlulkey(Conv *, int, char **);
+static char *grectlulresume(Conv *, int, char **);
+static char *grectlulsuspend(Conv *, int, char **);
+
+static struct{
+ char *cmd;
+ int argc;
+ char *(*f)(Conv *, int, char **);
+} grectls[Ncmds] = {
+[GREctlraw] = { "raw", 1, grectlraw, },
+[GREctlcooked] = { "cooked", 1, grectlcooked, },
+[GREctlretunnel]= { "retunnel", 5, grectlretunnel, },
+[GREctlreport] = { "report", 2, grectlreport, },
+[GREctldlsuspend]= { "dlsuspend", 1, grectldlsuspend,},
+[GREctlulsuspend]= { "ulsuspend", 1, grectlulsuspend,},
+[GREctldlresume]= { "dlresume", 1, grectldlresume, },
+[GREctlulresume]= { "ulresume", 1, grectlulresume, },
+[GREctlforward] = { "forward", 2, grectlforward, },
+[GREctlulkey] = { "ulkey", 2, grectlulkey, },
+};
+
+static uchar nulladdr[4];
+static char *sessend = "session end";
+
static void grekick(void *x, Block *bp);
+static char *gresetup(Conv *, char *, char *, char *);
-static char*
+uvlong grepdin, grepdout, grebdin, grebdout;
+uvlong grepuin, grepuout, grebuin, grebuout;
+
+static Block *
+getring(Bring *r)
+{
+ Block *bp;
+
+ if(r->consumed == r->produced)
+ return nil;
+
+ bp = r->ring[r->consumed & Ringmask];
+ r->ring[r->consumed & Ringmask] = nil;
+ r->consumed++;
+ return bp;
+}
+
+static void
+addring(Bring *r, Block *bp)
+{
+ Block *tbp;
+
+ if(r->produced - r->consumed > Ringmask){
+ /* Full! */
+ tbp = r->ring[r->produced & Ringmask];
+ assert(tbp);
+ freeb(tbp);
+ r->consumed++;
+ }
+ r->ring[r->produced & Ringmask] = bp;
+ r->produced++;
+}
+
+static char *
greconnect(Conv *c, char **argv, int argc)
{
Proto *p;
@@ -91,7 +208,7 @@
static void
grecreate(Conv *c)
{
- c->rq = qopen(64*1024, Qmsg, 0, c);
+ c->rq = qopen(GREqlen, Qmsg, 0, c);
c->wq = qbypass(grekick, c);
}
@@ -98,44 +215,88 @@
static int
grestate(Conv *c, char *state, int n)
{
- USED(c);
- return snprint(state, n, "%s", "Datagram");
+ GREconv *grec;
+ char *ep, *p;
+
+ grec = c->ptcl;
+ p = state;
+ ep = p + n;
+ p = seprint(p, ep, "%s%s%s%shoa %V north %V south %V seq %ulx "
+ "pending %uld %uld buffered dl %uld %uld ul %uld %uld ulkey %.8ulx\n",
+ c->inuse? "Open ": "Closed ",
+ grec->raw? "raw ": "",
+ grec->dlsusp? "DL suspended ": "",
+ grec->ulsusp? "UL suspended ": "",
+ grec->hoa, grec->north, grec->south, grec->seq,
+ grec->dlpending.consumed, grec->dlpending.produced,
+ grec->dlbuffered.consumed, grec->dlbuffered.produced,
+ grec->ulbuffered.consumed, grec->ulbuffered.produced,
+ grec->ulkey);
+ return p - state;
}
static char*
greannounce(Conv*, char**, int)
{
- return "pktifc does not support announce";
+ return "gre does not support announce";
}
static void
greclose(Conv *c)
{
- qclose(c->rq);
- qclose(c->wq);
- qclose(c->eq);
+ GREconv *grec;
+ Block *bp;
+
+ grec = c->ptcl;
+
+ /* Make sure we don't forward any more packets */
+ memset(grec->hoa, 0, sizeof grec->hoa);
+ memset(grec->north, 0, sizeof grec->north);
+ memset(grec->south, 0, sizeof grec->south);
+
+ qlock(&grec->lock);
+ while((bp = getring(&grec->dlpending)) != nil)
+ freeb(bp);
+
+ while((bp = getring(&grec->dlbuffered)) != nil)
+ freeb(bp);
+
+ while((bp = getring(&grec->ulbuffered)) != nil)
+ freeb(bp);
+
+ grec->dlpending.produced = grec->dlpending.consumed = 0;
+ grec->dlbuffered.produced = grec->dlbuffered.consumed = 0;
+ grec->ulbuffered.produced = grec->ulbuffered.consumed = 0;
+ qunlock(&grec->lock);
+
+ grec->raw = 0;
+ grec->seq = 0;
+ grec->dlsusp = grec->ulsusp = 1;
+
+ qhangup(c->rq, sessend);
+ qhangup(c->wq, sessend);
+ qhangup(c->eq, sessend);
ipmove(c->laddr, IPnoaddr);
ipmove(c->raddr, IPnoaddr);
- c->lport = 0;
- c->rport = 0;
+ c->lport = c->rport = 0;
}
-int drop;
-
static void
grekick(void *x, Block *bp)
{
- Conv *c = x;
- GREhdr *ghp;
+ Conv *c;
+ GREconv *grec;
+ GREhdr *gre;
uchar laddr[IPaddrlen], raddr[IPaddrlen];
if(bp == nil)
return;
+ c = x;
+ grec = c->ptcl;
+
/* Make space to fit ip header (gre header already there) */
bp = padblock(bp, GRE_IPONLY);
- if(bp == nil)
- return;
/* make sure the message has a GRE header */
bp = pullupblock(bp, GRE_IPONLY+GRE_IPPLUSGRE);
@@ -142,90 +303,344 @@
if(bp == nil)
return;
- ghp = (GREhdr *)(bp->rp);
- ghp->vihl = IP_VER4;
+ gre = (GREhdr *)bp->rp;
+ gre->vihl = IP_VER4;
- if(!((GREpriv*)c->p->priv)->raw){
- v4tov6(raddr, ghp->dst);
+ if(grec->raw == 0){
+ v4tov6(raddr, gre->dst);
if(ipcmp(raddr, v4prefix) == 0)
- memmove(ghp->dst, c->raddr + IPv4off, IPv4addrlen);
- v4tov6(laddr, ghp->src);
+ memmove(gre->dst, c->raddr + IPv4off, IPv4addrlen);
+ v4tov6(laddr, gre->src);
if(ipcmp(laddr, v4prefix) == 0){
if(ipcmp(c->laddr, IPnoaddr) == 0)
- findlocalip(c->p->f, c->laddr, raddr); /* pick interface closest to dest */
- memmove(ghp->src, c->laddr + IPv4off, IPv4addrlen);
+ /* pick interface closest to dest */
+ findlocalip(c->p->f, c->laddr, raddr);
+ memmove(gre->src, c->laddr + IPv4off, sizeof gre->src);
}
- hnputs(ghp->eproto, c->rport);
+ hnputs(gre->eproto, c->rport);
}
- ghp->proto = IP_GREPROTO;
- ghp->frag[0] = 0;
- ghp->frag[1] = 0;
+ gre->proto = IP_GREPROTO;
+ gre->frag[0] = gre->frag[1] = 0;
+ grepdout++;
+ grebdout += BLEN(bp);
ipoput4(c->p->f, bp, 0, c->ttl, c->tos, nil);
}
static void
-greiput(Proto *gre, Ipifc*, Block *bp)
+gredownlink(Conv *c, Block *bp)
{
- int len;
- GREhdr *ghp;
- Conv *c, **p;
- ushort eproto;
+ Metablock *m;
+ GREconv *grec;
+ GREhdr *gre;
+ int hdrlen, suspended, extra;
+ ushort flags;
+ ulong seq;
+
+ gre = (GREhdr *)bp->rp;
+ if(gre->ttl == 1){
+ freeb(bp);
+ return;
+ }
+
+ /*
+ * We've received a packet with a GRE header and we need to
+ * re-adjust the packet header to strip all unwanted parts
+ * but leave room for only a sequence number.
+ */
+ grec = c->ptcl;
+ flags = nhgets(gre->flags);
+ hdrlen = 0;
+ if(flags & GRE_cksum)
+ hdrlen += 2;
+ if(flags & GRE_routing){
+ print("%V routing info present. Discarding packet", gre->src);
+ freeb(bp);
+ return;
+ }
+ if(flags & (GRE_cksum|GRE_routing))
+ hdrlen += 2; /* Offset field */
+ if(flags & GRE_key)
+ hdrlen += 4;
+ if(flags & GRE_seq)
+ hdrlen += 4;
+
+ /*
+ * The outgoing packet only has the sequence number set. Make room
+ * for the sequence number.
+ */
+ if(hdrlen != sizeof(ulong)){
+ extra = hdrlen - sizeof(ulong);
+ if(extra < 0 && bp->rp - bp->base < -extra){
+ print("gredownlink: cannot add sequence number\n");
+ freeb(bp);
+ return;
+ }
+ memmove(bp->rp + extra, bp->rp, sizeof(GREhdr));
+ bp->rp += extra;
+ assert(BLEN(bp) >= sizeof(GREhdr) + sizeof(ulong));
+ gre = (GREhdr *)bp->rp;
+ }
+ seq = grec->seq++;
+ hnputs(gre->flags, GRE_seq);
+ hnputl(bp->rp + sizeof(GREhdr), seq);
+
+ /*
+ * Keep rp and seq at the base. ipoput4 consumes rp for
+ * refragmentation.
+ */
+ assert(bp->rp - bp->base >= sizeof(Metablock));
+ m = (Metablock *)bp->base;
+ m->rp = bp->rp;
+ m->seq = seq;
+
+ /*
+ * Here we make a decision what we're doing with the packet. We're
+ * doing this w/o holding a lock which means that later on in the
+ * process we may discover we've done the wrong thing. I don't want
+ * to call ipoput with the lock held.
+ */
+restart:
+ suspended = grec->dlsusp;
+ if(suspended){
+ if(!canqlock(&grec->lock)){
+ /*
+ * just give up. too bad, we lose a packet. this
+ * is just too hard and my brain already hurts.
+ */
+ freeb(bp);
+ return;
+ }
+
+ if(!grec->dlsusp){
+ /*
+ * suspend race. We though we were suspended, but
+ * we really weren't.
+ */
+ qunlock(&grec->lock);
+ goto restart;
+ }
+
+ /* Undo the incorrect ref count addition */
+ addring(&grec->dlbuffered, bp);
+ qunlock(&grec->lock);
+ return;
+ }
+
+ /*
+ * When we get here, we're not suspended. Proceed to send the
+ * packet.
+ */
+ memmove(gre->src, grec->coa, sizeof gre->dst);
+ memmove(gre->dst, grec->south, sizeof gre->dst);
+
+ ipoput4(c->p->f, copyblock(bp, BLEN(bp)), 0, gre->ttl - 1, gre->tos, nil);
+ grepdout++;
+ grebdout += BLEN(bp);
+
+ /*
+ * Now make sure we didn't do the wrong thing.
+ */
+ if(!canqlock(&grec->lock)){
+ freeb(bp); /* The packet just goes away */
+ return;
+ }
+
+ /* We did the right thing */
+ addring(&grec->dlpending, bp);
+ qunlock(&grec->lock);
+}
+
+static void
+greuplink(Conv *c, Block *bp)
+{
+ GREconv *grec;
+ GREhdr *gre;
+ ushort flags;
+
+ gre = (GREhdr *)bp->rp;
+ if(gre->ttl == 1)
+ return;
+
+ grec = c->ptcl;
+ memmove(gre->src, grec->coa, sizeof gre->src);
+ memmove(gre->dst, grec->north, sizeof gre->dst);
+
+ /*
+ * Add a key, if needed.
+ */
+ if(grec->ulkey){
+ flags = nhgets(gre->flags);
+ if(flags & (GRE_cksum|GRE_routing)){
+ print("%V routing info present. Discarding packet\n",
+ gre->src);
+ freeb(bp);
+ return;
+ }
+
+ if((flags & GRE_key) == 0){
+ /* Make room for the key */
+ if(bp->rp - bp->base < sizeof(ulong)){
+ print("%V can't add key\n", gre->src);
+ freeb(bp);
+ return;
+ }
+
+ bp->rp -= 4;
+ memmove(bp->rp, bp->rp + 4, sizeof(GREhdr));
+
+ gre = (GREhdr *)bp->rp;
+ hnputs(gre->flags, flags | GRE_key);
+ }
+
+ /* Add the key */
+ hnputl(bp->rp + sizeof(GREhdr), grec->ulkey);
+ }
+
+ if(!canqlock(&grec->lock)){
+ freeb(bp);
+ return;
+ }
+
+ if(grec->ulsusp)
+ addring(&grec->ulbuffered, bp);
+ else{
+ ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+ grepuout++;
+ grebuout += BLEN(bp);
+ }
+ qunlock(&grec->lock);
+}
+
+static void
+greiput(Proto *proto, Ipifc *, Block *bp)
+{
+ int len, hdrlen;
+ ushort eproto, flags;
uchar raddr[IPaddrlen];
+ Conv *c, **p;
+ GREconv *grec;
+ GREhdr *gre;
GREpriv *gpriv;
+ Ip4hdr *ip;
- gpriv = gre->priv;
- ghp = (GREhdr*)(bp->rp);
+ /*
+ * We don't want to deal with block lists. Ever. The problem is
+ * that when the block is forwarded, devether.c puts the block into
+ * a queue that also uses ->next. Just do not use ->next here!
+ */
+ if(bp->next != nil)
+ bp = pullupblock(bp, blocklen(bp));
- v4tov6(raddr, ghp->src);
- eproto = nhgets(ghp->eproto);
- qlock(gre);
+ gre = (GREhdr *)bp->rp;
+ if(BLEN(bp) < sizeof(GREhdr) || gre->proto != IP_GREPROTO){
+ freeb(bp);
+ return;
+ }
- /* Look for a conversation structure for this port and address */
- c = nil;
- for(p = gre->conv; *p; p++) {
+ v4tov6(raddr, gre->src);
+ eproto = nhgets(gre->eproto);
+ flags = nhgets(gre->flags);
+ hdrlen = sizeof(GREhdr);
+
+ if(flags & GRE_cksum)
+ hdrlen += 2;
+ if(flags & GRE_routing){
+ print("%I routing info present. Discarding packet\n", raddr);
+ freeb(bp);
+ return;
+ }
+ if(flags & (GRE_cksum|GRE_routing))
+ hdrlen += 2; /* Offset field */
+ if(flags & GRE_key)
+ hdrlen += 4;
+ if(flags & GRE_seq)
+ hdrlen += 4;
+
+ qlock(proto);
+
+ if(eproto != 0x880B && BLEN(bp) - hdrlen >= sizeof(Ip4hdr)){
+ ip = (Ip4hdr *)(bp->rp + hdrlen);
+
+ /*
+ * Look for a conversation structure for this port and address, or
+ * match the retunnel part, or match on the raw flag.
+ */
+ for(p = proto->conv; *p; p++) {
+ c = *p;
+
+ if(c->inuse == 0)
+ continue;
+
+ /*
+ * Do not stop this session - blocking here
+ * implies that etherread is blocked.
+ */
+ grec = c->ptcl;
+ if(memcmp(ip->dst, grec->hoa, sizeof ip->dst) == 0){
+ grepdin++;
+ grebdin += BLEN(bp);
+ gredownlink(c, bp);
+ qunlock(proto);
+ return;
+ }
+
+ if(memcmp(ip->src, grec->hoa, sizeof ip->src) == 0){
+ grepuin++;
+ grebuin += BLEN(bp);
+ greuplink(c, bp);
+ qunlock(proto);
+ return;
+ }
+ }
+ }
+
+
+ /*
+ * when we get here, none of the forwarding tunnels matched. now
+ * try to match on raw and conversational sessions.
+ */
+ for(c = nil, p = proto->conv; *p; p++) {
c = *p;
+
if(c->inuse == 0)
continue;
- if(c->rport == eproto &&
- (gpriv->raw || ipcmp(c->raddr, raddr) == 0))
+
+ /*
+ * Do not stop this session - blocking here
+ * implies that etherread is blocked.
+ */
+ grec = c->ptcl;
+ if(c->rport == eproto &&
+ (grec->raw || ipcmp(c->raddr, raddr) == 0))
break;
}
- if(*p == nil) {
- qunlock(gre);
- freeblist(bp);
+ qunlock(proto);
+
+ if(*p == nil){
+ freeb(bp);
return;
}
- qunlock(gre);
-
/*
* Trim the packet down to data size
*/
- len = nhgets(ghp->len) - GRE_IPONLY;
+ len = nhgets(gre->len) - GRE_IPONLY;
if(len < GRE_IPPLUSGRE){
- freeblist(bp);
+ freeb(bp);
return;
}
+
bp = trimblock(bp, GRE_IPONLY, len);
if(bp == nil){
+ gpriv = proto->priv;
gpriv->lenerr++;
return;
}
- /*
- * Can't delimit packet so pull it all into one block.
- */
- if(qlen(c->rq) > 64*1024)
- freeblist(bp);
- else{
- bp = concatblock(bp);
- if(bp == 0)
- panic("greiput");
- qpass(c->rq, bp);
- }
+ qpass(c->rq, bp);
}
int
@@ -234,29 +649,258 @@
GREpriv *gpriv;
gpriv = gre->priv;
+ return snprint(buf, len,
+ "gre: %llud %llud %llud %llud %llud %llud %llud %llud, lenerrs %llud\n",
+ grepdin, grepdout, grepuin, grepuout,
+ grebdin, grebdout, grebuin, grebuout, gpriv->lenerr);
+}
- return snprint(buf, len, "gre: len %lud\n", gpriv->lenerr);
+static char *
+grectlraw(Conv *c, int, char **)
+{
+ GREconv *grec;
+
+ grec = c->ptcl;
+ grec->raw = 1;
+ return nil;
}
-char*
-grectl(Conv *c, char **f, int n)
+static char *
+grectlcooked(Conv *c, int, char **)
{
- GREpriv *gpriv;
+ GREconv *grec;
- gpriv = c->p->priv;
- if(n == 1){
- if(strcmp(f[0], "raw") == 0){
- gpriv->raw = 1;
- return nil;
- }
- else if(strcmp(f[0], "cooked") == 0){
- gpriv->raw = 0;
- return nil;
- }
+ grec = c->ptcl;
+ grec->raw = 0;
+ return nil;
+}
+
+static char *
+grectlretunnel(Conv *c, int, char **argv)
+{
+ GREconv *grec;
+ uchar ipaddr[4];
+
+ grec = c->ptcl;
+ if(memcmp(grec->hoa, nulladdr, sizeof grec->hoa))
+ return "tunnel already set up";
+
+ v4parseip(ipaddr, argv[1]);
+ if(memcmp(ipaddr, nulladdr, sizeof ipaddr) == 0)
+ return "bad hoa";
+ memmove(grec->hoa, ipaddr, sizeof grec->hoa);
+ v4parseip(ipaddr, argv[2]);
+ memmove(grec->north, ipaddr, sizeof grec->north);
+ v4parseip(ipaddr, argv[3]);
+ memmove(grec->south, ipaddr, sizeof grec->south);
+ v4parseip(ipaddr, argv[4]);
+ memmove(grec->coa, ipaddr, sizeof grec->coa);
+ grec->ulsusp = 1;
+ grec->dlsusp = 0;
+
+ return nil;
+}
+
+static char *
+grectlreport(Conv *c, int, char **argv)
+{
+ ulong seq;
+ Block *bp;
+ Bring *r;
+ GREconv *grec;
+ Metablock *m;
+
+ grec = c->ptcl;
+ seq = strtoul(argv[1], nil, 0);
+
+ qlock(&grec->lock);
+ r = &grec->dlpending;
+ while(r->produced - r->consumed > 0){
+ bp = r->ring[r->consumed & Ringmask];
+
+ assert(bp && bp->rp - bp->base >= sizeof(Metablock));
+ m = (Metablock *)bp->base;
+ if((long)(seq - m->seq) <= 0)
+ break;
+
+ r->ring[r->consumed & Ringmask] = nil;
+ r->consumed++;
+
+ freeb(bp);
}
- return "unknown control request";
+ qunlock(&grec->lock);
+ return nil;
}
+static char *
+grectldlsuspend(Conv *c, int, char **)
+{
+ GREconv *grec;
+
+ grec = c->ptcl;
+ if(grec->dlsusp)
+ return "already suspended";
+
+ grec->dlsusp = 1;
+ return nil;
+}
+
+static char *
+grectlulsuspend(Conv *c, int, char **)
+{
+ GREconv *grec;
+
+ grec = c->ptcl;
+ if(grec->ulsusp)
+ return "already suspended";
+
+ grec->ulsusp = 1;
+ return nil;
+}
+
+static char *
+grectldlresume(Conv *c, int, char **)
+{
+ GREconv *grec;
+ GREhdr *gre;
+ Block *bp;
+
+ grec = c->ptcl;
+
+ qlock(&grec->lock);
+ if(!grec->dlsusp){
+ qunlock(&grec->lock);
+ return "not suspended";
+ }
+
+ while((bp = getring(&grec->dlbuffered)) != nil){
+ gre = (GREhdr *)bp->rp;
+ qunlock(&grec->lock);
+
+ ipoput4(c->p->f, copyblock(bp, BLEN(bp)), 0, gre->ttl - 1, gre->tos, nil);
+
+ qlock(&grec->lock);
+ addring(&grec->dlpending, bp);
+ }
+ grec->dlsusp = 0;
+ qunlock(&grec->lock);
+ return nil;
+}
+
+static char *
+grectlulresume(Conv *c, int, char **)
+{
+ GREconv *grec;
+ GREhdr *gre;
+ Block *bp;
+
+ grec = c->ptcl;
+
+ qlock(&grec->lock);
+ while((bp = getring(&grec->ulbuffered)) != nil){
+ gre = (GREhdr *)bp->rp;
+
+ qunlock(&grec->lock);
+ ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+ qlock(&grec->lock);
+ }
+ grec->ulsusp = 0;
+ qunlock(&grec->lock);
+ return nil;
+}
+
+static char *
+grectlforward(Conv *c, int, char **argv)
+{
+ Block *bp;
+ GREconv *grec;
+ GREhdr *gre;
+ Metablock *m;
+
+ grec = c->ptcl;
+
+ v4parseip(grec->south, argv[1]);
+ memmove(grec->north, grec->south, sizeof grec->north);
+
+ qlock(&grec->lock);
+ if(!grec->dlsusp){
+ qunlock(&grec->lock);
+ return "not suspended";
+ }
+ grec->dlsusp = 0;
+ grec->ulsusp = 0;
+
+ while((bp = getring(&grec->dlpending)) != nil){
+
+ assert(bp->rp - bp->base >= sizeof(Metablock));
+ m = (Metablock *)bp->base;
+ assert(m->rp >= bp->base && m->rp < bp->lim);
+
+ bp->rp = m->rp;
+
+ gre = (GREhdr *)bp->rp;
+ memmove(gre->src, grec->coa, sizeof gre->dst);
+ memmove(gre->dst, grec->south, sizeof gre->dst);
+
+ qunlock(&grec->lock);
+ ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+ qlock(&grec->lock);
+ }
+
+ while((bp = getring(&grec->dlbuffered)) != nil){
+ gre = (GREhdr *)bp->rp;
+ memmove(gre->src, grec->coa, sizeof gre->dst);
+ memmove(gre->dst, grec->south, sizeof gre->dst);
+
+ qunlock(&grec->lock);
+ ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+ qlock(&grec->lock);
+ }
+
+ while((bp = getring(&grec->ulbuffered)) != nil){
+ gre = (GREhdr *)bp->rp;
+
+ memmove(gre->src, grec->coa, sizeof gre->dst);
+ memmove(gre->dst, grec->south, sizeof gre->dst);
+
+ qunlock(&grec->lock);
+ ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+ qlock(&grec->lock);
+ }
+ qunlock(&grec->lock);
+ return nil;
+}
+
+static char *
+grectlulkey(Conv *c, int, char **argv)
+{
+ GREconv *grec;
+
+ grec = c->ptcl;
+ grec->ulkey = strtoul(argv[1], nil, 0);
+ return nil;
+}
+
+char *
+grectl(Conv *c, char **f, int n)
+{
+ int i;
+
+ if(n < 1)
+ return "too few arguments";
+
+ for(i = 0; i < Ncmds; i++)
+ if(strcmp(f[0], grectls[i].cmd) == 0)
+ break;
+
+ if(i == Ncmds)
+ return "no such command";
+ if(grectls[i].argc != 0 && grectls[i].argc != n)
+ return "incorrect number of arguments";
+
+ return grectls[i].f(c, n, f);
+}
+
void
greinit(Fs *fs)
{
@@ -276,7 +920,7 @@
gre->stats = grestats;
gre->ipproto = IP_GREPROTO;
gre->nc = 64;
- gre->ptclsize = 0;
+ gre->ptclsize = sizeof(GREconv);
Fsproto(fs, gre);
}
diff -u a/os/ip//icmp.c b/os/ip//icmp.c
--- a/os/ip//icmp.c
+++ b/os/ip//icmp.c
@@ -44,11 +44,6 @@
Maxtype = 18,
};
-enum
-{
- MinAdvise = 24, /* minimum needed for us to advise another protocol */
-};
-
char *icmpnames[Maxtype+1] =
{
[EchoReply] "EchoReply",
@@ -70,6 +65,8 @@
IP_ICMPPROTO = 1,
ICMP_IPSIZE = 20,
ICMP_HDRSIZE = 8,
+
+ MinAdvise = ICMP_IPSIZE+4, /* minimum needed for us to advise another protocol */
};
enum
@@ -113,7 +110,7 @@
c->wq = qbypass(icmpkick, c);
}
-extern char*
+char*
icmpconnect(Conv *c, char **argv, int argc)
{
char *e;
@@ -126,11 +123,11 @@
return nil;
}
-extern int
+int
icmpstate(Conv *c, char *state, int n)
{
USED(c);
- return snprint(state, n, "%s qin %d qout %d",
+ return snprint(state, n, "%s qin %d qout %d\n",
"Datagram",
c->rq ? qlen(c->rq) : 0,
c->wq ? qlen(c->wq) : 0
@@ -137,7 +134,7 @@
);
}
-extern char*
+char*
icmpannounce(Conv *c, char **argv, int argc)
{
char *e;
@@ -150,7 +147,7 @@
return nil;
}
-extern void
+void
icmpclose(Conv *c)
{
qclose(c->rq);
@@ -169,8 +166,7 @@
if(bp == nil)
return;
-
- if(blocklen(bp) < ICMP_IPSIZE + ICMP_HDRSIZE){
+ if(BLEN(bp) < ICMP_IPSIZE + ICMP_HDRSIZE){
freeblist(bp);
return;
}
@@ -190,21 +186,50 @@
ipoput4(c->p->f, bp, 0, c->ttl, c->tos, nil);
}
-extern void
-icmpttlexceeded(Fs *f, uchar *ia, Block *bp)
+static int
+ip4reply(Fs *f, uchar ip4[4])
{
+ uchar addr[IPaddrlen];
+ int i;
+
+ v4tov6(addr, ip4);
+ if(ipismulticast(addr))
+ return 0;
+ i = ipforme(f, addr);
+ return i == 0 || i == Runi;
+}
+
+static int
+ip4me(Fs *f, uchar ip4[4])
+{
+ uchar addr[IPaddrlen];
+
+ v4tov6(addr, ip4);
+ if(ipismulticast(addr))
+ return 0;
+ return ipforme(f, addr) == Runi;
+}
+
+void
+icmpttlexceeded(Fs *f, Ipifc *ifc, Block *bp)
+{
Block *nbp;
Icmp *p, *np;
+ uchar ia[IPv4addrlen];
p = (Icmp *)bp->rp;
+ if(!ip4reply(f, p->src) || !ipv4local(ifc, ia, 0, p->src))
+ return;
- netlog(f, Logicmp, "sending icmpttlexceeded -> %V\n", p->src);
+ netlog(f, Logicmp, "sending icmpttlexceeded %V -> src %V dst %V\n",
+ ia, p->src, p->dst);
+
nbp = allocb(ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8);
nbp->wp += ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8;
np = (Icmp *)nbp->rp;
np->vihl = IP_VER4;
+ memmove(np->src, ia, sizeof(np->src));
memmove(np->dst, p->src, sizeof(np->dst));
- v6tov4(np->src, ia);
memmove(np->data, bp->rp, ICMP_IPSIZE + 8);
np->type = TimeExceed;
np->code = 0;
@@ -214,7 +239,6 @@
memset(np->cksum, 0, sizeof(np->cksum));
hnputs(np->cksum, ptclcsum(nbp, ICMP_IPSIZE, blocklen(nbp) - ICMP_IPSIZE));
ipoput4(f, nbp, 0, MAXTTL, DFLTTOS, nil);
-
}
static void
@@ -222,20 +246,10 @@
{
Block *nbp;
Icmp *p, *np;
- int i;
- uchar addr[IPaddrlen];
p = (Icmp *)bp->rp;
-
- /* only do this for unicast sources and destinations */
- v4tov6(addr, p->dst);
- i = ipforme(f, addr);
- if((i&Runi) == 0)
+ if(!ip4me(f, p->dst) || !ip4reply(f, p->src))
return;
- v4tov6(addr, p->src);
- i = ipforme(f, addr);
- if(i != 0 && (i&Runi) == 0)
- return;
netlog(f, Logicmp, "sending icmpnoconv -> %V\n", p->src);
nbp = allocb(ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8);
@@ -255,13 +269,13 @@
ipoput4(f, nbp, 0, MAXTTL, DFLTTOS, nil);
}
-extern void
+void
icmpnoconv(Fs *f, Block *bp)
{
icmpunreachable(f, bp, 3, 0);
}
-extern void
+void
icmpcantfrag(Fs *f, Block *bp, int mtu)
{
icmpunreachable(f, bp, 4, mtu);
@@ -270,35 +284,34 @@
static void
goticmpkt(Proto *icmp, Block *bp)
{
+ ushort recid;
+ uchar dst[IPaddrlen], src[IPaddrlen];
Conv **c, *s;
Icmp *p;
- uchar dst[IPaddrlen];
- ushort recid;
p = (Icmp *) bp->rp;
- v4tov6(dst, p->src);
+ v4tov6(dst, p->dst);
+ v4tov6(src, p->src);
recid = nhgets(p->icmpid);
- for(c = icmp->conv; *c; c++) {
- s = *c;
+ for(c = icmp->conv; (s = *c) != nil; c++){
if(s->lport == recid)
- if(ipcmp(s->raddr, dst) == 0){
- bp = concatblock(bp);
- if(bp != nil)
- qpass(s->rq, bp);
- return;
- }
+ if(ipcmp(s->laddr, dst) == 0 || ipcmp(s->raddr, src) == 0)
+ qpass(s->rq, copyblock(bp, blocklen(bp)));
}
freeblist(bp);
}
static Block *
-mkechoreply(Block *bp)
+mkechoreply(Block *bp, Fs *f)
{
Icmp *q;
uchar ip[4];
q = (Icmp *)bp->rp;
+ if(!ip4me(f, q->dst) || !ip4reply(f, q->src))
+ return nil;
+
q->vihl = IP_VER4;
memmove(ip, q->src, sizeof(q->dst));
memmove(q->src, q->dst, sizeof(q->src));
@@ -318,12 +331,22 @@
[3] "port unreachable",
[4] "fragmentation needed and DF set",
[5] "source route failed",
+[6] "destination network unknown",
+[7] "destination host unknown",
+[8] "source host isolated",
+[9] "network administratively prohibited",
+[10] "host administratively prohibited",
+[11] "network unreachable for tos",
+[12] "host unreachable for tos",
+[13] "communication administratively prohibited",
+[14] "host precedence violation",
+[15] "precedence cutoff in effect",
};
static void
icmpiput(Proto *icmp, Ipifc*, Block *bp)
{
- int n, iplen;
+ int n;
Icmp *p;
Block *r;
Proto *pr;
@@ -332,12 +355,10 @@
Icmppriv *ipriv;
ipriv = icmp->priv;
-
ipriv->stats[InMsgs]++;
- p = (Icmp *)bp->rp;
- netlog(icmp->f, Logicmp, "icmpiput %d %d\n", p->type, p->code);
- n = blocklen(bp);
+ bp = concatblock(bp);
+ n = BLEN(bp);
if(n < ICMP_IPSIZE+ICMP_HDRSIZE){
ipriv->stats[InErrors]++;
ipriv->stats[HlenErrs]++;
@@ -344,69 +365,57 @@
netlog(icmp->f, Logicmp, "icmp hlen %d\n", n);
goto raise;
}
- iplen = nhgets(p->length);
- if(iplen > n || (iplen % 1)){
- ipriv->stats[LenErrs]++;
+ if(ptclcsum(bp, ICMP_IPSIZE, n - ICMP_IPSIZE)){
ipriv->stats[InErrors]++;
- netlog(icmp->f, Logicmp, "icmp length %d\n", iplen);
- goto raise;
- }
- if(ptclcsum(bp, ICMP_IPSIZE, iplen - ICMP_IPSIZE)){
- ipriv->stats[InErrors]++;
ipriv->stats[CsumErrs]++;
netlog(icmp->f, Logicmp, "icmp checksum error\n");
goto raise;
}
+ p = (Icmp *)bp->rp;
+ netlog(icmp->f, Logicmp, "icmpiput %s (%d) %d\n",
+ (p->type < nelem(icmpnames)? icmpnames[p->type]: ""),
+ p->type, p->code);
if(p->type <= Maxtype)
ipriv->in[p->type]++;
switch(p->type) {
case EchoRequest:
- if (iplen < n)
- bp = trimblock(bp, 0, iplen);
- r = mkechoreply(bp);
+ r = mkechoreply(bp, icmp->f);
+ if(r == nil)
+ goto raise;
ipriv->out[EchoReply]++;
ipoput4(icmp->f, r, 0, MAXTTL, DFLTTOS, nil);
break;
case Unreachable:
- if(p->code > 5)
- msg = unreachcode[1];
- else
+ if(p->code >= nelem(unreachcode)) {
+ snprint(m2, sizeof m2, "unreachable %V -> %V code %d",
+ p->src, p->dst, p->code);
+ msg = m2;
+ } else
msg = unreachcode[p->code];
+ Advise:
bp->rp += ICMP_IPSIZE+ICMP_HDRSIZE;
- if(blocklen(bp) < MinAdvise){
+ if(BLEN(bp) < MinAdvise){
ipriv->stats[LenErrs]++;
goto raise;
}
p = (Icmp *)bp->rp;
- pr = Fsrcvpcolx(icmp->f, p->proto);
- if(pr != nil && pr->advise != nil) {
- (*pr->advise)(pr, bp, msg);
- return;
+ if((nhgets(p->frag) & IP_FO) == 0){
+ pr = Fsrcvpcolx(icmp->f, p->proto);
+ if(pr != nil && pr->advise != nil) {
+ (*pr->advise)(pr, bp, msg);
+ return;
+ }
}
-
bp->rp -= ICMP_IPSIZE+ICMP_HDRSIZE;
goticmpkt(icmp, bp);
break;
case TimeExceed:
if(p->code == 0){
- sprint(m2, "ttl exceeded at %V", p->src);
-
- bp->rp += ICMP_IPSIZE+ICMP_HDRSIZE;
- if(blocklen(bp) < MinAdvise){
- ipriv->stats[LenErrs]++;
- goto raise;
- }
- p = (Icmp *)bp->rp;
- pr = Fsrcvpcolx(icmp->f, p->proto);
- if(pr != nil && pr->advise != nil) {
- (*pr->advise)(pr, bp, m2);
- return;
- }
- bp->rp -= ICMP_IPSIZE+ICMP_HDRSIZE;
+ snprint(msg = m2, sizeof m2, "ttl exceeded at %V", p->src);
+ goto Advise;
}
-
goticmpkt(icmp, bp);
break;
default:
@@ -419,22 +428,25 @@
freeblist(bp);
}
-void
+static void
icmpadvise(Proto *icmp, Block *bp, char *msg)
{
+ ushort recid;
+ uchar dst[IPaddrlen], src[IPaddrlen];
Conv **c, *s;
Icmp *p;
- uchar dst[IPaddrlen];
- ushort recid;
p = (Icmp *) bp->rp;
v4tov6(dst, p->dst);
+ v4tov6(src, p->src);
recid = nhgets(p->icmpid);
- for(c = icmp->conv; *c; c++) {
- s = *c;
+ for(c = icmp->conv; (s = *c) != nil; c++){
if(s->lport == recid)
+ if(ipcmp(s->laddr, src) == 0)
if(ipcmp(s->raddr, dst) == 0){
+ if(s->ignoreadvice)
+ break;
qhangup(s->rq, msg);
qhangup(s->wq, msg);
break;
@@ -443,7 +455,7 @@
freeblist(bp);
}
-int
+static int
icmpstats(Proto *icmp, char *buf, int len)
{
Icmppriv *priv;
@@ -456,7 +468,7 @@
for(i = 0; i < Nstats; i++)
p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
for(i = 0; i <= Maxtype; i++){
- if(icmpnames[i])
+ if(icmpnames[i] != nil)
p = seprint(p, e, "%s: %lud %lud\n", icmpnames[i], priv->in[i], priv->out[i]);
else
p = seprint(p, e, "%d: %lud %lud\n", i, priv->in[i], priv->out[i]);
@@ -463,13 +475,7 @@
}
return p - buf;
}
-
-int
-icmpgc(Proto *icmp)
-{
- return natgc(icmp->ipproto);
-}
-
+
void
icmpinit(Fs *fs)
{
@@ -487,7 +493,7 @@
icmp->stats = icmpstats;
icmp->ctl = nil;
icmp->advise = icmpadvise;
- icmp->gc = icmpgc;
+ icmp->gc = nil;
icmp->ipproto = IP_ICMPPROTO;
icmp->nc = 128;
icmp->ptclsize = 0;
diff -u a/os/ip//icmp6.c b/os/ip//icmp6.c
--- a/os/ip//icmp6.c
+++ b/os/ip//icmp6.c
@@ -1,3 +1,6 @@
+/*
+ * Internet Control Message Protocol for IPv6
+ */
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
@@ -7,41 +10,36 @@
#include "ip.h"
#include "ipv6.h"
-typedef struct ICMPpkt ICMPpkt;
-typedef struct IPICMP IPICMP;
-typedef struct Ndpkt Ndpkt;
-typedef struct NdiscC NdiscC;
+enum
+{
+ InMsgs6,
+ InErrors6,
+ OutMsgs6,
+ CsumErrs6,
+ LenErrs6,
+ HlenErrs6,
+ HoplimErrs6,
+ IcmpCodeErrs6,
+ TargetErrs6,
+ OptlenErrs6,
+ AddrmxpErrs6,
+ RouterAddrErrs6,
-struct ICMPpkt {
- uchar type;
- uchar code;
- uchar cksum[2];
- uchar icmpid[2];
- uchar seq[2];
+ Nstats6,
};
-struct IPICMP {
- Ip6hdr;
- ICMPpkt;
+enum {
+ ICMP_USEAD6 = 40,
};
-struct NdiscC
-{
- IPICMP;
- uchar target[IPaddrlen];
+enum {
+ Oflag = 1<<5,
+ Sflag = 1<<6,
+ Rflag = 1<<7,
};
-struct Ndpkt
-{
- NdiscC;
- uchar otype;
- uchar olen; // length in units of 8 octets(incl type, code),
- // 1 for IEEE 802 addresses
- uchar lnaddr[6]; // link-layer address
-};
-
-enum {
- // ICMPv6 types
+enum {
+ /* ICMPv6 types */
EchoReply = 0,
UnreachableV6 = 1,
PacketTooBigV6 = 2,
@@ -69,6 +67,65 @@
Maxtype6 = 137,
};
+enum {
+ MinAdvise = IP6HDR+4, /* minimum needed for us to advise another protocol */
+};
+
+/* on-the-wire packet formats */
+typedef struct IPICMP IPICMP;
+typedef struct Ndpkt Ndpkt;
+typedef struct NdiscC NdiscC;
+
+/* we do this to avoid possible struct padding */
+#define ICMPHDR \
+ IPV6HDR; \
+ uchar type; \
+ uchar code; \
+ uchar cksum[2]; \
+ uchar icmpid[2]; \
+ uchar seq[2]
+
+struct IPICMP {
+ ICMPHDR;
+ uchar payload[];
+};
+
+#define IPICMPSZ offsetof(IPICMP, payload[0])
+
+struct NdiscC {
+ ICMPHDR;
+ uchar target[IPaddrlen];
+ uchar payload[];
+};
+
+#define NDISCSZ offsetof(NdiscC, payload[0])
+
+struct Ndpkt {
+ ICMPHDR;
+ uchar target[IPaddrlen];
+ uchar otype;
+ uchar olen; /* length in units of 8 octets(incl type, code),
+ * 1 for IEEE 802 addresses */
+ uchar lnaddr[6]; /* link-layer address */
+ uchar payload[];
+};
+
+#define NDPKTSZ offsetof(Ndpkt, payload[0])
+
+typedef struct Icmppriv6
+{
+ ulong stats[Nstats6];
+
+ /* message counts */
+ ulong in[Maxtype6+1];
+ ulong out[Maxtype6+1];
+} Icmppriv6;
+
+typedef struct Icmpcb6
+{
+ uchar headers;
+} Icmpcb6;
+
char *icmpnames6[Maxtype6+1] =
{
[EchoReply] "EchoReply",
@@ -95,24 +152,6 @@
[RedirectV6] "RedirectV6",
};
-enum
-{
- InMsgs6,
- InErrors6,
- OutMsgs6,
- CsumErrs6,
- LenErrs6,
- HlenErrs6,
- HoplimErrs6,
- IcmpCodeErrs6,
- TargetErrs6,
- OptlenErrs6,
- AddrmxpErrs6,
- RouterAddrErrs6,
-
- Nstats6,
-};
-
static char *statnames6[Nstats6] =
{
[InMsgs6] "InMsgs",
@@ -129,49 +168,18 @@
[RouterAddrErrs6] "RouterAddrErrs",
};
-typedef struct Icmppriv6
-{
- ulong stats[Nstats6];
-
- /* message counts */
- ulong in[Maxtype6+1];
- ulong out[Maxtype6+1];
-} Icmppriv6;
-
-typedef struct Icmpcb6
-{
- QLock;
- uchar headers;
-} Icmpcb6;
-
static char *unreachcode[] =
{
-[icmp6_no_route] "no route to destination",
-[icmp6_ad_prohib] "comm with destination administratively prohibited",
-[icmp6_unassigned] "icmp unreachable: unassigned error code (2)",
-[icmp6_adr_unreach] "address unreachable",
-[icmp6_port_unreach] "port unreachable",
-[icmp6_unkn_code] "icmp unreachable: unknown code",
+[Icmp6_no_route] "no route to destination",
+[Icmp6_ad_prohib] "comm with destination administratively prohibited",
+[Icmp6_out_src_scope] "beyond scope of source address",
+[Icmp6_adr_unreach] "address unreachable",
+[Icmp6_port_unreach] "port unreachable",
+[Icmp6_gress_src_fail] "source address failed ingress/egress policy",
+[Icmp6_rej_route] "reject route to destination",
+[Icmp6_unknown] "icmp unreachable: unknown code",
};
-enum {
- ICMP_USEAD6 = 40,
-};
-
-enum {
- Oflag = 1<<5,
- Sflag = 1<<6,
- Rflag = 1<<7,
-};
-
-enum {
- slladd = 1,
- tlladd = 2,
- prfinfo = 3,
- redhdr = 4,
- mtuopt = 5,
-};
-
static void icmpkick6(void *x, Block *bp);
static void
@@ -185,13 +193,14 @@
set_cksum(Block *bp)
{
IPICMP *p = (IPICMP *)(bp->rp);
+ int n = blocklen(bp);
- hnputl(p->vcf, 0); // borrow IP header as pseudoheader
- hnputs(p->ploadlen, blocklen(bp)-IPV6HDR_LEN);
+ hnputl(p->vcf, 0); /* borrow IP header as pseudoheader */
+ hnputs(p->ploadlen, n - IP6HDR);
p->proto = 0;
- p->ttl = ICMPv6; // ttl gets set later
+ p->ttl = ICMPv6; /* ttl gets set later */
hnputs(p->cksum, 0);
- hnputs(p->cksum, ptclcsum(bp, 0, blocklen(bp)));
+ hnputs(p->cksum, ptclcsum(bp, 0, n));
p->proto = ICMPv6;
}
@@ -198,7 +207,8 @@
static Block *
newIPICMP(int packetlen)
{
- Block *nbp;
+ Block *nbp;
+
nbp = allocb(packetlen);
nbp->wp += packetlen;
memset(nbp->rp, 0, packetlen);
@@ -205,20 +215,22 @@
return nbp;
}
-void
+static void
icmpadvise6(Proto *icmp, Block *bp, char *msg)
{
- Conv **c, *s;
- IPICMP *p;
- ushort recid;
+ ushort recid;
+ Conv **c, *s;
+ IPICMP *p;
- p = (IPICMP *) bp->rp;
+ p = (IPICMP *)bp->rp;
recid = nhgets(p->icmpid);
- for(c = icmp->conv; *c; c++) {
- s = *c;
+ for(c = icmp->conv; (s = *c) != nil; c++){
if(s->lport == recid)
+ if(ipcmp(s->laddr, p->src) == 0)
if(ipcmp(s->raddr, p->dst) == 0){
+ if(s->ignoreadvice)
+ break;
qhangup(s->rq, msg);
qhangup(s->wq, msg);
break;
@@ -230,9 +242,9 @@
static void
icmpkick6(void *x, Block *bp)
{
+ uchar laddr[IPaddrlen], raddr[IPaddrlen];
Conv *c = x;
IPICMP *p;
- uchar laddr[IPaddrlen], raddr[IPaddrlen];
Icmppriv6 *ipriv = c->p->priv;
Icmpcb6 *icb = (Icmpcb6*)c->ptcl;
@@ -249,10 +261,10 @@
bp->rp += IPaddrlen;
ipmove(raddr, bp->rp);
bp->rp += IPaddrlen;
- bp = padblock(bp, sizeof(Ip6hdr));
+ bp = padblock(bp, IP6HDR);
}
- if(blocklen(bp) < sizeof(IPICMP)){
+ if(BLEN(bp) < IPICMPSZ){
freeblist(bp);
return;
}
@@ -268,23 +280,20 @@
set_cksum(bp);
p->vcf[0] = 0x06 << 4;
- if(p->type <= Maxtype6)
+ if(p->type <= Maxtype6)
ipriv->out[p->type]++;
ipoput6(c->p->f, bp, 0, c->ttl, c->tos, nil);
}
-char*
+static char*
icmpctl6(Conv *c, char **argv, int argc)
{
Icmpcb6 *icb;
icb = (Icmpcb6*) c->ptcl;
-
- if(argc==1) {
- if(strcmp(argv[0], "headers")==0) {
- icb->headers = 6;
- return nil;
- }
+ if(argc==1 && strcmp(argv[0], "headers")==0) {
+ icb->headers = 6;
+ return nil;
}
return "unknown control request";
}
@@ -292,41 +301,39 @@
static void
goticmpkt6(Proto *icmp, Block *bp, int muxkey)
{
- Conv **c, *s;
- IPICMP *p = (IPICMP *)bp->rp;
- ushort recid;
- uchar *addr;
+ ushort recid;
+ uchar *addr;
+ Conv **c, *s;
+ IPICMP *p = (IPICMP *)bp->rp;
if(muxkey == 0) {
recid = nhgets(p->icmpid);
addr = p->src;
- }
- else {
+ } else {
recid = muxkey;
addr = p->dst;
}
-
- for(c = icmp->conv; *c; c++){
- s = *c;
- if(s->lport == recid && ipcmp(s->raddr, addr) == 0){
- bp = concatblock(bp);
- if(bp != nil)
- qpass(s->rq, bp);
- return;
- }
+ for(c = icmp->conv; (s = *c) != nil; c++){
+ if(s->lport == recid)
+ if(ipcmp(s->laddr, p->dst) == 0 || ipcmp(s->raddr, addr) == 0)
+ qpass(s->rq, copyblock(bp, blocklen(bp)));
}
-
freeblist(bp);
}
static Block *
-mkechoreply6(Block *bp)
+mkechoreply6(Block *bp, Ipifc *ifc)
{
+ uchar addr[IPaddrlen];
IPICMP *p = (IPICMP *)(bp->rp);
- uchar addr[IPaddrlen];
+ if(isv6mcast(p->src))
+ return nil;
ipmove(addr, p->src);
- ipmove(p->src, p->dst);
+ if(!isv6mcast(p->dst))
+ ipmove(p->src, p->dst);
+ else if (!ipv6local(ifc, p->src, 0, addr))
+ return nil;
ipmove(p->dst, addr);
p->type = EchoReplyV6;
set_cksum(bp);
@@ -335,49 +342,43 @@
/*
* sends out an ICMPv6 neighbor solicitation
- * suni == SRC_UNSPEC or SRC_UNI,
+ * suni == SRC_UNSPEC or SRC_UNI,
* tuni == TARG_MULTI => multicast for address resolution,
* and tuni == TARG_UNI => neighbor reachability.
*/
-
-extern void
+void
icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac)
{
- Block *nbp;
+ Block *nbp;
Ndpkt *np;
Proto *icmp = f->t2p[ICMPv6];
Icmppriv6 *ipriv = icmp->priv;
-
- nbp = newIPICMP(sizeof(Ndpkt));
+ nbp = newIPICMP(NDPKTSZ);
np = (Ndpkt*) nbp->rp;
+ if(suni == SRC_UNSPEC)
+ ipmove(np->src, v6Unspecified);
+ else
+ ipmove(np->src, src);
- if(suni == SRC_UNSPEC)
- memmove(np->src, v6Unspecified, IPaddrlen);
- else
- memmove(np->src, src, IPaddrlen);
-
if(tuni == TARG_UNI)
- memmove(np->dst, targ, IPaddrlen);
+ ipmove(np->dst, targ);
else
ipv62smcast(np->dst, targ);
np->type = NbrSolicit;
np->code = 0;
- memmove(np->target, targ, IPaddrlen);
+ ipmove(np->target, targ);
if(suni != SRC_UNSPEC) {
- np->otype = SRC_LLADDRESS;
- np->olen = 1; /* 1+1+6 = 8 = 1 8-octet */
+ np->otype = SRC_LLADDR;
+ np->olen = 1; /* 1+1+6 = 8 = 1 8-octet */
memmove(np->lnaddr, mac, sizeof(np->lnaddr));
- }
- else {
- int r = sizeof(Ndpkt)-sizeof(NdiscC);
- nbp->wp -= r;
- }
+ } else
+ nbp->wp -= NDPKTSZ - NDISCSZ;
set_cksum(nbp);
- np = (Ndpkt*) nbp->rp;
+ np = (Ndpkt*)nbp->rp;
np->ttl = HOP_LIMIT;
np->vcf[0] = 0x06 << 4;
ipriv->out[NbrSolicit]++;
@@ -388,122 +389,101 @@
/*
* sends out an ICMPv6 neighbor advertisement. pktflags == RSO flags.
*/
-extern void
+void
icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags)
{
- Block *nbp;
+ Block *nbp;
Ndpkt *np;
Proto *icmp = f->t2p[ICMPv6];
Icmppriv6 *ipriv = icmp->priv;
- nbp = newIPICMP(sizeof(Ndpkt));
- np = (Ndpkt*) nbp->rp;
+ nbp = newIPICMP(NDPKTSZ);
+ np = (Ndpkt*)nbp->rp;
- memmove(np->src, src, IPaddrlen);
- memmove(np->dst, dst, IPaddrlen);
+ ipmove(np->src, src);
+ ipmove(np->dst, dst);
np->type = NbrAdvert;
np->code = 0;
np->icmpid[0] = flags;
- memmove(np->target, targ, IPaddrlen);
+ ipmove(np->target, targ);
- np->otype = TARGET_LLADDRESS;
- np->olen = 1;
+ np->otype = TARGET_LLADDR;
+ np->olen = 1;
memmove(np->lnaddr, mac, sizeof(np->lnaddr));
set_cksum(nbp);
- np = (Ndpkt*) nbp->rp;
+ np = (Ndpkt*)nbp->rp;
np->ttl = HOP_LIMIT;
np->vcf[0] = 0x06 << 4;
ipriv->out[NbrAdvert]++;
- netlog(f, Logicmp, "sending neighbor advertisement %I\n", src);
+ netlog(f, Logicmp, "sending neighbor advertisement %I\n", targ);
ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
}
-extern void
-icmphostunr(Fs *f, Ipifc *ifc, Block *bp, int code, int free)
+void
+icmphostunr6(Fs *f, Ipifc *ifc, Block *bp, int code, int tome)
{
+ int osz = BLEN(bp);
+ int sz = MIN(IPICMPSZ + osz, v6MINTU);
Block *nbp;
IPICMP *np;
- Ip6hdr *p;
- int osz = BLEN(bp);
- int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
- Proto *icmp = f->t2p[ICMPv6];
+ Ip6hdr *p;
+ Proto *icmp = f->t2p[ICMPv6];
Icmppriv6 *ipriv = icmp->priv;
+ uchar ia[IPaddrlen];
- p = (Ip6hdr *) bp->rp;
+ p = (Ip6hdr *)bp->rp;
+ if(isv6mcast(p->dst) || isv6mcast(p->src) || !ipv6local(ifc, ia, 0, p->src))
+ return;
- if(isv6mcast(p->src))
- goto clean;
+ netlog(f, Logicmp, "send icmphostunr %I -> src %I dst %I\n",
+ ia, p->src, p->dst);
nbp = newIPICMP(sz);
- np = (IPICMP *) nbp->rp;
-
- rlock(ifc);
- if(ipv6anylocal(ifc, np->src)) {
- netlog(f, Logicmp, "send icmphostunr -> s%I d%I\n", p->src, p->dst);
- }
- else {
- netlog(f, Logicmp, "icmphostunr fail -> s%I d%I\n", p->src, p->dst);
- freeblist(nbp);
- if(free)
- goto clean;
- else
- return;
- }
-
- memmove(np->dst, p->src, IPaddrlen);
+ np = (IPICMP *)nbp->rp;
+ ipmove(np->src, ia);
+ ipmove(np->dst, p->src);
np->type = UnreachableV6;
np->code = code;
- memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+ memmove(nbp->rp + IPICMPSZ, bp->rp, sz - IPICMPSZ);
set_cksum(nbp);
np->ttl = HOP_LIMIT;
np->vcf[0] = 0x06 << 4;
ipriv->out[UnreachableV6]++;
- if(free)
+ if(tome)
ipiput6(f, ifc, nbp);
- else {
+ else
ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
- return;
- }
-
-clean:
- runlock(ifc);
- freeblist(bp);
}
-extern void
+void
icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp)
{
+ int osz = BLEN(bp);
+ int sz = MIN(IPICMPSZ + osz, v6MINTU);
Block *nbp;
IPICMP *np;
- Ip6hdr *p;
- int osz = BLEN(bp);
- int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
- Proto *icmp = f->t2p[ICMPv6];
+ Ip6hdr *p;
+ Proto *icmp = f->t2p[ICMPv6];
Icmppriv6 *ipriv = icmp->priv;
+ uchar ia[IPaddrlen];
- p = (Ip6hdr *) bp->rp;
-
- if(isv6mcast(p->src))
+ p = (Ip6hdr *)bp->rp;
+ if(isv6mcast(p->dst) || isv6mcast(p->src) || !ipv6local(ifc, ia, 0, p->src))
return;
+ netlog(f, Logicmp, "send icmpttlexceeded6 %I -> src %I dst %I\n",
+ ia, p->src, p->dst);
+
nbp = newIPICMP(sz);
np = (IPICMP *) nbp->rp;
-
- if(ipv6anylocal(ifc, np->src)) {
- netlog(f, Logicmp, "send icmpttlexceeded6 -> s%I d%I\n", p->src, p->dst);
- }
- else {
- netlog(f, Logicmp, "icmpttlexceeded6 fail -> s%I d%I\n", p->src, p->dst);
- return;
- }
-
- memmove(np->dst, p->src, IPaddrlen);
+ ipmove(np->src, ia);
+ ipmove(np->dst, p->src);
np->type = TimeExceedV6;
np->code = 0;
- memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+ memmove(nbp->rp + IPICMPSZ, bp->rp, sz - IPICMPSZ);
set_cksum(nbp);
np->ttl = HOP_LIMIT;
np->vcf[0] = 0x06 << 4;
@@ -511,38 +491,33 @@
ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
}
-extern void
+void
icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp)
{
+ int osz = BLEN(bp);
+ int sz = MIN(IPICMPSZ + osz, v6MINTU);
Block *nbp;
IPICMP *np;
- Ip6hdr *p;
- int osz = BLEN(bp);
- int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
- Proto *icmp = f->t2p[ICMPv6];
+ Ip6hdr *p;
+ Proto *icmp = f->t2p[ICMPv6];
Icmppriv6 *ipriv = icmp->priv;
+ uchar ia[IPaddrlen];
- p = (Ip6hdr *) bp->rp;
-
- if(isv6mcast(p->src))
+ p = (Ip6hdr *)bp->rp;
+ if(isv6mcast(p->dst) || isv6mcast(p->src) || !ipv6local(ifc, ia, 0, p->src))
return;
- nbp = newIPICMP(sz);
- np = (IPICMP *) nbp->rp;
+ netlog(f, Logicmp, "send icmppkttoobig6 %I -> src %I dst %I\n",
+ ia, p->src, p->dst);
- if(ipv6anylocal(ifc, np->src)) {
- netlog(f, Logicmp, "send icmppkttoobig6 -> s%I d%I\n", p->src, p->dst);
- }
- else {
- netlog(f, Logicmp, "icmppkttoobig6 fail -> s%I d%I\n", p->src, p->dst);
- return;
- }
-
- memmove(np->dst, p->src, IPaddrlen);
+ nbp = newIPICMP(sz);
+ np = (IPICMP *)nbp->rp;
+ ipmove(np->src, ia);
+ ipmove(np->dst, p->src);
np->type = PacketTooBigV6;
np->code = 0;
hnputl(np->icmpid, ifc->maxtu - ifc->m->hsize);
- memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+ memmove(nbp->rp + IPICMPSZ, bp->rp, sz - IPICMPSZ);
set_cksum(nbp);
np->ttl = HOP_LIMIT;
np->vcf[0] = 0x06 << 4;
@@ -554,31 +529,23 @@
* RFC 2461, pages 39-40, pages 57-58.
*/
static int
-valid(Proto *icmp, Ipifc *ifc, Block *bp, Icmppriv6 *ipriv) {
- int sz, osz, unsp, n, ttl, iplen;
- int pktsz = BLEN(bp);
- uchar *packet = bp->rp;
- IPICMP *p = (IPICMP *) packet;
- Ndpkt *np;
+valid(Proto *icmp, Ipifc *, Block *bp, Icmppriv6 *ipriv)
+{
+ int sz, osz, unsp, ttl;
+ int pktsz = BLEN(bp);
+ uchar *packet = bp->rp;
+ IPICMP *p = (IPICMP *) packet;
+ Ndpkt *np;
- USED(ifc);
- n = blocklen(bp);
- if(n < sizeof(IPICMP)) {
+ if(pktsz < IPICMPSZ) {
ipriv->stats[HlenErrs6]++;
- netlog(icmp->f, Logicmp, "icmp hlen %d\n", n);
+ netlog(icmp->f, Logicmp, "icmp hlen %d\n", pktsz);
goto err;
}
- iplen = nhgets(p->ploadlen);
- if(iplen > n-IPV6HDR_LEN || (iplen % 1)) {
- ipriv->stats[LenErrs6]++;
- netlog(icmp->f, Logicmp, "icmp length %d\n", iplen);
- goto err;
- }
-
- // Rather than construct explicit pseudoheader, overwrite IPv6 header
+ /* Rather than construct explicit pseudoheader, overwrite IPv6 header */
if(p->proto != ICMPv6) {
- // This code assumes no extension headers!!!
+ /* This code assumes no extension headers!!! */
netlog(icmp->f, Logicmp, "icmp error: extension header\n");
goto err;
}
@@ -586,7 +553,7 @@
ttl = p->ttl;
p->ttl = p->proto;
p->proto = 0;
- if(ptclcsum(bp, 0, iplen + IPV6HDR_LEN)) {
+ if(ptclcsum(bp, 0, pktsz)) {
ipriv->stats[CsumErrs6]++;
netlog(icmp->f, Logicmp, "icmp checksum error\n");
goto err;
@@ -595,19 +562,16 @@
p->ttl = ttl;
/* additional tests for some pkt types */
- if( (p->type == NbrSolicit) ||
- (p->type == NbrAdvert) ||
- (p->type == RouterAdvert) ||
- (p->type == RouterSolicit) ||
- (p->type == RedirectV6) ) {
-
+ if (p->type == NbrSolicit || p->type == NbrAdvert ||
+ p->type == RouterAdvert || p->type == RouterSolicit ||
+ p->type == RedirectV6) {
if(p->ttl != HOP_LIMIT) {
- ipriv->stats[HoplimErrs6]++;
- goto err;
+ ipriv->stats[HoplimErrs6]++;
+ goto err;
}
if(p->code != 0) {
- ipriv->stats[IcmpCodeErrs6]++;
- goto err;
+ ipriv->stats[IcmpCodeErrs6]++;
+ goto err;
}
switch (p->type) {
@@ -615,82 +579,78 @@
case NbrAdvert:
np = (Ndpkt*) p;
if(isv6mcast(np->target)) {
- ipriv->stats[TargetErrs6]++;
- goto err;
+ ipriv->stats[TargetErrs6]++;
+ goto err;
}
- if(optexsts(np) && (np->olen == 0)) {
- ipriv->stats[OptlenErrs6]++;
- goto err;
+ if(optexsts(np) && np->olen == 0) {
+ ipriv->stats[OptlenErrs6]++;
+ goto err;
}
-
- if(p->type == NbrSolicit) {
- if(ipcmp(np->src, v6Unspecified) == 0) {
- if(!issmcast(np->dst) || optexsts(np)) {
- ipriv->stats[AddrmxpErrs6]++;
- goto err;
- }
+
+ if (p->type == NbrSolicit &&
+ ipcmp(np->src, v6Unspecified) == 0)
+ if(!issmcast(np->dst) || optexsts(np)) {
+ ipriv->stats[AddrmxpErrs6]++;
+ goto err;
}
- }
-
- if(p->type == NbrAdvert) {
- if((isv6mcast(np->dst))&&(nhgets(np->icmpid) & Sflag)){
- ipriv->stats[AddrmxpErrs6]++;
- goto err;
+
+ if(p->type == NbrAdvert)
+ if(isv6mcast(np->dst) &&
+ (nhgets(np->icmpid) & Sflag)){
+ ipriv->stats[AddrmxpErrs6]++;
+ goto err;
}
- }
break;
-
+
case RouterAdvert:
- if(pktsz - sizeof(Ip6hdr) < 16) {
- ipriv->stats[HlenErrs6]++;
- goto err;
+ if(pktsz - IP6HDR < 16) {
+ ipriv->stats[HlenErrs6]++;
+ goto err;
}
if(!islinklocal(p->src)) {
- ipriv->stats[RouterAddrErrs6]++;
- goto err;
+ ipriv->stats[RouterAddrErrs6]++;
+ goto err;
}
- sz = sizeof(IPICMP) + 8;
- while ((sz+1) < pktsz) {
- osz = *(packet+sz+1);
+ sz = IPICMPSZ + 8;
+ while (sz+8 <= pktsz) {
+ osz = packet[sz+1];
if(osz <= 0) {
- ipriv->stats[OptlenErrs6]++;
- goto err;
- }
+ ipriv->stats[OptlenErrs6]++;
+ goto err;
+ }
sz += 8*osz;
}
break;
-
+
case RouterSolicit:
- if(pktsz - sizeof(Ip6hdr) < 8) {
- ipriv->stats[HlenErrs6]++;
- goto err;
+ if(pktsz - IP6HDR < 8) {
+ ipriv->stats[HlenErrs6]++;
+ goto err;
}
unsp = (ipcmp(p->src, v6Unspecified) == 0);
- sz = sizeof(IPICMP) + 8;
- while ((sz+1) < pktsz) {
- osz = *(packet+sz+1);
- if((osz <= 0) ||
- (unsp && (*(packet+sz) == slladd)) ) {
- ipriv->stats[OptlenErrs6]++;
- goto err;
+ sz = IPICMPSZ + 8;
+ while (sz+8 <= pktsz) {
+ osz = packet[sz+1];
+ if(osz <= 0 ||
+ (unsp && packet[sz] == SRC_LLADDR)) {
+ ipriv->stats[OptlenErrs6]++;
+ goto err;
}
sz += 8*osz;
}
break;
-
+
case RedirectV6:
- //to be filled in
+ /* to be filled in */
break;
-
+
default:
goto err;
}
}
-
return 1;
-
err:
- ipriv->stats[InErrors6]++;
+ ipriv->stats[InErrors6]++;
return 0;
}
@@ -700,169 +660,162 @@
Iplifc *lifc;
int t;
- rlock(ifc);
- if(ipproxyifc(f, ifc, target)) {
- runlock(ifc);
- return t_uniproxy;
- }
-
- for(lifc = ifc->lifc; lifc; lifc = lifc->next) {
- if(ipcmp(lifc->local, target) == 0) {
- t = (lifc->tentative) ? t_unitent : t_unirany;
- runlock(ifc);
- return t;
- }
- }
-
- runlock(ifc);
- return 0;
+ if((lifc = iplocalonifc(ifc, target)) != nil)
+ t = lifc->tentative? Tunitent: Tunirany;
+ else if(ipproxyifc(f, ifc, target))
+ t = Tuniproxy;
+ else
+ t = 0;
+ return t;
}
static void
-icmpiput6(Proto *icmp, Ipifc *ipifc, Block *bp)
+icmpiput6(Proto *icmp, Ipifc *ifc, Block *bp)
{
- uchar *packet = bp->rp;
- IPICMP *p = (IPICMP *)packet;
- Icmppriv6 *ipriv = icmp->priv;
- Block *r;
- Proto *pr;
- char *msg, m2[128];
- Ndpkt* np;
+ char *msg, m2[128];
uchar pktflags;
- uchar lsrc[IPaddrlen];
- int refresh = 1;
+ uchar ia[IPaddrlen];
+ Block *r;
+ IPICMP *p;
+ Icmppriv6 *ipriv = icmp->priv;
Iplifc *lifc;
+ Ndpkt* np;
+ Proto *pr;
- if(!valid(icmp, ipifc, bp, ipriv))
- goto raise;
+ bp = concatblock(bp);
+ p = (IPICMP*)bp->rp;
- if(p->type <= Maxtype6)
- ipriv->in[p->type]++;
- else
+ if(!valid(icmp, ifc, bp, ipriv) || p->type > Maxtype6)
goto raise;
+ ipriv->in[p->type]++;
+
switch(p->type) {
case EchoRequestV6:
- r = mkechoreply6(bp);
+ r = mkechoreply6(bp, ifc);
+ if(r == nil)
+ goto raise;
ipriv->out[EchoReply]++;
ipoput6(icmp->f, r, 0, MAXTTL, DFLTTOS, nil);
break;
case UnreachableV6:
- if(p->code > 4)
- msg = unreachcode[icmp6_unkn_code];
+ if(p->code >= nelem(unreachcode))
+ msg = unreachcode[Icmp6_unknown];
else
msg = unreachcode[p->code];
-
- bp->rp += sizeof(IPICMP);
- if(blocklen(bp) < 8){
+ Advise:
+ bp->rp += IPICMPSZ;
+ if(BLEN(bp) < MinAdvise){
ipriv->stats[LenErrs6]++;
goto raise;
}
p = (IPICMP *)bp->rp;
- pr = Fsrcvpcolx(icmp->f, p->proto);
- if(pr != nil && pr->advise != nil) {
- (*pr->advise)(pr, bp, msg);
- return;
- }
- bp->rp -= sizeof(IPICMP);
- goticmpkt6(icmp, bp, 0);
- break;
-
- case TimeExceedV6:
- if(p->code == 0){
- sprint(m2, "ttl exceeded at %I", p->src);
-
- bp->rp += sizeof(IPICMP);
- if(blocklen(bp) < 8){
- ipriv->stats[LenErrs6]++;
- goto raise;
+ /* get rid of fragment header if this is the first fragment */
+ if(p->proto == FH && BLEN(bp) >= MinAdvise+IP6FHDR && MinAdvise > IP6HDR){
+ Fraghdr6 *fh = (Fraghdr6*)(bp->rp + IP6HDR);
+ if((nhgets(fh->offsetRM) & ~7) == 0){ /* first fragment */
+ p->proto = fh->nexthdr;
+ /* copy down payload over fragment header */
+ bp->rp += IP6HDR;
+ bp->wp -= IP6FHDR;
+ memmove(bp->rp, bp->rp+IP6FHDR, BLEN(bp));
+ hnputs(p->ploadlen, BLEN(bp));
+ bp->rp -= IP6HDR;
}
- p = (IPICMP *)bp->rp;
+ }
+ if(p->proto != FH){
pr = Fsrcvpcolx(icmp->f, p->proto);
if(pr != nil && pr->advise != nil) {
- (*pr->advise)(pr, bp, m2);
+ (*pr->advise)(pr, bp, msg);
return;
}
- bp->rp -= sizeof(IPICMP);
}
+ bp->rp -= IPICMPSZ;
+ goticmpkt6(icmp, bp, 0);
+ break;
+ case TimeExceedV6:
+ if(p->code == 0){
+ snprint(msg = m2, sizeof m2, "ttl exceeded at %I", p->src);
+ goto Advise;
+ }
+ if(p->code == 1){
+ snprint(msg = m2, sizeof m2, "frag time exceeded at %I", p->src);
+ goto Advise;
+ }
goticmpkt6(icmp, bp, 0);
break;
+ case PacketTooBigV6:
+ snprint(msg = m2, sizeof(m2), "packet too big for %lud mtu at %I",
+ (ulong)nhgetl(p->icmpid), p->src);
+ goto Advise;
+
case RouterAdvert:
case RouterSolicit:
- /* using lsrc as a temp, munge hdr for goticmp6
- memmove(lsrc, p->src, IPaddrlen);
- memmove(p->src, p->dst, IPaddrlen);
- memmove(p->dst, lsrc, IPaddrlen); */
-
goticmpkt6(icmp, bp, p->type);
break;
case NbrSolicit:
- np = (Ndpkt*) p;
+ np = (Ndpkt*)p;
pktflags = 0;
- switch (targettype(icmp->f, ipifc, np->target)) {
- case t_unirany:
+ if(ifc->sendra6)
+ pktflags |= Rflag;
+ switch (targettype(icmp->f, ifc, np->target)) {
+ case Tunirany:
pktflags |= Oflag;
/* fall through */
- case t_uniproxy:
- if(ipcmp(np->src, v6Unspecified) != 0) {
- arpenter(icmp->f, V6, np->src, np->lnaddr, 8*np->olen-2, 0);
+ case Tuniproxy:
+ if(ipv6local(ifc, ia, 0, np->src)) {
+ if(arpenter(icmp->f, V6, np->src, np->lnaddr, 8*np->olen-2, ia, ifc, 0) < 0)
+ break;
pktflags |= Sflag;
- }
- if(ipv6local(ipifc, lsrc)) {
- icmpna(icmp->f, lsrc,
- (ipcmp(np->src, v6Unspecified)==0)?v6allnodesL:np->src,
- np->target, ipifc->mac, pktflags);
- }
- else
- freeblist(bp);
+ } else
+ ipmove(ia, np->target);
+ icmpna(icmp->f, ia, (pktflags & Sflag)? np->src: v6allnodesL,
+ np->target, ifc->mac, pktflags);
break;
-
- case t_unitent:
- /* not clear what needs to be done. send up
- * an icmp mesg saying don't use this address? */
-
- default:
- freeblist(bp);
+ case Tunitent:
+ /*
+ * not clear what needs to be done. send up
+ * an icmp mesg saying don't use this address?
+ */
+ break;
}
-
+ freeblist(bp);
break;
case NbrAdvert:
- np = (Ndpkt*) p;
+ np = (Ndpkt*)p;
- /* if the target address matches one of the local interface
- * address and the local interface address has tentative bit set,
- * then insert into ARP table. this is so the duplication address
- * detection part of ipconfig can discover duplication through
- * the arp table
+ /*
+ * if the target address matches one of the local interface
+ * addresses and the local interface address has tentative bit
+ * set, insert into ARP table. this is so the duplicate address
+ * detection part of ipconfig can discover duplication through
+ * the arp table.
*/
- lifc = iplocalonifc(ipifc, np->target);
- if(lifc && lifc->tentative)
- refresh = 0;
- arpenter(icmp->f, V6, np->target, np->lnaddr, 8*np->olen-2, refresh);
+ lifc = iplocalonifc(ifc, np->target);
+ if(lifc != nil && lifc->tentative)
+ arpenter(icmp->f, V6, np->target, np->lnaddr, 8*np->olen-2, np->target, ifc, 0);
+ else if(ipv6local(ifc, ia, 0, np->target))
+ arpenter(icmp->f, V6, np->target, np->lnaddr, 8*np->olen-2, ia, ifc, 1);
freeblist(bp);
break;
- case PacketTooBigV6:
-
default:
goticmpkt6(icmp, bp, 0);
break;
}
return;
-
raise:
freeblist(bp);
-
}
-int
+static int
icmpstats6(Proto *icmp6, char *buf, int len)
{
Icmppriv6 *priv;
@@ -874,23 +827,28 @@
e = p+len;
for(i = 0; i < Nstats6; i++)
p = seprint(p, e, "%s: %lud\n", statnames6[i], priv->stats[i]);
- for(i = 0; i <= Maxtype6; i++){
+ for(i = 0; i <= Maxtype6; i++)
if(icmpnames6[i])
- p = seprint(p, e, "%s: %lud %lud\n", icmpnames6[i], priv->in[i], priv->out[i]);
-/* else
- p = seprint(p, e, "%d: %lud %lud\n", i, priv->in[i], priv->out[i]);
-*/
- }
+ p = seprint(p, e, "%s: %lud %lud\n", icmpnames6[i],
+ priv->in[i], priv->out[i]);
return p - buf;
}
-// need to import from icmp.c
+/* import from icmp.c */
extern int icmpstate(Conv *c, char *state, int n);
extern char* icmpannounce(Conv *c, char **argv, int argc);
extern char* icmpconnect(Conv *c, char **argv, int argc);
extern void icmpclose(Conv *c);
+static void
+icmpclose6(Conv *c)
+{
+ Icmpcb6 *icb = (Icmpcb6*)c->ptcl;
+ icb->headers = 0;
+ icmpclose(c);
+}
+
void
icmp6init(Fs *fs)
{
@@ -902,7 +860,7 @@
icmp6->announce = icmpannounce;
icmp6->state = icmpstate;
icmp6->create = icmpcreate6;
- icmp6->close = icmpclose;
+ icmp6->close = icmpclose6;
icmp6->rcv = icmpiput6;
icmp6->stats = icmpstats6;
icmp6->ctl = icmpctl6;
@@ -914,4 +872,3 @@
Fsproto(fs, icmp6);
}
-
diff -u a/os/ip//igmp.c b/os/ip//igmp.c
--- a/os/ip//igmp.c
+++ b/os/ip//igmp.c
@@ -1,3 +1,7 @@
+/*
+ * igmp - internet group management protocol
+ * unfinished.
+ */
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
@@ -40,8 +44,12 @@
uchar unused;
uchar igmpcksum[2]; /* checksum of igmp portion */
uchar group[IPaddrlen]; /* multicast group */
+
+ uchar payload[];
};
+#define IGMPPKTSZ offsetof(IGMPpkt, payload[0])
+
/*
* lists for group reports
*/
@@ -49,7 +57,7 @@
struct IGMPrep
{
IGMPrep *next;
- Media *m;
+ Medium *m;
int ticks;
Multicast *multi;
};
@@ -76,19 +84,17 @@
} stats;
void
-igmpsendreport(Media *m, uchar *addr)
+igmpsendreport(Medium *m, uchar *addr)
{
IGMPpkt *p;
Block *bp;
bp = allocb(sizeof(IGMPpkt));
- if(bp == nil)
- return;
p = (IGMPpkt*)bp->wp;
p->vihl = IP_VER4;
- bp->wp += sizeof(IGMPpkt);
- memset(bp->rp, 0, sizeof(IGMPpkt));
- hnputl(p->src, Mediagetaddr(m));
+ bp->wp += IGMPPKTSZ;
+ memset(bp->rp, 0, IGMPPKTSZ);
+ hnputl(p->src, Mediumgetaddr(m));
hnputl(p->dst, Ipallsys);
p->vertype = (1<<4) | IGMPreport;
p->proto = IP_IGMPPROTO;
@@ -166,7 +172,7 @@
}
void
-igmpiput(Media *m, Ipifc *, Block *bp)
+igmpiput(Medium *m, Ipifc *, Block *bp)
{
int n;
IGMPpkt *ghp;
@@ -206,7 +212,7 @@
if(rp != nil)
break; /* already reporting */
- mp = Mediacopymulti(m);
+ mp = Mediumcopymulti(m);
if(mp == nil)
break;
@@ -285,7 +291,7 @@
igmp.ptclsize = 0;
igmpreportfn = igmpsendreport;
- kproc("igmpproc", igmpproc, 0, 0);
+ kproc("igmpproc", igmpproc, 0);
Fsproto(fs, &igmp);
}
diff -u a/os/ip//il.c b/os/ip//il.c
--- a/os/ip//il.c
+++ b/os/ip//il.c
@@ -189,7 +189,7 @@
{
Ipht ht;
- ulong stats[Nstats];
+ uvlong stats[Nstats];
ulong csumerr; /* checksum errors */
ulong hlenerr; /* header length error */
@@ -208,7 +208,7 @@
void ilrcvmsg(Conv*, Block*);
-void ilsendctl(Conv*, Ilhdr*, int, ulong, ulong, int);
+int ilsendctl(Conv*, Ilhdr*, int, ulong, ulong, int);
void ilackq(Ilcb*, Block*);
void ilprocess(Conv*, Ilhdr*, Block*);
void ilpullup(Conv*);
@@ -251,6 +251,9 @@
e = Fsstdconnect(c, argv, argc);
if(e != nil)
return e;
+ if(c->ipversion != V4)
+ return "only IP version 4 supported";
+
return ilstart(c, IL_CONNECT, fast);
}
@@ -260,7 +263,7 @@
Ilcb *ic;
ic = (Ilcb*)(c->ptcl);
- return snprint(state, n, "%s qin %d qout %d del %5.5d Br %5.5d md %5.5d una %5.5lud rex %5.5d rxq %5.5d max %5.5d",
+ return snprint(state, n, "%s qin %d qout %d del %5.5d Br %5.5d md %5.5d una %5.5lud rex %5.5d rxq %5.5d max %5.5d\n",
ilstates[ic->state],
c->rq ? qlen(c->rq) : 0,
c->wq ? qlen(c->wq) : 0,
@@ -434,7 +437,7 @@
p = buf;
e = p+len;
for(i = 0; i < Nstats; i++)
- p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
+ p = seprint(p, e, "%s: %llud\n", statnames[i], priv->stats[i]);
return p - buf;
}
@@ -548,6 +551,9 @@
ih = (Ilhdr *)bp->rp;
plen = blocklen(bp);
+ if(plen > 0 && (ih->vihl&0xF0)!=IP_VER4)
+ goto raise; /* ignore non V4 packets */
+
if(plen < IL_IPSIZE+IL_HDRSIZE){
netlog(il->f, Logil, "il: hlenerr\n");
ipriv->stats[HlenErrs]++;
@@ -572,7 +578,7 @@
else
st = iltype[ih->iltype];
ipriv->stats[CsumErrs]++;
- netlog(il->f, Logil, "il: cksum %ux %ux, pkt(%s id %lud ack %lud %I/%d->%d)\n",
+ netlog(il->f, Logil, "il: cksum %ux %s, pkt(%ux id %ud ack %I/%d->%d)\n",
csum, st, nhgetl(ih->ilid), nhgetl(ih->ilack), raddr, sp, dp);
goto raise;
}
@@ -595,7 +601,7 @@
else
st = iltype[ih->iltype];
ilreject(il->f, ih); /* no channel and not sync */
- netlog(il->f, Logil, "il: no channel, pkt(%s id %lud ack %lud %I/%ud->%ud)\n",
+ netlog(il->f, Logil, "il: no channel, pkt(%s id %ud ack %ud %I/%ud->%ud)\n",
st, nhgetl(ih->ilid), nhgetl(ih->ilack), raddr, sp, dp);
goto raise;
}
@@ -829,7 +835,7 @@
c = ic->conv;
id = nhgetl(h->ilid);
- netlog(c->p->f, Logil, "il: rexmit %d %ud: %d %d: %i %d/%d\n", id, ic->recvd,
+ netlog(c->p->f, Logil, "il: rexmit %lud %lud: %d %lud: %I %d/%d\n", id, ic->recvd,
ic->rexmit, ic->timeout,
c->raddr, c->lport, c->rport);
@@ -852,7 +858,7 @@
ic = (Ilcb*)s->ptcl;
USED(ic);
- netlog(s->p->f, Logilmsg, "%11s rcv %d/%d snt %d/%d pkt(%s id %d ack %d %d->%d) ",
+ netlog(s->p->f, Logilmsg, "%11s rcv %lud/%lud snt %lud/%lud pkt(%s id %d ack %ud %ud->%ud) ",
ilstates[ic->state], ic->rstart, ic->recvd, ic->start,
ic->next, iltype[h->iltype], nhgetl(h->ilid),
nhgetl(h->ilack), nhgets(h->ilsrc), nhgets(h->ildst));
@@ -859,7 +865,7 @@
_ilprocess(s, h, bp);
- netlog(s->p->f, Logilmsg, "%11s rcv %d snt %d\n", ilstates[ic->state], ic->recvd, ic->next);
+ netlog(s->p->f, Logilmsg, "%11s rcv %lud snt %lud\n", ilstates[ic->state], ic->recvd, ic->next);
}
void
@@ -917,17 +923,12 @@
bp->list = nil;
dlen = nhgets(oh->illen)-IL_HDRSIZE;
bp = trimblock(bp, IL_IPSIZE+IL_HDRSIZE, dlen);
+
/*
* Upper levels don't know about multiple-block
* messages so copy all into one (yick).
*/
- bp = concatblock(bp);
- if(bp == 0)
- panic("ilpullup");
- bp = packblock(bp);
- if(bp == 0)
- panic("ilpullup2");
- qpass(s->rq, bp);
+ qpass(s->rq, packblock(concatblock(bp)));
}
qunlock(&ic->outo);
}
@@ -948,7 +949,7 @@
id = nhgetl(h->ilid);
/* Window checks */
if(id <= ic->recvd || id > ic->recvd+ic->window) {
- netlog(s->p->f, Logil, "il: message outside window %ud <%ud-%ud>: %i %d/%d\n",
+ netlog(s->p->f, Logil, "il: message outside window %lud <%lud-%lud>: %I %d/%d\n",
id, ic->recvd, ic->recvd+ic->window, s->raddr, s->lport, s->rport);
freeblist(bp);
return;
@@ -983,7 +984,7 @@
qunlock(&ic->outo);
}
-void
+int
ilsendctl(Conv *ipc, Ilhdr *inih, int type, ulong id, ulong ack, int ilspec)
{
Ilhdr *ih;
@@ -1034,7 +1035,7 @@
hnputs(ih->ilsum, ptclcsum(bp, IL_IPSIZE, IL_HDRSIZE));
if(ipc==nil)
- panic("ipc is nil caller is %.8lux", getcallerpc(&ipc));
+ panic("ipc is nil caller is %#p", getcallerpc(&ipc));
if(ipc->p==nil)
panic("ipc->p is nil");
@@ -1042,7 +1043,7 @@
iltype[ih->iltype], nhgetl(ih->ilid), nhgetl(ih->ilack),
nhgets(ih->ilsrc), nhgets(ih->ildst));
- ipoput4(ipc->p->f, bp, 0, ttl, tos, ipc);
+ return ipoput4(ipc->p->f, bp, 0, ttl, tos, ipc);
}
void
@@ -1145,6 +1146,8 @@
il = x;
+ while(waserror())
+ ;
loop:
tsleep(&up->sleep, return0, 0, Iltickms);
for(s = il->conv; s && *s; s++) {
@@ -1248,7 +1251,7 @@
qlock(&ipriv->apl);
if(ipriv->ackprocstarted == 0){
sprint(kpname, "#I%dilack", c->p->f->dev);
- kproc(kpname, ilackproc, c->p, 0);
+ kproc(kpname, ilackproc, c->p);
ipriv->ackprocstarted = 1;
}
qunlock(&ipriv->apl);
@@ -1280,7 +1283,8 @@
case IL_CONNECT:
ic->state = Ilsyncer;
iphtadd(&ipriv->ht, c);
- ilsendctl(c, nil, Ilsync, ic->start, ic->recvd, 0);
+ if(ilsendctl(c, nil, Ilsync, ic->start, ic->recvd, 0) < 0)
+ ilhangup(c, "no route");
break;
}
@@ -1332,6 +1336,8 @@
if(s->lport == psource)
if(ipcmp(s->laddr, source) == 0)
if(ipcmp(s->raddr, dest) == 0){
+ if(s->ignoreadvice)
+ break;
qunlock(il);
ic = (Ilcb*)s->ptcl;
switch(ic->state){
@@ -1380,12 +1386,6 @@
}
}
-int
-ilgc(Proto *il)
-{
- return natgc(il->ipproto);
-}
-
void
ilinit(Fs *f)
{
@@ -1406,7 +1406,7 @@
il->advise = iladvise;
il->stats = ilxstats;
il->inuse = ilinuse;
- il->gc = ilgc;
+ il->gc = nil;
il->ipproto = IP_ILPROTO;
il->nc = scalednconv();
il->ptclsize = sizeof(Ilcb);
diff -u a/os/ip//ip.c b/os/ip//ip.c
--- a/os/ip//ip.c
+++ b/os/ip//ip.c
@@ -7,94 +7,6 @@
#include "ip.h"
-typedef struct IP IP;
-typedef struct Fragment4 Fragment4;
-typedef struct Fragment6 Fragment6;
-typedef struct Ipfrag Ipfrag;
-
-enum
-{
- IP4HDR = 20, /* sizeof(Ip4hdr) */
- IP6HDR = 40, /* sizeof(Ip6hdr) */
- IP_HLEN4 = 0x05, /* Header length in words */
- IP_DF = 0x4000, /* Don't fragment */
- IP_MF = 0x2000, /* More fragments */
- IP6FHDR = 8, /* sizeof(Fraghdr6) */
- IP_MAX = 64*1024, /* Maximum Internet packet size */
-};
-
-#define BLKIPVER(xp) (((Ip4hdr*)((xp)->rp))->vihl&0xF0)
-
-/* MIB II counters */
-enum
-{
- Forwarding,
- DefaultTTL,
- InReceives,
- InHdrErrors,
- InAddrErrors,
- ForwDatagrams,
- InUnknownProtos,
- InDiscards,
- InDelivers,
- OutRequests,
- OutDiscards,
- OutNoRoutes,
- ReasmTimeout,
- ReasmReqds,
- ReasmOKs,
- ReasmFails,
- FragOKs,
- FragFails,
- FragCreates,
-
- Nstats,
-};
-
-struct Fragment4
-{
- Block* blist;
- Fragment4* next;
- ulong src;
- ulong dst;
- ushort id;
- ulong age;
-};
-
-struct Fragment6
-{
- Block* blist;
- Fragment6* next;
- uchar src[IPaddrlen];
- uchar dst[IPaddrlen];
- uint id;
- ulong age;
-};
-
-struct Ipfrag
-{
- ushort foff;
- ushort flen;
-};
-
-/* an instance of IP */
-struct IP
-{
- ulong stats[Nstats];
-
- QLock fraglock4;
- Fragment4* flisthead4;
- Fragment4* fragfree4;
- Ref id4;
-
- QLock fraglock6;
- Fragment6* flisthead6;
- Fragment6* fragfree6;
- Ref id6;
-
- int iprouting; /* true if we route like a gateway */
-};
-
static char *statnames[] =
{
[Forwarding] "Forwarding",
@@ -118,45 +30,11 @@
[FragCreates] "FragCreates",
};
-#define BLKIP(xp) ((Ip4hdr*)((xp)->rp))
-/*
- * This sleazy macro relies on the media header size being
- * larger than sizeof(Ipfrag). ipreassemble checks this is true
- */
-#define BKFG(xp) ((Ipfrag*)((xp)->base))
+static Block* ip4reassemble(IP*, int, Block*);
+static void ipfragfree4(IP*, Fragment4*);
+static Fragment4* ipfragallo4(IP*);
-ushort ipcsum(uchar*);
-Block* ip4reassemble(IP*, int, Block*, Ip4hdr*);
-void ipfragfree4(IP*, Fragment4*);
-Fragment4* ipfragallo4(IP*);
-
-
-void
-ip_init_6(Fs *f)
-{
- V6params *v6p;
-
- v6p = smalloc(sizeof(V6params));
-
- v6p->rp.mflag = 0; // default not managed
- v6p->rp.oflag = 0;
- v6p->rp.maxraint = 600000; // millisecs
- v6p->rp.minraint = 200000;
- v6p->rp.linkmtu = 0; // no mtu sent
- v6p->rp.reachtime = 0;
- v6p->rp.rxmitra = 0;
- v6p->rp.ttl = MAXTTL;
- v6p->rp.routerlt = 3*(v6p->rp.maxraint);
-
- v6p->hp.rxmithost = 1000; // v6 RETRANS_TIMER
-
- v6p->cdrouter = -1;
-
- f->v6p = v6p;
-
-}
-
-void
+static void
initfrag(IP *ip, int size)
{
Fragment4 *fq4, *eq4;
@@ -189,6 +67,7 @@
IP *ip;
ip = smalloc(sizeof(IP));
+ ip->stats[DefaultTTL] = MAXTTL;
initfrag(ip, 100);
f->ip = ip;
@@ -202,11 +81,11 @@
if(f->ip->iprouting==0)
f->ip->stats[Forwarding] = 2;
else
- f->ip->stats[Forwarding] = 1;
+ f->ip->stats[Forwarding] = 1;
}
int
-ipoput4(Fs *f, Block *bp, int gating, int ttl, int tos, Conv *c)
+ipoput4(Fs *f, Block *bp, int gating, int ttl, int tos, Routehint *rh)
{
Ipifc *ifc;
uchar *gate;
@@ -213,66 +92,41 @@
ulong fragoff;
Block *xp, *nb;
Ip4hdr *eh, *feh;
- int lid, len, seglen, chunk, dlen, blklen, offset, medialen;
- Route *r, *sr;
+ int lid, len, seglen, chunk, hlen, dlen, blklen, offset, medialen;
+ Route *r;
IP *ip;
int rv = 0;
ip = f->ip;
-
- /* Fill out the ip header */
- eh = (Ip4hdr*)(bp->rp);
-
ip->stats[OutRequests]++;
- /* Number of uchars in data and ip header to write */
+ /* Fill out the ip header */
+ eh = (Ip4hdr*)bp->rp;
+ assert(BLEN(bp) >= IP4HDR);
len = blocklen(bp);
-
- if(gating){
- chunk = nhgets(eh->length);
- if(chunk > len){
- ip->stats[OutDiscards]++;
- netlog(f, Logip, "short gated packet\n");
- goto free;
- }
- if(chunk < len)
- len = chunk;
- }
if(len >= IP_MAX){
ip->stats[OutDiscards]++;
- netlog(f, Logip, "exceeded ip max size %V\n", eh->dst);
+ netlog(f, Logip, "%V -> %V: exceeded ip max size: %d\n", eh->src, eh->dst, len);
goto free;
}
- r = v4lookup(f, eh->dst, c);
- if(r == nil){
+ r = v4lookup(f, eh->dst, eh->src, rh);
+ if(r == nil || (ifc = r->ifc) == nil){
ip->stats[OutNoRoutes]++;
- netlog(f, Logip, "no interface %V\n", eh->dst);
+ netlog(f, Logip, "%V -> %V: no interface\n", eh->src, eh->dst);
rv = -1;
goto free;
}
- ifc = r->ifc;
- if(r->type & (Rifc|Runi))
+ if(r->type & (Rifc|Runi|Rbcast|Rmulti))
gate = eh->dst;
else
- if(r->type & (Rbcast|Rmulti)) {
- gate = eh->dst;
- sr = v4lookup(f, eh->src, nil);
- if(sr != nil && (sr->type & Runi))
- ifc = sr->ifc;
- }
- else
gate = r->v4.gate;
- if(!gating)
- eh->vihl = IP_VER4|IP_HLEN4;
- eh->ttl = ttl;
- if(!gating)
- eh->tos = tos;
-
- if(!canrlock(ifc))
+ if(!canrlock(ifc)){
+ ip->stats[OutDiscards]++;
goto free;
+ }
if(waserror()){
runlock(ifc);
nexterror();
@@ -280,17 +134,18 @@
if(ifc->m == nil)
goto raise;
- /* Output NAT */
- if(nato(bp, ifc, f) != 0)
- goto raise;
+ if(!gating){
+ eh->vihl = IP_VER4|IP_HLEN4;
+ eh->tos = tos;
+ }
+ eh->ttl = ttl;
/* If we dont need to fragment just send it */
medialen = ifc->maxtu - ifc->m->hsize;
if(len <= medialen) {
- if(!gating)
- hnputs(eh->id, incref(&ip->id4));
hnputs(eh->length, len);
if(!gating){
+ hnputs(eh->id, incref(&ip->id4));
eh->frag[0] = 0;
eh->frag[1] = 0;
}
@@ -297,31 +152,31 @@
eh->cksum[0] = 0;
eh->cksum[1] = 0;
hnputs(eh->cksum, ipcsum(&eh->vihl));
- ifc->m->bwrite(ifc, bp, V4, gate);
+
+ ipifcoput(ifc, bp, V4, gate);
runlock(ifc);
poperror();
return 0;
}
-if((eh->frag[0] & (IP_DF>>8)) && !gating) print("%V: DF set\n", eh->dst);
-
if(eh->frag[0] & (IP_DF>>8)){
ip->stats[FragFails]++;
ip->stats[OutDiscards]++;
icmpcantfrag(f, bp, medialen);
- netlog(f, Logip, "%V: eh->frag[0] & (IP_DF>>8)\n", eh->dst);
+ netlog(f, Logip, "%V -> %V: can't fragment with DF flag set\n", eh->src, eh->dst);
goto raise;
}
- seglen = (medialen - IP4HDR) & ~7;
+ hlen = (eh->vihl & 0xF)<<2;
+ seglen = (medialen - hlen) & ~7;
if(seglen < 8){
ip->stats[FragFails]++;
ip->stats[OutDiscards]++;
- netlog(f, Logip, "%V seglen < 8\n", eh->dst);
+ netlog(f, Logip, "%V -> %V: can't fragment with seglen < 8\n", eh->src, eh->dst);
goto raise;
}
- dlen = len - IP4HDR;
+ dlen = len - hlen;
xp = bp;
if(gating)
lid = nhgets(eh->id);
@@ -328,8 +183,8 @@
else
lid = incref(&ip->id4);
- offset = IP4HDR;
- while(xp != nil && offset && offset >= BLEN(xp)) {
+ offset = hlen;
+ while(offset && offset >= BLEN(xp)) {
offset -= BLEN(xp);
xp = xp->next;
}
@@ -341,30 +196,30 @@
fragoff = 0;
dlen += fragoff;
for(; fragoff < dlen; fragoff += seglen) {
- nb = allocb(IP4HDR+seglen);
- feh = (Ip4hdr*)(nb->rp);
+ nb = allocb(hlen+seglen);
+ feh = (Ip4hdr*)nb->rp;
- memmove(nb->wp, eh, IP4HDR);
- nb->wp += IP4HDR;
+ memmove(nb->wp, eh, hlen);
+ nb->wp += hlen;
if((fragoff + seglen) >= dlen) {
seglen = dlen - fragoff;
hnputs(feh->frag, fragoff>>3);
}
- else
+ else
hnputs(feh->frag, (fragoff>>3)|IP_MF);
- hnputs(feh->length, seglen + IP4HDR);
+ hnputs(feh->length, seglen + hlen);
hnputs(feh->id, lid);
/* Copy up the data area */
chunk = seglen;
while(chunk) {
- if(!xp) {
+ if(xp == nil) {
ip->stats[OutDiscards]++;
ip->stats[FragFails]++;
freeblist(nb);
- netlog(f, Logip, "!xp: chunk %d\n", chunk);
+ netlog(f, Logip, "xp == nil: chunk %d\n", chunk);
goto raise;
}
blklen = chunk;
@@ -376,12 +231,13 @@
chunk -= blklen;
if(xp->rp == xp->wp)
xp = xp->next;
- }
+ }
feh->cksum[0] = 0;
feh->cksum[1] = 0;
hnputs(feh->cksum, ipcsum(&feh->vihl));
- ifc->m->bwrite(ifc, nb, V4, gate);
+
+ ipifcoput(ifc, nb, V4, gate);
ip->stats[FragCreates]++;
}
ip->stats[FragOKs]++;
@@ -396,17 +252,14 @@
void
ipiput4(Fs *f, Ipifc *ifc, Block *bp)
{
- int hl;
- int hop, tos, proto, olen;
+ int hl, len, hop, tos;
+ uchar v6dst[IPaddrlen];
+ ushort frag;
Ip4hdr *h;
Proto *p;
- ushort frag;
- int notforme;
- uchar *dp, v6dst[IPaddrlen];
IP *ip;
- Route *r;
- if(BLKIPVER(bp) != IP_VER4) {
+ if((bp->rp[0]&0xF0) != IP_VER4) {
ipiput6(f, ifc, bp);
return;
}
@@ -430,58 +283,45 @@
return;
}
- h = (Ip4hdr*)(bp->rp);
-
- /* Input NAT */
- nati(bp, ifc);
-
- /* dump anything that whose header doesn't checksum */
+ h = (Ip4hdr*)bp->rp;
+ hl = (h->vihl & 0xF)<<2;
+ if(hl < IP4HDR || hl > BLEN(bp)) {
+ ip->stats[InHdrErrors]++;
+ netlog(f, Logip, "%V -> %V: bad ip header length: %d\n", h->src, h->dst, hl);
+ goto drop;
+ }
if((bp->flag & Bipck) == 0 && ipcsum(&h->vihl)) {
ip->stats[InHdrErrors]++;
- netlog(f, Logip, "ip: checksum error %V\n", h->src);
- freeblist(bp);
+ netlog(f, Logip, "%V -> %V: bad ip header checksum\n", h->src, h->dst);
+ goto drop;
+ }
+ len = nhgets(h->length);
+ if(len < hl || (bp = trimblock(bp, 0, len)) == nil){
+ ip->stats[InHdrErrors]++;
+ netlog(f, Logip, "%V -> %V: bogus packet length: %d\n", h->src, h->dst, len);
+ if(bp != nil)
+ goto drop;
return;
}
- v4tov6(v6dst, h->dst);
- notforme = ipforme(f, v6dst) == 0;
+ h = (Ip4hdr*)bp->rp;
- /* Check header length and version */
- if((h->vihl&0x0F) != IP_HLEN4) {
- hl = (h->vihl&0xF)<<2;
- if(hl < (IP_HLEN4<<2)) {
- ip->stats[InHdrErrors]++;
- netlog(f, Logip, "ip: %V bad hivl %ux\n", h->src, h->vihl);
- freeblist(bp);
- return;
- }
- /* If this is not routed strip off the options */
- if(notforme == 0) {
- olen = nhgets(h->length);
- dp = bp->rp + (hl - (IP_HLEN4<<2));
- memmove(dp, h, IP_HLEN4<<2);
- bp->rp = dp;
- h = (Ip4hdr*)(bp->rp);
- h->vihl = (IP_VER4|IP_HLEN4);
- hnputs(h->length, olen-hl+(IP_HLEN4<<2));
- }
- }
-
/* route */
- if(notforme) {
- Conv conv;
+ v4tov6(v6dst, h->dst);
+ if(!ipforme(f, v6dst)) {
+ Route *r;
+ Routehint rh;
+ Ipifc *nifc;
- if(!ip->iprouting){
- freeb(bp);
- return;
- }
+ if(!ip->iprouting)
+ goto drop;
/* don't forward to source's network */
- conv.r = nil;
- r = v4lookup(f, h->dst, &conv);
- if(r == nil || r->ifc == ifc){
+ rh.r = nil;
+ r = v4lookup(f, h->dst, h->src, &rh);
+ if(r == nil || (nifc = r->ifc) == nil
+ || (nifc == ifc && !ifc->reflect)){
ip->stats[OutDiscards]++;
- freeblist(bp);
- return;
+ goto drop;
}
/* don't forward if packet has timed out */
@@ -488,23 +328,18 @@
hop = h->ttl;
if(hop < 1) {
ip->stats[InHdrErrors]++;
- icmpttlexceeded(f, ifc->lifc->local, bp);
- freeblist(bp);
- return;
+ icmpttlexceeded(f, ifc, bp);
+ goto drop;
}
/* reassemble if the interface expects it */
-if(r->ifc == nil) panic("nil route rfc");
- if(r->ifc->reassemble){
+ if(nifc->reassemble){
frag = nhgets(h->frag);
- if(frag) {
- h->tos = 0;
- if(frag & IP_MF)
- h->tos = 1;
- bp = ip4reassemble(ip, frag, bp, h);
+ if(frag & (IP_MF|IP_FO)) {
+ bp = ip4reassemble(ip, frag, bp);
if(bp == nil)
return;
- h = (Ip4hdr*)(bp->rp);
+ h = (Ip4hdr*)bp->rp;
}
}
@@ -511,27 +346,30 @@
ip->stats[ForwDatagrams]++;
tos = h->tos;
hop = h->ttl;
- ipoput4(f, bp, 1, hop - 1, tos, &conv);
+ ipoput4(f, bp, 1, hop - 1, tos, &rh);
return;
}
+ /* If this is not routed strip off the options */
+ if(hl > IP4HDR) {
+ hl -= IP4HDR;
+ len -= hl;
+ bp->rp += hl;
+ memmove(bp->rp, h, IP4HDR);
+ h = (Ip4hdr*)bp->rp;
+ h->vihl = IP_VER4|IP_HLEN4;
+ hnputs(h->length, len);
+ }
+
frag = nhgets(h->frag);
- if(frag) {
- h->tos = 0;
- if(frag & IP_MF)
- h->tos = 1;
- bp = ip4reassemble(ip, frag, bp, h);
+ if(frag & (IP_MF|IP_FO)) {
+ bp = ip4reassemble(ip, frag, bp);
if(bp == nil)
return;
- h = (Ip4hdr*)(bp->rp);
+ h = (Ip4hdr*)bp->rp;
}
- /* don't let any frag info go up the stack */
- h->frag[0] = 0;
- h->frag[1] = 0;
-
- proto = h->proto;
- p = Fsrcvpcol(f, proto);
+ p = Fsrcvpcol(f, h->proto);
if(p != nil && p->rcv != nil) {
ip->stats[InDelivers]++;
(*p->rcv)(p, ifc, bp);
@@ -539,6 +377,7 @@
}
ip->stats[InDiscards]++;
ip->stats[InUnknownProtos]++;
+drop:
freeblist(bp);
}
@@ -550,45 +389,43 @@
int i;
ip = f->ip;
- ip->stats[DefaultTTL] = MAXTTL;
-
p = buf;
e = p+len;
- for(i = 0; i < Nstats; i++)
- p = seprint(p, e, "%s: %lud\n", statnames[i], ip->stats[i]);
+ for(i = 0; i < Nipstats; i++)
+ p = seprint(p, e, "%s: %llud\n", statnames[i], ip->stats[i]);
return p - buf;
}
-Block*
-ip4reassemble(IP *ip, int offset, Block *bp, Ip4hdr *ih)
+static Block*
+ip4reassemble(IP *ip, int offset, Block *bp)
{
- int fend;
+ int ovlap, fragsize, len;
+ ulong src, dst;
ushort id;
+ Block *bl, **l, *prev;
Fragment4 *f, *fnext;
- ulong src, dst;
- Block *bl, **l, *last, *prev;
- int ovlap, len, fragsize, pktposn;
+ Ipfrag *fp, *fq;
+ Ip4hdr *ih;
+ /*
+ * block lists are too hard, concatblock into a single block
+ */
+ bp = concatblock(bp);
+
+ ih = (Ip4hdr*)bp->rp;
src = nhgetl(ih->src);
dst = nhgetl(ih->dst);
id = nhgets(ih->id);
+ fragsize = BLEN(bp) - ((ih->vihl&0xF)<<2);
- /*
- * block lists are too hard, pullupblock into a single block
- */
- if(bp->next){
- bp = pullupblock(bp, blocklen(bp));
- ih = (Ip4hdr*)(bp->rp);
- }
-
qlock(&ip->fraglock4);
/*
* find a reassembly queue for this fragment
*/
- for(f = ip->flisthead4; f; f = fnext){
+ for(f = ip->flisthead4; f != nil; f = fnext){
fnext = f->next; /* because ipfragfree4 changes the list */
- if(f->src == src && f->dst == dst && f->id == id)
+ if(f->id == id && f->src == src && f->dst == dst)
break;
if(f->age < NOW){
ip->stats[ReasmTimeout]++;
@@ -601,22 +438,23 @@
* and get rid of any fragments that might go
* with it.
*/
- if(!ih->tos && (offset & ~(IP_MF|IP_DF)) == 0) {
+ if((offset & (IP_MF|IP_FO)) == 0) {
if(f != nil) {
- ipfragfree4(ip, f);
ip->stats[ReasmFails]++;
+ ipfragfree4(ip, f);
}
qunlock(&ip->fraglock4);
return bp;
}
- if(bp->base+sizeof(Ipfrag) >= bp->rp){
- bp = padblock(bp, sizeof(Ipfrag));
- bp->rp += sizeof(Ipfrag);
+ if(bp->base+IPFRAGSZ > bp->rp){
+ bp = padblock(bp, IPFRAGSZ);
+ bp->rp += IPFRAGSZ;
}
- BKFG(bp)->foff = offset<<3;
- BKFG(bp)->flen = nhgets(ih->length)-IP4HDR;
+ fp = (Ipfrag*)bp->base;
+ fp->foff = (offset & IP_FO)<<3;
+ fp->flen = fragsize;
/* First fragment allocates a reassembly queue */
if(f == nil) {
@@ -627,8 +465,9 @@
f->blist = bp;
- qunlock(&ip->fraglock4);
ip->stats[ReasmReqds]++;
+ qunlock(&ip->fraglock4);
+
return nil;
}
@@ -638,7 +477,7 @@
prev = nil;
l = &f->blist;
bl = f->blist;
- while(bl != nil && BKFG(bp)->foff > BKFG(bl)->foff) {
+ while(bl != nil && fp->foff > ((Ipfrag*)bl->base)->foff) {
prev = bl;
l = &bl->next;
bl = bl->next;
@@ -645,15 +484,16 @@
}
/* Check overlap of a previous fragment - trim away as necessary */
- if(prev) {
- ovlap = BKFG(prev)->foff + BKFG(prev)->flen - BKFG(bp)->foff;
+ if(prev != nil) {
+ fq = (Ipfrag*)prev->base;
+ ovlap = fq->foff + fq->flen - fp->foff;
if(ovlap > 0) {
- if(ovlap >= BKFG(bp)->flen) {
- freeblist(bp);
+ if(ovlap >= fp->flen) {
qunlock(&ip->fraglock4);
+ freeb(bp);
return nil;
}
- BKFG(prev)->flen -= ovlap;
+ fq->flen -= ovlap;
}
}
@@ -662,26 +502,26 @@
*l = bp;
/* Check to see if succeeding segments overlap */
- if(bp->next) {
+ if(bp->next != nil) {
l = &bp->next;
- fend = BKFG(bp)->foff + BKFG(bp)->flen;
+ offset = fp->foff + fp->flen;
/* Take completely covered segments out */
- while(*l) {
- ovlap = fend - BKFG(*l)->foff;
+ while((bl = *l) != nil) {
+ fq = (Ipfrag*)bl->base;
+ ovlap = offset - fq->foff;
if(ovlap <= 0)
break;
- if(ovlap < BKFG(*l)->flen) {
- BKFG(*l)->flen -= ovlap;
- BKFG(*l)->foff += ovlap;
- /* move up ih hdrs */
- memmove((*l)->rp + ovlap, (*l)->rp, IP4HDR);
- (*l)->rp += ovlap;
+ if(ovlap < fq->flen) {
+ /* move up ip header */
+ memmove(bl->rp + ovlap, bl->rp, BLEN(bl) - fq->flen);
+ bl->rp += ovlap;
+ fq->flen -= ovlap;
+ fq->foff += ovlap;
break;
}
- last = (*l)->next;
- (*l)->next = nil;
- freeblist(*l);
- *l = last;
+ *l = bl->next;
+ bl->next = nil;
+ freeb(bl);
}
}
@@ -689,35 +529,50 @@
* look for a complete packet. if we get to a fragment
* without IP_MF set, we're done.
*/
- pktposn = 0;
- for(bl = f->blist; bl; bl = bl->next) {
- if(BKFG(bl)->foff != pktposn)
+ offset = 0;
+ for(bl = f->blist; bl != nil; bl = bl->next, offset += fp->flen) {
+ fp = (Ipfrag*)bl->base;
+ if(fp->foff != offset)
break;
- if((BLKIP(bl)->frag[0]&(IP_MF>>8)) == 0) {
- bl = f->blist;
- len = nhgets(BLKIP(bl)->length);
- bl->wp = bl->rp + len;
- /* Pullup all the fragment headers and
- * return a complete packet
- */
- for(bl = bl->next; bl; bl = bl->next) {
- fragsize = BKFG(bl)->flen;
- len += fragsize;
- bl->rp += IP4HDR;
- bl->wp = bl->rp + fragsize;
- }
+ ih = (Ip4hdr*)bl->rp;
+ if(ih->frag[0]&(IP_MF>>8))
+ continue;
- bl = f->blist;
- f->blist = nil;
+ bl = f->blist;
+ len = BLEN(bl);
+
+ /*
+ * Pullup all the fragment headers and
+ * return a complete packet
+ */
+ for(bl = bl->next; bl != nil && len < IP_MAX; bl = bl->next) {
+ fq = (Ipfrag*)bl->base;
+ fragsize = fq->flen;
+ bl->rp = bl->wp - fragsize;
+ len += fragsize;
+ }
+
+ if(len >= IP_MAX){
ipfragfree4(ip, f);
- ih = BLKIP(bl);
- hnputs(ih->length, len);
+ ip->stats[ReasmFails]++;
qunlock(&ip->fraglock4);
- ip->stats[ReasmOKs]++;
- return bl;
+ return nil;
}
- pktposn += BKFG(bl)->flen;
+
+ bl = f->blist;
+ f->blist = nil;
+ ipfragfree4(ip, f);
+
+ ih = (Ip4hdr*)bl->rp;
+ ih->frag[0] = 0;
+ ih->frag[1] = 0;
+ hnputs(ih->length, len);
+
+ ip->stats[ReasmOKs]++;
+ qunlock(&ip->fraglock4);
+
+ return bl;
}
qunlock(&ip->fraglock4);
return nil;
@@ -726,20 +581,20 @@
/*
* ipfragfree4 - Free a list of fragments - assume hold fraglock4
*/
-void
+static void
ipfragfree4(IP *ip, Fragment4 *frag)
{
Fragment4 *fl, **l;
- if(frag->blist)
+ if(frag->blist != nil)
freeblist(frag->blist);
-
- frag->src = 0;
- frag->id = 0;
frag->blist = nil;
+ frag->id = 0;
+ frag->src = 0;
+ frag->dst = 0;
l = &ip->flisthead4;
- for(fl = *l; fl; fl = fl->next) {
+ for(fl = *l; fl != nil; fl = fl->next) {
if(fl == frag) {
*l = frag->next;
break;
@@ -755,7 +610,7 @@
/*
* ipfragallo4 - allocate a reassembly queue - assume hold fraglock4
*/
-Fragment4 *
+static Fragment4*
ipfragallo4(IP *ip)
{
Fragment4 *f;
@@ -762,7 +617,7 @@
while(ip->fragfree4 == nil) {
/* free last entry on fraglist */
- for(f = ip->flisthead4; f->next; f = f->next)
+ for(f = ip->flisthead4; f->next != nil; f = f->next)
;
ipfragfree4(ip, f);
}
diff -u a/os/ip//ip.h b/os/ip//ip.h
--- a/os/ip//ip.h
+++ b/os/ip//ip.h
@@ -1,35 +1,33 @@
typedef struct Conv Conv;
+typedef struct Fragment4 Fragment4;
+typedef struct Fragment6 Fragment6;
typedef struct Fs Fs;
typedef union Hwaddr Hwaddr;
typedef struct IP IP;
typedef struct IPaux IPaux;
+typedef struct Ip4hdr Ip4hdr;
+typedef struct Ipfrag Ipfrag;
typedef struct Ipself Ipself;
typedef struct Ipselftab Ipselftab;
typedef struct Iplink Iplink;
typedef struct Iplifc Iplifc;
typedef struct Ipmulti Ipmulti;
-typedef struct IProuter IProuter;
typedef struct Ipifc Ipifc;
typedef struct Iphash Iphash;
typedef struct Ipht Ipht;
typedef struct Netlog Netlog;
-typedef struct Ifclog Ifclog;
typedef struct Medium Medium;
typedef struct Proto Proto;
typedef struct Arpent Arpent;
typedef struct Arp Arp;
typedef struct Route Route;
+typedef struct Routehint Routehint;
typedef struct Routerparams Routerparams;
typedef struct Hostparams Hostparams;
-typedef struct V6router V6router;
-typedef struct V6params V6params;
+typedef struct v6params v6params;
-typedef struct Ip4hdr Ip4hdr;
-typedef struct Nat Nat;
-
#pragma incomplete Arp
-#pragma incomplete Ifclog
#pragma incomplete Ipself
#pragma incomplete Ipselftab
#pragma incomplete IP
@@ -39,10 +37,9 @@
{
Addrlen= 64,
Maxproto= 20,
- Nhash= 64,
- Maxincall= 5,
- Nchans= 16383,
- MAClen= 16, /* longest mac address */
+ Maxincall= 10,
+ Nchans= 1024,
+ MAClen= 8, /* longest mac address */
MAXTTL= 255,
DFLTTOS= 0,
@@ -57,6 +54,12 @@
V6= 6,
IP_VER4= 0x40,
IP_VER6= 0x60,
+ IP_HLEN4= 5, /* v4: Header length in words */
+ IP_DF= 0x4000, /* v4: Don't fragment */
+ IP_MF= 0x2000, /* v4: More fragments */
+ IP_FO= 0x1fff, /* v4: Fragment offset */
+ IP4HDR= IP_HLEN4<<2, /* sizeof(Ip4hdr) */
+ IP_MAX= 64*1024, /* Max. Internet packet size, v4 & v6 */
/* 2^Lroot trees in the root table */
Lroot= 10,
@@ -73,6 +76,79 @@
Connected= 4,
};
+/* MIB II counters */
+enum
+{
+ Forwarding,
+ DefaultTTL,
+ InReceives,
+ InHdrErrors,
+ InAddrErrors,
+ ForwDatagrams,
+ InUnknownProtos,
+ InDiscards,
+ InDelivers,
+ OutRequests,
+ OutDiscards,
+ OutNoRoutes,
+ ReasmTimeout,
+ ReasmReqds,
+ ReasmOKs,
+ ReasmFails,
+ FragOKs,
+ FragFails,
+ FragCreates,
+
+ Nipstats,
+};
+
+struct Fragment4
+{
+ Block* blist;
+ Fragment4* next;
+ ulong src;
+ ulong dst;
+ ushort id;
+ ulong age;
+};
+
+struct Fragment6
+{
+ Block* blist;
+ Fragment6* next;
+ uchar src[IPaddrlen];
+ uchar dst[IPaddrlen];
+ uint id;
+ ulong age;
+};
+
+struct Ipfrag
+{
+ ushort foff;
+ ushort flen;
+ uchar payload[];
+};
+
+#define IPFRAGSZ offsetof(Ipfrag, payload[0])
+
+/* an instance of IP */
+struct IP
+{
+ uvlong stats[Nipstats];
+
+ QLock fraglock4;
+ Fragment4* flisthead4;
+ Fragment4* fragfree4;
+ Ref id4;
+
+ QLock fraglock6;
+ Fragment6* flisthead6;
+ Fragment6* fragfree6;
+ Ref id6;
+
+ int iprouting; /* true if we route like a gateway */
+};
+
/* on the wire packet header */
struct Ip4hdr
{
@@ -86,9 +162,14 @@
uchar cksum[2]; /* Header checksum */
uchar src[4]; /* IP source */
uchar dst[4]; /* IP destination */
- uchar data[1]; /* start of data */
};
+struct Routehint
+{
+ Route *r; /* last route used */
+ ulong rgen; /* routetable generation for *r */
+};
+
/*
* one per conversation directory
*/
@@ -100,9 +181,9 @@
Proto* p;
int restricted; /* remote port is restricted */
+ int ignoreadvice; /* don't terminate connection on icmp errors */
uint ttl; /* max time to live */
uint tos; /* type of service */
- int ignoreadvice; /* don't terminate connection on icmp errors */
uchar ipversion;
uchar laddr[IPaddrlen]; /* local IP address */
@@ -139,8 +220,7 @@
void* ptcl; /* protocol specific stuff */
- Route *r; /* last route used */
- ulong rgen; /* routetable generation for *r */
+ Routehint;
};
struct Medium
@@ -161,18 +241,8 @@
/* process packets written to 'data' */
void (*pktin)(Fs *f, Ipifc *ifc, Block *bp);
- /* routes for router boards */
- void (*addroute)(Ipifc *ifc, int, uchar*, uchar*, uchar*, int);
- void (*remroute)(Ipifc *ifc, int, uchar*, uchar*);
- void (*flushroutes)(Ipifc *ifc);
-
- /* for routing multicast groups */
- void (*joinmulti)(Ipifc *ifc, uchar *a, uchar *ia);
- void (*leavemulti)(Ipifc *ifc, uchar *a, uchar *ia);
-
/* address resolution */
- void (*ares)(Fs*, int, uchar*, uchar*, int, int); /* resolve */
- void (*areg)(Ipifc*, uchar*); /* register */
+ void (*areg)(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *ip);
/* v6 address generation */
void (*pref2addr)(uchar *pref, uchar *ea);
@@ -187,12 +257,13 @@
uchar mask[IPaddrlen];
uchar remote[IPaddrlen];
uchar net[IPaddrlen];
+ uchar type; /* route type */
uchar tentative; /* =1 => v6 dup disc on, =0 => confirmed unique */
uchar onlink; /* =1 => onlink, =0 offlink. */
uchar autoflag; /* v6 autonomous flag */
- long validlt; /* v6 valid lifetime */
- long preflt; /* v6 preferred lifetime */
- long origint; /* time when addr was added */
+ ulong validlt; /* v6 valid lifetime */
+ ulong preflt; /* v6 preferred lifetime */
+ ulong origint; /* time when addr was added */
Iplink *link; /* addresses linked to this lifc */
Iplifc *next;
};
@@ -203,25 +274,25 @@
Ipself *self;
Iplifc *lifc;
Iplink *selflink; /* next link for this local address */
- Iplink *lifclink; /* next link for this ifc */
- ulong expire;
+ Iplink *lifclink; /* next link for this lifc */
Iplink *next; /* free list */
+ ulong expire;
int ref;
};
-/* rfc 2461, pp.40--43. */
+/* rfc 2461, pp.40—43. */
/* default values, one per stack */
struct Routerparams {
- int mflag;
- int oflag;
- int maxraint;
- int minraint;
- int linkmtu;
- int reachtime;
- int rxmitra;
- int ttl;
- int routerlt;
+ int mflag; /* flag: managed address configuration */
+ int oflag; /* flag: other stateful configuration */
+ int maxraint; /* max. router adv interval (ms) */
+ int minraint; /* min. router adv interval (ms) */
+ int linkmtu; /* mtu options */
+ int reachtime; /* reachable time */
+ int rxmitra; /* retransmit interval */
+ int ttl; /* cur hop count limit */
+ int routerlt; /* router lifetime */
};
struct Hostparams {
@@ -231,22 +302,18 @@
struct Ipifc
{
RWlock;
-
+
Conv *conv; /* link to its conversation structure */
char dev[64]; /* device we're attached to */
Medium *m; /* Media pointer */
int maxtu; /* Maximum transfer unit */
int mintu; /* Minumum tranfer unit */
- int mbps; /* megabits per second */
void *arg; /* medium specific */
- int reassemble; /* reassemble IP packets before forwarding */
- /* these are used so that we can unbind on the fly */
- Lock idlock;
+ uchar reflect; /* allow forwarded packets to go out the same interface */
+ uchar reassemble; /* reassemble IP packets before forwarding to this interface */
+
uchar ifcid; /* incremented each 'bind/unbind/add/remove' */
- int ref; /* number of proc's using this ipifc */
- Rendez wait; /* where unbinder waits for ref == 0 */
- int unbinding;
uchar mac[MAClen]; /* MAC address */
@@ -255,10 +322,16 @@
ulong in, out; /* message statistics */
ulong inerr, outerr; /* ... */
- uchar sendra6; /* == 1 => send router advs on this ifc */
- uchar recvra6; /* == 1 => recv router advs on this ifc */
- Routerparams rp; /* router parameters as in RFC 2461, pp.40--43.
+ uchar sendra6; /* flag: send router advs on this ifc */
+ uchar recvra6; /* flag: recv router advs on this ifc */
+ Routerparams rp; /* router parameters as in RFC 2461, pp.40—43.
used only if node is router */
+
+ int speed; /* link speed in bits per second */
+ int delay; /* burst delay in ms */
+ int burst; /* burst delay in bytes */
+ int load; /* bytes in flight */
+ ulong ticks;
};
/*
@@ -330,20 +403,11 @@
int nc; /* number of conversations */
int ac;
Qid qid; /* qid for protocol directory */
- ushort nextport;
ushort nextrport;
void *priv;
};
-/*
- * Stream for sending packets to user level
- */
-struct IProuter {
- QLock;
- int opens;
- Queue *q;
-};
/*
* one per IP protocol stack
@@ -362,8 +426,7 @@
IP *ip;
Ipselftab *self;
Arp *arp;
- V6params *v6p;
- IProuter iprouter;
+ v6params *v6p;
Route *v4root[1<<Lroot]; /* v4 routing forest */
Route *v6root[1<<Lroot]; /* v6 routing forest */
@@ -370,7 +433,6 @@
Route *queue; /* used as temp when reinjecting routes */
Netlog *alog;
- Ifclog *ilog;
char ndb[1024]; /* an ndb entry for this interface */
int ndbvers;
@@ -377,23 +439,10 @@
long ndbmtime;
};
-/* one per default router known to host */
-struct V6router {
- uchar inuse;
- Ipifc *ifc;
- int ifcid;
- uchar routeraddr[IPaddrlen];
- long ltorigin;
- Routerparams rp;
-};
-
-struct V6params
+struct v6params
{
Routerparams rp; /* v6 params, one copy per node now */
Hostparams hp;
- V6router v6rlist[3]; /* max 3 default routers, currently */
- int cdrouter; /* uses only v6rlist[cdrouter] if */
- /* cdrouter >= 0. */
};
@@ -410,8 +459,7 @@
char* Fsstdbind(Conv*, char**, int);
ulong scalednconv(void);
void closeconv(Conv*);
-
-/*
+/*
* logging
*/
enum
@@ -434,7 +482,6 @@
Logrudpmsg= 1<<16,
Logesp= 1<<17,
Logtcpwin= 1<<18,
- Lognat= 1<<19,
};
void netloginit(Fs*);
@@ -449,17 +496,17 @@
void ifclogopen(Fs*, Chan*);
void ifclogclose(Fs*, Chan*);
+#pragma varargck argpos netlog 3
+
/*
* iproute.c
*/
typedef struct RouteTree RouteTree;
-typedef struct Routewalk Routewalk;
typedef struct V4route V4route;
typedef struct V6route V6route;
enum
{
-
/* type bits */
Rv4= (1<<0), /* this is a version 4 route */
Rifc= (1<<1), /* this route is a directly connected interface */
@@ -468,27 +515,18 @@
Rbcast= (1<<4), /* a broadcast self address */
Rmulti= (1<<5), /* a multicast self address */
Rproxy= (1<<6), /* this route should be proxied */
+ Rsrc= (1<<7), /* source specific route */
};
-struct Routewalk
-{
- int o;
- int h;
- char* p;
- char* e;
- void* state;
- void (*walk)(Route*, Routewalk*);
-};
-
struct RouteTree
{
- Route* right;
- Route* left;
- Route* mid;
+ Route *mid;
+ Route *left;
+ Route *right;
+ Ipifc *ifc;
+ uchar ifcid; /* must match ifc->id */
uchar depth;
uchar type;
- uchar ifcid; /* must match ifc->id */
- Ipifc *ifc;
char tag[4];
int ref;
};
@@ -497,6 +535,10 @@
{
ulong address;
ulong endaddress;
+
+ ulong source;
+ ulong endsource;
+
uchar gate[IPv4addrlen];
};
@@ -504,6 +546,10 @@
{
ulong address[IPllen];
ulong endaddress[IPllen];
+
+ ulong source[IPllen];
+ ulong endsource[IPllen];
+
uchar gate[IPaddrlen];
};
@@ -516,17 +562,16 @@
V4route v4;
};
};
-extern void v4addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type);
-extern void v6addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type);
-extern void v4delroute(Fs *f, uchar *a, uchar *mask, int dolock);
-extern void v6delroute(Fs *f, uchar *a, uchar *mask, int dolock);
-extern Route* v4lookup(Fs *f, uchar *a, Conv *c);
-extern Route* v6lookup(Fs *f, uchar *a, Conv *c);
+
+extern void addroute(Fs *f, uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag);
+extern void remroute(Fs *f, uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag);
+extern Route* v4lookup(Fs *f, uchar *a, uchar *s, Routehint *h);
+extern Route* v6lookup(Fs *f, uchar *a, uchar *s, Routehint *h);
+extern Route* v4source(Fs *f, uchar *a, uchar *s);
+extern Route* v6source(Fs *f, uchar *a, uchar *s);
extern long routeread(Fs *f, char*, ulong, int);
extern long routewrite(Fs *f, Chan*, char*, int);
-extern void routetype(int, char*);
-extern void ipwalkroutes(Fs*, Routewalk*);
-extern void convroute(Route*, uchar*, uchar*, uchar*, char*, int*);
+extern void routetype(int type, char p[8]);
/*
* devip.c
@@ -543,7 +588,6 @@
};
extern IPaux* newipaux(char*, char*);
-extern void setlport(Conv*);
/*
* arp.c
@@ -552,18 +596,16 @@
{
uchar ip[IPaddrlen];
uchar mac[MAClen];
- Medium *type; /* media type */
- Arpent* hash;
- Block* hold;
- Block* last;
- uint ctime; /* time entry was created or refreshed */
- uint utime; /* time entry was last used */
- uchar state;
+ Arpent *hash;
Arpent *nextrxt; /* re-transmit chain */
- uint rtime; /* time for next retransmission */
- uchar rxtsrem;
+ Block *hold;
+ Block *last;
Ipifc *ifc;
uchar ifcid; /* must match ifc->id */
+ uchar state;
+ uchar rxtsrem; /* re-tranmissions remaining */
+ ulong ctime; /* time entry was created or refreshed */
+ ulong utime; /* time entry was last used */
};
extern void arpinit(Fs*);
@@ -572,15 +614,17 @@
extern Arpent* arpget(Arp*, Block *bp, int version, Ipifc *ifc, uchar *ip, uchar *h);
extern void arprelease(Arp*, Arpent *a);
extern Block* arpresolve(Arp*, Arpent *a, Medium *type, uchar *mac);
-extern void arpenter(Fs*, int version, uchar *ip, uchar *mac, int len, int norefresh);
+extern int arpenter(Fs*, int version, uchar *ip, uchar *mac, int n, uchar *ia, Ipifc *ifc, int refresh);
+extern void ndpsendsol(Fs*, Ipifc*, Arpent*);
/*
* ipaux.c
*/
-extern int myetheraddr(uchar*, char*);
-extern ulong parseip(uchar*, char*);
-extern ulong parseipmask(uchar*, char*);
+extern int parseether(uchar*, char*);
+extern vlong parseip(uchar*, char*);
+extern vlong parseipmask(uchar*, char*, int);
+extern vlong parseipandmask(uchar*, uchar*, char*, char*);
extern char* v4parseip(uchar*, char*);
extern void maskip(uchar *from, uchar *mask, uchar *to);
extern int parsemac(uchar *to, char *from, int len);
@@ -589,12 +633,10 @@
extern void v4tov6(uchar *v6, uchar *v4);
extern int v6tov4(uchar *v4, uchar *v6);
extern int eipfmt(Fmt*);
+extern int convipvers(Conv *c);
#define ipmove(x, y) memmove(x, y, IPaddrlen)
#define ipcmp(x, y) ( (x)[IPaddrlen-1] != (y)[IPaddrlen-1] || memcmp(x, y, IPaddrlen) )
-
-#define ip4move(x, y) memmove(x, y, IPv4addrlen)
-#define ip4cmp(x, y) ( (x)[IPv4addrlen-1] != (y)[IPv4addrlen-1] || memcmp(x, y, IPv4addrlen) )
extern uchar IPv4bcast[IPaddrlen];
extern uchar IPv4bcastobs[IPaddrlen];
@@ -612,7 +654,6 @@
extern Medium ethermedium;
extern Medium nullmedium;
extern Medium pktmedium;
-extern Medium tripmedium;
/*
* ipifc.c
@@ -619,33 +660,24 @@
*/
extern Medium* ipfindmedium(char *name);
extern void addipmedium(Medium *med);
+extern void ipifcoput(Ipifc *ifc, Block *bp, int version, uchar *ip);
extern int ipforme(Fs*, uchar *addr);
-extern int iptentative(Fs*, uchar *addr);
-extern int ipisbm(uchar *);
-extern int ipismulticast(uchar *);
-extern Ipifc* findipifc(Fs*, uchar *remote, int type);
-extern void findprimaryip(Fs*, uchar*);
+extern int ipismulticast(uchar *ip);
+extern Ipifc* findipifc(Fs*, uchar *local, uchar *remote, int type);
+extern Ipifc* findipifcstr(Fs *f, char *s);
extern void findlocalip(Fs*, uchar *local, uchar *remote);
-extern int ipv4local(Ipifc *ifc, uchar *addr);
-extern int ipv6local(Ipifc *ifc, uchar *addr);
-extern int ipv6anylocal(Ipifc *ifc, uchar *addr);
+extern int ipv4local(Ipifc *ifc, uchar *local, int prefixlen, uchar *remote);
+extern int ipv6local(Ipifc *ifc, uchar *local, int prefixlen, uchar *remote);
extern Iplifc* iplocalonifc(Ipifc *ifc, uchar *ip);
+extern Iplifc* ipremoteonifc(Ipifc *ifc, uchar *ip);
extern int ipproxyifc(Fs *f, Ipifc *ifc, uchar *ip);
-extern int ipismulticast(uchar *ip);
-extern int ipisbooting(void);
-extern int ipifccheckin(Ipifc *ifc, Medium *med);
-extern void ipifccheckout(Ipifc *ifc);
-extern int ipifcgrab(Ipifc *ifc);
-extern void ipifcaddroute(Fs*, int, uchar*, uchar*, uchar*, int);
-extern void ipifcremroute(Fs*, int, uchar*, uchar*);
extern void ipifcremmulti(Conv *c, uchar *ma, uchar *ia);
extern void ipifcaddmulti(Conv *c, uchar *ma, uchar *ia);
extern char* ipifcrem(Ipifc *ifc, char **argv, int argc);
extern char* ipifcadd(Ipifc *ifc, char **argv, int argc, int tentative, Iplifc *lifcp);
extern long ipselftabread(Fs*, char *a, ulong offset, int n);
-extern char* ipifcaddpref6(Ipifc *ifc, char**argv, int argc);
-extern void ipsendra6(Fs *f, int on);
-
+extern char* ipifcadd6(Ipifc *ifc, char**argv, int argc);
+extern char* ipifcremove6(Ipifc *ifc, char**argv, int argc);
/*
* ip.c
*/
@@ -652,37 +684,26 @@
extern void iprouting(Fs*, int);
extern void icmpnoconv(Fs*, Block*);
extern void icmpcantfrag(Fs*, Block*, int);
-extern void icmpttlexceeded(Fs*, uchar*, Block*);
+extern void icmpttlexceeded(Fs*, Ipifc*, Block*);
extern ushort ipcsum(uchar*);
extern void ipiput4(Fs*, Ipifc*, Block*);
extern void ipiput6(Fs*, Ipifc*, Block*);
-extern int ipoput4(Fs*, Block*, int, int, int, Conv*);
-extern int ipoput6(Fs*, Block*, int, int, int, Conv*);
+extern int ipoput4(Fs*, Block*, int, int, int, Routehint*);
+extern int ipoput6(Fs*, Block*, int, int, int, Routehint*);
extern int ipstats(Fs*, char*, int);
extern ushort ptclbsum(uchar*, int);
extern ushort ptclcsum(Block*, int, int);
extern void ip_init(Fs*);
-extern void update_mtucache(uchar*, ulong);
-extern ulong restrict_mtu(uchar*, ulong);
+extern void ip_init_6(Fs*);
/*
* bootp.c
*/
-char* (*bootp)(Ipifc*);
-int (*bootpread)(char*, ulong, int);
+extern int bootpread(char*, ulong, int);
/*
- * iprouter.c
- */
-void useriprouter(Fs*, Ipifc*, Block*);
-void iprouteropen(Fs*);
-void iprouterclose(Fs*);
-long iprouterread(Fs*, void*, int);
-
-/*
* resolving inferno/plan9 differences
*/
-Chan* commonfdtochan(int, int, int, int);
char* commonuser(void);
char* commonerror(void);
@@ -695,15 +716,3 @@
* global to all of the stack
*/
extern void (*igmpreportfn)(Ipifc*, uchar*);
-
-/*
- * nat.c
- */
-extern int nato(Block*, Ipifc*, Fs*);
-extern void nati(Block*, Ipifc*);
-extern int natgc(uchar);
-
-extern int addnataddr(uchar*, uchar*, Iplifc*);
-extern int removenataddr(uchar*, uchar*, Iplifc*);
-extern void shownataddr(void);
-extern void flushnataddr(void);
diff -u a/os/ip//ipaux.c b/os/ip//ipaux.c
--- a/os/ip//ipaux.c
+++ b/os/ip//ipaux.c
@@ -5,49 +5,8 @@
#include "fns.h"
#include "../port/error.h"
#include "ip.h"
-#include "ipv6.h"
+#include "ipv6.h"
-/*
- * well known IP addresses
- */
-uchar IPv4bcast[IPaddrlen] = {
- 0, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff
-};
-uchar IPv4allsys[IPaddrlen] = {
- 0, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0xff, 0xff,
- 0xe0, 0, 0, 0x01
-};
-uchar IPv4allrouter[IPaddrlen] = {
- 0, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0xff, 0xff,
- 0xe0, 0, 0, 0x02
-};
-uchar IPallbits[IPaddrlen] = {
- 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff
-};
-
-uchar IPnoaddr[IPaddrlen];
-
-/*
- * prefix of all v4 addresses
- */
-uchar v4prefix[IPaddrlen] = {
- 0, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0xff, 0xff,
- 0, 0, 0, 0
-};
-
-
char *v6hdrtypes[Maxhdrtype] =
{
[HBH] "HopbyHop",
@@ -54,7 +13,7 @@
[ICMP] "ICMP",
[IGMP] "IGMP",
[GGP] "GGP",
- [IPINIP] "IP",
+ [IPINIP] "IP",
[ST] "ST",
[TCP] "TCP",
[UDP] "UDP",
@@ -87,6 +46,7 @@
0, 0, 0, 0,
0, 0, 0, 0x01
};
+
uchar v6linklocal[IPaddrlen] = {
0xfe, 0x80, 0, 0,
0, 0, 0, 0,
@@ -99,26 +59,8 @@
0, 0, 0, 0,
0, 0, 0, 0
};
-int v6llpreflen = 8; // link-local prefix length
-uchar v6sitelocal[IPaddrlen] = {
- 0xfe, 0xc0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0, 0
-};
-uchar v6sitelocalmask[IPaddrlen] = {
- 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff,
- 0, 0, 0, 0,
- 0, 0, 0, 0
-};
-int v6slpreflen = 6; // site-local prefix length
-uchar v6glunicast[IPaddrlen] = {
- 0x08, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0, 0
-};
+int v6llpreflen = 8; /* link-local prefix length in bytes */
+
uchar v6multicast[IPaddrlen] = {
0xff, 0, 0, 0,
0, 0, 0, 0,
@@ -131,7 +73,8 @@
0, 0, 0, 0,
0, 0, 0, 0
};
-int v6mcpreflen = 1; // multicast prefix length
+int v6mcpreflen = 1; /* multicast prefix length */
+
uchar v6allnodesN[IPaddrlen] = {
0xff, 0x01, 0, 0,
0, 0, 0, 0,
@@ -138,6 +81,12 @@
0, 0, 0, 0,
0, 0, 0, 0x01
};
+uchar v6allroutersN[IPaddrlen] = {
+ 0xff, 0x01, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0x02
+};
uchar v6allnodesNmask[IPaddrlen] = {
0xff, 0xff, 0, 0,
0, 0, 0, 0,
@@ -144,7 +93,8 @@
0, 0, 0, 0,
0, 0, 0, 0
};
-int v6aNpreflen = 2; // all nodes (N) prefix
+int v6aNpreflen = 2; /* all nodes (N) prefix */
+
uchar v6allnodesL[IPaddrlen] = {
0xff, 0x02, 0, 0,
0, 0, 0, 0,
@@ -151,19 +101,6 @@
0, 0, 0, 0,
0, 0, 0, 0x01
};
-uchar v6allnodesLmask[IPaddrlen] = {
- 0xff, 0xff, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0, 0
-};
-int v6aLpreflen = 2; // all nodes (L) prefix
-uchar v6allroutersN[IPaddrlen] = {
- 0xff, 0x01, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0, 0x02
-};
uchar v6allroutersL[IPaddrlen] = {
0xff, 0x02, 0, 0,
0, 0, 0, 0,
@@ -170,12 +107,14 @@
0, 0, 0, 0,
0, 0, 0, 0x02
};
-uchar v6allroutersS[IPaddrlen] = {
- 0xff, 0x05, 0, 0,
+uchar v6allnodesLmask[IPaddrlen] = {
+ 0xff, 0xff, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
- 0, 0, 0, 0x02
+ 0, 0, 0, 0
};
+int v6aLpreflen = 2; /* all nodes (L) prefix */
+
uchar v6solicitednode[IPaddrlen] = {
0xff, 0x02, 0, 0,
0, 0, 0, 0,
@@ -190,9 +129,6 @@
};
int v6snpreflen = 13;
-
-
-
ushort
ptclcsum(Block *bp, int offset, int len)
{
@@ -215,7 +151,7 @@
if(bp->next == nil) {
if(blocklen < len)
len = blocklen;
- return ~ptclbsum(addr, len) & 0xffff;
+ return ptclbsum(addr, len) ^ 0xffff;
}
losum = 0;
@@ -247,7 +183,7 @@
while((csum = losum>>16) != 0)
losum = csum + (losum & 0xffff);
- return ~losum & 0xffff;
+ return losum ^ 0xffff;
}
enum
@@ -255,306 +191,9 @@
Isprefix= 16,
};
-static uchar prefixvals[256] =
-{
-[0x00] 0 | Isprefix,
-[0x80] 1 | Isprefix,
-[0xC0] 2 | Isprefix,
-[0xE0] 3 | Isprefix,
-[0xF0] 4 | Isprefix,
-[0xF8] 5 | Isprefix,
-[0xFC] 6 | Isprefix,
-[0xFE] 7 | Isprefix,
-[0xFF] 8 | Isprefix,
-};
-
-int
-eipfmt(Fmt *f)
-{
- char buf[5*8];
- static char *efmt = "%.2lux%.2lux%.2lux%.2lux%.2lux%.2lux";
- static char *ifmt = "%d.%d.%d.%d";
- uchar *p, ip[16];
- ulong *lp;
- ushort s;
- int i, j, n, eln, eli;
-
- switch(f->r) {
- case 'E': /* Ethernet address */
- p = va_arg(f->args, uchar*);
- return fmtprint(f, efmt, p[0], p[1], p[2], p[3], p[4], p[5]);
-
- case 'I': /* Ip address */
- p = va_arg(f->args, uchar*);
-common:
- if(memcmp(p, v4prefix, 12) == 0)
- return fmtprint(f, ifmt, p[12], p[13], p[14], p[15]);
-
- /* find longest elision */
- eln = eli = -1;
- for(i = 0; i < 16; i += 2){
- for(j = i; j < 16; j += 2)
- if(p[j] != 0 || p[j+1] != 0)
- break;
- if(j > i && j - i > eln){
- eli = i;
- eln = j - i;
- }
- }
-
- /* print with possible elision */
- n = 0;
- for(i = 0; i < 16; i += 2){
- if(i == eli){
- n += sprint(buf+n, "::");
- i += eln;
- if(i >= 16)
- break;
- } else if(i != 0)
- n += sprint(buf+n, ":");
- s = (p[i]<<8) + p[i+1];
- n += sprint(buf+n, "%ux", s);
- }
- return fmtstrcpy(f, buf);
-
- case 'i': /* v6 address as 4 longs */
- lp = va_arg(f->args, ulong*);
- for(i = 0; i < 4; i++)
- hnputl(ip+4*i, *lp++);
- p = ip;
- goto common;
-
- case 'V': /* v4 ip address */
- p = va_arg(f->args, uchar*);
- return fmtprint(f, ifmt, p[0], p[1], p[2], p[3]);
-
- case 'M': /* ip mask */
- p = va_arg(f->args, uchar*);
-
- /* look for a prefix mask */
- for(i = 0; i < 16; i++)
- if(p[i] != 0xff)
- break;
- if(i < 16){
- if((prefixvals[p[i]] & Isprefix) == 0)
- goto common;
- for(j = i+1; j < 16; j++)
- if(p[j] != 0)
- goto common;
- n = 8*i + (prefixvals[p[i]] & ~Isprefix);
- } else
- n = 8*16;
-
- /* got one, use /xx format */
- return fmtprint(f, "/%d", n);
- }
- return fmtstrcpy(f, "(eipfmt)");
-}
-
#define CLASS(p) ((*(uchar*)(p))>>6)
-extern char*
-v4parseip(uchar *to, char *from)
-{
- int i;
- char *p;
-
- p = from;
- for(i = 0; i < 4 && *p; i++){
- to[i] = strtoul(p, &p, 0);
- if(*p == '.')
- p++;
- }
- switch(CLASS(to)){
- case 0: /* class A - 1 uchar net */
- case 1:
- if(i == 3){
- to[3] = to[2];
- to[2] = to[1];
- to[1] = 0;
- } else if(i == 2){
- to[3] = to[1];
- to[1] = 0;
- }
- break;
- case 2: /* class B - 2 uchar net */
- if(i == 3){
- to[3] = to[2];
- to[2] = 0;
- }
- break;
- }
- return p;
-}
-
-int
-isv4(uchar *ip)
-{
- return memcmp(ip, v4prefix, IPv4off) == 0;
-}
-
-
-/*
- * the following routines are unrolled with no memset's to speed
- * up the usual case
- */
void
-v4tov6(uchar *v6, uchar *v4)
-{
- v6[0] = 0;
- v6[1] = 0;
- v6[2] = 0;
- v6[3] = 0;
- v6[4] = 0;
- v6[5] = 0;
- v6[6] = 0;
- v6[7] = 0;
- v6[8] = 0;
- v6[9] = 0;
- v6[10] = 0xff;
- v6[11] = 0xff;
- v6[12] = v4[0];
- v6[13] = v4[1];
- v6[14] = v4[2];
- v6[15] = v4[3];
-}
-
-int
-v6tov4(uchar *v4, uchar *v6)
-{
- if(v6[0] == 0
- && v6[1] == 0
- && v6[2] == 0
- && v6[3] == 0
- && v6[4] == 0
- && v6[5] == 0
- && v6[6] == 0
- && v6[7] == 0
- && v6[8] == 0
- && v6[9] == 0
- && v6[10] == 0xff
- && v6[11] == 0xff)
- {
- v4[0] = v6[12];
- v4[1] = v6[13];
- v4[2] = v6[14];
- v4[3] = v6[15];
- return 0;
- } else {
- memset(v4, 0, 4);
- return -1;
- }
-}
-
-ulong
-parseip(uchar *to, char *from)
-{
- int i, elipsis = 0, v4 = 1;
- ulong x;
- char *p, *op;
-
- memset(to, 0, IPaddrlen);
- p = from;
- for(i = 0; i < 16 && *p; i+=2){
- op = p;
- x = strtoul(p, &p, 16);
- if(*p == '.' || (*p == 0 && i == 0)){
- p = v4parseip(to+i, op);
- i += 4;
- break;
- } else {
- to[i] = x>>8;
- to[i+1] = x;
- }
- if(*p == ':'){
- v4 = 0;
- if(*++p == ':'){
- elipsis = i+2;
- p++;
- }
- }
- }
- if(i < 16){
- memmove(&to[elipsis+16-i], &to[elipsis], i-elipsis);
- memset(&to[elipsis], 0, 16-i);
- }
- if(v4){
- to[10] = to[11] = 0xff;
- return nhgetl(to+12);
- } else
- return 6;
-}
-
-/*
- * hack to allow ip v4 masks to be entered in the old
- * style
- */
-ulong
-parseipmask(uchar *to, char *from)
-{
- ulong x;
- int i;
- uchar *p;
-
- if(*from == '/'){
- /* as a number of prefix bits */
- i = atoi(from+1);
- if(i < 0)
- i = 0;
- if(i > 128)
- i = 128;
- memset(to, 0, IPaddrlen);
- for(p = to; i >= 8; i -= 8)
- *p++ = 0xff;
- if(i > 0)
- *p = ~((1<<(8-i))-1);
- x = nhgetl(to+IPv4off);
- } else {
- /* as a straight bit mask */
- x = parseip(to, from);
- if(memcmp(to, v4prefix, IPv4off) == 0)
- memset(to, 0xff, IPv4off);
- }
- return x;
-}
-
-void
-maskip(uchar *from, uchar *mask, uchar *to)
-{
- int i;
-
- for(i = 0; i < IPaddrlen; i++)
- to[i] = from[i] & mask[i];
-}
-
-uchar classmask[4][16] = {
- 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0x00,0x00,0x00,
- 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0x00,0x00,0x00,
- 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0x00,0x00,
- 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0x00,
-};
-
-uchar*
-defmask(uchar *ip)
-{
- if(isv4(ip))
- return classmask[ip[IPv4off]>>6];
- else {
- if(ipcmp(ip, v6loopback) == 0)
- return IPallbits;
- else if(memcmp(ip, v6linklocal, v6llpreflen) == 0)
- return v6linklocalmask;
- else if(memcmp(ip, v6sitelocal, v6slpreflen) == 0)
- return v6sitelocalmask;
- else if(memcmp(ip, v6solicitednode, v6snpreflen) == 0)
- return v6solicitednodemask;
- else if(memcmp(ip, v6multicast, v6mcpreflen) == 0)
- return v6multicastmask;
- return IPallbits;
- }
-}
-
-void
ipv62smcast(uchar *smcast, uchar *a)
{
assert(IPaddrlen == 16);
@@ -599,7 +238,7 @@
ulong
iphash(uchar *sa, ushort sp, uchar *da, ushort dp)
{
- return ((sa[IPaddrlen-1]<<24) ^ (sp << 16) ^ (da[IPaddrlen-1]<<8) ^ dp ) % Nhash;
+ return ((sa[IPaddrlen-1]<<24) ^ (sp << 16) ^ (da[IPaddrlen-1]<<8) ^ dp ) % Nipht;
}
void
@@ -678,7 +317,7 @@
return c;
}
}
-
+
/* match local address and port */
hv = iphash(IPnoaddr, 0, da, dp);
for(h = ht->tab[hv]; h != nil; h = h->next){
@@ -690,7 +329,7 @@
return c;
}
}
-
+
/* match just port */
hv = iphash(IPnoaddr, 0, IPnoaddr, dp);
for(h = ht->tab[hv]; h != nil; h = h->next){
@@ -702,7 +341,7 @@
return c;
}
}
-
+
/* match local address */
hv = iphash(IPnoaddr, 0, da, 0);
for(h = ht->tab[hv]; h != nil; h = h->next){
@@ -714,7 +353,7 @@
return c;
}
}
-
+
/* look for something that matches anything */
hv = iphash(IPnoaddr, 0, IPnoaddr, 0);
for(h = ht->tab[hv]; h != nil; h = h->next){
@@ -726,4 +365,13 @@
}
unlock(ht);
return nil;
+}
+
+int
+convipvers(Conv *c)
+{
+ if(isv4(c->raddr) && isv4(c->laddr) || ipcmp(c->raddr, IPnoaddr) == 0)
+ return V4;
+ else
+ return V6;
}
diff -u a/os/ip//ipifc.c b/os/ip//ipifc.c
--- a/os/ip//ipifc.c
+++ b/os/ip//ipifc.c
@@ -11,17 +11,14 @@
#define DPRINT if(0)print
enum {
- Maxmedia = 32,
- Nself = Maxmedia*5,
- NHASH = (1<<6),
- NCACHE = 256,
- QMAX = 64*1024-1,
+ Maxmedia = 32,
+ Nself = Maxmedia*5,
+ NHASH = 1<<6,
+ NCACHE = 256,
+ QMAX = 192*1024-1,
};
-Medium *media[Maxmedia] =
-{
- 0
-};
+Medium *media[Maxmedia] = { 0 };
/*
* cache of local addresses (addresses we answer to)
@@ -29,12 +26,10 @@
struct Ipself
{
uchar a[IPaddrlen];
- Ipself *hnext; /* next address in the hash table */
+ Ipself *next; /* next address in the hash table */
Iplink *link; /* binding twixt Ipself and Ipifc */
ulong expire;
uchar type; /* type of address */
- int ref;
- Ipself *next; /* free list */
};
struct Ipselftab
@@ -64,11 +59,47 @@
static void addselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a, int type);
static void remselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a);
-static char* ipifcjoinmulti(Ipifc *ifc, char **argv, int argc);
-static char* ipifcleavemulti(Ipifc *ifc, char **argv, int argc);
-static void ipifcregisterproxy(Fs*, Ipifc*, uchar*);
-static char* ipifcremlifc(Ipifc*, Iplifc*);
+static void ipifcregisteraddr(Fs*, Ipifc*, Iplifc*, uchar*);
+static void ipifcregisterproxy(Fs*, Ipifc*, uchar*, int);
+static char* ipifcremlifc(Ipifc*, Iplifc**);
+static char Ebound[] = "interface already bound";
+static char Eunbound[] = "interface not bound";
+
+enum {
+ unknownv6, /* UGH */
+ unspecifiedv6,
+ linklocalv6,
+ globalv6,
+};
+
+static int
+v6addrtype(uchar *addr)
+{
+ if(isv4(addr) || ipcmp(addr, IPnoaddr) == 0)
+ return unknownv6;
+ else if(islinklocal(addr) || ipcmp(addr, v6loopback) == 0 ||
+ isv6mcast(addr) && (addr[1] & 0xF) <= Link_local_scop)
+ return linklocalv6;
+ else
+ return globalv6;
+}
+
+static int
+comprefixlen(uchar *a, uchar *b, int n)
+{
+ int i, c;
+
+ for(i = 0; i < n; i++){
+ if((c = a[i] ^ b[i]) == 0)
+ continue;
+ for(i <<= 3; (c & 0x80) == 0; i++)
+ c <<= 1;
+ return i;
+ }
+ return i << 3;
+}
+
/*
* link in a new medium
*/
@@ -121,7 +152,7 @@
wlock(ifc);
if(ifc->m != nil){
wunlock(ifc);
- return "interface already bound";
+ return Ebound;
}
if(waserror()){
wunlock(ifc);
@@ -142,18 +173,14 @@
ifc->m = m;
ifc->mintu = ifc->m->mintu;
ifc->maxtu = ifc->m->maxtu;
+ ifc->delay = 40;
+ ifc->speed = 0;
if(ifc->m->unbindonclose == 0)
ifc->conv->inuse++;
- ifc->rp.mflag = 0; // default not managed
- ifc->rp.oflag = 0;
- ifc->rp.maxraint = 600000; // millisecs
- ifc->rp.minraint = 200000;
- ifc->rp.linkmtu = 0; // no mtu sent
- ifc->rp.reachtime = 0;
- ifc->rp.rxmitra = 0;
- ifc->rp.ttl = MAXTTL;
- ifc->rp.routerlt = 3*(ifc->rp.maxraint);
+ /* default router paramters */
+ ifc->rp = c->p->f->v6p->rp;
+
/* any ancillary structures (like routes) no longer pertain */
ifc->ifcid++;
@@ -170,29 +197,44 @@
/*
* detach a device from an interface, close the interface
- * called with ifc->conv closed
*/
static char*
ipifcunbind(Ipifc *ifc)
{
- char *err;
+ Medium *m;
- if(waserror()){
+ wlock(ifc);
+ m = ifc->m;
+ if(m == nil){
wunlock(ifc);
- nexterror();
+ return Eunbound;
}
- wlock(ifc);
- /* dissociate routes */
- if(ifc->m != nil && ifc->m->unbindonclose == 0)
- ifc->conv->inuse--;
- ifc->ifcid++;
+ /* disassociate logical interfaces (before zeroing ifc->arg) */
+ while(ifc->lifc != nil)
+ ipifcremlifc(ifc, &ifc->lifc);
/* disassociate device */
- if(ifc->m != nil && ifc->m->unbind)
- (*ifc->m->unbind)(ifc);
+ if(m->unbind != nil){
+ extern Medium nullmedium;
+
+ /*
+ * unbind() might unlock the ifc, so change the medium
+ * to the nullmedium to prevent packets from getting
+ * sent while the medium is shutting down.
+ */
+ ifc->m = &nullmedium;
+
+ if(!waserror()){
+ (*m->unbind)(ifc);
+ poperror();
+ }
+ }
+
memset(ifc->dev, 0, sizeof(ifc->dev));
ifc->arg = nil;
+
+ ifc->reflect = 0;
ifc->reassemble = 0;
/* close queues to stop queuing of packets */
@@ -200,26 +242,22 @@
qclose(ifc->conv->wq);
qclose(ifc->conv->sq);
- /* disassociate logical interfaces */
- while(ifc->lifc){
- err = ipifcremlifc(ifc, ifc->lifc);
- if(err)
- error(err);
- }
-
+ /* dissociate routes */
+ ifc->ifcid++;
+ if(m->unbindonclose == 0)
+ ifc->conv->inuse--;
ifc->m = nil;
wunlock(ifc);
- poperror();
+
return nil;
}
+char sfixedformat[] = "device %s maxtu %d sendra %d recvra %d mflag %d oflag %d"
+" maxraint %d minraint %d linkmtu %d reachtime %d rxmitra %d ttl %d routerlt %d"
+" pktin %lud pktout %lud errin %lud errout %lud speed %d delay %d\n";
-
-char sfixedformat[] = "device %s maxtu %d sendra %d recvra %d mflag %d oflag %d maxraint %d minraint %d linkmtu %d reachtime %d rxmitra %d ttl %d routerlt %d pktin %lud pktout %lud errin %lud errout %lud\n";
-
char slineformat[] = " %-40I %-10M %-40I %-12lud %-12lud\n";
-
static int
ipifcstate(Conv *c, char *state, int n)
{
@@ -228,19 +266,18 @@
int m;
ifc = (Ipifc*)c->ptcl;
-
m = snprint(state, n, sfixedformat,
ifc->dev, ifc->maxtu, ifc->sendra6, ifc->recvra6,
ifc->rp.mflag, ifc->rp.oflag, ifc->rp.maxraint,
ifc->rp.minraint, ifc->rp.linkmtu, ifc->rp.reachtime,
ifc->rp.rxmitra, ifc->rp.ttl, ifc->rp.routerlt,
- ifc->in, ifc->out, ifc->inerr, ifc->outerr);
+ ifc->in, ifc->out, ifc->inerr, ifc->outerr,
+ ifc->speed, ifc->delay);
rlock(ifc);
- for(lifc = ifc->lifc; lifc && n > m; lifc = lifc->next)
- m += snprint(state+m, n - m, slineformat,
- lifc->local, lifc->mask, lifc->remote,
- lifc->validlt, lifc->preflt);
+ for(lifc = ifc->lifc; lifc != nil && n > m; lifc = lifc->next)
+ m += snprint(state+m, n - m, slineformat, lifc->local,
+ lifc->mask, lifc->remote, lifc->validlt, lifc->preflt);
if(ifc->lifc == nil)
m += snprint(state+m, n - m, "\n");
runlock(ifc);
@@ -256,13 +293,11 @@
int m;
ifc = (Ipifc*)c->ptcl;
-
- m = 0;
-
rlock(ifc);
- for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+ m = 0;
+ for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
m += snprint(state+m, n - m, "%-40.40I ->", lifc->local);
- for(link = lifc->link; link; link = link->lifclink)
+ for(link = lifc->link; link != nil; link = link->lifclink)
m += snprint(state+m, n - m, " %-40.40I", link->self->a);
m += snprint(state+m, n - m, "\n");
}
@@ -279,6 +314,59 @@
return ifc->m != nil;
}
+static void
+ipifcadjustburst(Ipifc *ifc)
+{
+ int burst;
+
+ burst = ((vlong)ifc->delay * ifc->speed) / 8000;
+ if(burst < ifc->maxtu)
+ burst = ifc->maxtu;
+ ifc->burst = burst;
+}
+
+static void
+ipifcsetdelay(Ipifc *ifc, int delay)
+{
+ if(delay < 0)
+ delay = 0;
+ else if(delay > 1000)
+ delay = 1000;
+ ifc->delay = delay;
+ ipifcadjustburst(ifc);
+}
+
+static void
+ipifcsetspeed(Ipifc *ifc, int speed)
+{
+ if(speed < 0)
+ speed = 0;
+ ifc->speed = speed;
+ ifc->load = 0;
+ ipifcadjustburst(ifc);
+}
+
+void
+ipifcoput(Ipifc *ifc, Block *bp, int version, uchar *ip)
+{
+ if(ifc->speed){
+ ulong now = MACHP(0)->ticks;
+ int dt = TK2MS(now - ifc->ticks);
+ ifc->ticks = now;
+ ifc->load -= ((vlong)dt * ifc->speed) / 8000;
+ if(ifc->load < 0 || dt < 0 || dt > 1000)
+ ifc->load = 0;
+ else if(ifc->load > ifc->burst){
+ freeblist(bp);
+ return;
+ }
+ }
+ bp = concatblock(bp);
+ ifc->load += BLEN(bp);
+ ifc->m->bwrite(ifc, bp, version, ip);
+}
+
+
/*
* called when a process writes to an interface's 'data'
*/
@@ -294,18 +382,15 @@
return;
ifc = (Ipifc*)c->ptcl;
- if(!canrlock(ifc)){
- freeb(bp);
- return;
- }
+ rlock(ifc);
if(waserror()){
runlock(ifc);
nexterror();
}
- if(ifc->m == nil || ifc->m->pktin == nil)
- freeb(bp);
- else
+ if(ifc->m != nil && ifc->m->pktin != nil)
(*ifc->m->pktin)(c->p->f, ifc, bp);
+ else
+ freeb(bp);
runlock(ifc);
poperror();
}
@@ -319,27 +404,26 @@
Ipifc *ifc;
c->rq = qopen(QMAX, 0, 0, 0);
- c->sq = qopen(2*QMAX, 0, 0, 0);
c->wq = qopen(QMAX, Qkick, ipifckick, c);
+ c->sq = qopen(QMAX, 0, 0, 0);
+ if(c->rq == nil || c->wq == nil || c->sq == nil)
+ error(Enomem);
ifc = (Ipifc*)c->ptcl;
ifc->conv = c;
- ifc->unbinding = 0;
ifc->m = nil;
+ ifc->reflect = 0;
ifc->reassemble = 0;
}
/*
* called after last close of ipifc data or ctl
- * called with c locked, we must unlock
*/
static void
ipifcclose(Conv *c)
{
- Ipifc *ifc;
- Medium *m;
+ Ipifc *ifc = (Ipifc*)c->ptcl;
+ Medium *m = ifc->m;
- ifc = (Ipifc*)c->ptcl;
- m = ifc->m;
if(m != nil && m->unbindonclose)
ipifcunbind(ifc);
}
@@ -347,19 +431,17 @@
/*
* change an interface's mtu
*/
-char*
-ipifcsetmtu(Ipifc *ifc, char **argv, int argc)
+static char*
+ipifcsetmtu(Ipifc *ifc, int mtu)
{
- int mtu;
+ Medium *m = ifc->m;
- if(argc < 2)
+ if(m == nil)
+ return Eunbound;
+ if(mtu < m->mintu || mtu > m->maxtu)
return Ebadarg;
- if(ifc->m == nil)
- return Ebadarg;
- mtu = strtoul(argv[1], 0, 0);
- if(mtu < ifc->m->mintu || mtu > ifc->m->maxtu)
- return Ebadarg;
ifc->maxtu = mtu;
+ ipifcadjustburst(ifc);
return nil;
}
@@ -374,13 +456,8 @@
Iplifc *lifc, **l;
int i, type, mtu;
Fs *f;
- int sendnbrdisc = 0;
- if(ifc->m == nil)
- return "ipifc not yet bound to device";
-
- f = ifc->conv->p->f;
-
+ mtu = 0;
type = Rifc;
memset(ip, 0, IPaddrlen);
memset(mask, 0, IPaddrlen);
@@ -392,23 +469,21 @@
/* fall through */
case 5:
mtu = strtoul(argv[4], 0, 0);
- if(mtu >= ifc->m->mintu && mtu <= ifc->m->maxtu)
- ifc->maxtu = mtu;
/* fall through */
case 4:
- parseip(ip, argv[1]);
- parseipmask(mask, argv[2]);
- parseip(rem, argv[3]);
+ if (parseipandmask(ip, mask, argv[1], argv[2]) == -1 || parseip(rem, argv[3]) == -1)
+ return Ebadip;
maskip(rem, mask, net);
break;
case 3:
- parseip(ip, argv[1]);
- parseipmask(mask, argv[2]);
+ if (parseipandmask(ip, mask, argv[1], argv[2]) == -1)
+ return Ebadip;
maskip(ip, mask, rem);
maskip(rem, mask, net);
break;
case 2:
- parseip(ip, argv[1]);
+ if (parseip(ip, argv[1]) == -1)
+ return Ebadip;
memmove(mask, defmask(ip), IPaddrlen);
maskip(ip, mask, rem);
maskip(rem, mask, net);
@@ -415,26 +490,55 @@
break;
default:
return Ebadarg;
- break;
}
- if(isv4(ip))
+
+ /* check for point-to-point interface */
+ if(ipcmp(ip, v6loopback) != 0) /* skip v6 loopback, it's a special address */
+ if(ipcmp(mask, IPallbits) == 0)
+ type |= Rptpt;
+
+ if(isv4(ip) || ipcmp(ip, IPnoaddr) == 0){
+ type |= Rv4;
tentative = 0;
+ }
+
wlock(ifc);
+ if(ifc->m == nil){
+ wunlock(ifc);
+ return Eunbound;
+ }
+ f = ifc->conv->p->f;
+ if(waserror()){
+ wunlock(ifc);
+ return up->errstr;
+ }
+ if(mtu > 0)
+ ipifcsetmtu(ifc, mtu);
+
/* ignore if this is already a local address for this ifc */
- for(lifc = ifc->lifc; lifc; lifc = lifc->next) {
- if(ipcmp(lifc->local, ip) == 0) {
- if(lifc->tentative != tentative)
- lifc->tentative = tentative;
- if(lifcp != nil) {
- lifc->onlink = lifcp->onlink;
- lifc->autoflag = lifcp->autoflag;
- lifc->validlt = lifcp->validlt;
- lifc->preflt = lifcp->preflt;
- lifc->origint = lifcp->origint;
+ if((lifc = iplocalonifc(ifc, ip)) != nil){
+ if(lifcp != nil) {
+ if(!lifc->onlink && lifcp->onlink){
+ lifc->onlink = 1;
+ addroute(f, lifc->remote, lifc->mask, ip, IPallbits,
+ lifc->remote, lifc->type, ifc, tifc);
+ if(v6addrtype(ip) != linklocalv6)
+ addroute(f, lifc->remote, lifc->mask, ip, IPnoaddr,
+ lifc->remote, lifc->type, ifc, tifc);
}
- goto out;
+ lifc->autoflag = lifcp->autoflag;
+ lifc->validlt = lifcp->validlt;
+ lifc->preflt = lifcp->preflt;
+ lifc->origint = lifcp->origint;
}
+ if(lifc->tentative != tentative){
+ lifc->tentative = tentative;
+ goto done;
+ }
+ wunlock(ifc);
+ poperror();
+ return nil;
}
/* add the address to the list of logical ifc's for this ifc */
@@ -443,6 +547,7 @@
ipmove(lifc->mask, mask);
ipmove(lifc->remote, rem);
ipmove(lifc->net, net);
+ lifc->type = type;
lifc->tentative = tentative;
if(lifcp != nil) {
lifc->onlink = lifcp->onlink;
@@ -450,39 +555,34 @@
lifc->validlt = lifcp->validlt;
lifc->preflt = lifcp->preflt;
lifc->origint = lifcp->origint;
+ } else { /* default values */
+ lifc->onlink = lifc->autoflag = 1;
+ lifc->validlt = lifc->preflt = ~0UL;
+ lifc->origint = NOW / 1000;
}
- else { // default values
- lifc->onlink = 1;
- lifc->autoflag = 1;
- lifc->validlt = 0xffffffff;
- lifc->preflt = 0xffffffff;
- lifc->origint = NOW / 10^3;
- }
lifc->next = nil;
- for(l = &ifc->lifc; *l; l = &(*l)->next)
+ for(l = &ifc->lifc; *l != nil; l = &(*l)->next)
;
*l = lifc;
- /* check for point-to-point interface */
- if(ipcmp(ip, v6loopback)) /* skip v6 loopback, it's a special address */
- if(ipcmp(mask, IPallbits) == 0)
- type |= Rptpt;
+ /* add route for this logical interface */
+ if(lifc->onlink){
+ addroute(f, rem, mask, ip, IPallbits, rem, type, ifc, tifc);
+ if(v6addrtype(ip) != linklocalv6)
+ addroute(f, rem, mask, ip, IPnoaddr, rem, type, ifc, tifc);
+ }
- /* add local routes */
- if(isv4(ip))
- v4addroute(f, tifc, rem+IPv4off, mask+IPv4off, rem+IPv4off, type);
- else
- v6addroute(f, tifc, rem, mask, rem, type);
-
addselfcache(f, ifc, lifc, ip, Runi);
- if((type & (Rproxy|Rptpt)) == (Rproxy|Rptpt)){
- ipifcregisterproxy(f, ifc, rem);
- goto out;
+ /* register proxy */
+ if(type & Rptpt){
+ if(type & Rproxy)
+ ipifcregisterproxy(f, ifc, rem, 1);
+ goto done;
}
- if(isv4(ip) || ipcmp(ip, IPnoaddr) == 0) {
+ if(type & Rv4) {
/* add subnet directed broadcast address to the self cache */
for(i = 0; i < IPaddrlen; i++)
bcast[i] = (ip[i] & mask[i]) | ~mask[i];
@@ -504,174 +604,135 @@
for(i = 0; i < IPaddrlen; i++)
bcast[i] = (ip[i] & mask[i]) & mask[i];
addselfcache(f, ifc, lifc, bcast, Rbcast);
-
+
addselfcache(f, ifc, lifc, IPv4bcast, Rbcast);
- }
- else {
+ } else {
if(ipcmp(ip, v6loopback) == 0) {
/* add node-local mcast address */
addselfcache(f, ifc, lifc, v6allnodesN, Rmulti);
/* add route for all node multicast */
- v6addroute(f, tifc, v6allnodesN, v6allnodesNmask, v6allnodesN, Rmulti);
+ addroute(f, v6allnodesN, v6allnodesNmask,
+ ip, IPallbits,
+ v6allnodesN, Rmulti, ifc, tifc);
}
/* add all nodes multicast address */
addselfcache(f, ifc, lifc, v6allnodesL, Rmulti);
-
+
/* add route for all nodes multicast */
- v6addroute(f, tifc, v6allnodesL, v6allnodesLmask, v6allnodesL, Rmulti);
-
+ addroute(f, v6allnodesL, v6allnodesLmask,
+ ip, IPallbits,
+ v6allnodesL, Rmulti, ifc, tifc);
+
/* add solicited-node multicast address */
ipv62smcast(bcast, ip);
addselfcache(f, ifc, lifc, bcast, Rmulti);
-
- sendnbrdisc = 1;
}
- /* register the address on this network for address resolution */
- if(isv4(ip) && ifc->m->areg != nil)
- (*ifc->m->areg)(ifc, ip);
-
-out:
+done:
wunlock(ifc);
- if(tentative && sendnbrdisc)
- icmpns(f, 0, SRC_UNSPEC, ip, TARG_MULTI, ifc->mac);
+ poperror();
+
+ rlock(ifc);
+ ipifcregisteraddr(f, ifc, lifc, ip);
+ runlock(ifc);
+
return nil;
}
/*
* remove a logical interface from an ifc
- * always called with ifc wlock'd
+ * called with ifc wlock'd
*/
static char*
-ipifcremlifc(Ipifc *ifc, Iplifc *lifc)
+ipifcremlifc(Ipifc *ifc, Iplifc **l)
{
- Iplifc **l;
- Fs *f;
+ Iplifc *lifc = *l;
+ Fs *f = ifc->conv->p->f;
- f = ifc->conv->p->f;
-
- /*
- * find address on this interface and remove from chain.
- * for pt to pt we actually specify the remote address as the
- * addresss to remove.
- */
- for(l = &ifc->lifc; *l != nil && *l != lifc; l = &(*l)->next)
- ;
- if(*l == nil)
+ if(lifc == nil)
return "address not on this interface";
*l = lifc->next;
/* disassociate any addresses */
- while(lifc->link)
+ while(lifc->link != nil)
remselfcache(f, ifc, lifc, lifc->link->self->a);
/* remove the route for this logical interface */
- if(isv4(lifc->local))
- v4delroute(f, lifc->remote+IPv4off, lifc->mask+IPv4off, 1);
- else {
- v6delroute(f, lifc->remote, lifc->mask, 1);
+ if(lifc->onlink){
+ remroute(f, lifc->remote, lifc->mask,
+ lifc->local, IPallbits,
+ lifc->remote, lifc->type, ifc, tifc);
+ if(v6addrtype(lifc->local) != linklocalv6)
+ remroute(f, lifc->remote, lifc->mask,
+ lifc->local, IPnoaddr,
+ lifc->remote, lifc->type, ifc, tifc);
+ }
+
+ /* unregister proxy */
+ if(lifc->type & Rptpt){
+ if(lifc->type & Rproxy)
+ ipifcregisterproxy(f, ifc, lifc->remote, 0);
+ goto done;
+ }
+
+ /* remove route for all nodes multicast */
+ if((lifc->type & Rv4) == 0){
if(ipcmp(lifc->local, v6loopback) == 0)
- /* remove route for all node multicast */
- v6delroute(f, v6allnodesN, v6allnodesNmask, 1);
- else if(memcmp(lifc->local, v6linklocal, v6llpreflen) == 0)
- /* remove route for all link multicast */
- v6delroute(f, v6allnodesL, v6allnodesLmask, 1);
+ remroute(f, v6allnodesN, v6allnodesNmask,
+ lifc->local, IPallbits,
+ v6allnodesN, Rmulti, ifc, tifc);
+
+ remroute(f, v6allnodesL, v6allnodesLmask,
+ lifc->local, IPallbits,
+ v6allnodesL, Rmulti, ifc, tifc);
}
+done:
free(lifc);
return nil;
-
}
/*
* remove an address from an interface.
- * called with c locked
*/
char*
ipifcrem(Ipifc *ifc, char **argv, int argc)
{
- uchar ip[IPaddrlen];
- uchar mask[IPaddrlen];
- uchar rem[IPaddrlen];
- Iplifc *lifc;
- char *rv;
+ uchar ip[IPaddrlen], mask[IPaddrlen], rem[IPaddrlen];
+ Iplifc *lifc, **l;
+ char *err;
if(argc < 3)
return Ebadarg;
-
- parseip(ip, argv[1]);
- parseipmask(mask, argv[2]);
+ if(parseipandmask(ip, mask, argv[1], argv[2]) == -1)
+ return Ebadip;
if(argc < 4)
maskip(ip, mask, rem);
- else
- parseip(rem, argv[3]);
+ else if(parseip(rem, argv[3]) == -1)
+ return Ebadip;
- wlock(ifc);
-
/*
* find address on this interface and remove from chain.
* for pt to pt we actually specify the remote address as the
* addresss to remove.
*/
+ wlock(ifc);
+ l = &ifc->lifc;
for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next) {
- if (memcmp(ip, lifc->local, IPaddrlen) == 0
- && memcmp(mask, lifc->mask, IPaddrlen) == 0
- && memcmp(rem, lifc->remote, IPaddrlen) == 0)
+ if(ipcmp(ip, lifc->local) == 0
+ && ipcmp(mask, lifc->mask) == 0
+ && ipcmp(rem, lifc->remote) == 0)
break;
+ l = &lifc->next;
}
-
- rv = ipifcremlifc(ifc, lifc);
+ err = ipifcremlifc(ifc, l);
wunlock(ifc);
- return rv;
+ return err;
}
/*
- * distribute routes to active interfaces like the
- * TRIP linecards
- */
-void
-ipifcaddroute(Fs *f, int vers, uchar *addr, uchar *mask, uchar *gate, int type)
-{
- Medium *m;
- Conv **cp, **e;
- Ipifc *ifc;
-
- e = &f->ipifc->conv[f->ipifc->nc];
- for(cp = f->ipifc->conv; cp < e; cp++){
- if(*cp != nil) {
- ifc = (Ipifc*)(*cp)->ptcl;
- m = ifc->m;
- if(m == nil)
- continue;
- if(m->addroute != nil)
- m->addroute(ifc, vers, addr, mask, gate, type);
- }
- }
-}
-
-void
-ipifcremroute(Fs *f, int vers, uchar *addr, uchar *mask)
-{
- Medium *m;
- Conv **cp, **e;
- Ipifc *ifc;
-
- e = &f->ipifc->conv[f->ipifc->nc];
- for(cp = f->ipifc->conv; cp < e; cp++){
- if(*cp != nil) {
- ifc = (Ipifc*)(*cp)->ptcl;
- m = ifc->m;
- if(m == nil)
- continue;
- if(m->remroute != nil)
- m->remroute(ifc, vers, addr, mask);
- }
- }
-}
-
-/*
* associate an address with the interface. This wipes out any previous
* addresses. This is a macro that means, remove all the old interfaces
* and add a new one.
@@ -679,170 +740,89 @@
static char*
ipifcconnect(Conv* c, char **argv, int argc)
{
+ Ipifc *ifc = (Ipifc*)c->ptcl;
char *err;
- Ipifc *ifc;
- ifc = (Ipifc*)c->ptcl;
-
- if(ifc->m == nil)
- return "ipifc not yet bound to device";
-
- if(waserror()){
- wunlock(ifc);
- nexterror();
- }
wlock(ifc);
- while(ifc->lifc){
- err = ipifcremlifc(ifc, ifc->lifc);
- if(err)
- error(err);
- }
+ while(ifc->lifc != nil)
+ ipifcremlifc(ifc, &ifc->lifc);
wunlock(ifc);
- poperror();
err = ipifcadd(ifc, argv, argc, 0, nil);
- if(err)
+ if(err != nil)
return err;
Fsconnected(c, nil);
-
return nil;
}
char*
-ipifcsetpar6(Ipifc *ifc, char **argv, int argc)
+ipifcra6(Ipifc *ifc, char **argv, int argc)
{
- int i, argsleft, vmax = ifc->rp.maxraint, vmin = ifc->rp.minraint;
+ int i, argsleft;
+ uchar sendra, recvra;
+ Routerparams rp;
- argsleft = argc - 1;
i = 1;
-
- if(argsleft % 2 != 0)
+ argsleft = argc - 1;
+ if((argsleft % 2) != 0)
return Ebadarg;
+ sendra = ifc->sendra6;
+ recvra = ifc->recvra6;
+ rp = ifc->rp;
+
while (argsleft > 1) {
- if(strcmp(argv[i],"recvra")==0)
- ifc->recvra6 = (atoi(argv[i+1]) != 0);
- else if(strcmp(argv[i],"sendra")==0)
- ifc->sendra6 = (atoi(argv[i+1]) != 0);
- else if(strcmp(argv[i],"mflag")==0)
- ifc->rp.mflag = (atoi(argv[i+1]) != 0);
- else if(strcmp(argv[i],"oflag")==0)
- ifc->rp.oflag = (atoi(argv[i+1]) != 0);
- else if(strcmp(argv[i],"maxraint")==0)
- ifc->rp.maxraint = atoi(argv[i+1]);
- else if(strcmp(argv[i],"minraint")==0)
- ifc->rp.minraint = atoi(argv[i+1]);
- else if(strcmp(argv[i],"linkmtu")==0)
- ifc->rp.linkmtu = atoi(argv[i+1]);
- else if(strcmp(argv[i],"reachtime")==0)
- ifc->rp.reachtime = atoi(argv[i+1]);
- else if(strcmp(argv[i],"rxmitra")==0)
- ifc->rp.rxmitra = atoi(argv[i+1]);
- else if(strcmp(argv[i],"ttl")==0)
- ifc->rp.ttl = atoi(argv[i+1]);
- else if(strcmp(argv[i],"routerlt")==0)
- ifc->rp.routerlt = atoi(argv[i+1]);
+ if(strcmp(argv[i], "recvra") == 0)
+ recvra = atoi(argv[i+1]) != 0;
+ else if(strcmp(argv[i], "sendra") == 0)
+ sendra = atoi(argv[i+1]) != 0;
+ else if(strcmp(argv[i], "mflag") == 0)
+ rp.mflag = atoi(argv[i+1]) != 0;
+ else if(strcmp(argv[i], "oflag") == 0)
+ rp.oflag = atoi(argv[i+1]) != 0;
+ else if(strcmp(argv[i], "maxraint") == 0)
+ rp.maxraint = atoi(argv[i+1]);
+ else if(strcmp(argv[i], "minraint") == 0)
+ rp.minraint = atoi(argv[i+1]);
+ else if(strcmp(argv[i], "linkmtu") == 0)
+ rp.linkmtu = atoi(argv[i+1]);
+ else if(strcmp(argv[i], "reachtime") == 0)
+ rp.reachtime = atoi(argv[i+1]);
+ else if(strcmp(argv[i], "rxmitra") == 0)
+ rp.rxmitra = atoi(argv[i+1]);
+ else if(strcmp(argv[i], "ttl") == 0)
+ rp.ttl = atoi(argv[i+1]);
+ else if(strcmp(argv[i], "routerlt") == 0)
+ rp.routerlt = atoi(argv[i+1]);
else
- return Ebadarg;
+ return Ebadarg;
argsleft -= 2;
i += 2;
}
- // consistency check
- if(ifc->rp.maxraint < ifc->rp.minraint) {
- ifc->rp.maxraint = vmax;
- ifc->rp.minraint = vmin;
+ /* consistency check */
+ if(rp.maxraint < rp.minraint)
return Ebadarg;
- }
- return nil;
-}
+ ifc->rp = rp;
+ ifc->sendra6 = sendra;
+ ifc->recvra6 = recvra;
-char*
-ipifcsendra6(Ipifc *ifc, char **argv, int argc)
-{
- int i;
-
- i = 0;
- if(argc > 1)
- i = atoi(argv[1]);
- ifc->sendra6 = (i!=0);
return nil;
}
-char*
-ipifcrecvra6(Ipifc *ifc, char **argv, int argc)
-{
- int i;
-
- i = 0;
- if(argc > 1)
- i = atoi(argv[1]);
- ifc->recvra6 = (i!=0);
- return nil;
-}
-
-char*
-ipifcnat(Ipifc *ifc, char **argv, int argc)
-{
- uchar src[IPaddrlen], mask[IPaddrlen], dst[IPaddrlen];
- Iplifc *lifc;
-
- if(argc == 2){
- if((strcmp(argv[1], "show") == 0)){
- shownataddr();
- return nil;
- }else if((strcmp(argv[1], "flush") == 0)){
- flushnataddr();
- return nil;
- }else
- return Ebadarg;
- }
-
- if(argc != 5)
- return Ebadarg;
-
- if (parseip(src, argv[2]) == -1)
- return Ebadip;
-
- if (parseipmask(mask, argv[3]) == -1)
- return Ebadip;
-
- if (parseip(dst, argv[4]) == -1)
- return Ebadip;
-
- if((lifc=iplocalonifc(ifc, dst)) == nil)
- return Ebadip;
-
- if(strcmp(argv[1], "add") == 0){
- if(addnataddr(src, mask, lifc) != 0)
- return Ebadarg;
- }else if(strcmp(argv[1], "remove") == 0){
- if(removenataddr(src, mask, lifc) != 0)
- return Ebadarg;
- }else
- return Ebadarg;
-
- return nil;
-}
-
/*
* non-standard control messages.
- * called with c locked.
*/
static char*
-ipifcctl(Conv* c, char**argv, int argc)
+ipifcctl(Conv* c, char **argv, int argc)
{
- Ipifc *ifc;
- int i;
+ Ipifc *ifc = (Ipifc*)c->ptcl;
- ifc = (Ipifc*)c->ptcl;
if(strcmp(argv[0], "add") == 0)
return ipifcadd(ifc, argv, argc, 0, nil);
- else if(strcmp(argv[0], "bootp") == 0)
- return bootp(ifc);
else if(strcmp(argv[0], "try") == 0)
return ipifcadd(ifc, argv, argc, 1, nil);
else if(strcmp(argv[0], "remove") == 0)
@@ -849,36 +829,38 @@
return ipifcrem(ifc, argv, argc);
else if(strcmp(argv[0], "unbind") == 0)
return ipifcunbind(ifc);
- else if(strcmp(argv[0], "joinmulti") == 0)
- return ipifcjoinmulti(ifc, argv, argc);
- else if(strcmp(argv[0], "leavemulti") == 0)
- return ipifcleavemulti(ifc, argv, argc);
else if(strcmp(argv[0], "mtu") == 0)
- return ipifcsetmtu(ifc, argv, argc);
- else if(strcmp(argv[0], "reassemble") == 0){
- ifc->reassemble = 1;
+ return ipifcsetmtu(ifc, argc>1? strtoul(argv[1], 0, 0): 0);
+ else if(strcmp(argv[0], "speed") == 0){
+ ipifcsetspeed(ifc, argc>1? atoi(argv[1]): 0);
return nil;
}
+ else if(strcmp(argv[0], "delay") == 0){
+ ipifcsetdelay(ifc, argc>1? atoi(argv[1]): 0);
+ return nil;
+ }
else if(strcmp(argv[0], "iprouting") == 0){
- i = 1;
- if(argc > 1)
- i = atoi(argv[1]);
- iprouting(c->p->f, i);
+ iprouting(c->p->f, argc>1? atoi(argv[1]): 1);
return nil;
}
- else if(strcmp(argv[0], "addpref6") == 0)
- return ipifcaddpref6(ifc, argv, argc);
- else if(strcmp(argv[0], "setpar6") == 0)
- return ipifcsetpar6(ifc, argv, argc);
- else if(strcmp(argv[0], "sendra6") == 0)
- return ipifcsendra6(ifc, argv, argc);
- else if(strcmp(argv[0], "recvra6") == 0)
- return ipifcrecvra6(ifc, argv, argc);
- else if(strcmp(argv[0], "nat") == 0)
- return ipifcnat(ifc, argv, argc);
+ else if(strcmp(argv[0], "reflect") == 0){
+ ifc->reflect = argc>1? atoi(argv[1]): 1;
+ return nil;
+ }
+ else if(strcmp(argv[0], "reassemble") == 0){
+ ifc->reassemble = argc>1? atoi(argv[1]): 1;
+ return nil;
+ }
+ else if(strcmp(argv[0], "add6") == 0)
+ return ipifcadd6(ifc, argv, argc);
+ else if(strcmp(argv[0], "remove6") == 0)
+ return ipifcremove6(ifc, argv, argc);
+ else if(strcmp(argv[0], "ra6") == 0)
+ return ipifcra6(ifc, argv, argc);
return "unsupported ctl";
}
+int
ipifcstats(Proto *ipifc, char *buf, int len)
{
return ipstats(ipifc->f, buf, len);
@@ -907,7 +889,7 @@
ipifc->nc = Maxmedia;
ipifc->ptclsize = sizeof(Ipifc);
- f->ipifc = ipifc; /* hack for ipifcremroute, findipifc, ... */
+ f->ipifc = ipifc; /* hack for ipifcremroute, findipifc, ... */
f->self = smalloc(sizeof(Ipselftab)); /* hack for ipforme */
Fsproto(f, ipifc);
@@ -915,21 +897,25 @@
/*
* add to self routing cache
- * called with c locked
*/
static void
addselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a, int type)
{
- Ipself *p;
Iplink *lp;
+ Ipself *p;
int h;
+ type |= (lifc->type & Rv4);
qlock(f->self);
+ if(waserror()){
+ qunlock(f->self);
+ nexterror();
+ }
/* see if the address already exists */
h = hashipa(a);
- for(p = f->self->hash[h]; p; p = p->next)
- if(memcmp(a, p->a, IPaddrlen) == 0)
+ for(p = f->self->hash[h]; p != nil; p = p->next)
+ if(ipcmp(a, p->a) == 0)
break;
/* allocate a local address and add to hash chain */
@@ -946,7 +932,7 @@
}
/* look for a link for this lifc */
- for(lp = p->link; lp; lp = lp->selflink)
+ for(lp = p->link; lp != nil; lp = lp->selflink)
if(lp->lifc == lifc)
break;
@@ -962,18 +948,19 @@
lifc->link = lp;
/* add to routing table */
- if(isv4(a))
- v4addroute(f, tifc, a+IPv4off, IPallbits+IPv4off, a+IPv4off, type);
- else
- v6addroute(f, tifc, a, IPallbits, a, type);
+ addroute(f, a, IPallbits,
+ lifc->local,
+ ((type & (Rbcast|Rmulti)) != 0 || v6addrtype(a) == linklocalv6) ?
+ IPallbits : IPnoaddr,
+ a, type, ifc, tifc);
if((type & Rmulti) && ifc->m->addmulti != nil)
(*ifc->m->addmulti)(ifc, a, lifc->local);
- } else {
+ } else
lp->ref++;
- }
qunlock(f->self);
+ poperror();
}
/*
@@ -992,8 +979,8 @@
ulong now = NOW;
l = &freeiplink;
- for(np = *l; np; np = *l){
- if(np->expire > now){
+ for(np = *l; np != nil; np = *l){
+ if((long)(now - np->expire) >= 0){
*l = np->next;
free(np);
continue;
@@ -1000,10 +987,11 @@
}
l = &np->next;
}
- p->expire = now + 5000; /* give other threads 5 secs to get out */
+ p->expire = now + 5000; /* give other threads 5 secs to get out */
p->next = nil;
*l = p;
}
+
static void
ipselffree(Ipself *p)
{
@@ -1011,8 +999,8 @@
ulong now = NOW;
l = &freeipself;
- for(np = *l; np; np = *l){
- if(np->expire > now){
+ for(np = *l; np != nil; np = *l){
+ if((long)(now - np->expire) >= 0){
*l = np->next;
free(np);
continue;
@@ -1019,7 +1007,7 @@
}
l = &np->next;
}
- p->expire = now + 5000; /* give other threads 5 secs to get out */
+ p->expire = now + 5000; /* give other threads 5 secs to get out */
p->next = nil;
*l = p;
}
@@ -1027,7 +1015,6 @@
/*
* Decrement reference for this address on this link.
* Unlink from selftab if this is the last ref.
- * called with c locked
*/
static void
remselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a)
@@ -1039,7 +1026,7 @@
/* find the unique selftab entry */
l = &f->self->hash[hashipa(a)];
- for(p = *l; p; p = *l){
+ for(p = *l; p != nil; p = *l){
if(ipcmp(p->a, a) == 0)
break;
l = &p->next;
@@ -1053,7 +1040,7 @@
* that matches the selftab entry
*/
l_lifc = &lifc->link;
- for(link = *l_lifc; link; link = *l_lifc){
+ for(link = *l_lifc; link != nil; link = *l_lifc){
if(link->self == p)
break;
l_lifc = &link->lifclink;
@@ -1067,8 +1054,8 @@
* the one we just found
*/
l_self = &p->link;
- for(link = *l_self; link; link = *l_self){
- if(link == *(l_lifc))
+ for(link = *l_self; link != nil; link = *l_self){
+ if(link == *l_lifc)
break;
l_self = &link->selflink;
}
@@ -1079,9 +1066,20 @@
if(--(link->ref) != 0)
goto out;
- if((p->type & Rmulti) && ifc->m->remmulti != nil)
- (*ifc->m->remmulti)(ifc, a, lifc->local);
+ /* remove from routing table */
+ remroute(f, a, IPallbits,
+ lifc->local,
+ ((p->type & (Rbcast|Rmulti)) != 0 || v6addrtype(a) == linklocalv6) ?
+ IPallbits : IPnoaddr,
+ a, p->type, ifc, tifc);
+ if((p->type & Rmulti) && ifc->m->remmulti != nil){
+ if(!waserror()){
+ (*ifc->m->remmulti)(ifc, a, lifc->local);
+ poperror();
+ }
+ }
+
/* ref == 0, remove from both chains and free the link */
*l_lifc = link->lifclink;
*l_self = link->selflink;
@@ -1090,30 +1088,18 @@
if(p->link != nil)
goto out;
- /* remove from routing table */
- if(isv4(a))
- v4delroute(f, a+IPv4off, IPallbits+IPv4off, 1);
- else
- v6delroute(f, a, IPallbits, 1);
-
+ /* if null address, forget */
+ if(ipcmp(a, v4prefix) == 0 || ipcmp(a, IPnoaddr) == 0)
+ f->self->acceptall = 0;
+
/* no more links, remove from hash and free */
*l = p->next;
ipselffree(p);
- /* if IPnoaddr, forget */
- if(ipcmp(a, v4prefix) == 0 || ipcmp(a, IPnoaddr) == 0)
- f->self->acceptall = 0;
-
out:
qunlock(f->self);
}
-static char *stformat = "%-44.44I %2.2d %4.4s\n";
-enum
-{
- Nstformat= 41,
-};
-
long
ipselftabread(Fs *f, char *cp, ulong offset, int n)
{
@@ -1124,14 +1110,14 @@
m = 0;
off = offset;
- qlock(f->self);
for(i = 0; i < NHASH && m < n; i++){
for(p = f->self->hash[i]; p != nil && m < n; p = p->next){
nifc = 0;
- for(link = p->link; link; link = link->selflink)
+ for(link = p->link; link != nil; link = link->selflink)
nifc++;
routetype(p->type, state);
- m += snprint(cp + m, n - m, stformat, p->a, nifc, state);
+ m += snprint(cp + m, n - m, "%-44.44I %2.2d %4.4s\n",
+ p->a, nifc, state);
if(off > 0){
off -= m;
m = 0;
@@ -1138,30 +1124,15 @@
}
}
}
- qunlock(f->self);
return m;
}
-int
-iptentative(Fs *f, uchar *addr)
-{
- Ipself *p;
-
- p = f->self->hash[hashipa(addr)];
- for(; p; p = p->next){
- if(ipcmp(addr, p->a) == 0) {
- return p->link->lifc->tentative;
- }
- }
- return 0;
-}
-
/*
* returns
* 0 - no match
* Runi
* Rbcast
- * Rmcast
+ * Rmulti
*/
int
ipforme(Fs *f, uchar *addr)
@@ -1168,11 +1139,9 @@
{
Ipself *p;
- p = f->self->hash[hashipa(addr)];
- for(; p; p = p->next){
+ for(p = f->self->hash[hashipa(addr)]; p != nil; p = p->next)
if(ipcmp(addr, p->a) == 0)
- return p->type;
- }
+ return p->type & (Runi|Rbcast|Rmulti);
/* hack to say accept anything */
if(f->self->acceptall)
@@ -1186,254 +1155,237 @@
* return nil.
*/
Ipifc*
-findipifc(Fs *f, uchar *remote, int type)
+findipifc(Fs *f, uchar *local, uchar *remote, int type)
{
+ uchar gnet[IPaddrlen];
+ int spec, xspec;
Ipifc *ifc, *x;
Iplifc *lifc;
- Conv **cp, **e;
- uchar gnet[IPaddrlen];
- uchar xmask[IPaddrlen];
+ Conv **cp;
- x = nil; memset(xmask, 0, IPaddrlen);
-
- /* find most specific match */
- e = &f->ipifc->conv[f->ipifc->nc];
- for(cp = f->ipifc->conv; cp < e; cp++){
- if(*cp == 0)
- continue;
-
+ x = nil;
+ xspec = 0;
+ for(cp = f->ipifc->conv; *cp != nil; cp++){
ifc = (Ipifc*)(*cp)->ptcl;
-
- for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+ if(!canrlock(ifc))
+ continue;
+ for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+ if(type & Runi){
+ if(ipcmp(remote, lifc->local) == 0){
+ Found:
+ runlock(ifc);
+ return ifc;
+ }
+ } else if(type & (Rbcast|Rmulti)) {
+ if(ipcmp(local, lifc->local) == 0)
+ goto Found;
+ }
maskip(remote, lifc->mask, gnet);
if(ipcmp(gnet, lifc->net) == 0){
- if(x == nil || ipcmp(lifc->mask, xmask) > 0){
+ spec = comprefixlen(remote, lifc->local, IPaddrlen);
+ if(spec > xspec){
x = ifc;
- ipmove(xmask, lifc->mask);
+ xspec = spec;
}
}
}
+ runlock(ifc);
}
- if(x != nil)
- return x;
+ return x;
+}
- /* for now for broadcast and multicast, just use first interface */
- if(type & (Rbcast|Rmulti)){
- for(cp = f->ipifc->conv; cp < e; cp++){
- if(*cp == 0)
- continue;
- ifc = (Ipifc*)(*cp)->ptcl;
- if(ifc->lifc != nil)
- return ifc;
- }
+Ipifc*
+findipifcstr(Fs *f, char *s)
+{
+ uchar ip[IPaddrlen];
+ Conv *c;
+ char *p;
+ long x;
+
+ x = strtol(s, &p, 10);
+ if(p > s && *p == '\0'){
+ if(x < 0)
+ return nil;
+ if(x < f->ipifc->nc && (c = f->ipifc->conv[x]) != nil && ipifcinuse(c))
+ return (Ipifc*)c->ptcl;
}
-
+ if(parseip(ip, s) != -1)
+ return findipifc(f, ip, ip, Runi);
return nil;
}
-enum {
- unknownv6,
- multicastv6,
- unspecifiedv6,
- linklocalv6,
- sitelocalv6,
- globalv6,
-};
-
-int
-v6addrtype(uchar *addr)
-{
- if(isv6global(addr))
- return globalv6;
- if(islinklocal(addr))
- return linklocalv6;
- if(isv6mcast(addr))
- return multicastv6;
- if(issitelocal(addr))
- return sitelocalv6;
- return unknownv6;
-}
-
-#define v6addrcurr(lifc) (( (lifc)->origint + (lifc)->preflt >= (NOW/10^3) ) || ( (lifc)->preflt == 0xffffffff ))
-
+/*
+ * find "best" (global > link local > unspecified)
+ * local address; address must be current.
+ */
static void
findprimaryipv6(Fs *f, uchar *local)
{
- Conv **cp, **e;
- Ipifc *ifc;
- Iplifc *lifc;
+ ulong now = NOW/1000;
int atype, atypel;
+ Iplifc *lifc;
+ Ipifc *ifc;
+ Conv **cp;
ipmove(local, v6Unspecified);
atype = unspecifiedv6;
- /* find "best" (global > sitelocal > link local > unspecified)
- * local address; address must be current */
-
- e = &f->ipifc->conv[f->ipifc->nc];
- for(cp = f->ipifc->conv; cp < e; cp++){
- if(*cp == 0)
- continue;
+ for(cp = f->ipifc->conv; *cp != nil; cp++){
ifc = (Ipifc*)(*cp)->ptcl;
- for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+ rlock(ifc);
+ for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
atypel = v6addrtype(lifc->local);
if(atypel > atype)
- if(v6addrcurr(lifc)) {
+ if(lifc->preflt == ~0UL || lifc->preflt >= now-lifc->origint) {
ipmove(local, lifc->local);
atype = atypel;
- if(atype == globalv6)
+ if(atype == globalv6){
+ runlock(ifc);
return;
+ }
}
}
+ runlock(ifc);
}
}
/*
- * returns first ip address configured
+ * returns first v4 address configured
*/
static void
findprimaryipv4(Fs *f, uchar *local)
{
- Conv **cp, **e;
- Ipifc *ifc;
Iplifc *lifc;
+ Ipifc *ifc;
+ Conv **cp;
/* find first ifc local address */
- e = &f->ipifc->conv[f->ipifc->nc];
- for(cp = f->ipifc->conv; cp < e; cp++){
- if(*cp == 0)
- continue;
+ for(cp = f->ipifc->conv; *cp != nil; cp++){
ifc = (Ipifc*)(*cp)->ptcl;
- if((lifc = ifc->lifc) != nil){
- ipmove(local, lifc->local);
- return;
+ rlock(ifc);
+ for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+ if((lifc->type & Rv4) != 0){
+ ipmove(local, lifc->local);
+ runlock(ifc);
+ return;
+ }
}
+ runlock(ifc);
}
+ ipmove(local, IPnoaddr);
}
/*
- * find the local address 'closest' to the remote system, copy it to
- * local and return the ifc for that address
+ * ipv4local, ipv6local:
+ * return a local address associated with an interface close to remote.
+ * prefixlen is the number of leading bits in the local address that
+ * have to match an interface address to be considered. this is used
+ * by source specific routes to filter on the source address.
+ * return non-zero on success or zero when no address was found.
+ *
+ * for ipv4local, all addresses are 4 byte format.
*/
-void
-findlocalip(Fs *f, uchar *local, uchar *remote)
+int
+ipv4local(Ipifc *ifc, uchar *local, int prefixlen, uchar *remote)
{
- Ipifc *ifc;
Iplifc *lifc;
- Route *r;
- uchar gate[IPaddrlen];
- uchar gnet[IPaddrlen];
- int version;
- int atype = unspecifiedv6, atypel = unknownv6;
+ int a, b;
- USED(atype);
- USED(atypel);
- qlock(f->ipifc);
- r = v6lookup(f, remote, nil);
- version = (memcmp(remote, v4prefix, IPv4off) == 0) ? V4 : V6;
-
- if(r != nil){
- ifc = r->ifc;
- if(r->type & Rv4)
- v4tov6(gate, r->v4.gate);
- else {
- ipmove(gate, r->v6.gate);
- ipmove(local, v6Unspecified);
- }
+ b = -1;
+ for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+ if((lifc->type & Rv4) == 0 || ipcmp(lifc->local, IPnoaddr) == 0)
+ continue;
- /* find ifc address closest to the gateway to use */
- switch(version) {
- case V4:
- for(lifc = ifc->lifc; lifc; lifc = lifc->next){
- maskip(gate, lifc->mask, gnet);
- if(ipcmp(gnet, lifc->net) == 0){
- ipmove(local, lifc->local);
- goto out;
- }
- }
- break;
- case V6:
- for(lifc = ifc->lifc; lifc; lifc = lifc->next){
- atypel = v6addrtype(lifc->local);
- maskip(gate, lifc->mask, gnet);
- if(ipcmp(gnet, lifc->net) == 0)
- if(atypel > atype)
- if(v6addrcurr(lifc)) {
- ipmove(local, lifc->local);
- atype = atypel;
- if(atype == globalv6)
- break;
- }
- }
- if(atype > unspecifiedv6)
- goto out;
- break;
- default:
- panic("findlocalip: version %d", version);
+ if(prefixlen && comprefixlen(lifc->local+IPv4off, local, IPv4addrlen) < prefixlen)
+ continue;
+
+ a = comprefixlen(lifc->local+IPv4off, remote, IPv4addrlen);
+ if(a > b){
+ b = a;
+ memmove(local, lifc->local+IPv4off, IPv4addrlen);
}
}
-
- switch(version){
- case V4:
- findprimaryipv4(f, local);
- break;
- case V6:
- findprimaryipv6(f, local);
- break;
- default:
- panic("findlocalip2: version %d", version);
- }
-
-out:
- qunlock(f->ipifc);
+ return b >= 0;
}
-/*
- * return first v4 address associated with an interface
- */
int
-ipv4local(Ipifc *ifc, uchar *addr)
+ipv6local(Ipifc *ifc, uchar *local, int prefixlen, uchar *remote)
{
+ struct {
+ int atype;
+ int deprecated;
+ int comprefixlen;
+ } a, b;
+ int atype;
+ ulong now;
Iplifc *lifc;
- for(lifc = ifc->lifc; lifc; lifc = lifc->next){
- if(isv4(lifc->local)){
- memmove(addr, lifc->local+IPv4off, IPv4addrlen);
- return 1;
- }
+ if(isv4(remote)){
+ memmove(local, v4prefix, IPv4off);
+ if((prefixlen -= IPv4off*8) < 0)
+ prefixlen = 0;
+ return ipv4local(ifc, local+IPv4off, prefixlen, remote+IPv4off);
}
- return 0;
-}
-/*
- * return first v6 address associated with an interface
- */
-int
-ipv6local(Ipifc *ifc, uchar *addr)
-{
- Iplifc *lifc;
+ atype = v6addrtype(remote);
+ b.atype = unknownv6;
+ b.deprecated = 1;
+ b.comprefixlen = 0;
- for(lifc = ifc->lifc; lifc; lifc = lifc->next){
- if(!isv4(lifc->local) && !(lifc->tentative)){
- ipmove(addr, lifc->local);
- return 1;
+ now = NOW/1000;
+ for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+ if(lifc->tentative)
+ continue;
+
+ if(prefixlen && comprefixlen(lifc->local, local, IPaddrlen) < prefixlen)
+ continue;
+
+ a.atype = v6addrtype(lifc->local);
+ a.deprecated = lifc->preflt != ~0UL && lifc->preflt < now-lifc->origint;
+ a.comprefixlen = comprefixlen(lifc->local, remote, IPaddrlen);
+
+ /* prefer appropriate scope */
+ if(a.atype != b.atype){
+ if(a.atype > b.atype && b.atype < atype ||
+ a.atype < b.atype && b.atype > atype)
+ goto Good;
+ continue;
}
+ /* prefer non-deprecated addresses */
+ if(a.deprecated != b.deprecated){
+ if(b.deprecated)
+ goto Good;
+ continue;
+ }
+ /* prefer longer common prefix */
+ if(a.comprefixlen != b.comprefixlen){
+ if(a.comprefixlen > b.comprefixlen)
+ goto Good;
+ continue;
+ }
+ continue;
+ Good:
+ b = a;
+ ipmove(local, lifc->local);
}
- return 0;
+
+ return b.atype >= atype;
}
-int
-ipv6anylocal(Ipifc *ifc, uchar *addr)
+/*
+ * find the local address for a remote destination
+ */
+void
+findlocalip(Fs *f, uchar *local, uchar *remote)
{
- Iplifc *lifc;
-
- for(lifc = ifc->lifc; lifc; lifc = lifc->next){
- if(!isv4(lifc->local)){
- ipmove(addr, lifc->local);
- return SRC_UNI;
- }
+ if(isv4(remote)) {
+ memmove(local, v4prefix, IPv4off);
+ if(v4source(f, remote+IPv4off, local+IPv4off) == nil)
+ findprimaryipv4(f, local);
+ } else {
+ if(v6source(f, remote, local) == nil)
+ findprimaryipv6(f, local);
}
- return SRC_UNSPEC;
}
/*
@@ -1444,13 +1396,28 @@
{
Iplifc *lifc;
- for(lifc = ifc->lifc; lifc; lifc = lifc->next)
+ for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next)
if(ipcmp(ip, lifc->local) == 0)
return lifc;
+
return nil;
}
+Iplifc*
+ipremoteonifc(Ipifc *ifc, uchar *ip)
+{
+ uchar net[IPaddrlen];
+ Iplifc *lifc;
+ for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+ maskip(ip, lifc->mask, net);
+ if(ipcmp(net, lifc->remote) == 0)
+ return lifc;
+ }
+ return nil;
+}
+
+
/*
* See if we're proxying for this address on this interface
*/
@@ -1458,24 +1425,13 @@
ipproxyifc(Fs *f, Ipifc *ifc, uchar *ip)
{
Route *r;
- uchar net[IPaddrlen];
- Iplifc *lifc;
/* see if this is a direct connected pt to pt address */
- r = v6lookup(f, ip, nil);
- if(r == nil)
+ r = v6lookup(f, ip, ip, nil);
+ if(r == nil || (r->type & (Rifc|Rproxy)) != (Rifc|Rproxy))
return 0;
- if((r->type & (Rifc|Rproxy)) != (Rifc|Rproxy))
- return 0;
- /* see if this is on the right interface */
- for(lifc = ifc->lifc; lifc; lifc = lifc->next){
- maskip(ip, lifc->mask, net);
- if(ipcmp(net, lifc->remote) == 0)
- return 1;
- }
-
- return 0;
+ return ipremoteonifc(ifc, ip) != nil;
}
/*
@@ -1487,73 +1443,53 @@
if(isv4(ip)){
if(ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0)
return V4;
- } else {
- if(ip[0] == 0xff)
- return V6;
}
+ else if(ip[0] == 0xff)
+ return V6;
return 0;
}
-int
-ipisbm(uchar *ip)
-{
- if(isv4(ip)){
- if(ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0)
- return V4;
- if(ipcmp(ip, IPv4bcast) == 0)
- return V4;
- } else {
- if(ip[0] == 0xff)
- return V6;
- }
- return 0;
-}
-
-
/*
- * add a multicast address to an interface, called with c locked
+ * add a multicast address to an interface.
*/
void
ipifcaddmulti(Conv *c, uchar *ma, uchar *ia)
{
- Ipifc *ifc;
- Iplifc *lifc;
- Conv **p;
Ipmulti *multi, **l;
+ Iplifc *lifc;
+ Ipifc *ifc;
Fs *f;
- f = c->p->f;
-
- for(l = &c->multi; *l; l = &(*l)->next)
- if(ipcmp(ma, (*l)->ma) == 0)
- if(ipcmp(ia, (*l)->ia) == 0)
+ if(isv4(ma) != isv4(ia))
+ error("incompatible multicast/interface ip address");
+
+ for(l = &c->multi; *l != nil; l = &(*l)->next)
+ if(ipcmp(ma, (*l)->ma) == 0 && ipcmp(ia, (*l)->ia) == 0)
return; /* it's already there */
- multi = *l = smalloc(sizeof(*multi));
- ipmove(multi->ma, ma);
- ipmove(multi->ia, ia);
- multi->next = nil;
-
- for(p = f->ipifc->conv; *p; p++){
- if((*p)->inuse == 0)
- continue;
- ifc = (Ipifc*)(*p)->ptcl;
+ f = c->p->f;
+ if((ifc = findipifc(f, ia, ma, Rmulti)) != nil){
+ rlock(ifc);
if(waserror()){
- wunlock(ifc);
+ runlock(ifc);
nexterror();
}
- wlock(ifc);
- for(lifc = ifc->lifc; lifc; lifc = lifc->next)
- if(ipcmp(ia, lifc->local) == 0)
- addselfcache(f, ifc, lifc, ma, Rmulti);
- wunlock(ifc);
+ if((lifc = iplocalonifc(ifc, ia)) != nil)
+ addselfcache(f, ifc, lifc, ma, Rmulti);
+ runlock(ifc);
poperror();
}
+
+ multi = smalloc(sizeof(*multi));
+ ipmove(multi->ma, ma);
+ ipmove(multi->ia, ia);
+ multi->next = nil;
+ *l = multi;
}
/*
- * remove a multicast address from an interface, called with c locked
+ * remove a multicast address from an interface.
*/
void
ipifcremmulti(Conv *c, uchar *ma, uchar *ia)
@@ -1560,15 +1496,11 @@
{
Ipmulti *multi, **l;
Iplifc *lifc;
- Conv **p;
Ipifc *ifc;
Fs *f;
- f = c->p->f;
-
- for(l = &c->multi; *l; l = &(*l)->next)
- if(ipcmp(ma, (*l)->ma) == 0)
- if(ipcmp(ia, (*l)->ia) == 0)
+ for(l = &c->multi; *l != nil; l = &(*l)->next)
+ if(ipcmp(ma, (*l)->ma) == 0 && ipcmp(ia, (*l)->ia) == 0)
break;
multi = *l;
@@ -1576,161 +1508,101 @@
return; /* we don't have it open */
*l = multi->next;
+ multi->next = nil;
- for(p = f->ipifc->conv; *p; p++){
- if((*p)->inuse == 0)
- continue;
-
- ifc = (Ipifc*)(*p)->ptcl;
- if(waserror()){
- wunlock(ifc);
- nexterror();
- }
- wlock(ifc);
- for(lifc = ifc->lifc; lifc; lifc = lifc->next)
- if(ipcmp(ia, lifc->local) == 0)
+ f = c->p->f;
+ if((ifc = findipifc(f, ia, ma, Rmulti)) != nil){
+ rlock(ifc);
+ if(!waserror()){
+ if((lifc = iplocalonifc(ifc, ia)) != nil)
remselfcache(f, ifc, lifc, ma);
- wunlock(ifc);
- poperror();
+ poperror();
+ }
+ runlock(ifc);
}
-
free(multi);
}
-/*
- * make lifc's join and leave multicast groups
- */
-static char*
-ipifcjoinmulti(Ipifc *ifc, char **argv, int argc)
+/* register the address on this network for address resolution */
+static void
+ipifcregisteraddr(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *ip)
{
- USED(ifc, argv, argc);
- return nil;
+ if(waserror()){
+ print("ipifcregisteraddr %s %I %I: %s\n", ifc->dev, lifc->local, ip, up->errstr);
+ return;
+ }
+ if(ifc->m != nil && ifc->m->areg != nil)
+ (*ifc->m->areg)(f, ifc, lifc, ip);
+ poperror();
}
-static char*
-ipifcleavemulti(Ipifc *ifc, char **argv, int argc)
-{
- USED(ifc, argv, argc);
- return nil;
-}
-
static void
-ipifcregisterproxy(Fs *f, Ipifc *ifc, uchar *ip)
+ipifcregisterproxy(Fs *f, Ipifc *ifc, uchar *ip, int add)
{
- Conv **cp, **e;
- Ipifc *nifc;
+ uchar a[IPaddrlen];
Iplifc *lifc;
- Medium *m;
- uchar net[IPaddrlen];
+ Ipifc *nifc;
+ Conv **cp;
- /* register the address on any network that will proxy for us */
- e = &f->ipifc->conv[f->ipifc->nc];
+ /* register the address on any interface that will proxy for the ip */
+ for(cp = f->ipifc->conv; *cp != nil; cp++){
+ nifc = (Ipifc*)(*cp)->ptcl;
+ if(nifc == ifc || !canrlock(nifc))
+ continue;
- if(!isv4(ip)) { // V6
- for(cp = f->ipifc->conv; cp < e; cp++){
- if(*cp == nil)
- continue;
- nifc = (Ipifc*)(*cp)->ptcl;
- if(nifc == ifc)
- continue;
-
- rlock(nifc);
- m = nifc->m;
- if(m == nil || m->addmulti == nil) {
- runlock(nifc);
- continue;
- }
- for(lifc = nifc->lifc; lifc; lifc = lifc->next){
- maskip(ip, lifc->mask, net);
- if(ipcmp(net, lifc->remote) == 0) { /* add solicited-node multicast address */
- ipv62smcast(net, ip);
- addselfcache(f, nifc, lifc, net, Rmulti);
- arpenter(f, V6, ip, nifc->mac, 6, 0);
- //(*m->addmulti)(nifc, net, ip);
- break;
- }
- }
+ if(nifc->m == nil
+ || (lifc = ipremoteonifc(nifc, ip)) == nil
+ || (lifc->type & Rptpt) != 0
+ || waserror()){
runlock(nifc);
+ continue;
}
- return;
- }
- else { // V4
- for(cp = f->ipifc->conv; cp < e; cp++){
- if(*cp == nil)
- continue;
- nifc = (Ipifc*)(*cp)->ptcl;
- if(nifc == ifc)
- continue;
-
- rlock(nifc);
- m = nifc->m;
- if(m == nil || m->areg == nil){
- runlock(nifc);
- continue;
- }
- for(lifc = nifc->lifc; lifc; lifc = lifc->next){
- maskip(ip, lifc->mask, net);
- if(ipcmp(net, lifc->remote) == 0){
- (*m->areg)(nifc, ip);
- break;
- }
- }
- runlock(nifc);
+ if((lifc->type & Rv4) == 0){
+ /* add solicited-node multicast addr */
+ ipv62smcast(a, ip);
+ if(add)
+ addselfcache(f, nifc, lifc, a, Rmulti);
+ else
+ remselfcache(f, nifc, lifc, a);
}
+ if(add)
+ ipifcregisteraddr(f, nifc, lifc, ip);
+ runlock(nifc);
+ poperror();
}
}
-
-// added for new v6 mesg types
-static void
-adddefroute6(Fs *f, uchar *gate, int force)
-{
- Route *r;
-
- r = v6lookup(f, v6Unspecified, nil);
- if(r!=nil)
- if(!(force) && (strcmp(r->tag,"ra")!=0)) // route entries generated
- return; // by all other means take
- // precedence over router annc
-
- v6delroute(f, v6Unspecified, v6Unspecified, 1);
- v6addroute(f, "ra", v6Unspecified, v6Unspecified, gate, 0);
-}
-
-enum
-{
- Ngates = 3,
-};
-
char*
-ipifcaddpref6(Ipifc *ifc, char**argv, int argc)
+ipifcadd6(Ipifc *ifc, char **argv, int argc)
{
- uchar onlink = 1;
- uchar autoflag = 1;
- long validlt = 0xffffffff;
- long preflt = 0xffffffff;
- long origint = NOW / 10^3;
- uchar prefix[IPaddrlen];
- int plen = 64;
- Iplifc *lifc;
- char addr[40], preflen[6];
- char *params[3];
+ int plen = 64;
+ char addr[40], preflen[6];
+ char *params[3];
+ uchar prefix[IPaddrlen];
+ Iplifc lifc;
+ Medium *m;
+ lifc.onlink = 1;
+ lifc.autoflag = 1;
+ lifc.validlt = lifc.preflt = ~0UL;
+ lifc.origint = NOW / 1000;
+
switch(argc) {
case 7:
- preflt = atoi(argv[6]);
+ lifc.preflt = strtoul(argv[6], 0, 10);
/* fall through */
case 6:
- validlt = atoi(argv[5]);
+ lifc.validlt = strtoul(argv[5], 0, 10);
/* fall through */
case 5:
- autoflag = atoi(argv[4]);
+ lifc.autoflag = atoi(argv[4]) != 0;
/* fall through */
case 4:
- onlink = atoi(argv[3]);
+ lifc.onlink = atoi(argv[3]) != 0;
/* fall through */
case 3:
plen = atoi(argv[2]);
+ /* fall through */
case 2:
break;
default:
@@ -1737,25 +1609,16 @@
return Ebadarg;
}
- if((parseip(prefix, argv[1])!=6) ||
- (validlt < preflt) ||
- (plen < 0) || (plen > 64) ||
- (islinklocal(prefix))
- )
+ if (parseip(prefix, argv[1]) != 6 || lifc.validlt < lifc.preflt || plen < 0 ||
+ plen > 64 || islinklocal(prefix))
return Ebadarg;
- lifc = smalloc(sizeof(Iplifc));
- lifc->onlink = (onlink!=0);
- lifc->autoflag = (autoflag!=0);
- lifc->validlt = validlt;
- lifc->preflt = preflt;
- lifc->origint = origint;
+ /* issue "add" ctl msg for v6 link-local addr and prefix len */
+ m = ifc->m;
+ if(m == nil || m->pref2addr == nil)
+ return Eunbound;
+ (*m->pref2addr)(prefix, ifc->mac); /* mac → v6 link-local addr */
- if(ifc->m->pref2addr!=nil)
- ifc->m->pref2addr(prefix, ifc->mac);
- else
- return Ebadarg;
-
sprint(addr, "%I", prefix);
sprint(preflen, "/%d", plen);
params[0] = "add";
@@ -1762,6 +1625,28 @@
params[1] = addr;
params[2] = preflen;
- return ipifcadd(ifc, params, 3, 0, lifc);
+ return ipifcadd(ifc, params, 3, 0, &lifc);
}
+char*
+ipifcremove6(Ipifc *ifc, char**, int argc)
+{
+ Iplifc *lifc, **l;
+ ulong now;
+
+ if(argc != 1)
+ return Ebadarg;
+
+ wlock(ifc);
+ now = NOW/1000;
+ for(l = &ifc->lifc; (lifc = *l) != nil;) {
+ if((lifc->type & Rv4) == 0)
+ if(lifc->validlt != ~0UL && lifc->validlt < now-lifc->origint)
+ if(ipifcremlifc(ifc, l) == nil)
+ continue;
+ l = &lifc->next;
+ }
+ wunlock(ifc);
+
+ return nil;
+}
diff -u a/os/ip//ipmux.c b/os/ip//ipmux.c
--- a/os/ip//ipmux.c
+++ b/os/ip//ipmux.c
@@ -1,3 +1,6 @@
+/*
+ * IP packet filter
+ */
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
@@ -6,30 +9,14 @@
#include "../port/error.h"
#include "ip.h"
-#define DPRINT if(0)print
+#include "ipv6.h"
typedef struct Ipmuxrock Ipmuxrock;
typedef struct Ipmux Ipmux;
-typedef struct Ip6hdr Ip6hdr;
enum
{
- IPHDR = 20, /* sizeof(Ip4hdr) */
-};
-
-struct Ip6hdr
-{
- uchar vcf[4]; /* version, class label, and flow label */
- uchar ploadlen[2]; /* payload length */
- uchar proto; /* next header, i.e. proto */
- uchar ttl; /* hop limit, i.e. ttl */
- uchar src[16]; /* IP source */
- uchar dst[16]; /* IP destination */
-};
-
-
-enum
-{
+ Tver,
Tproto,
Tdata,
Tiph,
@@ -36,28 +23,8 @@
Tdst,
Tsrc,
Tifc,
-
- Cother = 0,
- Cbyte, /* single byte */
- Cmbyte, /* single byte with mask */
- Cshort, /* single short */
- Cmshort, /* single short with mask */
- Clong, /* single long */
- Cmlong, /* single long with mask */
- Cifc,
- Cmifc,
};
-char *ftname[] =
-{
-[Tproto] "proto",
-[Tdata] "data",
-[Tiph] "iph",
-[Tdst] "dst",
-[Tsrc] "src",
-[Tifc] "ifc",
-};
-
/*
* a node in the decision tree
*/
@@ -66,16 +33,12 @@
Ipmux *yes;
Ipmux *no;
uchar type; /* type of field(Txxxx) */
- uchar ctype; /* tupe of comparison(Cxxxx) */
uchar len; /* length in bytes of item to compare */
uchar n; /* number of items val points to */
- short off; /* offset of comparison */
- short eoff; /* end offset of comparison */
- uchar skiphdr; /* should offset start after ipheader */
+ int off; /* offset of comparison */
uchar *val;
uchar *mask;
uchar *e; /* val+n*len*/
-
int ref; /* so we can garbage collect */
Conv *conv;
};
@@ -90,6 +53,7 @@
static int ipmuxsprint(Ipmux*, int, char*, int);
static void ipmuxkick(void *x);
+static void ipmuxfree(Ipmux *f);
static char*
skipwhite(char *p)
@@ -122,27 +86,33 @@
Ipmux *f;
p = skipwhite(p);
- if(strncmp(p, "dst", 3) == 0){
+ if(strncmp(p, "ver", 3) == 0){
+ type = Tver;
+ off = 0;
+ len = 1;
+ p += 3;
+ }
+ else if(strncmp(p, "dst", 3) == 0){
type = Tdst;
- off = offsetof(Ip4hdr, dst[0]);
- len = IPv4addrlen;
+ off = offsetof(Ip6hdr, dst[0]);
+ len = IPaddrlen;
p += 3;
}
else if(strncmp(p, "src", 3) == 0){
type = Tsrc;
- off = offsetof(Ip4hdr, src[0]);
- len = IPv4addrlen;
+ off = offsetof(Ip6hdr, src[0]);
+ len = IPaddrlen;
p += 3;
}
else if(strncmp(p, "ifc", 3) == 0){
type = Tifc;
- off = -IPv4addrlen;
- len = IPv4addrlen;
+ off = -IPaddrlen;
+ len = IPaddrlen;
p += 3;
}
else if(strncmp(p, "proto", 5) == 0){
type = Tproto;
- off = offsetof(Ip4hdr, proto);
+ off = offsetof(Ip6hdr, proto);
len = 1;
p += 5;
}
@@ -160,7 +130,7 @@
return nil;
p++;
off = strtoul(p, &p, 0);
- if(off < 0 || off > (64-IPHDR))
+ if(off < 0)
return nil;
p = skipwhite(p);
if(*p != ':')
@@ -189,11 +159,6 @@
f->mask = nil;
f->n = 1;
f->ref = 1;
- if(type == Tdata)
- f->skiphdr = 1;
- else
- f->skiphdr = 0;
-
return f;
}
@@ -229,7 +194,7 @@
static Ipmux*
parsemux(char *p)
{
- int n, nomask;
+ int n;
Ipmux *f;
char *val;
char *mask;
@@ -247,7 +212,7 @@
goto parseerror;
/* parse mask */
- mask = follows(val, '&');
+ mask = follows(p, '&');
if(mask != nil){
switch(f->type){
case Tsrc:
@@ -254,7 +219,7 @@
case Tdst:
case Tifc:
f->mask = smalloc(f->len);
- v4parseip(f->mask, mask);
+ parseipmask(f->mask, mask, 0);
break;
case Tdata:
case Tiph:
@@ -264,15 +229,13 @@
default:
goto parseerror;
}
- nomask = 0;
- } else {
- nomask = 1;
+ } else if(f->type == Tver){
f->mask = smalloc(f->len);
- memset(f->mask, 0xff, f->len);
+ f->mask[0] = 0xF0;
}
/* parse vals */
- f->n = getfields(val, vals, sizeof(vals)/sizeof(char*), 1, "|");
+ f->n = getfields(val, vals, nelem(vals), 1, "|");
if(f->n == 0)
goto parseerror;
f->val = smalloc(f->n*f->len);
@@ -279,10 +242,21 @@
v = f->val;
for(n = 0; n < f->n; n++){
switch(f->type){
+ case Tver:
+ if(f->n != 1)
+ goto parseerror;
+ if(strcmp(vals[n], "6") == 0)
+ *v = IP_VER6;
+ else if(strcmp(vals[n], "4") == 0)
+ *v = IP_VER4;
+ else
+ goto parseerror;
+ break;
case Tsrc:
case Tdst:
case Tifc:
- v4parseip(v, vals[n]);
+ if(parseip(v, vals[n]) == -1)
+ goto parseerror;
break;
case Tproto:
case Tdata:
@@ -292,34 +266,11 @@
}
v += f->len;
}
-
- f->eoff = f->off + f->len;
f->e = f->val + f->n*f->len;
- f->ctype = Cother;
- if(f->n == 1){
- switch(f->len){
- case 1:
- f->ctype = nomask ? Cbyte : Cmbyte;
- break;
- case 2:
- f->ctype = nomask ? Cshort : Cmshort;
- break;
- case 4:
- if(f->type == Tifc)
- f->ctype = nomask ? Cifc : Cmifc;
- else
- f->ctype = nomask ? Clong : Cmlong;
- break;
- }
- }
return f;
parseerror:
- if(f->mask)
- free(f->mask);
- if(f->val)
- free(f->val);
- free(f);
+ ipmuxfree(f);
return nil;
}
@@ -342,8 +293,7 @@
return n;
/* compare offsets, call earlier ones more specific */
- n = (a->off+((int)a->skiphdr)*offsetof(Ip4hdr, data[0])) -
- (b->off+((int)b->skiphdr)*offsetof(Ip4hdr, data[0]));
+ n = a->off - b->off;
if(n != 0)
return n;
@@ -413,6 +363,10 @@
*nf = *f;
nf->no = ipmuxcopy(f->no);
nf->yes = ipmuxcopy(f->yes);
+ if(f->mask != nil){
+ nf->mask = smalloc(f->len);
+ memmove(nf->mask, f->mask, f->len);
+ }
nf->val = smalloc(f->n*f->len);
nf->e = nf->val + f->len*f->n;
memmove(nf->val, f->val, f->n*f->len);
@@ -422,8 +376,10 @@
static void
ipmuxfree(Ipmux *f)
{
- if(f->val != nil)
- free(f->val);
+ if(f == nil)
+ return;
+ free(f->val);
+ free(f->mask);
free(f);
}
@@ -432,10 +388,8 @@
{
if(f == nil)
return;
- if(f->no != nil)
- ipmuxfree(f->no);
- if(f->yes != nil)
- ipmuxfree(f->yes);
+ ipmuxfree(f->no);
+ ipmuxfree(f->yes);
ipmuxfree(f);
}
@@ -510,6 +464,8 @@
return ipmuxremove(&ft->no, f);
}
+ ipmuxremove(&ft->no, f->no);
+
/* we found a match */
if(--(ft->ref) == 0){
/*
@@ -531,8 +487,55 @@
}
/*
+ * convert to ipv4 filter
+ */
+static Ipmux*
+ipmuxconv4(Ipmux *f)
+{
+ int i, n;
+
+ if(f == nil)
+ return nil;
+
+ switch(f->type){
+ case Tproto:
+ f->off = offsetof(Ip4hdr, proto);
+ break;
+ case Tdst:
+ f->off = offsetof(Ip4hdr, dst[0]);
+ if(0){
+ case Tsrc:
+ f->off = offsetof(Ip4hdr, src[0]);
+ }
+ if(f->len != IPaddrlen)
+ break;
+ n = 0;
+ for(i = 0; i < f->n; i++){
+ if(isv4(f->val + i*IPaddrlen)){
+ memmove(f->val + n*IPv4addrlen, f->val + i*IPaddrlen + IPv4off, IPv4addrlen);
+ n++;
+ }
+ }
+ if(n == 0){
+ ipmuxtreefree(f);
+ return nil;
+ }
+ f->n = n;
+ f->len = IPv4addrlen;
+ if(f->mask != nil)
+ memmove(f->mask, f->mask+IPv4off, IPv4addrlen);
+ }
+ f->e = f->val + f->n*f->len;
+
+ f->yes = ipmuxconv4(f->yes);
+ f->no = ipmuxconv4(f->no);
+
+ return f;
+}
+
+/*
* connection request is a semi separated list of filters
- * e.g. proto=17;dat[0:4]=11aa22bb;ifc=135.104.9.2&255.255.255.0
+ * e.g. ver=4;proto=17;data[0:4]=11aa22bb;ifc=135.104.9.2&255.255.255.0
*
* there's no protection against overlapping specs.
*/
@@ -568,6 +571,18 @@
return Ebadarg;
mux->conv = c;
+ if(chain->type != Tver) {
+ char ver6[] = "ver=6";
+ mux = parsemux(ver6);
+ mux->yes = chain;
+ mux->no = ipmuxcopy(chain);
+ chain = mux;
+ }
+ if(*chain->val == IP_VER4)
+ chain->yes = ipmuxconv4(chain->yes);
+ else
+ chain->no = ipmuxconv4(chain->no);
+
/* save a copy of the chain so we can later remove it */
mux = ipmuxcopy(chain);
r = (Ipmuxrock*)(c->ptcl);
@@ -642,95 +657,84 @@
Block *bp;
bp = qget(c->wq);
- if(bp == nil)
- return;
- else {
+ if(bp != nil) {
Ip4hdr *ih4 = (Ip4hdr*)(bp->rp);
- if((ih4->vihl)&0xF0 != 0x60)
+
+ if((ih4->vihl & 0xF0) != IP_VER6)
ipoput4(c->p->f, bp, 0, ih4->ttl, ih4->tos, nil);
- else {
- Ip6hdr *ih6 = (Ip6hdr*)(bp->rp);
- ipoput6(c->p->f, bp, 0, ih6->ttl, 0, nil);
- }
+ else
+ ipoput6(c->p->f, bp, 0, ((Ip6hdr*)ih4)->ttl, 0, nil);
}
}
+static int
+maskmemcmp(uchar *m, uchar *v, uchar *c, int n)
+{
+ int i;
+
+ if(m == nil)
+ return memcmp(v, c, n) != 0;
+
+ for(i = 0; i < n; i++)
+ if((v[i] & m[i]) != c[i])
+ return 1;
+ return 0;
+}
+
static void
ipmuxiput(Proto *p, Ipifc *ifc, Block *bp)
{
- int len, hl;
Fs *f = p->f;
- uchar *m, *h, *v, *e, *ve, *hp;
Conv *c;
+ Iplifc *lifc;
Ipmux *mux;
- Ip4hdr *ip;
+ uchar *v;
+ Ip4hdr *ip4;
Ip6hdr *ip6;
+ int off, hl;
- ip = (Ip4hdr*)bp->rp;
- hl = (ip->vihl&0x0F)<<2;
+ ip4 = (Ip4hdr*)bp->rp;
+ if((ip4->vihl & 0xF0) == IP_VER4) {
+ hl = (ip4->vihl&0x0F)<<2;
+ ip6 = nil;
+ } else {
+ hl = IP6HDR;
+ ip6 = (Ip6hdr*)ip4;
+ }
if(p->priv == nil)
goto nomatch;
- h = bp->rp;
- len = BLEN(bp);
+ c = nil;
+ lifc = nil;
- /* run the v4 filter */
+ /* run the filter */
rlock(f);
- c = nil;
mux = f->ipmux->priv;
while(mux != nil){
- if(mux->eoff > len){
- mux = mux->no;
- continue;
- }
- hp = h + mux->off + ((int)mux->skiphdr)*hl;
- switch(mux->ctype){
- case Cbyte:
- if(*mux->val == *hp)
- goto yes;
+ switch(mux->type){
+ case Tifc:
+ if(mux->len != IPaddrlen)
+ goto no;
+ for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next)
+ for(v = mux->val; v < mux->e; v += IPaddrlen)
+ if(maskmemcmp(mux->mask, lifc->local, v, IPaddrlen) == 0)
+ goto yes;
+ goto no;
+ case Tdata:
+ off = hl;
break;
- case Cmbyte:
- if((*hp & *mux->mask) == *mux->val)
- goto yes;
- break;
- case Cshort:
- if(*((ushort*)mux->val) == *(ushort*)hp)
- goto yes;
- break;
- case Cmshort:
- if((*(ushort*)hp & (*((ushort*)mux->mask))) == *((ushort*)mux->val))
- goto yes;
- break;
- case Clong:
- if(*((ulong*)mux->val) == *(ulong*)hp)
- goto yes;
- break;
- case Cmlong:
- if((*(ulong*)hp & (*((ulong*)mux->mask))) == *((ulong*)mux->val))
- goto yes;
- break;
- case Cifc:
- if(*((ulong*)mux->val) == *(ulong*)(ifc->lifc->local + IPv4off))
- goto yes;
- break;
- case Cmifc:
- if((*(ulong*)(ifc->lifc->local + IPv4off) & (*((ulong*)mux->mask))) == *((ulong*)mux->val))
- goto yes;
- break;
default:
- v = mux->val;
- for(e = mux->e; v < e; v = ve){
- m = mux->mask;
- hp = h + mux->off;
- for(ve = v + mux->len; v < ve; v++){
- if((*hp++ & *m++) != *v)
- break;
- }
- if(v == ve)
- goto yes;
- }
+ off = 0;
+ break;
}
+ off += mux->off;
+ if(off < 0 || off + mux->len > BLEN(bp))
+ goto no;
+ for(v = mux->val; v < mux->e; v += mux->len)
+ if(maskmemcmp(mux->mask, bp->rp + off, v, mux->len) == 0)
+ goto yes;
+no:
mux = mux->no;
continue;
yes:
@@ -743,28 +747,24 @@
if(c != nil){
/* tack on interface address */
bp = padblock(bp, IPaddrlen);
- ipmove(bp->rp, ifc->lifc->local);
- bp = concatblock(bp);
- if(bp != nil)
- if(qpass(c->rq, bp) < 0)
- print("Q");
+ if(lifc == nil)
+ lifc = ifc->lifc;
+ ipmove(bp->rp, lifc != nil ? lifc->local : IPnoaddr);
+ qpass(c->rq, concatblock(bp));
return;
}
nomatch:
/* doesn't match any filter, hand it to the specific protocol handler */
- ip = (Ip4hdr*)bp->rp;
- if((ip->vihl&0xF0)==0x40) {
- p = f->t2p[ip->proto];
- } else {
- ip6 = (Ip6hdr*)bp->rp;
+ if(ip6 != nil)
p = f->t2p[ip6->proto];
- }
- if(p && p->rcv)
- (*p->rcv)(p, ifc, bp);
else
- freeblist(bp);
- return;
+ p = f->t2p[ip4->proto];
+ if(p != nil && p->rcv != nil){
+ (*p->rcv)(p, ifc, bp);
+ return;
+ }
+ freeblist(bp);
}
static int
@@ -780,11 +780,14 @@
n += snprint(buf+n, len-n, "\n");
return n;
}
- n += snprint(buf+n, len-n, "h[%d:%d]&",
- mux->off+((int)mux->skiphdr)*((int)offsetof(Ip4hdr, data[0])),
- mux->off+(((int)mux->skiphdr)*((int)offsetof(Ip4hdr, data[0])))+mux->len-1);
- for(i = 0; i < mux->len; i++)
- n += snprint(buf+n, len - n, "%2.2ux", mux->mask[i]);
+ n += snprint(buf+n, len-n, "%s[%d:%d]",
+ mux->type == Tdata ? "data": "iph",
+ mux->off, mux->off+mux->len-1);
+ if(mux->mask != nil){
+ n += snprint(buf+n, len-n, "&");
+ for(i = 0; i < mux->len; i++)
+ n += snprint(buf+n, len - n, "%2.2ux", mux->mask[i]);
+ }
n += snprint(buf+n, len-n, "=");
v = mux->val;
for(j = 0; j < mux->n; j++){
diff -u a/os/ip//iproute.c b/os/ip//iproute.c
--- a/os/ip//iproute.c
+++ b/os/ip//iproute.c
@@ -12,10 +12,10 @@
static void calcd(Route*);
/* these are used for all instances of IP */
-Route* v4freelist;
-Route* v6freelist;
-RWlock routelock;
-ulong v4routegeneration, v6routegeneration;
+static Route* v4freelist;
+static Route* v6freelist;
+static RWlock routelock;
+static ulong v4routegeneration, v6routegeneration;
static void
freeroute(Route *r)
@@ -22,6 +22,7 @@
{
Route **l;
+ r->ref = 0;
r->left = nil;
r->right = nil;
if(r->type & Rv4)
@@ -35,9 +36,8 @@
static Route*
allocroute(int type)
{
- Route *r;
+ Route *r, **l;
int n;
- Route **l;
if(type & Rv4){
n = sizeof(RouteTree) + sizeof(V4route);
@@ -72,9 +72,9 @@
return;
l = allocroute(r->type);
+ l->left = r;
l->mid = *q;
*q = l;
- l->left = r;
}
/*
@@ -99,11 +99,11 @@
*/
enum
{
- Rpreceeds,
- Rfollows,
- Requals,
- Rcontains,
- Rcontained,
+ Rpreceeds, /* a left of b */
+ Rfollows, /* a right of b */
+ Requals, /* a equals b */
+ Rcontains, /* a contians b */
+ Roverlaps, /* a overlaps b */
};
static int
@@ -112,44 +112,88 @@
if(a->type & Rv4){
if(a->v4.endaddress < b->v4.address)
return Rpreceeds;
-
if(a->v4.address > b->v4.endaddress)
return Rfollows;
-
if(a->v4.address <= b->v4.address
&& a->v4.endaddress >= b->v4.endaddress){
if(a->v4.address == b->v4.address
- && a->v4.endaddress == b->v4.endaddress)
- return Requals;
+ && a->v4.endaddress == b->v4.endaddress){
+ if(a->v4.source <= b->v4.source
+ && a->v4.endsource >= b->v4.endsource){
+ if(a->v4.source == b->v4.source
+ && a->v4.endsource == b->v4.endsource)
+ return Requals;
+ return Rcontains;
+ }
+ return Roverlaps;
+ }
return Rcontains;
}
- return Rcontained;
+ return Roverlaps;
}
if(lcmp(a->v6.endaddress, b->v6.address) < 0)
return Rpreceeds;
-
if(lcmp(a->v6.address, b->v6.endaddress) > 0)
return Rfollows;
-
if(lcmp(a->v6.address, b->v6.address) <= 0
&& lcmp(a->v6.endaddress, b->v6.endaddress) >= 0){
if(lcmp(a->v6.address, b->v6.address) == 0
- && lcmp(a->v6.endaddress, b->v6.endaddress) == 0)
- return Requals;
+ && lcmp(a->v6.endaddress, b->v6.endaddress) == 0){
+ if(lcmp(a->v6.source, b->v6.source) <= 0
+ && lcmp(a->v6.endsource, b->v6.endsource) >= 0){
+ if(lcmp(a->v6.source, b->v6.source) == 0
+ && lcmp(a->v6.endsource, b->v6.endsource) == 0)
+ return Requals;
+ return Rcontains;
+ }
+ return Roverlaps;
+ }
return Rcontains;
}
+ return Roverlaps;
+}
- return Rcontained;
+/* return 1 if a matches b, otherwise 0 */
+static int
+matchroute(Route *a, Route *b)
+{
+ if(a == b)
+ return 1;
+
+ if((a->type^b->type) & (Rifc|Runi|Rmulti|Rbcast))
+ return 0;
+
+ if(a->type & Rv4){
+ if(memcmp(a->v4.gate, IPnoaddr+IPv4off, IPv4addrlen) != 0
+ && memcmp(a->v4.gate, b->v4.gate, IPv4addrlen) != 0)
+ return 0;
+ } else {
+ if(ipcmp(a->v6.gate, IPnoaddr) != 0
+ && ipcmp(a->v6.gate, b->v6.gate) != 0)
+ return 0;
+ }
+
+ if(a->ifc != nil && b->ifc != nil && (a->ifc != b->ifc || a->ifcid != b->ifcid))
+ return 0;
+
+ if(*a->tag != 0 && strncmp(a->tag, b->tag, sizeof(a->tag)) != 0)
+ return 0;
+
+ return 1;
}
static void
copygate(Route *old, Route *new)
{
+ old->type = new->type;
+ old->ifc = new->ifc;
+ old->ifcid = new->ifcid;
if(new->type & Rv4)
memmove(old->v4.gate, new->v4.gate, IPv4addrlen);
else
- memmove(old->v6.gate, new->v6.gate, IPaddrlen);
+ ipmove(old->v6.gate, new->v6.gate);
+ strncpy(old->tag, new->tag, sizeof(new->tag));
}
/*
@@ -162,12 +206,12 @@
l = p->left;
r = p->right;
- p->left = 0;
- p->right = 0;
+ p->left = nil;
+ p->right = nil;
addnode(f, root, p);
- if(l)
+ if(l != nil)
walkadd(f, root, l);
- if(r)
+ if(r != nil)
walkadd(f, root, r);
}
@@ -180,16 +224,16 @@
Route *q;
int d;
- if(p) {
+ if(p != nil) {
d = 0;
q = p->left;
- if(q)
+ if(q != nil)
d = q->depth;
q = p->right;
- if(q && q->depth > d)
+ if(q != nil && q->depth > d)
d = q->depth;
q = p->mid;
- if(q && q->depth > d)
+ if(q != nil && q->depth > d)
d = q->depth;
p->depth = d+1;
}
@@ -210,8 +254,8 @@
* rotate tree node
*/
p = *cur;
- dl = 0; if(l = p->left) dl = l->depth;
- dr = 0; if(r = p->right) dr = r->depth;
+ dl = 0; if((l = p->left) != nil) dl = l->depth;
+ dr = 0; if((r = p->right) != nil) dr = r->depth;
if(dl > dr+1) {
p->left = l->right;
@@ -239,7 +283,7 @@
Route *p;
p = *cur;
- if(p == 0) {
+ if(p == nil) {
*cur = new;
new->depth = 1;
return;
@@ -269,15 +313,13 @@
* supercede the old entry if the old one isn't
* a local interface.
*/
- if((p->type & Rifc) == 0){
- p->type = new->type;
- p->ifcid = -1;
+ if((p->type & Rifc) == 0)
copygate(p, new);
- } else if(new->type & Rifc)
+ else if(new->type & Rifc)
p->ref++;
freeroute(new);
break;
- case Rcontained:
+ case Roverlaps:
addnode(f, &p->mid, new);
break;
}
@@ -285,241 +327,316 @@
balancetree(cur);
}
-#define V4H(a) ((a&0x07ffffff)>>(32-Lroot-5))
-
-void
-v4addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type)
+/*
+ * find node matching r
+ */
+static Route**
+looknode(Route **cur, Route *r)
{
Route *p;
- ulong sa;
- ulong m;
- ulong ea;
- int h, eh;
- m = nhgetl(mask);
- sa = nhgetl(a) & m;
- ea = sa | ~m;
+ for(;;){
+ p = *cur;
+ if(p == nil)
+ return nil;
+ switch(rangecompare(r, p)){
+ case Rcontains:
+ return nil;
+ case Rpreceeds:
+ cur = &p->left;
+ break;
+ case Rfollows:
+ cur = &p->right;
+ break;
+ case Roverlaps:
+ cur = &p->mid;
+ break;
+ case Requals:
+ if((p->type & Rifc) == 0 && !matchroute(r, p))
+ return nil;
+ return cur;
+ }
+ }
+}
- eh = V4H(ea);
- for(h=V4H(sa); h<=eh; h++) {
- p = allocroute(Rv4 | type);
- p->v4.address = sa;
- p->v4.endaddress = ea;
- memmove(p->v4.gate, gate, sizeof(p->v4.gate));
- memmove(p->tag, tag, sizeof(p->tag));
+static Route*
+looknodetag(Route *r, char *tag)
+{
+ Route *x;
- wlock(&routelock);
- addnode(f, &f->v4root[h], p);
- while(p = f->queue) {
- f->queue = p->mid;
- walkadd(f, &f->v4root[h], p->left);
- freeroute(p);
- }
- wunlock(&routelock);
+ if(r == nil)
+ return nil;
+
+ if((x = looknodetag(r->mid, tag)) != nil)
+ return x;
+ if((x = looknodetag(r->left, tag)) != nil)
+ return x;
+ if((x = looknodetag(r->right, tag)) != nil)
+ return x;
+
+ if((r->type & Rifc) == 0){
+ if(tag == nil || strncmp(tag, r->tag, sizeof(r->tag)) == 0)
+ return r;
}
- v4routegeneration++;
- ipifcaddroute(f, Rv4, a, mask, gate, type);
+ return nil;
}
-#define V6H(a) (((a)[IPllen-1] & 0x07ffffff)>>(32-Lroot-5))
-#define ISDFLT(a, mask, tag) ((ipcmp((a),v6Unspecified)==0) && (ipcmp((mask),v6Unspecified)==0) && (strcmp((tag), "ra")!=0))
+#define V4H(a) ((a&0x07ffffff)>>(32-Lroot-5))
+#define V6H(a) (((a)[IPllen-1]&0x07ffffff)>>(32-Lroot-5))
-void
-v6addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type)
+static void
+routeadd(Fs *f, Route *r)
{
- Route *p;
- ulong sa[IPllen], ea[IPllen];
- ulong x, y;
- int h, eh;
+ Route **h, **e, *p;
- /*
- if(ISDFLT(a, mask, tag))
- f->v6p->cdrouter = -1;
- */
+ if(r->type & Rv4){
+ h = &f->v4root[V4H(r->v4.address)];
+ e = &f->v4root[V4H(r->v4.endaddress)];
+ } else {
+ h = &f->v6root[V6H(r->v6.address)];
+ e = &f->v6root[V6H(r->v6.endaddress)];
+ }
+ for(; h <= e; h++) {
+ p = allocroute(r->type);
- for(h = 0; h < IPllen; h++){
- x = nhgetl(a+4*h);
- y = nhgetl(mask+4*h);
- sa[h] = x & y;
- ea[h] = x | ~y;
- }
+ p->ifc = r->ifc;
+ p->ifcid = r->ifcid;
- eh = V6H(ea);
- for(h = V6H(sa); h <= eh; h++) {
- p = allocroute(type);
- memmove(p->v6.address, sa, IPaddrlen);
- memmove(p->v6.endaddress, ea, IPaddrlen);
- memmove(p->v6.gate, gate, IPaddrlen);
- memmove(p->tag, tag, sizeof(p->tag));
+ if(r->type & Rv4)
+ memmove(&p->v4, &r->v4, sizeof(r->v4));
+ else
+ memmove(&p->v6, &r->v6, sizeof(r->v6));
- wlock(&routelock);
- addnode(f, &f->v6root[h], p);
- while(p = f->queue) {
+ memmove(p->tag, r->tag, sizeof(r->tag));
+
+ addnode(f, h, p);
+ while((p = f->queue) != nil) {
f->queue = p->mid;
- walkadd(f, &f->v6root[h], p->left);
+ walkadd(f, h, p->left);
freeroute(p);
}
- wunlock(&routelock);
}
- v6routegeneration++;
- ipifcaddroute(f, 0, a, mask, gate, type);
+ if(r->type & Rv4)
+ v4routegeneration++;
+ else
+ v6routegeneration++;
}
-Route**
-looknode(Route **cur, Route *r)
+static void
+routerem(Fs *f, Route *r)
{
- Route *p;
+ Route **h, **e, **l, *p;
- for(;;){
- p = *cur;
- if(p == 0)
- return 0;
-
- switch(rangecompare(r, p)){
- case Rcontains:
- return 0;
- case Rpreceeds:
- cur = &p->left;
- break;
- case Rfollows:
- cur = &p->right;
- break;
- case Rcontained:
- cur = &p->mid;
- break;
- case Requals:
- return cur;
+ if(r->type & Rv4){
+ h = &f->v4root[V4H(r->v4.address)];
+ e = &f->v4root[V4H(r->v4.endaddress)];
+ } else {
+ h = &f->v6root[V6H(r->v6.address)];
+ e = &f->v6root[V6H(r->v6.endaddress)];
+ }
+
+ for(; h <= e; h++) {
+ if((l = looknode(h, r)) == nil)
+ continue;
+ p = *l;
+ if(--(p->ref) != 0)
+ continue;
+ *l = nil;
+ addqueue(&f->queue, p->left);
+ addqueue(&f->queue, p->mid);
+ addqueue(&f->queue, p->right);
+ freeroute(p);
+
+ while((p = f->queue) != nil) {
+ f->queue = p->mid;
+ walkadd(f, h, p->left);
+ freeroute(p);
}
}
+
+ if(r->type & Rv4)
+ v4routegeneration++;
+ else
+ v6routegeneration++;
}
-void
-v4delroute(Fs *f, uchar *a, uchar *mask, int dolock)
+static Route
+mkroute(uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag)
{
- Route **r, *p;
- Route rt;
- int h, eh;
- ulong m;
+ ulong x, y;
+ Route r;
+ int h;
- m = nhgetl(mask);
- rt.v4.address = nhgetl(a) & m;
- rt.v4.endaddress = rt.v4.address | ~m;
- rt.type = Rv4;
+ memset(&r, 0, sizeof(r));
- eh = V4H(rt.v4.endaddress);
- for(h=V4H(rt.v4.address); h<=eh; h++) {
- if(dolock)
- wlock(&routelock);
- r = looknode(&f->v4root[h], &rt);
- if(r) {
- p = *r;
- if(--(p->ref) == 0){
- *r = 0;
- addqueue(&f->queue, p->left);
- addqueue(&f->queue, p->mid);
- addqueue(&f->queue, p->right);
- freeroute(p);
- while(p = f->queue) {
- f->queue = p->mid;
- walkadd(f, &f->v4root[h], p->left);
- freeroute(p);
- }
- }
+ r.type = type;
+
+ if(type & Rv4){
+ x = nhgetl(a+IPv4off);
+ y = nhgetl(mask+IPv4off);
+ r.v4.address = x & y;
+ r.v4.endaddress = x | ~y;
+
+ x = nhgetl(s+IPv4off);
+ y = nhgetl(smask+IPv4off);
+ if(y != 0)
+ r.type |= Rsrc;
+ r.v4.source = x & y;
+ r.v4.endsource = x | ~y;
+
+ memmove(r.v4.gate, gate+IPv4off, IPv4addrlen);
+ } else {
+ for(h = 0; h < IPllen; h++){
+ x = nhgetl(a+4*h);
+ y = nhgetl(mask+4*h);
+ r.v6.address[h] = x & y;
+ r.v6.endaddress[h] = x | ~y;
+
+ x = nhgetl(s+4*h);
+ y = nhgetl(smask+4*h);
+ if(y != 0)
+ r.type |= Rsrc;
+ r.v6.source[h] = x & y;
+ r.v6.endsource[h] = x | ~y;
}
- if(dolock)
- wunlock(&routelock);
+
+ memmove(r.v6.gate, gate, IPaddrlen);
}
- v4routegeneration++;
- ipifcremroute(f, Rv4, a, mask);
+ if(ifc != nil){
+ r.ifc = ifc;
+ r.ifcid = ifc->ifcid;
+ }
+
+ if(tag != nil)
+ strncpy(r.tag, tag, sizeof(r.tag));
+
+ return r;
}
void
-v6delroute(Fs *f, uchar *a, uchar *mask, int dolock)
+addroute(Fs *f, uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag)
{
- Route **r, *p;
- Route rt;
- int h, eh;
- ulong x, y;
+ Route r = mkroute(a, mask, s, smask, gate, type, ifc, tag);
+ wlock(&routelock);
+ routeadd(f, &r);
+ wunlock(&routelock);
+}
- for(h = 0; h < IPllen; h++){
- x = nhgetl(a+4*h);
- y = nhgetl(mask+4*h);
- rt.v6.address[h] = x & y;
- rt.v6.endaddress[h] = x | ~y;
+void
+remroute(Fs *f, uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag)
+{
+ Route r = mkroute(a, mask, s, smask, gate, type, ifc, tag);
+ wlock(&routelock);
+ routerem(f, &r);
+ wunlock(&routelock);
+}
+
+/* get the outgoing interface for route r */
+static Ipifc*
+routefindipifc(Route *r, Fs *f)
+{
+ uchar local[IPaddrlen], gate[IPaddrlen];
+ Ipifc *ifc;
+ int i;
+
+ ifc = r->ifc;
+ if(ifc != nil && ifc->ifcid == r->ifcid)
+ return ifc;
+
+ if(r->type & Rsrc) {
+ if(r->type & Rv4) {
+ hnputl(local+IPv4off, r->v4.source);
+ memmove(local, v4prefix, IPv4off);
+ } else {
+ for(i = 0; i < IPllen; i++)
+ hnputl(local+4*i, r->v6.source[i]);
+ }
+ } else {
+ ipmove(local, IPnoaddr);
}
- rt.type = 0;
- eh = V6H(rt.v6.endaddress);
- for(h=V6H(rt.v6.address); h<=eh; h++) {
- if(dolock)
- wlock(&routelock);
- r = looknode(&f->v6root[h], &rt);
- if(r) {
- p = *r;
- if(--(p->ref) == 0){
- *r = 0;
- addqueue(&f->queue, p->left);
- addqueue(&f->queue, p->mid);
- addqueue(&f->queue, p->right);
- freeroute(p);
- while(p = f->queue) {
- f->queue = p->mid;
- walkadd(f, &f->v6root[h], p->left);
- freeroute(p);
- }
- }
+ if(r->type & Rifc) {
+ if(r->type & Rv4) {
+ hnputl(gate+IPv4off, r->v4.address);
+ memmove(gate, v4prefix, IPv4off);
+ } else {
+ for(i = 0; i < IPllen; i++)
+ hnputl(gate+4*i, r->v6.address[i]);
}
- if(dolock)
- wunlock(&routelock);
+ } else {
+ if(r->type & Rv4)
+ v4tov6(gate, r->v4.gate);
+ else
+ ipmove(gate, r->v6.gate);
}
- v6routegeneration++;
- ipifcremroute(f, 0, a, mask);
+ if((ifc = findipifc(f, local, gate, r->type)) == nil)
+ return nil;
+
+ r->ifc = ifc;
+ r->ifcid = ifc->ifcid;
+ return ifc;
}
+/*
+ * v4lookup, v6lookup:
+ * lookup a route to destination address a from source address s
+ * and return the route. returns nil if no route was found.
+ * an optional Routehint can be passed in rh to cache the lookup.
+ *
+ * for v4lookup, addresses are in 4 byte format.
+ */
Route*
-v4lookup(Fs *f, uchar *a, Conv *c)
+v4lookup(Fs *f, uchar *a, uchar *s, Routehint *rh)
{
+ ulong la, ls;
Route *p, *q;
- ulong la;
- uchar gate[IPaddrlen];
Ipifc *ifc;
- if(c != nil && c->r != nil && c->r->ifc != nil && c->rgen == v4routegeneration)
- return c->r;
+ if(rh != nil
+ && rh->rgen == v4routegeneration
+ && (q = rh->r) != nil
+ && (ifc = q->ifc) != nil
+ && q->ifcid == ifc->ifcid
+ && q->ref > 0)
+ return q;
la = nhgetl(a);
+ ls = nhgetl(s);
q = nil;
- for(p=f->v4root[V4H(la)]; p;)
- if(la >= p->v4.address) {
- if(la <= p->v4.endaddress) {
- q = p;
- p = p->mid;
- } else
- p = p->right;
- } else
+ for(p = f->v4root[V4H(la)]; p != nil;){
+ if(la < p->v4.address){
p = p->left;
-
- if(q && (q->ifc == nil || q->ifcid != q->ifc->ifcid)){
- if(q->type & Rifc) {
- hnputl(gate+IPv4off, q->v4.address);
- memmove(gate, v4prefix, IPv4off);
- } else
- v4tov6(gate, q->v4.gate);
- ifc = findipifc(f, gate, q->type);
- if(ifc == nil)
- return nil;
- q->ifc = ifc;
- q->ifcid = ifc->ifcid;
+ continue;
+ }
+ if(la > p->v4.endaddress){
+ p = p->right;
+ continue;
+ }
+ if(p->type & Rsrc){
+ if(ls < p->v4.source){
+ p = p->mid;
+ continue;
+ }
+ if(ls > p->v4.endsource){
+ p = p->mid;
+ continue;
+ }
+ }
+ q = p;
+ p = p->mid;
}
- if(c != nil){
- c->r = q;
- c->rgen = v4routegeneration;
+ if(q == nil || q->ref == 0 || routefindipifc(q, f) == nil)
+ return nil;
+
+ if(rh != nil){
+ rh->r = q;
+ rh->rgen = v4routegeneration;
}
return q;
@@ -526,29 +643,35 @@
}
Route*
-v6lookup(Fs *f, uchar *a, Conv *c)
+v6lookup(Fs *f, uchar *a, uchar *s, Routehint *rh)
{
- Route *p, *q;
- ulong la[IPllen];
- int h;
+ ulong la[IPllen], ls[IPllen];
ulong x, y;
- uchar gate[IPaddrlen];
+ Route *p, *q;
Ipifc *ifc;
+ int h;
- if(memcmp(a, v4prefix, IPv4off) == 0){
- q = v4lookup(f, a+IPv4off, c);
- if(q != nil)
- return q;
+ if(isv4(s)){
+ if(isv4(a))
+ return v4lookup(f, a+IPv4off, s+IPv4off, rh);
+ return nil;
}
- if(c != nil && c->r != nil && c->r->ifc != nil && c->rgen == v6routegeneration)
- return c->r;
+ if(rh != nil
+ && rh->rgen == v6routegeneration
+ && (q = rh->r) != nil
+ && (ifc = q->ifc) != nil
+ && q->ifcid == ifc->ifcid
+ && q->ref > 0)
+ return q;
- for(h = 0; h < IPllen; h++)
+ for(h = 0; h < IPllen; h++){
la[h] = nhgetl(a+4*h);
+ ls[h] = nhgetl(s+4*h);
+ }
- q = 0;
- for(p=f->v6root[V6H(la)]; p;){
+ q = nil;
+ for(p = f->v6root[V6H(la)]; p != nil;){
for(h = 0; h < IPllen; h++){
x = la[h];
y = p->v6.address[h];
@@ -571,42 +694,202 @@
}
break;
}
+ if(p->type & Rsrc){
+ for(h = 0; h < IPllen; h++){
+ x = ls[h];
+ y = p->v6.source[h];
+ if(x == y)
+ continue;
+ if(x < y){
+ p = p->mid;
+ goto next;
+ }
+ break;
+ }
+ for(h = 0; h < IPllen; h++){
+ x = ls[h];
+ y = p->v6.endsource[h];
+ if(x == y)
+ continue;
+ if(x > y){
+ p = p->mid;
+ goto next;
+ }
+ break;
+ }
+ }
q = p;
p = p->mid;
next: ;
}
- if(q && (q->ifc == nil || q->ifcid != q->ifc->ifcid)){
- if(q->type & Rifc) {
- for(h = 0; h < IPllen; h++)
- hnputl(gate+4*h, q->v6.address[h]);
- ifc = findipifc(f, gate, q->type);
- } else
- ifc = findipifc(f, q->v6.gate, q->type);
- if(ifc == nil)
- return nil;
- q->ifc = ifc;
- q->ifcid = ifc->ifcid;
+ if(q == nil || q->ref == 0 || routefindipifc(q, f) == nil)
+ return nil;
+
+ if(rh != nil){
+ rh->r = q;
+ rh->rgen = v6routegeneration;
}
- if(c != nil){
- c->r = q;
- c->rgen = v6routegeneration;
- }
return q;
}
+/*
+ * v4source, v6source:
+ * lookup a route to destination address a and also find
+ * a suitable source address s on the outgoing interface.
+ * return the route on success or nil when no route
+ * was found.
+ *
+ * for v4source, addresses are in 4 byte format.
+ */
+Route*
+v4source(Fs *f, uchar *a, uchar *s)
+{
+ uchar src[IPv4addrlen];
+ int splen;
+ ulong x, la;
+ Route *p, *q;
+ Ipifc *ifc;
+
+ q = nil;
+ la = nhgetl(a);
+ rlock(&routelock);
+ for(p = f->v4root[V4H(la)]; p != nil;){
+ if(la < p->v4.address){
+ p = p->left;
+ continue;
+ }
+ if(la > p->v4.endaddress){
+ p = p->right;
+ continue;
+ }
+ splen = 0;
+ if(p->type & Rsrc){
+ /* calculate local prefix length for source specific routes */
+ for(x = ~(p->v4.endsource ^ p->v4.source); x & 0x80000000UL; x <<= 1)
+ splen++;
+ hnputl(src, p->v4.source);
+ }
+ if((ifc = routefindipifc(p, f)) == nil
+ || !ipv4local(ifc, src, splen, (p->type & (Rifc|Rbcast|Rmulti|Rv4))==Rv4? p->v4.gate: a)){
+ p = p->mid;
+ continue;
+ }
+ memmove(s, src, IPv4addrlen);
+ q = p;
+ p = p->mid;
+ }
+ runlock(&routelock);
+ return q;
+}
+
+Route*
+v6source(Fs *f, uchar *a, uchar *s)
+{
+ uchar src[IPaddrlen];
+ int splen, h;
+ ulong x, y, la[IPllen];
+ Route *p, *q;
+ Ipifc *ifc;
+
+ q = nil;
+ for(h = 0; h < IPllen; h++)
+ la[h] = nhgetl(a+4*h);
+ rlock(&routelock);
+ for(p = f->v6root[V6H(la)]; p != nil;){
+ for(h = 0; h < IPllen; h++){
+ x = la[h];
+ y = p->v6.address[h];
+ if(x == y)
+ continue;
+ if(x < y){
+ p = p->left;
+ goto next;
+ }
+ break;
+ }
+ for(h = 0; h < IPllen; h++){
+ x = la[h];
+ y = p->v6.endaddress[h];
+ if(x == y)
+ continue;
+ if(x > y){
+ p = p->right;
+ goto next;
+ }
+ break;
+ }
+ splen = 0;
+ if(p->type & Rsrc){
+ /* calculate local prefix length for source specific routes */
+ for(h = 0; h < IPllen; h++){
+ hnputl(src+4*h, p->v6.source[h]);
+ if((x = ~(p->v6.endsource[h] ^ p->v6.source[h])) != ~0UL){
+ for(; x & 0x80000000UL; x <<= 1)
+ splen++;
+ break;
+ }
+ splen += 32;
+ }
+ }
+ if((ifc = routefindipifc(p, f)) == nil
+ || !ipv6local(ifc, src, splen, a)){
+ p = p->mid;
+ continue;
+ }
+ ipmove(s, src);
+ q = p;
+ p = p->mid;
+next: ;
+ }
+ runlock(&routelock);
+ return q;
+}
+
+static int
+parseroutetype(char *p)
+{
+ int type = 0;
+ switch(*p++){
+ default: return -1;
+ case '4': type |= Rv4;
+ case '6': break;
+ }
+ for(;;) switch(*p++){
+ default:
+ return -1;
+ case 'i':
+ if(((type ^= Rifc) & Rifc) != Rifc) return -1;
+ break;
+ case 'u':
+ if(((type ^= Runi) & (Runi|Rbcast|Rmulti)) != Runi) return -1;
+ break;
+ case 'b':
+ if(((type ^= Rbcast) & (Runi|Rbcast|Rmulti)) != Rbcast) return -1;
+ break;
+ case 'm':
+ if(((type ^= Rmulti) & (Runi|Rbcast|Rmulti)) != Rmulti) return -1;
+ break;
+ case 'p':
+ if(((type ^= Rptpt) & Rptpt) != Rptpt) return -1;
+ break;
+ case '\0':
+ return type;
+ }
+}
+
void
-routetype(int type, char *p)
+routetype(int type, char p[8])
{
- memset(p, ' ', 4);
- p[4] = 0;
if(type & Rv4)
*p++ = '4';
else
*p++ = '6';
+
if(type & Rifc)
*p++ = 'i';
+
if(type & Runi)
*p++ = 'u';
else if(type & Rbcast)
@@ -613,14 +896,14 @@
*p++ = 'b';
else if(type & Rmulti)
*p++ = 'm';
+
if(type & Rptpt)
- *p = 'p';
+ *p++ = 'p';
+ *p = 0;
}
-char *rformat = "%-15I %-4M %-15I %4.4s %4.4s %3s\n";
-
-void
-convroute(Route *r, uchar *addr, uchar *mask, uchar *gate, char *t, int *nifc)
+static void
+convroute(Route *r, uchar *addr, uchar *mask, uchar *src, uchar *smask, uchar *gate)
{
int i;
@@ -627,8 +910,16 @@
if(r->type & Rv4){
memmove(addr, v4prefix, IPv4off);
hnputl(addr+IPv4off, r->v4.address);
+
memset(mask, 0xff, IPv4off);
hnputl(mask+IPv4off, ~(r->v4.endaddress ^ r->v4.address));
+
+ memmove(src, v4prefix, IPv4off);
+ hnputl(src+IPv4off, r->v4.source);
+
+ memset(smask, 0xff, IPv4off);
+ hnputl(smask+IPv4off, ~(r->v4.endsource ^ r->v4.source));
+
memmove(gate, v4prefix, IPv4off);
memmove(gate+IPv4off, r->v4.gate, IPv4addrlen);
} else {
@@ -635,162 +926,186 @@
for(i = 0; i < IPllen; i++){
hnputl(addr + 4*i, r->v6.address[i]);
hnputl(mask + 4*i, ~(r->v6.endaddress[i] ^ r->v6.address[i]));
+ hnputl(src + 4*i, r->v6.source[i]);
+ hnputl(smask + 4*i, ~(r->v6.endsource[i] ^ r->v6.source[i]));
}
memmove(gate, r->v6.gate, IPaddrlen);
}
+}
- routetype(r->type, t);
+static char*
+seprintroute(char *p, char *e, Route *r)
+{
+ uchar addr[IPaddrlen], mask[IPaddrlen], src[IPaddrlen], smask[IPaddrlen], gate[IPaddrlen];
+ char type[8], ifbuf[4], *iname;
- if(r->ifc)
- *nifc = r->ifc->conv->x;
+ convroute(r, addr, mask, src, smask, gate);
+ routetype(r->type, type);
+ if(r->ifc != nil && r->ifcid == r->ifc->ifcid)
+ snprint(iname = ifbuf, sizeof ifbuf, "%d", r->ifc->conv->x);
else
- *nifc = -1;
+ iname = "-";
+ return seprint(p, e, "%-15I %-4M %-15I %-4s %4.4s %3s %-15I %-4M\n",
+ addr, mask, gate, type, r->tag, iname, src, smask);
}
-/*
- * this code is not in rr to reduce stack size
- */
-static void
-sprintroute(Route *r, Routewalk *rw)
+typedef struct Routewalk Routewalk;
+struct Routewalk
{
- int nifc, n;
- char t[5], *iname, ifbuf[5];
- uchar addr[IPaddrlen], mask[IPaddrlen], gate[IPaddrlen];
- char *p;
+ int o;
+ int h;
+ char* p;
+ char* e;
+};
- convroute(r, addr, mask, gate, t, &nifc);
- iname = "-";
- if(nifc != -1) {
- iname = ifbuf;
- snprint(ifbuf, sizeof ifbuf, "%d", nifc);
- }
- p = seprint(rw->p, rw->e, rformat, addr, mask, gate, t, r->tag, iname);
+static int
+rr1(Routewalk *rw, Route *r)
+{
+ int n = seprintroute(rw->p, rw->e, r) - rw->p;
if(rw->o < 0){
- n = p - rw->p;
if(n > -rw->o){
- memmove(rw->p, rw->p-rw->o, n+rw->o);
- rw->p = p + rw->o;
+ memmove(rw->p, rw->p - rw->o, n + rw->o);
+ rw->p += n + rw->o;
}
rw->o += n;
} else
- rw->p = p;
+ rw->p += n;
+ return rw->p < rw->e;
}
-/*
- * recurse descending tree, applying the function in Routewalk
- */
static int
rr(Route *r, Routewalk *rw)
{
int h;
- if(rw->e <= rw->p)
- return 0;
if(r == nil)
return 1;
-
if(rr(r->left, rw) == 0)
return 0;
-
if(r->type & Rv4)
h = V4H(r->v4.address);
else
h = V6H(r->v6.address);
-
- if(h == rw->h)
- rw->walk(r, rw);
-
+ if(h == rw->h){
+ if(rr1(rw, r) == 0)
+ return 0;
+ }
if(rr(r->mid, rw) == 0)
return 0;
-
return rr(r->right, rw);
}
-void
-ipwalkroutes(Fs *f, Routewalk *rw)
+long
+routeread(Fs *f, char *p, ulong offset, int n)
{
+ Routewalk rw[1];
+
+ rw->p = p;
+ rw->e = p+n;
+ rw->o = -offset;
+ if(rw->o > 0)
+ return 0;
+
rlock(&routelock);
- if(rw->e > rw->p) {
+ if(rw->p < rw->e) {
for(rw->h = 0; rw->h < nelem(f->v4root); rw->h++)
if(rr(f->v4root[rw->h], rw) == 0)
break;
}
- if(rw->e > rw->p) {
+ if(rw->p < rw->e) {
for(rw->h = 0; rw->h < nelem(f->v6root); rw->h++)
if(rr(f->v6root[rw->h], rw) == 0)
break;
}
runlock(&routelock);
-}
-long
-routeread(Fs *f, char *p, ulong offset, int n)
-{
- Routewalk rw;
-
- rw.p = p;
- rw.e = p+n;
- rw.o = -offset;
- rw.walk = sprintroute;
-
- ipwalkroutes(f, &rw);
-
- return rw.p - p;
+ return rw->p - p;
}
/*
- * this code is not in routeflush to reduce stack size
+ * 4 add addr mask gate
+ * 5 add addr mask gate ifc
+ * 6 add addr mask gate src smask
+ * 7 add addr mask gate ifc src smask
+ * 8 add addr mask gate tag ifc src smask
+ * 9 add addr mask gate type tag ifc src smask
+ * 3 remove addr mask
+ * 4 remove addr mask gate
+ * 5 remove addr mask src smask
+ * 6 remove addr mask gate src smask
+ * 7 remove addr mask gate ifc src smask
+ * 8 remove addr mask gate tag ifc src smask
+ * 9 remove addr mask gate type tag ifc src smask
*/
-void
-delroute(Fs *f, Route *r, int dolock)
+static Route
+parseroute(Fs *f, char **argv, int argc)
{
- uchar addr[IPaddrlen];
- uchar mask[IPaddrlen];
+ uchar addr[IPaddrlen], mask[IPaddrlen];
+ uchar src[IPaddrlen], smask[IPaddrlen];
uchar gate[IPaddrlen];
- char t[5];
- int nifc;
+ Ipifc *ifc;
+ char *tag;
+ int type;
- convroute(r, addr, mask, gate, t, &nifc);
- if(r->type & Rv4)
- v4delroute(f, addr+IPv4off, mask+IPv4off, dolock);
- else
- v6delroute(f, addr, mask, dolock);
-}
+ type = 0;
+ tag = nil;
+ ifc = nil;
+ ipmove(gate, IPnoaddr);
+ ipmove(src, IPnoaddr);
+ ipmove(smask, IPnoaddr);
-/*
- * recurse until one route is deleted
- * returns 0 if nothing is deleted, 1 otherwise
- */
-int
-routeflush(Fs *f, Route *r, char *tag)
-{
- if(r == nil)
- return 0;
- if(routeflush(f, r->mid, tag))
- return 1;
- if(routeflush(f, r->left, tag))
- return 1;
- if(routeflush(f, r->right, tag))
- return 1;
- if((r->type & Rifc) == 0){
- if(tag == nil || strncmp(tag, r->tag, sizeof(r->tag)) == 0){
- delroute(f, r, 0);
- return 1;
- }
+ if(argc < 3)
+ error(Ebadctl);
+ if(parseipandmask(addr, mask, argv[1], argv[2]) == -1)
+ error(Ebadip);
+
+ if(strcmp(argv[0], "add") == 0 || (argc > 3 && argc != 5)){
+ if(argc < 4)
+ error(Ebadctl);
+ if(parseip(gate, argv[3]) == -1)
+ error(Ebadip);
}
- return 0;
+ if(argc > 4 && (strcmp(argv[0], "add") != 0 || argc != 5)){
+ if(parseipandmask(src, smask, argv[argc-2], argv[argc-1]) == -1)
+ error(Ebadip);
+ }
+ if(argc == 5 && strcmp(argv[0], "add") == 0)
+ ifc = findipifcstr(f, argv[4]);
+ if(argc > 6)
+ ifc = findipifcstr(f, argv[argc-3]);
+ if(argc > 7)
+ tag = argv[argc-4];
+ if(argc > 8){
+ if((type = parseroutetype(argv[argc-5])) < 0)
+ error(Ebadctl);
+ } else {
+ if(isv4(addr))
+ type |= Rv4;
+ }
+ if(argc > 9)
+ error(Ebadctl);
+
+ if(type & Rv4){
+ if(!isv4(addr))
+ error(Ebadip);
+ if(ipcmp(smask, IPnoaddr) != 0 && !isv4(src))
+ error(Ebadip);
+ if(ipcmp(gate, IPnoaddr) != 0 && !isv4(gate))
+ error(Ebadip);
+ } else {
+ if(isv4(addr))
+ error(Ebadip);
+ }
+
+ return mkroute(addr, mask, src, smask, gate, type, ifc, tag);
}
long
routewrite(Fs *f, Chan *c, char *p, int n)
{
- int h, changed;
- char *tag;
Cmdbuf *cb;
- uchar addr[IPaddrlen];
- uchar mask[IPaddrlen];
- uchar gate[IPaddrlen];
- IPaux *a, *na;
+ IPaux *a;
+ Route *x, r;
cb = parsecmd(p, n);
if(waserror()){
@@ -797,54 +1112,44 @@
free(cb);
nexterror();
}
-
+ if(cb->nf < 1)
+ error("short control request");
if(strcmp(cb->f[0], "flush") == 0){
- tag = cb->f[1];
+ char *tag = cb->nf < 2 ? nil : cb->f[1];
+ int h;
+
+ wlock(&routelock);
for(h = 0; h < nelem(f->v4root); h++)
- for(changed = 1; changed;){
- wlock(&routelock);
- changed = routeflush(f, f->v4root[h], tag);
- wunlock(&routelock);
+ while((x = looknodetag(f->v4root[h], tag)) != nil){
+ memmove(&r, x, sizeof(RouteTree) + sizeof(V4route));
+ routerem(f, &r);
}
for(h = 0; h < nelem(f->v6root); h++)
- for(changed = 1; changed;){
- wlock(&routelock);
- changed = routeflush(f, f->v6root[h], tag);
- wunlock(&routelock);
+ while((x = looknodetag(f->v6root[h], tag)) != nil){
+ memmove(&r, x, sizeof(RouteTree) + sizeof(V6route));
+ routerem(f, &r);
}
- } else if(strcmp(cb->f[0], "remove") == 0){
- if(cb->nf < 3)
- error(Ebadarg);
- parseip(addr, cb->f[1]);
- parseipmask(mask, cb->f[2]);
- if(memcmp(addr, v4prefix, IPv4off) == 0)
- v4delroute(f, addr+IPv4off, mask+IPv4off, 1);
- else
- v6delroute(f, addr, mask, 1);
- } else if(strcmp(cb->f[0], "add") == 0){
- if(cb->nf < 4)
- error(Ebadarg);
- parseip(addr, cb->f[1]);
- parseipmask(mask, cb->f[2]);
- parseip(gate, cb->f[3]);
- tag = "none";
- if(c != nil){
+ wunlock(&routelock);
+ } else if(strcmp(cb->f[0], "add") == 0 || strcmp(cb->f[0], "remove") == 0){
+ r = parseroute(f, cb->f, cb->nf);
+ if(*r.tag == 0){
a = c->aux;
- tag = a->tag;
+ strncpy(r.tag, a->tag, sizeof(r.tag));
}
- if(memcmp(addr, v4prefix, IPv4off) == 0)
- v4addroute(f, tag, addr+IPv4off, mask+IPv4off, gate+IPv4off, 0);
+ wlock(&routelock);
+ if(strcmp(cb->f[0], "add") == 0)
+ routeadd(f, &r);
else
- v6addroute(f, tag, addr, mask, gate, 0);
+ routerem(f, &r);
+ wunlock(&routelock);
} else if(strcmp(cb->f[0], "tag") == 0) {
if(cb->nf < 2)
error(Ebadarg);
-
a = c->aux;
- na = newipaux(a->owner, cb->f[1]);
- c->aux = na;
+ c->aux = newipaux(a->owner, cb->f[1]);
free(a);
- }
+ } else
+ error(Ebadctl);
poperror();
free(cb);
diff -u a/os/ip//ipv6.c b/os/ip//ipv6.c
--- a/os/ip//ipv6.c
+++ b/os/ip//ipv6.c
@@ -8,250 +8,127 @@
#include "ip.h"
#include "ipv6.h"
-enum
-{
- IP4HDR = 20, /* sizeof(Ip4hdr) */
- IP6HDR = 40, /* sizeof(Ip6hdr) */
- IP_HLEN4 = 0x05, /* Header length in words */
- IP_DF = 0x4000, /* Don't fragment */
- IP_MF = 0x2000, /* More fragments */
- IP6FHDR = 8, /* sizeof(Fraghdr6) */
- IP_MAX = (32*1024), /* Maximum Internet packet size */
-};
-
-#define IPV6CLASS(hdr) ((hdr->vcf[0]&0x0F)<<2 | (hdr->vcf[1]&0xF0)>>2)
-#define BLKIPVER(xp) (((Ip6hdr*)((xp)->rp))->vcf[0]&0xF0)
-/*
- * This sleazy macro is stolen shamelessly from ip.c, see comment there.
- */
-#define BKFG(xp) ((Ipfrag*)((xp)->base))
-
-typedef struct IP IP;
-typedef struct Fragment4 Fragment4;
-typedef struct Fragment6 Fragment6;
-typedef struct Ipfrag Ipfrag;
-
-Block* ip6reassemble(IP*, int, Block*, Ip6hdr*);
-void ipfragfree6(IP*, Fragment6*);
-Fragment6* ipfragallo6(IP*);
+static Block* ip6reassemble(IP*, int, Block*);
+static Fragment6* ipfragallo6(IP*);
+static void ipfragfree6(IP*, Fragment6*);
+static Block* procopts(Block *bp);
static Block* procxtns(IP *ip, Block *bp, int doreasm);
-int unfraglen(Block *bp, uchar *nexthdr, int setfh);
-Block* procopts(Block *bp);
+static int unfraglen(Block *bp, uchar *nexthdr, int setfh, int popfh);
-/* MIB II counters */
-enum
+void
+ip_init_6(Fs *f)
{
- Forwarding,
- DefaultTTL,
- InReceives,
- InHdrErrors,
- InAddrErrors,
- ForwDatagrams,
- InUnknownProtos,
- InDiscards,
- InDelivers,
- OutRequests,
- OutDiscards,
- OutNoRoutes,
- ReasmTimeout,
- ReasmReqds,
- ReasmOKs,
- ReasmFails,
- FragOKs,
- FragFails,
- FragCreates,
+ v6params *v6p;
- Nstats,
-};
+ v6p = smalloc(sizeof(v6params));
-static char *statnames[] =
-{
-[Forwarding] "Forwarding",
-[DefaultTTL] "DefaultTTL",
-[InReceives] "InReceives",
-[InHdrErrors] "InHdrErrors",
-[InAddrErrors] "InAddrErrors",
-[ForwDatagrams] "ForwDatagrams",
-[InUnknownProtos] "InUnknownProtos",
-[InDiscards] "InDiscards",
-[InDelivers] "InDelivers",
-[OutRequests] "OutRequests",
-[OutDiscards] "OutDiscards",
-[OutNoRoutes] "OutNoRoutes",
-[ReasmTimeout] "ReasmTimeout",
-[ReasmReqds] "ReasmReqds",
-[ReasmOKs] "ReasmOKs",
-[ReasmFails] "ReasmFails",
-[FragOKs] "FragOKs",
-[FragFails] "FragFails",
-[FragCreates] "FragCreates",
-};
+ v6p->rp.mflag = 0; /* default not managed */
+ v6p->rp.oflag = 0;
+ v6p->rp.maxraint = 600000; /* millisecs */
+ v6p->rp.minraint = 200000;
+ v6p->rp.linkmtu = 0; /* no mtu sent */
+ v6p->rp.reachtime = 0;
+ v6p->rp.rxmitra = 0;
+ v6p->rp.ttl = MAXTTL;
+ v6p->rp.routerlt = (3 * v6p->rp.maxraint) / 1000;
-struct Fragment4
-{
- Block* blist;
- Fragment4* next;
- ulong src;
- ulong dst;
- ushort id;
- ulong age;
-};
+ v6p->hp.rxmithost = 1000; /* v6 RETRANS_TIMER */
-struct Fragment6
-{
- Block* blist;
- Fragment6* next;
- uchar src[IPaddrlen];
- uchar dst[IPaddrlen];
- uint id;
- ulong age;
-};
+ f->v6p = v6p;
+}
-struct Ipfrag
-{
- ushort foff;
- ushort flen;
-};
-
-/* an instance of IP */
-struct IP
-{
- ulong stats[Nstats];
-
- QLock fraglock4;
- Fragment4* flisthead4;
- Fragment4* fragfree4;
- Ref id4;
-
- QLock fraglock6;
- Fragment6* flisthead6;
- Fragment6* fragfree6;
- Ref id6;
-
- int iprouting; /* true if we route like a gateway */
-};
-
int
-ipoput6(Fs *f, Block *bp, int gating, int ttl, int tos, Conv *c)
+ipoput6(Fs *f, Block *bp, int gating, int ttl, int tos, Routehint *rh)
{
- int tentative;
- Ipifc *ifc;
+ int medialen, len, chunk, uflen, flen, seglen, lid, offset, fragoff;
+ int morefrags, blklen, rv = 0;
uchar *gate, nexthdr;
- Ip6hdr *eh;
- int medialen, len, chunk, uflen, flen, seglen, lid, offset, fragoff, morefrags, blklen;
- Route *r, *sr;
- Fraghdr6 fraghdr;
Block *xp, *nb;
+ Fraghdr6 fraghdr;
IP *ip;
- int rv = 0;
+ Ip6hdr *eh;
+ Ipifc *ifc;
+ Route *r;
ip = f->ip;
-
- /* Fill out the ip header */
- eh = (Ip6hdr*)(bp->rp);
-
ip->stats[OutRequests]++;
- /* Number of uchars in data and ip header to write */
+ /* Fill out the ip header */
+ eh = (Ip6hdr*)bp->rp;
+ assert(BLEN(bp) >= IP6HDR);
len = blocklen(bp);
-
- tentative = iptentative(f, eh->src);
- if(tentative){
- netlog(f, Logip, "reject tx of packet with tentative src address\n");
- goto free;
- }
-
- if(gating){
- chunk = nhgets(eh->ploadlen);
- if(chunk > len){
- ip->stats[OutDiscards]++;
- netlog(f, Logip, "short gated packet\n");
- goto free;
- }
- if(chunk + IPV6HDR_LEN < len)
- len = chunk + IPV6HDR_LEN;
- }
-
if(len >= IP_MAX){
-// print("len > IP_MAX, free\n");
ip->stats[OutDiscards]++;
- netlog(f, Logip, "exceeded ip max size %I\n", eh->dst);
+ netlog(f, Logip, "%I -> %I: exceeded ip max size: %d\n", eh->src, eh->dst, len);
goto free;
}
- r = v6lookup(f, eh->dst, c);
- if(r == nil){
-// print("no route for %I, src %I free\n", eh->dst, eh->src);
+ r = v6lookup(f, eh->dst, eh->src, rh);
+ if(r == nil || (r->type & Rv4) != 0 || (ifc = r->ifc) == nil){
ip->stats[OutNoRoutes]++;
- netlog(f, Logip, "no interface %I\n", eh->dst);
+ netlog(f, Logip, "%I -> %I: no interface\n", eh->src, eh->dst);
rv = -1;
goto free;
}
- ifc = r->ifc;
- if(r->type & (Rifc|Runi))
+ if(r->type & (Rifc|Runi|Rbcast|Rmulti))
gate = eh->dst;
else
- if(r->type & (Rbcast|Rmulti)) {
- gate = eh->dst;
- sr = v6lookup(f, eh->src, nil);
- if(sr != nil && (sr->type & Runi))
- ifc = sr->ifc;
- }
- else
gate = r->v6.gate;
- if(!gating)
- eh->vcf[0] = IP_VER6;
- eh->ttl = ttl;
- if(!gating) {
- eh->vcf[0] |= (tos >> 4);
- eh->vcf[1] = (tos << 4);
- }
-
- if(!canrlock(ifc)) {
+ if(!canrlock(ifc)){
+ ip->stats[OutDiscards]++;
goto free;
}
-
if(waserror()){
runlock(ifc);
nexterror();
}
- if(ifc->m == nil) {
+ if(ifc->m == nil)
goto raise;
+
+ if(!gating){
+ eh->vcf[0] = IP_VER6;
+ eh->vcf[0] |= tos >> 4;
+ eh->vcf[1] = tos << 4;
}
+ eh->ttl = ttl;
/* If we dont need to fragment just send it */
medialen = ifc->maxtu - ifc->m->hsize;
if(len <= medialen) {
- hnputs(eh->ploadlen, len-IPV6HDR_LEN);
- ifc->m->bwrite(ifc, bp, V6, gate);
+ hnputs(eh->ploadlen, len - IP6HDR);
+ ipifcoput(ifc, bp, V6, gate);
runlock(ifc);
poperror();
return 0;
}
- if(gating)
- if(ifc->reassemble <= 0) {
-
- /* v6 intermediate nodes are not supposed to fragment pkts;
- we fragment if ifc->reassemble is turned on; an exception
- needed for nat.
+ if(gating && !ifc->reassemble) {
+ /*
+ * v6 intermediate nodes are not supposed to fragment pkts;
+ * we fragment if ifc->reassemble is turned on; an exception
+ * needed for nat.
*/
-
ip->stats[OutDiscards]++;
icmppkttoobig6(f, ifc, bp);
- netlog(f, Logip, "%I: gated pkts not fragmented\n", eh->dst);
+ netlog(f, Logip, "%I -> %I: gated pkts not fragmented\n", eh->src, eh->dst);
goto raise;
}
-
+
/* start v6 fragmentation */
- uflen = unfraglen(bp, &nexthdr, 1);
+ uflen = unfraglen(bp, &nexthdr, 1, 0);
+ if(uflen < IP6HDR || nexthdr == FH) {
+ ip->stats[FragFails]++;
+ ip->stats[OutDiscards]++;
+ netlog(f, Logip, "%I -> %I: fragment header botch\n", eh->src, eh->dst);
+ goto raise;
+ }
if(uflen > medialen) {
ip->stats[FragFails]++;
ip->stats[OutDiscards]++;
- netlog(f, Logip, "%I: unfragmentable part too big\n", eh->dst);
+ netlog(f, Logip, "%I -> %I: unfragmentable part too big: %d\n", eh->src, eh->dst, uflen);
goto raise;
}
@@ -260,7 +137,7 @@
if(seglen < 8) {
ip->stats[FragFails]++;
ip->stats[OutDiscards]++;
- netlog(f, Logip, "%I: seglen < 8\n", eh->dst);
+ netlog(f, Logip, "%I -> %I: seglen < 8\n", eh->src, eh->dst);
goto raise;
}
@@ -271,13 +148,13 @@
xp = bp;
offset = uflen;
- while (xp != nil && offset && offset >= BLEN(xp)) {
+ while (offset && offset >= BLEN(xp)) {
offset -= BLEN(xp);
xp = xp->next;
}
xp->rp += offset;
- fragoff = 0;
+ fragoff = 0;
morefrags = 1;
for(; fragoff < flen; fragoff += seglen) {
@@ -292,7 +169,7 @@
memmove(nb->wp, eh, uflen);
nb->wp += uflen;
- hnputs(fraghdr.offsetRM, fragoff); // last 3 bits must be 0
+ hnputs(fraghdr.offsetRM, fragoff); /* last 3 bits must be 0 */
fraghdr.offsetRM[1] |= morefrags;
memmove(nb->wp, &fraghdr, IP6FHDR);
nb->wp += IP6FHDR;
@@ -300,11 +177,11 @@
/* Copy data */
chunk = seglen;
while (chunk) {
- if(!xp) {
+ if(xp == nil) {
ip->stats[OutDiscards]++;
ip->stats[FragFails]++;
freeblist(nb);
- netlog(f, Logip, "!xp: chunk in v6%d\n", chunk);
+ netlog(f, Logip, "xp == nil: chunk in v6%d\n", chunk);
goto raise;
}
blklen = chunk;
@@ -316,10 +193,9 @@
xp->rp += blklen;
chunk -= blklen;
if(xp->rp == xp->wp)
- xp = xp->next;
+ xp = xp->next;
}
-
- ifc->m->bwrite(ifc, nb, V6, gate);
+ ipifcoput(ifc, nb, V6, gate);
ip->stats[FragCreates]++;
}
ip->stats[FragOKs]++;
@@ -328,7 +204,7 @@
runlock(ifc);
poperror();
free:
- freeblist(bp);
+ freeblist(bp);
return rv;
}
@@ -335,16 +211,10 @@
void
ipiput6(Fs *f, Ipifc *ifc, Block *bp)
{
- int hl;
- int hop, tos;
- uchar proto;
+ int hl, len, hop, tos;
+ IP *ip;
Ip6hdr *h;
Proto *p;
- int notforme;
- int tentative;
- uchar v6dst[IPaddrlen];
- IP *ip;
- Route *r, *sr;
ip = f->ip;
ip->stats[InReceives]++;
@@ -365,40 +235,44 @@
return;
}
- h = (Ip6hdr *)(bp->rp);
-
- memmove(&v6dst[0], &(h->dst)[0], IPaddrlen);
- notforme = ipforme(f, v6dst) == 0;
- tentative = iptentative(f, v6dst);
-
- if(tentative && (h->proto != ICMPv6)) {
- print("tentative addr, drop\n");
- freeblist(bp);
- return;
- }
-
/* Check header version */
- if(BLKIPVER(bp) != IP_VER6) {
+ h = (Ip6hdr*)bp->rp;
+ if((h->vcf[0] & 0xF0) != IP_VER6) {
ip->stats[InHdrErrors]++;
netlog(f, Logip, "ip: bad version %ux\n", (h->vcf[0]&0xF0)>>2);
- freeblist(bp);
+ goto drop;
+ }
+ len = IP6HDR + nhgets(h->ploadlen);
+ if((bp = trimblock(bp, 0, len)) == nil){
+ ip->stats[InHdrErrors]++;
+ netlog(f, Logip, "%I -> %I: bogus packet length: %d\n", h->src, h->dst, len);
return;
}
+ h = (Ip6hdr*)bp->rp;
/* route */
- if(notforme) {
- if(!ip->iprouting){
- freeb(bp);
- return;
+ if(!ipforme(f, h->dst)) {
+ Route *r;
+ Routehint rh;
+ Ipifc *nifc;
+
+ if(!ip->iprouting)
+ goto drop;
+
+ /* don't forward to link-local destinations */
+ if(islinklocal(h->dst) ||
+ (isv6mcast(h->dst) && (h->dst[1]&0xF) <= Link_local_scop)){
+ ip->stats[OutDiscards]++;
+ goto drop;
}
+
/* don't forward to source's network */
- sr = v6lookup(f, h->src, nil);
- r = v6lookup(f, h->dst, nil);
-
- if(r == nil || sr == r){
+ rh.r = nil;
+ r = v6lookup(f, h->dst, h->src, &rh);
+ if(r == nil || (nifc = r->ifc) == nil || (r->type & Rv4) != 0
+ || (nifc == ifc && !ifc->reflect)){
ip->stats[OutDiscards]++;
- freeblist(bp);
- return;
+ goto drop;
}
/* don't forward if packet has timed out */
@@ -406,33 +280,29 @@
if(hop < 1) {
ip->stats[InHdrErrors]++;
icmpttlexceeded6(f, ifc, bp);
- freeblist(bp);
- return;
+ goto drop;
}
/* process headers & reassemble if the interface expects it */
- bp = procxtns(ip, bp, r->ifc->reassemble);
-
+ bp = procxtns(ip, bp, nifc->reassemble);
if(bp == nil)
return;
ip->stats[ForwDatagrams]++;
- h = (Ip6hdr *) (bp->rp);
- tos = IPV6CLASS(h);
+ h = (Ip6hdr*)bp->rp;
+ tos = (h->vcf[0]&0x0F)<<2 | (h->vcf[1]&0xF0)>>2;
hop = h->ttl;
- ipoput6(f, bp, 1, hop-1, tos, nil);
+ ipoput6(f, bp, 1, hop-1, tos, &rh);
return;
}
/* reassemble & process headers if needed */
bp = procxtns(ip, bp, 1);
-
if(bp == nil)
return;
- h = (Ip6hdr *) (bp->rp);
- proto = h->proto;
- p = Fsrcvpcol(f, proto);
+ h = (Ip6hdr*)bp->rp;
+ p = Fsrcvpcol(f, h->proto);
if(p != nil && p->rcv != nil) {
ip->stats[InDelivers]++;
(*p->rcv)(p, ifc, bp);
@@ -441,6 +311,7 @@
ip->stats[InDiscards]++;
ip->stats[InUnknownProtos]++;
+drop:
freeblist(bp);
}
@@ -447,20 +318,20 @@
/*
* ipfragfree6 - copied from ipfragfree4 - assume hold fraglock6
*/
-void
+static void
ipfragfree6(IP *ip, Fragment6 *frag)
{
Fragment6 *fl, **l;
- if(frag->blist)
+ if(frag->blist != nil)
freeblist(frag->blist);
-
- memset(frag->src, 0, IPaddrlen);
- frag->id = 0;
frag->blist = nil;
+ frag->id = 0;
+ memset(frag->src, 0, IPaddrlen);
+ memset(frag->dst, 0, IPaddrlen);
l = &ip->flisthead6;
- for(fl = *l; fl; fl = fl->next) {
+ for(fl = *l; fl != nil; fl = fl->next) {
if(fl == frag) {
*l = frag->next;
break;
@@ -470,13 +341,12 @@
frag->next = ip->fragfree6;
ip->fragfree6 = frag;
-
}
/*
* ipfragallo6 - copied from ipfragalloc4
*/
-Fragment6*
+static Fragment6*
ipfragallo6(IP *ip)
{
Fragment6 *f;
@@ -483,7 +353,7 @@
while(ip->fragfree6 == nil) {
/* free last entry on fraglist */
- for(f = ip->flisthead6; f->next; f = f->next)
+ for(f = ip->flisthead6; f->next != nil; f = f->next)
;
ipfragfree6(ip, f);
}
@@ -497,108 +367,109 @@
}
static Block*
-procxtns(IP *ip, Block *bp, int doreasm) {
-
- int offset;
+procxtns(IP *ip, Block *bp, int doreasm)
+{
uchar proto;
- Ip6hdr *h;
+ int offset;
- h = (Ip6hdr *) (bp->rp);
- offset = unfraglen(bp, &proto, 0);
-
- if((proto == FH) && (doreasm != 0)) {
- bp = ip6reassemble(ip, offset, bp, h);
- if(bp == nil)
- return nil;
- offset = unfraglen(bp, &proto, 0);
+ offset = unfraglen(bp, &proto, 0, doreasm);
+ if(offset >= IP6HDR && proto == FH && doreasm) {
+ bp = ip6reassemble(ip, offset, bp);
+ if(bp == nil)
+ return nil;
+ offset = unfraglen(bp, &proto, 0, 0);
+ if(proto == FH)
+ offset = -1;
}
-
- if(proto == DOH || offset > IP6HDR)
+ if(offset < IP6HDR){
+ ip->stats[InHdrErrors]++;
+ ip->stats[InDiscards]++;
+ freeblist(bp);
+ return nil;
+ }
+ if(proto == DOH || offset > IP6HDR)
bp = procopts(bp);
-
return bp;
}
-
-/* returns length of "Unfragmentable part", i.e., sum of lengths of ipv6 hdr,
- * hop-by-hop & routing headers if present; *nexthdr is set to nexthdr value
- * of the last header in the "Unfragmentable part"; if setfh != 0, nexthdr
- * field of the last header in the "Unfragmentable part" is set to FH.
+/*
+ * returns length of "Unfragmentable part", i.e., sum of lengths of ipv6 hdr,
+ * hop-by-hop & routing headers if present; *nexthdr is set to nexthdr value
+ * of the last header in the "Unfragmentable part"; if setfh != 0, nexthdr
+ * field of the last header in the "Unfragmentable part" is set to FH.
+ * When the last header is a fragment header and popfh != 0 then set
+ * the nexthdr value of the previous header to the nexthdr value of the
+ * fragment header. returns -1 on error.
*/
-int
-unfraglen(Block *bp, uchar *nexthdr, int setfh)
+static int
+unfraglen(Block *bp, uchar *nexthdr, int setfh, int popfh)
{
- uchar *p, *q;
- int ufl, hs;
+ uchar *e, *p, *q;
+ e = bp->wp;
p = bp->rp;
- q = p+6; /* proto, = p+sizeof(Ip6hdr.vcf)+sizeof(Ip6hdr.ploadlen) */
+ q = p+6; /* proto, = p+sizeof(Ip6hdr.vcf)+sizeof(Ip6hdr.ploadlen) */
*nexthdr = *q;
- ufl = IP6HDR;
- p += ufl;
-
- for(;;) {
- if(*nexthdr == HBH || *nexthdr == RH) {
- *nexthdr = *p;
- hs = ((int)*(p+1) + 1) * 8;
- ufl += hs;
- q = p;
- p += hs;
- }
- else
- break;
+ p += IP6HDR;
+ while(*nexthdr == HBH || *nexthdr == RH){
+ if(p+2 > e)
+ return -1;
+ q = p;
+ *nexthdr = *q;
+ p += ((int)p[1] + 1) * 8;
}
-
- if(*nexthdr == FH)
- *q = *p;
-
- if(setfh)
+ if(p > e)
+ return -1;
+ if(*nexthdr == FH){
+ if(p+IP6FHDR > e || *p == FH)
+ return -1;
+ if(popfh)
+ *q = *p;
+ } else if(setfh)
*q = FH;
-
- return ufl;
+ return p - bp->rp;
}
-Block*
+static Block*
procopts(Block *bp)
{
return bp;
}
-Block*
-ip6reassemble(IP* ip, int uflen, Block* bp, Ip6hdr* ih)
+static Block*
+ip6reassemble(IP* ip, int uflen, Block* bp)
{
-
- int fend, offset;
+ int offset, ovlap, fragsize, len;
+ uchar src[IPaddrlen], dst[IPaddrlen];
uint id;
- Fragment6 *f, *fnext;
+ Block *bl, **l, *prev;
Fraghdr6 *fraghdr;
- uchar src[IPaddrlen], dst[IPaddrlen];
- Block *bl, **l, *last, *prev;
- int ovlap, len, fragsize, pktposn;
+ Fragment6 *f, *fnext;
+ Ipfrag *fp, *fq;
+ Ip6hdr* ih;
- fraghdr = (Fraghdr6 *) (bp->rp + uflen);
- memmove(src, ih->src, IPaddrlen);
- memmove(dst, ih->dst, IPaddrlen);
- id = nhgetl(fraghdr->id);
- offset = nhgets(fraghdr->offsetRM) & ~7;
-
/*
- * block lists are too hard, pullupblock into a single block
+ * block lists are too hard, concatblock into a single block
*/
- if(bp->next){
- bp = pullupblock(bp, blocklen(bp));
- ih = (Ip6hdr *)(bp->rp);
- }
+ bp = concatblock(bp);
+ ih = (Ip6hdr*)bp->rp;
+ fraghdr = (Fraghdr6*)(bp->rp + uflen);
+ id = nhgetl(fraghdr->id);
+ offset = nhgets(fraghdr->offsetRM);
+ fragsize = BLEN(bp) - uflen - IP6FHDR;
+ memmove(src, ih->src, IPaddrlen);
+ memmove(dst, ih->dst, IPaddrlen);
+
qlock(&ip->fraglock6);
/*
* find a reassembly queue for this fragment
*/
- for(f = ip->flisthead6; f; f = fnext){
+ for(f = ip->flisthead6; f != nil; f = fnext){
fnext = f->next;
- if(ipcmp(f->src, src)==0 && ipcmp(f->dst, dst)==0 && f->id == id)
+ if(f->id == id && ipcmp(f->src, src) == 0 && ipcmp(f->dst, dst) == 0)
break;
if(f->age < NOW){
ip->stats[ReasmTimeout]++;
@@ -606,28 +477,35 @@
}
}
-
/*
* if this isn't a fragmented packet, accept it
* and get rid of any fragments that might go
* with it.
*/
- if(nhgets(fraghdr->offsetRM)==0) { // first frag is also the last
+ if((offset & ~6) == 0) { /* 1st frag is also last */
if(f != nil) {
- ipfragfree6(ip, f);
ip->stats[ReasmFails]++;
+ ipfragfree6(ip, f);
}
qunlock(&ip->fraglock6);
+
+ /* get rid of frag header */
+ memmove(bp->rp + IP6FHDR, bp->rp, uflen);
+ bp->rp += IP6FHDR;
+ ih = (Ip6hdr*)bp->rp;
+ hnputs(ih->ploadlen, BLEN(bp)-IP6HDR);
+
return bp;
}
- if(bp->base+sizeof(Ipfrag) >= bp->rp){
- bp = padblock(bp, sizeof(Ipfrag));
- bp->rp += sizeof(Ipfrag);
+ if(bp->base+IPFRAGSZ > bp->rp){
+ bp = padblock(bp, IPFRAGSZ);
+ bp->rp += IPFRAGSZ;
}
- BKFG(bp)->foff = offset;
- BKFG(bp)->flen = nhgets(ih->ploadlen) + IP6HDR - uflen - IP6FHDR;
+ fp = (Ipfrag*)bp->base;
+ fp->foff = offset & ~7;
+ fp->flen = fragsize;
/* First fragment allocates a reassembly queue */
if(f == nil) {
@@ -638,8 +516,9 @@
f->blist = bp;
- qunlock(&ip->fraglock6);
ip->stats[ReasmReqds]++;
+ qunlock(&ip->fraglock6);
+
return nil;
}
@@ -649,7 +528,7 @@
prev = nil;
l = &f->blist;
bl = f->blist;
- while(bl != nil && BKFG(bp)->foff > BKFG(bl)->foff) {
+ while(bl != nil && fp->foff > ((Ipfrag*)bl->base)->foff) {
prev = bl;
l = &bl->next;
bl = bl->next;
@@ -656,15 +535,16 @@
}
/* Check overlap of a previous fragment - trim away as necessary */
- if(prev) {
- ovlap = BKFG(prev)->foff + BKFG(prev)->flen - BKFG(bp)->foff;
+ if(prev != nil) {
+ fq = (Ipfrag*)prev->base;
+ ovlap = fq->foff + fq->flen - fp->foff;
if(ovlap > 0) {
- if(ovlap >= BKFG(bp)->flen) {
- freeblist(bp);
+ if(ovlap >= fp->flen) {
qunlock(&ip->fraglock6);
+ freeb(bp);
return nil;
}
- BKFG(prev)->flen -= ovlap;
+ fq->flen -= ovlap;
}
}
@@ -673,29 +553,27 @@
*l = bp;
/* Check to see if succeeding segments overlap */
- if(bp->next) {
+ if(bp->next != nil) {
l = &bp->next;
- fend = BKFG(bp)->foff + BKFG(bp)->flen;
+ offset = fp->foff + fp->flen;
/* Take completely covered segments out */
-
- while(*l) {
- ovlap = fend - BKFG(*l)->foff;
-
- if(ovlap <= 0)
- break;
- if(ovlap < BKFG(*l)->flen) {
- BKFG(*l)->flen -= ovlap;
- BKFG(*l)->foff += ovlap;
- /* move up ih hdrs */
- memmove((*l)->rp + ovlap, (*l)->rp, uflen);
- (*l)->rp += ovlap;
+ while((bl = *l) != nil) {
+ fq = (Ipfrag*)bl->base;
+ ovlap = offset - fq->foff;
+ if(ovlap <= 0)
break;
+ if(ovlap < fq->flen) {
+ /* move up ip and frag header */
+ memmove(bl->rp + ovlap, bl->rp, BLEN(bl) - fq->flen);
+ bl->rp += ovlap;
+ fq->flen -= ovlap;
+ fq->foff += ovlap;
+ break;
}
- last = (*l)->next;
- (*l)->next = nil;
- freeblist(*l);
- *l = last;
+ *l = bl->next;
+ bl->next = nil;
+ freeb(bl);
}
}
@@ -703,45 +581,55 @@
* look for a complete packet. if we get to a fragment
* with the trailing bit of fraghdr->offsetRM[1] set, we're done.
*/
- pktposn = 0;
- for(bl = f->blist; bl; bl = bl->next) {
- if(BKFG(bl)->foff != pktposn)
+ offset = 0;
+ for(bl = f->blist; bl != nil; bl = bl->next, offset += fp->flen) {
+ fp = (Ipfrag*)bl->base;
+ if(fp->foff != offset)
break;
-
- fraghdr = (Fraghdr6 *) (bl->rp + uflen);
- if((fraghdr->offsetRM[1] & 1) == 0) {
- bl = f->blist;
+ fraghdr = (Fraghdr6*)(bl->wp - fp->flen - IP6FHDR);
+ if(fraghdr->offsetRM[1] & 1)
+ continue;
- /* get rid of frag header in first fragment */
+ bl = f->blist;
+ fq = (Ipfrag*)bl->base;
- memmove(bl->rp + IP6FHDR, bl->rp, uflen);
- bl->rp += IP6FHDR;
- len = nhgets(((Ip6hdr*)(bl->rp))->ploadlen) - IP6FHDR;
- bl->wp = bl->rp + len + IP6HDR;
+ /* get rid of frag header in first fragment */
+ memmove(bl->rp + IP6FHDR, bl->rp, BLEN(bl) - fq->flen - IP6FHDR);
+ bl->rp += IP6FHDR;
+ len = BLEN(bl);
- /* Pullup all the fragment headers and
- * return a complete packet
- */
- for(bl = bl->next; bl; bl = bl->next) {
- fragsize = BKFG(bl)->flen;
- len += fragsize;
- bl->rp += uflen + IP6FHDR;
- bl->wp = bl->rp + fragsize;
- }
+ /*
+ * Pullup all the fragment headers and
+ * return a complete packet
+ */
+ for(bl = bl->next; bl != nil && len < IP_MAX; bl = bl->next) {
+ fq = (Ipfrag*)bl->base;
+ fragsize = fq->flen;
+ bl->rp = bl->wp - fragsize;
+ len += fragsize;
+ }
- bl = f->blist;
- f->blist = nil;
+ if(len >= IP_MAX){
ipfragfree6(ip, f);
- ih = (Ip6hdr*)(bl->rp);
- hnputs(ih->ploadlen, len);
+ ip->stats[ReasmFails]++;
qunlock(&ip->fraglock6);
- ip->stats[ReasmOKs]++;
- return bl;
+
+ return nil;
}
- pktposn += BKFG(bl)->flen;
+
+ bl = f->blist;
+ f->blist = nil;
+ ipfragfree6(ip, f);
+
+ ih = (Ip6hdr*)bl->rp;
+ hnputs(ih->ploadlen, len-IP6HDR);
+
+ ip->stats[ReasmOKs]++;
+ qunlock(&ip->fraglock6);
+
+ return bl;
}
qunlock(&ip->fraglock6);
return nil;
}
-
diff -u a/os/ip//ipv6.h b/os/ip//ipv6.h
--- a/os/ip//ipv6.h
+++ b/os/ip//ipv6.h
@@ -1,53 +1,31 @@
-#undef MIN
-#define MIN(a, b) ((a) <= (b) ? (a) : (b))
-
-/* rfc 3513 defines the address prefices */
+/*
+ * Internet Protocol Version 6
+ *
+ * rfc2460 defines the protocol, rfc2461 neighbour discovery, and
+ * rfc2462 address autoconfiguration. rfc4443 defines ICMP; was rfc2463.
+ * rfc4291 defines the address architecture (including prefices), was rfc3513.
+ * rfc4007 defines the scoped address architecture.
+ *
+ * global unicast is anything but unspecified (::), loopback (::1),
+ * multicast (ff00::/8), and link-local unicast (fe80::/10).
+ *
+ * site-local (fec0::/10) is now deprecated, originally by rfc3879.
+ *
+ * Unique Local IPv6 Unicast Addresses are defined by rfc4193.
+ * prefix is fc00::/7, scope is global, routing is limited to roughly a site.
+ */
#define isv6mcast(addr) ((addr)[0] == 0xff)
#define islinklocal(addr) ((addr)[0] == 0xfe && ((addr)[1] & 0xc0) == 0x80)
-#define issitelocal(addr) ((addr)[0] == 0xfe && ((addr)[1] & 0xc0) == 0xc0)
-#define isv6global(addr) (((addr)[0] & 0xe0) == 0x20)
-#define optexsts(np) (nhgets((np)->ploadlen) > 24)
-#define issmcast(addr) (memcmp((addr), v6solicitednode, 13) == 0)
+#define optexsts(np) (nhgets((np)->ploadlen) > 24)
+#define issmcast(addr) (memcmp((addr), v6solicitednode, 13) == 0)
-/* from RFC 2460 */
+#ifndef MIN
+#define MIN(a, b) ((a) <= (b)? (a): (b))
+#endif
-typedef struct Ip6hdr Ip6hdr;
-typedef struct Opthdr Opthdr;
-typedef struct Routinghdr Routinghdr;
-typedef struct Fraghdr6 Fraghdr6;
-
-struct Ip6hdr {
- uchar vcf[4]; // version:4, traffic class:8, flow label:20
- uchar ploadlen[2]; // payload length: packet length - 40
- uchar proto; // next header type
- uchar ttl; // hop limit
- uchar src[IPaddrlen];
- uchar dst[IPaddrlen];
-};
-
-struct Opthdr {
- uchar nexthdr;
- uchar len;
-};
-
-struct Routinghdr {
- uchar nexthdr;
- uchar len;
- uchar rtetype;
- uchar segrem;
-};
-
-struct Fraghdr6 {
- uchar nexthdr;
- uchar res;
- uchar offsetRM[2]; // Offset, Res, M flag
- uchar id[4];
-};
-
-
-enum { /* Header Types */
- HBH = 0, //?
+enum { /* Header Types */
+ HBH = 0, /* hop-by-hop multicast routing protocol */
ICMP = 1,
IGMP = 2,
GGP = 3,
@@ -72,89 +50,113 @@
Maxhdrtype = 256,
};
-
enum {
- // multicast flgs and scop
+ /* multicast flags and scopes */
- well_known_flg = 0,
- transient_flg = 1,
+// Well_known_flg = 0,
+// Transient_flg = 1,
- node_local_scop = 1,
- link_local_scop = 2,
- site_local_scop = 5,
- org_local_scop = 8,
- global_scop = 14,
+// Interface_local_scop = 1,
+ Link_local_scop = 2,
+// Site_local_scop = 5,
+// Org_local_scop = 8,
+ Global_scop = 14,
- // various prefix lengths
+ /* various prefix lengths */
+ SOLN_PREF_LEN = 13,
- SOLN_PREF_LEN = 13,
+ /* icmpv6 unreachability codes */
+ Icmp6_no_route = 0,
+ Icmp6_ad_prohib = 1,
+ Icmp6_out_src_scope = 2,
+ Icmp6_adr_unreach = 3,
+ Icmp6_port_unreach = 4,
+ Icmp6_gress_src_fail = 5,
+ Icmp6_rej_route = 6,
+ Icmp6_unknown = 7, /* our own invention for internal use */
- // icmpv6 unreach codes
- icmp6_no_route = 0,
- icmp6_ad_prohib = 1,
- icmp6_unassigned = 2,
- icmp6_adr_unreach = 3,
- icmp6_port_unreach = 4,
- icmp6_unkn_code = 5,
+ /* various flags & constants */
+ v6MINTU = 1280,
+ HOP_LIMIT = 255,
+ IP6HDR = 40, /* sizeof(Ip6hdr) = 8 + 2*16 */
+ IP6FHDR = 8, /* sizeof(Fraghdr6) */
- // various flags & constants
+ /* option types */
- v6MINTU = 1280,
- HOP_LIMIT = 255,
- ETHERHDR_LEN = 14,
- IPV6HDR_LEN = 40,
- IPV4HDR_LEN = 20,
+ /* neighbour discovery */
+ SRC_LLADDR = 1,
+ TARGET_LLADDR = 2,
+ PREFIX_INFO = 3,
+ REDIR_HEADER = 4,
+ MTU_OPTION = 5,
+ /* new since rfc2461; see iana.org/assignments/icmpv6-parameters */
+ V6nd_home = 8,
+ V6nd_srcaddrs = 9, /* rfc3122 */
+ V6nd_ip = 17,
+ /* /lib/rfc/drafts/draft-jeong-dnsop-ipv6-dns-discovery-12.txt */
+ V6nd_rdns = 25,
+ /* plan 9 extensions */
+ V6nd_9fs = 250,
+ V6nd_9auth = 251,
- // option types
+ SRC_UNSPEC = 0,
+ SRC_UNI = 1,
+ TARG_UNI = 2,
+ TARG_MULTI = 3,
- SRC_LLADDRESS = 1,
- TARGET_LLADDRESS = 2,
- PREFIX_INFO = 3,
- REDIR_HEADER = 4,
- MTU_OPTION = 5,
+ Tunitent = 1,
+ Tuniproxy = 2,
+ Tunirany = 3,
- SRC_UNSPEC = 0,
- SRC_UNI = 1,
- TARG_UNI = 2,
- TARG_MULTI = 3,
+ /* Node constants */
+ MAX_MULTICAST_SOLICIT = 3,
+ RETRANS_TIMER = 1000,
+};
- t_unitent = 1,
- t_uniproxy = 2,
- t_unirany = 3,
+typedef struct Ip6hdr Ip6hdr;
+typedef struct Opthdr Opthdr;
+typedef struct Routinghdr Routinghdr;
+typedef struct Fraghdr6 Fraghdr6;
- // Router constants (all times in milliseconds)
+/* we do this in case there's padding at the end of Ip6hdr */
+#define IPV6HDR \
+ uchar vcf[4]; /* version:4, traffic class:8, flow label:20 */\
+ uchar ploadlen[2]; /* payload length: packet length - 40 */ \
+ uchar proto; /* next header type */ \
+ uchar ttl; /* hop limit */ \
+ uchar src[IPaddrlen]; \
+ uchar dst[IPaddrlen]
- MAX_INITIAL_RTR_ADVERT_INTERVAL = 16000,
- MAX_INITIAL_RTR_ADVERTISEMENTS = 3,
- MAX_FINAL_RTR_ADVERTISEMENTS = 3,
- MIN_DELAY_BETWEEN_RAS = 3000,
- MAX_RA_DELAY_TIME = 500,
+struct Ip6hdr {
+ IPV6HDR;
+ uchar payload[];
+};
- // Host constants
+struct Opthdr { /* unused */
+ uchar nexthdr;
+ uchar len;
+};
- MAX_RTR_SOLICITATION_DELAY = 1000,
- RTR_SOLICITATION_INTERVAL = 4000,
- MAX_RTR_SOLICITATIONS = 3,
+/*
+ * Beware routing header type 0 (loose source routing); see
+ * http://www.secdev.org/conf/IPv6_RH_security-csw07.pdf.
+ * Type 1 is unused. Type 2 is for MIPv6 (mobile IPv6) filtering
+ * against type 0 header.
+ */
+struct Routinghdr { /* unused */
+ uchar nexthdr;
+ uchar len;
+ uchar rtetype;
+ uchar segrem;
+};
- // Node constants
-
- MAX_MULTICAST_SOLICIT = 3,
- MAX_UNICAST_SOLICIT = 3,
- MAX_ANYCAST_DELAY_TIME = 1000,
- MAX_NEIGHBOR_ADVERTISEMENT = 3,
- REACHABLE_TIME = 30000,
- RETRANS_TIMER = 1000,
- DELAY_FIRST_PROBE_TIME = 5000,
-
+struct Fraghdr6 {
+ uchar nexthdr;
+ uchar res;
+ uchar offsetRM[2]; /* Offset, Res, M flag */
+ uchar id[4];
};
-extern void ipv62smcast(uchar *, uchar *);
-extern void icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac);
-extern void icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags);
-extern void icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp);
-extern void icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp);
-extern void icmphostunr(Fs *f, Ipifc *ifc, Block *bp, int code, int free);
-
extern uchar v6allnodesN[IPaddrlen];
extern uchar v6allnodesL[IPaddrlen];
extern uchar v6allroutersN[IPaddrlen];
@@ -161,23 +163,16 @@
extern uchar v6allroutersL[IPaddrlen];
extern uchar v6allnodesNmask[IPaddrlen];
extern uchar v6allnodesLmask[IPaddrlen];
-extern uchar v6allroutersS[IPaddrlen];
extern uchar v6solicitednode[IPaddrlen];
extern uchar v6solicitednodemask[IPaddrlen];
extern uchar v6Unspecified[IPaddrlen];
extern uchar v6loopback[IPaddrlen];
-extern uchar v6loopbackmask[IPaddrlen];
extern uchar v6linklocal[IPaddrlen];
extern uchar v6linklocalmask[IPaddrlen];
-extern uchar v6sitelocal[IPaddrlen];
-extern uchar v6sitelocalmask[IPaddrlen];
-extern uchar v6glunicast[IPaddrlen];
extern uchar v6multicast[IPaddrlen];
extern uchar v6multicastmask[IPaddrlen];
extern int v6llpreflen;
-extern int v6slpreflen;
-extern int v6lbpreflen;
extern int v6mcpreflen;
extern int v6snpreflen;
extern int v6aNpreflen;
@@ -184,3 +179,10 @@
extern int v6aLpreflen;
extern int ReTransTimer;
+
+void ipv62smcast(uchar *, uchar *);
+void icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac);
+void icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags);
+void icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp);
+void icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp);
+void icmphostunr6(Fs *f, Ipifc *ifc, Block *bp, int code, int tome);
diff -u a/os/ip//loopbackmedium.c b/os/ip//loopbackmedium.c
--- a/os/ip//loopbackmedium.c
+++ b/os/ip//loopbackmedium.c
@@ -28,13 +28,12 @@
LB *lb;
lb = smalloc(sizeof(*lb));
+ lb->readp = (void*)-1;
lb->f = ifc->conv->p->f;
- /* TO DO: make queue size a function of kernel memory */
- lb->q = qopen(128*1024, Qmsg, nil, nil);
+ lb->q = qopen(1024*1024, Qmsg, nil, nil);
ifc->arg = lb;
- ifc->mbps = 1000;
- kproc("loopbackread", loopbackread, ifc, 0);
+ kproc("loopbackread", loopbackread, ifc);
}
@@ -43,13 +42,29 @@
{
LB *lb = ifc->arg;
- if(lb->readp)
+ while(waserror())
+ ;
+
+ /* wat for reader to start */
+ while(lb->readp == (void*)-1)
+ tsleep(&up->sleep, return0, 0, 300);
+
+ if(lb->readp != nil)
postnote(lb->readp, 1, "unbind", 0);
+ poperror();
+
+ wunlock(ifc);
+ while(waserror())
+ ;
+
/* wait for reader to die */
- while(lb->readp != 0)
+ while(lb->readp != nil)
tsleep(&up->sleep, return0, 0, 300);
+ poperror();
+ wlock(ifc);
+
/* clean up */
qfree(lb->q);
free(lb);
@@ -76,23 +91,14 @@
ifc = a;
lb = ifc->arg;
lb->readp = up; /* hide identity under a rock for unbind */
- if(waserror()){
- lb->readp = 0;
- pexit("hangup", 1);
- }
- for(;;){
- bp = qbread(lb->q, Maxtu);
- if(bp == nil)
- continue;
- ifc->in++;
- if(!canrlock(ifc)){
- freeb(bp);
- continue;
- }
+ if(!waserror())
+ while((bp = qbread(lb->q, Maxtu)) != nil){
+ rlock(ifc);
if(waserror()){
runlock(ifc);
nexterror();
}
+ ifc->in++;
if(ifc->lifc == nil)
freeb(bp);
else
@@ -100,6 +106,8 @@
runlock(ifc);
poperror();
}
+ lb->readp = nil;
+ pexit("hangup", 1);
}
Medium loopbackmedium =
diff -u a/os/ip//netdevmedium.c b/os/ip//netdevmedium.c
--- a/os/ip//netdevmedium.c
+++ b/os/ip//netdevmedium.c
@@ -49,12 +49,13 @@
mchan = namec(argv[2], Aopen, ORDWR, 0);
er = smalloc(sizeof(*er));
+ er->readp = (void*)-1;
er->mchan = mchan;
er->f = ifc->conv->p->f;
ifc->arg = er;
- kproc("netdevread", netdevread, ifc, 0);
+ kproc("netdevread", netdevread, ifc);
}
/*
@@ -65,13 +66,29 @@
{
Netdevrock *er = ifc->arg;
+ while(waserror())
+ ;
+
+ /* wait for reader to start */
+ while(er->readp == (void*)-1)
+ tsleep(&up->sleep, return0, 0, 300);
+
if(er->readp != nil)
postnote(er->readp, 1, "unbind", 0);
- /* wait for readers to die */
+ poperror();
+
+ wunlock(ifc);
+ while(waserror())
+ ;
+
+ /* wait for reader to die */
while(er->readp != nil)
tsleep(&up->sleep, return0, 0, 300);
+ poperror();
+ wlock(ifc);
+
if(er->mchan != nil)
cclose(er->mchan);
@@ -86,8 +103,6 @@
{
Netdevrock *er = ifc->arg;
- if(bp->next)
- bp = concatblock(bp);
if(BLEN(bp) < ifc->mintu)
bp = adjustblock(bp, ifc->mintu);
@@ -104,34 +119,22 @@
Ipifc *ifc;
Block *bp;
Netdevrock *er;
- char *argv[1];
ifc = a;
er = ifc->arg;
er->readp = up; /* hide identity under a rock for unbind */
- if(waserror()){
- er->readp = nil;
- pexit("hangup", 1);
- }
+ if(!waserror())
for(;;){
bp = devtab[er->mchan->type]->bread(er->mchan, ifc->maxtu, 0);
if(bp == nil){
- /*
- * get here if mchan is a pipe and other side hangs up
- * clean up this interface & get out
-ZZZ is this a good idea?
- */
poperror();
- er->readp = nil;
- argv[0] = "unbind";
- if(!waserror())
+ if(!waserror()){
+ static char *argv[] = { "unbind" };
ifc->conv->p->ctl(ifc->conv, argv, 1);
- pexit("hangup", 1);
+ }
+ break;
}
- if(!canrlock(ifc)){
- freeb(bp);
- continue;
- }
+ rlock(ifc);
if(waserror()){
runlock(ifc);
nexterror();
@@ -144,6 +147,8 @@
runlock(ifc);
poperror();
}
+ er->readp = nil;
+ pexit("hangup", 1);
}
void
diff -u a/os/ip//netlog.c b/os/ip//netlog.c
--- a/os/ip//netlog.c
+++ b/os/ip//netlog.c
@@ -7,7 +7,7 @@
#include "../ip/ip.h"
enum {
- Nlog = 4*1024,
+ Nlog = 16*1024,
};
/*
@@ -39,12 +39,12 @@
{ "ppp", Logppp, },
{ "ip", Logip, },
{ "fs", Logfs, },
- { "tcp", Logtcp, },
{ "il", Logil, },
+ { "tcp", Logtcp, },
{ "icmp", Logicmp, },
{ "udp", Logudp, },
{ "compress", Logcompress, },
- { "ilmsg", Logil|Logilmsg, },
+ { "logilmsg", Logilmsg, },
{ "gre", Loggre, },
{ "tcpwin", Logtcp|Logtcpwin, },
{ "tcprxmt", Logtcp|Logtcprxmt, },
@@ -85,8 +85,11 @@
nexterror();
}
if(f->alog->opens == 0){
- if(f->alog->buf == nil)
+ if(f->alog->buf == nil){
f->alog->buf = malloc(Nlog);
+ if(f->alog->buf == nil)
+ error(Enomem);
+ }
f->alog->rptr = f->alog->buf;
f->alog->end = f->alog->buf + Nlog;
}
@@ -202,6 +205,7 @@
else
f->alog->iponlyset = 1;
free(cb);
+ poperror();
return;
default:
@@ -227,7 +231,7 @@
void
netlog(Fs *f, int mask, char *fmt, ...)
{
- char buf[128], *t, *fp;
+ char buf[256], *t, *fp;
int i, n;
va_list arg;
diff -u a/os/ip//nullmedium.c b/os/ip//nullmedium.c
--- a/os/ip//nullmedium.c
+++ b/os/ip//nullmedium.c
@@ -19,8 +19,9 @@
}
static void
-nullbwrite(Ipifc*, Block*, int, uchar*)
+nullbwrite(Ipifc*, Block *bp, int, uchar*)
{
+ freeb(bp);
error("nullbwrite");
}
diff -u a/os/ip//pktmedium.c b/os/ip//pktmedium.c
--- a/os/ip//pktmedium.c
+++ b/os/ip//pktmedium.c
@@ -16,10 +16,10 @@
Medium pktmedium =
{
.name= "pkt",
-.hsize= 14,
-.mintu= 40,
+.hsize= 0,
+.mintu= 0,
.maxtu= 4*1024,
-.maclen= 6,
+.maclen= 0,
.bind= pktbind,
.unbind= pktunbind,
.bwrite= pktbwrite,
@@ -28,12 +28,13 @@
};
/*
- * called to bind an IP ifc to an ethernet device
+ * called to bind an IP ifc to an packet device
* called with ifc wlock'd
*/
static void
-pktbind(Ipifc*, int, char**)
+pktbind(Ipifc*, int argc, char **argv)
{
+ USED(argc, argv);
}
/*
@@ -51,7 +52,6 @@
pktbwrite(Ipifc *ifc, Block *bp, int, uchar*)
{
/* enqueue onto the conversation's rq */
- bp = concatblock(bp);
if(ifc->conv->snoopers.ref > 0)
qpass(ifc->conv->sq, copyblock(bp, BLEN(bp)));
qpass(ifc->conv->rq, bp);
diff -u a/os/ip//rudp.c b/os/ip//rudp.c
--- a/os/ip//rudp.c
+++ b/os/ip//rudp.c
@@ -1,4 +1,5 @@
/*
+ * Reliable User Datagram Protocol, currently only for IPv4.
* This protocol is compatible with UDP's packet format.
* It could be done over UDP if need be.
*/
@@ -25,20 +26,17 @@
enum
{
- UDP_HDRSIZE = 20, /* pseudo header + udp header */
UDP_PHDRSIZE = 12, /* pseudo header */
+// UDP_HDRSIZE = 20, /* pseudo header + udp header */
UDP_RHDRSIZE = 36, /* pseudo header + udp header + rudp header */
UDP_IPHDR = 8, /* ip header */
IP_UDPPROTO = 254,
- UDP_USEAD7 = 52,
- UDP_USEAD6 = 36,
- UDP_USEAD4 = 12,
+ UDP_USEAD7 = 52, /* size of new ipv6 headers struct */
Rudprxms = 200,
Rudptickms = 50,
Rudpmaxxmit = 10,
Maxunacked = 100,
-
};
#define Hangupgen 0xffffffff /* used only in hangup messages */
@@ -205,7 +203,7 @@
qlock(&rpriv->apl);
if(rpriv->ackprocstarted == 0){
sprint(kpname, "#I%drudpack", rudp->f->dev);
- kproc(kpname, relackproc, rudp, 0);
+ kproc(kpname, relackproc, rudp);
rpriv->ackprocstarted = 1;
}
qunlock(&rpriv->apl);
@@ -240,6 +238,7 @@
qlock(ucb);
for(r = ucb->r; r; r = r->next)
m += snprint(state+m, n-m, " %I/%ld", r->addr, UNACKED(r));
+ m += snprint(state+m, n-m, "\n");
qunlock(ucb);
return m;
}
@@ -281,7 +280,7 @@
/* force out any delayed acks */
ucb = (Rudpcb*)c->ptcl;
qlock(ucb);
- for(r = ucb->r; r; r = r->next){
+ for(r = ucb->r; r != nil; r = r->next){
if(r->acksent != r->rcvseq)
relsendack(c, r, 0);
}
@@ -374,27 +373,10 @@
rport = nhgets(bp->rp);
bp->rp += 2+2; /* Ignore local port */
break;
- case 6:
- /* get user specified addresses */
- bp = pullupblock(bp, UDP_USEAD6);
- if(bp == nil)
- return;
- ipmove(raddr, bp->rp);
- bp->rp += IPaddrlen;
- ipmove(laddr, bp->rp);
- bp->rp += IPaddrlen;
- /* pick interface closest to dest */
- if(ipforme(f, laddr) != Runi)
- findlocalip(f, laddr, raddr);
- rport = nhgets(bp->rp);
-
- bp->rp += 4; /* Igonore local port */
- break;
default:
ipmove(raddr, c->raddr);
ipmove(laddr, c->laddr);
rport = c->rport;
-
break;
}
@@ -402,9 +384,6 @@
/* Make space to fit rudp & ip header */
bp = padblock(bp, UDP_IPHDR+UDP_RHDRSIZE);
- if(bp == nil)
- return;
-
uh = (Udphdr *)(bp->rp);
uh->vihl = IP_VER4;
@@ -417,7 +396,6 @@
uh->frag[1] = 0;
hnputs(uh->udpplen, ptcllen);
switch(ucb->headers){
- case 6:
case 7:
v6tov4(uh->udpdst, raddr);
hnputs(uh->udpdport, rport);
@@ -528,7 +506,7 @@
c = iphtlook(&upriv->ht, raddr, rport, laddr, lport);
if(c == nil){
- /* no converstation found */
+ /* no conversation found */
upriv->ustats.rudpNoPorts++;
qunlock(rudp);
netlog(f, Logudp, "udp: no conv %I!%d -> %I!%d\n", raddr, rport,
@@ -574,45 +552,32 @@
p = bp->rp;
ipmove(p, raddr); p += IPaddrlen;
ipmove(p, laddr); p += IPaddrlen;
- ipmove(p, ifc->lifc->local); p += IPaddrlen;
+ if(!ipv6local(ifc, p, 0, raddr))
+ ipmove(p, ifc->lifc != nil ? ifc->lifc->local : IPnoaddr);
+ p += IPaddrlen;
hnputs(p, rport); p += 2;
hnputs(p, lport);
break;
- case 6:
- /* pass the src address */
- bp = padblock(bp, UDP_USEAD6);
- p = bp->rp;
- ipmove(p, raddr); p += IPaddrlen;
- ipmove(p, ipforme(f, laddr)==Runi ? laddr : ifc->lifc->local); p += IPaddrlen;
- hnputs(p, rport); p += 2;
- hnputs(p, lport);
- break;
default:
/* connection oriented rudp */
if(ipcmp(c->raddr, IPnoaddr) == 0){
- /* save the src address in the conversation */
+ /* reply with the same ip address (if not broadcast) */
+ if(ipforme(f, laddr) != Runi)
+ ipv6local(ifc, laddr, 0, raddr);
+ ipmove(c->laddr, laddr);
ipmove(c->raddr, raddr);
c->rport = rport;
-
- /* reply with the same ip address (if not broadcast) */
- if(ipforme(f, laddr) == Runi)
- ipmove(c->laddr, laddr);
- else
- v4tov6(c->laddr, ifc->lifc->local);
}
break;
}
- if(bp->next)
- bp = concatblock(bp);
if(qfull(c->rq)) {
- netlog(f, Logrudp, "rudp: qfull %I.%d -> %I.%d\n", raddr, rport,
- laddr, lport);
+ netlog(f, Logrudp, "rudp: qfull %I.%d -> %I.%d\n",
+ raddr, rport, laddr, lport);
freeblist(bp);
+ } else {
+ qpass(c->rq, concatblock(bp));
}
- else
- qpass(c->rq, bp);
-
qunlock(ucb);
}
@@ -629,16 +594,14 @@
if(n < 1)
return rudpunknown;
- if(strcmp(f[0], "headers++4") == 0){
- ucb->headers = 7;
+ if(strcmp(f[0], "headers") == 0){
+ ucb->headers = 7; /* new headers format */
return nil;
- } else if(strcmp(f[0], "headers") == 0){
- ucb->headers = 6;
- return nil;
} else if(strcmp(f[0], "hangup") == 0){
if(n < 3)
return "bad syntax";
- parseip(ip, f[1]);
+ if (parseip(ip, f[1]) == -1)
+ return Ebadip;
x = atoi(f[2]);
qlock(ucb);
relforget(c, ip, x, 1);
@@ -645,7 +608,7 @@
qunlock(ucb);
return nil;
} else if(strcmp(f[0], "randdrop") == 0){
- x = 10; /* default is 10% */
+ x = 10; /* default is 10% */
if(n > 1)
x = atoi(f[1]);
if(x > 100 || x < 0)
@@ -672,12 +635,13 @@
pdest = nhgets(h->udpdport);
/* Look for a connection */
- for(p = rudp->conv; *p; p++) {
- s = *p;
+ for(p = rudp->conv; (s = *p) != nil; p++) {
if(s->rport == pdest)
if(s->lport == psource)
if(ipcmp(s->raddr, dest) == 0)
if(ipcmp(s->laddr, source) == 0){
+ if(s->ignoreadvice)
+ break;
qhangup(s->rq, msg);
qhangup(s->wq, msg);
break;
@@ -701,12 +665,6 @@
upriv->orders);
}
-int
-rudpgc(Proto *rudp)
-{
- return natgc(rudp->ipproto);
-}
-
void
rudpinit(Fs *fs)
{
@@ -725,9 +683,8 @@
rudp->rcv = rudpiput;
rudp->advise = rudpadvise;
rudp->stats = rudpstats;
- rudp->gc = rudpgc;
rudp->ipproto = IP_UDPPROTO;
- rudp->nc = 16;
+ rudp->nc = 32;
rudp->ptclsize = sizeof(Rudpcb);
Fsproto(fs, rudp);
@@ -770,6 +727,8 @@
rudp = (Proto *)a;
+ while(waserror())
+ ;
loop:
tsleep(&up->sleep, return0, 0, Rudptickms);
@@ -989,8 +948,6 @@
Fs *f;
bp = allocb(UDP_IPHDR + UDP_RHDRSIZE);
- if(bp == nil)
- return;
bp->wp += UDP_IPHDR + UDP_RHDRSIZE;
f = c->p->f;
uh = (Udphdr *)(bp->rp);
diff -u a/os/ip//tcp.c b/os/ip//tcp.c
--- a/os/ip//tcp.c
+++ b/os/ip//tcp.c
@@ -41,13 +41,13 @@
EOLOPT = 0,
NOOPOPT = 1,
MSSOPT = 2,
- MSS_LENGTH = 4, /* Mean segment size */
+ MSS_LENGTH = 4, /* Maximum segment size */
WSOPT = 3,
WS_LENGTH = 3, /* Bits to scale window size by */
MSL2 = 10,
MSPTICK = 50, /* Milliseconds per timer tick */
- DEF_MSS = 1460, /* Default mean segment */
- DEF_MSS6 = 1280, /* Default mean segment (min) for v6 */
+ DEF_MSS = 1460, /* Default maximum segment */
+ DEF_MSS6 = 1220, /* Default maximum segment (min) for v6 */
DEF_RTT = 500, /* Default round trip */
DEF_KAT = 120000, /* Default time (ms) between keep alives */
TCP_LISTEN = 0, /* Listen connection */
@@ -81,7 +81,13 @@
NLHT = 256, /* hash table size, must be a power of 2 */
LHTMASK = NLHT-1,
- HaveWS = 1<<8,
+ /*
+ * window is 64kb · 2ⁿ
+ * these factors determine the ultimate bandwidth-delay product.
+ * 64kb · 2⁵ = 2mb, or 2x overkill for 100mbps · 70ms.
+ */
+ Maxqscale = 4, /* maximum queuing scale */
+ Defadvscale = 4, /* default advertisement */
};
/* Must correspond to the enumeration above */
@@ -169,8 +175,9 @@
ulong seq;
ulong ack;
uchar flags;
- ushort ws; /* window scale option (if not zero) */
- ulong wnd;
+ uchar update;
+ ushort ws; /* window scale option */
+ ulong wnd; /* prescaled window*/
ushort urg;
ushort mss; /* max segment size option (if not zero) */
ushort len; /* size of data */
@@ -205,44 +212,53 @@
ulong wnd; /* Tcp send window */
ulong urg; /* Urgent data pointer */
ulong wl2;
- int scale; /* how much to right shift window in xmitted packets */
+ uint scale; /* how much to right shift window in xmitted packets */
/* to implement tahoe and reno TCP */
ulong dupacks; /* number of duplicate acks rcvd */
+ ulong partialack;
int recovery; /* loss recovery flag */
- ulong rxt; /* right window marker for recovery */
+ int retransmit; /* retransmit 1 packet @ una flag */
+ int rto;
+ ulong rxt; /* right window marker for recovery "recover" rfc3782 */
} snd;
struct {
ulong nxt; /* Receive pointer to next uchar slot */
ulong wnd; /* Receive window incoming */
+ ulong wsnt; /* Last wptr sent. important to track for large bdp */
+ ulong wptr;
ulong urg; /* Urgent pointer */
+ ulong ackptr; /* last acked sequence */
int blocked;
- int una; /* unacked data segs */
- int scale; /* how much to left shift window in rcved packets */
+ uint scale; /* how much to left shift window in rcv'd packets */
} rcv;
ulong iss; /* Initial sequence number */
- int sawwsopt; /* true if we saw a wsopt on the incoming SYN */
ulong cwind; /* Congestion window */
- int scale; /* desired snd.scale */
- ushort ssthresh; /* Slow start threshold */
+ ulong abcbytes; /* appropriate byte counting rfc 3465 */
+ uint scale; /* desired snd.scale */
+ ulong ssthresh; /* Slow start threshold */
int resent; /* Bytes just resent */
int irs; /* Initial received squence */
- ushort mss; /* Mean segment size */
+ ushort mss; /* Maximum segment size */
int rerecv; /* Overlap of data rerecevived */
- ulong window; /* Recevive window */
+ ulong window; /* Our receive window (queue) */
+ uint qscale; /* Log2 of our receive window (queue) */
uchar backoff; /* Exponential backoff counter */
int backedoff; /* ms we've backed off for rexmits */
uchar flags; /* State flags */
Reseq *reseq; /* Resequencing queue */
+ int nreseq;
+ int reseqlen;
Tcptimer timer; /* Activity timer */
Tcptimer acktimer; /* Acknowledge timer */
Tcptimer rtt_timer; /* Round trip timer */
Tcptimer katimer; /* keep alive timer */
ulong rttseq; /* Round trip sequence */
- int srtt; /* Shortened round trip */
+ int srtt; /* Smoothed round trip */
int mdev; /* Mean deviation of round trip */
int kacounter; /* count down for keep alive */
uint sndsyntime; /* time syn sent */
ulong time; /* time Finwait2 or Syn_received was sent */
+ ulong timeuna; /* snd.una when time was set */
int nochecksum; /* non-zero means don't send checksums */
int flgcnt; /* number of flags in the sequence (FIN,SEQ) */
@@ -285,11 +301,11 @@
};
int tcp_irtt = DEF_RTT; /* Initial guess at round trip time */
-ushort tcp_mss = DEF_MSS; /* Maximum segment size to be sent */
enum {
/* MIB stats */
MaxConn,
+ Mss,
ActiveOpens,
PassiveOpens,
EstabResets,
@@ -297,6 +313,7 @@
InSegs,
OutSegs,
RetransSegs,
+ RetransSegsSent,
RetransTimeouts,
InErrs,
OutRsts,
@@ -305,14 +322,27 @@
CsumErrs,
HlenErrs,
LenErrs,
+ Resequenced,
OutOfOrder,
+ ReseqBytelim,
+ ReseqPktlim,
+ Delayack,
+ Wopenack,
+ Recovery,
+ RecoveryDone,
+ RecoveryRTO,
+ RecoveryNoSeq,
+ RecoveryCwind,
+ RecoveryPA,
+
Nstats
};
-static char *statnames[] =
+static char *statnames[Nstats] =
{
[MaxConn] "MaxConn",
+[Mss] "MaxSegment",
[ActiveOpens] "ActiveOpens",
[PassiveOpens] "PassiveOpens",
[EstabResets] "EstabResets",
@@ -320,6 +350,7 @@
[InSegs] "InSegs",
[OutSegs] "OutSegs",
[RetransSegs] "RetransSegs",
+[RetransSegsSent] "RetransSegsSent",
[RetransTimeouts] "RetransTimeouts",
[InErrs] "InErrs",
[OutRsts] "OutRsts",
@@ -327,6 +358,19 @@
[HlenErrs] "HlenErrs",
[LenErrs] "LenErrs",
[OutOfOrder] "OutOfOrder",
+[Resequenced] "Resequenced",
+[ReseqBytelim] "ReseqBytelim",
+[ReseqPktlim] "ReseqPktlim",
+[Delayack] "Delayack",
+[Wopenack] "Wopenack",
+
+[Recovery] "Recovery",
+[RecoveryDone] "RecoveryDone",
+[RecoveryRTO] "RecoveryRTO",
+
+[RecoveryNoSeq] "RecoveryNoSeq",
+[RecoveryCwind] "RecoveryCwind",
+[RecoveryPA] "RecoveryPA",
};
typedef struct Tcppriv Tcppriv;
@@ -347,7 +391,7 @@
QLock apl;
int ackprocstarted;
- ulong stats[Nstats];
+ uvlong stats[Nstats];
};
/*
@@ -356,34 +400,34 @@
* of DoS attack.
*
* To avoid stateless Conv hogs, we pick a sequence number at random. If
- * it that number gets acked by the other end, we shut down the connection.
- * Look for tcpporthogedefense in the code.
+ * that number gets acked by the other end, we shut down the connection.
+ * Look for tcpporthogdefense in the code.
*/
int tcpporthogdefense = 0;
-int addreseq(Tcpctl*, Tcppriv*, Tcp*, Block*, ushort);
-void getreseq(Tcpctl*, Tcp*, Block**, ushort*);
-void localclose(Conv*, char*);
-void procsyn(Conv*, Tcp*);
-void tcpiput(Proto*, Ipifc*, Block*);
-void tcpoutput(Conv*);
-int tcptrim(Tcpctl*, Tcp*, Block**, ushort*);
-void tcpstart(Conv*, int);
-void tcptimeout(void*);
-void tcpsndsyn(Conv*, Tcpctl*);
-void tcprcvwin(Conv*);
-void tcpacktimer(void*);
-void tcpkeepalive(void*);
-void tcpsetkacounter(Tcpctl*);
-void tcprxmit(Conv*);
-void tcpsettimer(Tcpctl*);
-void tcpsynackrtt(Conv*);
-void tcpsetscale(Conv*, Tcpctl*, ushort, ushort);
+static int addreseq(Fs*, Tcpctl*, Tcppriv*, Tcp*, Block*, ushort);
+static int dumpreseq(Tcpctl*);
+static void getreseq(Tcpctl*, Tcp*, Block**, ushort*);
+static void limbo(Conv*, uchar*, uchar*, Tcp*, int);
+static void limborexmit(Proto*);
+static void localclose(Conv*, char*);
+static void procsyn(Conv*, Tcp*);
+static void tcpacktimer(void*);
+static void tcpiput(Proto*, Ipifc*, Block*);
+static void tcpkeepalive(void*);
+static void tcpoutput(Conv*);
+static void tcprcvwin(Conv*);
+static void tcprxmit(Conv*);
+static void tcpsetkacounter(Tcpctl*);
+static void tcpsetscale(Conv*, Tcpctl*, ushort, ushort);
+static void tcpsettimer(Tcpctl*);
+static void tcpsndsyn(Conv*, Tcpctl*);
+static void tcpstart(Conv*, int);
+static void tcpsynackrtt(Conv*);
+static void tcptimeout(void*);
+static int tcptrim(Tcpctl*, Tcp*, Block**, ushort*);
-static void limborexmit(Proto*);
-static void limbo(Conv*, uchar*, uchar*, Tcp*, int);
-
-void
+static void
tcpsetstate(Conv *s, uchar newstate)
{
Tcpctl *tcb;
@@ -403,11 +447,6 @@
if(newstate == Established)
tpriv->stats[CurrEstab]++;
- /**
- print( "%d/%d %s->%s CurrEstab=%d\n", s->lport, s->rport,
- tcpstates[oldstate], tcpstates[newstate], tpriv->tstats.tcpCurrEstab );
- **/
-
switch(newstate) {
case Closed:
qclose(s->rq);
@@ -430,7 +469,12 @@
tcpconnect(Conv *c, char **argv, int argc)
{
char *e;
+ Tcpctl *tcb;
+ tcb = (Tcpctl*)(c->ptcl);
+ if(tcb->state != Closed)
+ return Econinuse;
+
e = Fsstdconnect(c, argv, argc);
if(e != nil)
return e;
@@ -447,12 +491,14 @@
s = (Tcpctl*)(c->ptcl);
return snprint(state, n,
- "%s qin %d qout %d srtt %d mdev %d cwin %lud swin %lud>>%d rwin %lud>>%d timer.start %d timer.count %d rerecv %d katimer.start %d katimer.count %d\n",
+ "%s qin %d qout %d rq %d.%d srtt %d mdev %d sst %lud cwin %lud swin %lud>>%d rwin %lud>>%d qscale %d timer.start %d timer.count %d rerecv %d katimer.start %d katimer.count %d\n",
tcpstates[s->state],
c->rq ? qlen(c->rq) : 0,
c->wq ? qlen(c->wq) : 0,
- s->srtt, s->mdev,
- s->cwind, s->snd.wnd, s->rcv.scale, s->rcv.wnd, s->snd.scale,
+ s->nreseq, s->reseqlen,
+ s->srtt, s->mdev, s->ssthresh,
+ s->cwind, s->snd.wnd, s->snd.scale, s->rcv.wnd, s->rcv.scale,
+ s->qscale,
s->timer.start, s->timer.count, s->rerecv,
s->katimer.start, s->katimer.count);
}
@@ -470,7 +516,12 @@
tcpannounce(Conv *c, char **argv, int argc)
{
char *e;
+ Tcpctl *tcb;
+ tcb = (Tcpctl*)(c->ptcl);
+ if(tcb->state != Closed)
+ return Econinuse;
+
e = Fsstdannounce(c, argv, argc);
if(e != nil)
return e;
@@ -524,7 +575,7 @@
}
}
-void
+static void
tcpkick(void *x)
{
Conv *s = x;
@@ -546,7 +597,6 @@
/*
* Push data
*/
- tcprcvwin(s);
tcpoutput(s);
break;
default:
@@ -558,7 +608,9 @@
poperror();
}
-void
+static int seq_lt(ulong, ulong);
+
+static void
tcprcvwin(Conv *s) /* Call with tcb locked */
{
int w;
@@ -568,12 +620,20 @@
w = tcb->window - qlen(s->rq);
if(w < 0)
w = 0;
- tcb->rcv.wnd = w;
- if(w == 0)
+ /* RFC 1122 § 4.2.2.17 do not move right edge of window left */
+ if(seq_lt(tcb->rcv.nxt + w, tcb->rcv.wptr))
+ w = tcb->rcv.wptr - tcb->rcv.nxt;
+ if(w != tcb->rcv.wnd)
+ if(w>>tcb->rcv.scale == 0 || tcb->window > 4*tcb->mss && w < tcb->mss/4){
tcb->rcv.blocked = 1;
+ netlog(s->p->f, Logtcp, "tcprcvwin: window %lud qlen %d ws %ud lport %d\n",
+ tcb->window, qlen(s->rq), tcb->rcv.scale, s->lport);
+ }
+ tcb->rcv.wnd = w;
+ tcb->rcv.wptr = tcb->rcv.nxt + w;
}
-void
+static void
tcpacktimer(void *v)
{
Tcpctl *tcb;
@@ -589,7 +649,6 @@
qlock(s);
if(tcb->state != Closed){
tcb->flags |= FORCE;
- tcprcvwin(s);
tcpoutput(s);
}
qunlock(s);
@@ -597,10 +656,52 @@
}
static void
+tcpcongestion(Tcpctl *tcb)
+{
+ ulong inflight;
+
+ inflight = tcb->snd.nxt - tcb->snd.una;
+ if(inflight > tcb->cwind)
+ inflight = tcb->cwind;
+ tcb->ssthresh = inflight / 2;
+ if(tcb->ssthresh < 2*tcb->mss)
+ tcb->ssthresh = 2*tcb->mss;
+}
+
+enum {
+ L = 2, /* aggressive slow start; legal values ∈ (1.0, 2.0) */
+};
+
+static void
+tcpabcincr(Tcpctl *tcb, uint acked)
+{
+ uint limit;
+
+ tcb->abcbytes += acked;
+ if(tcb->cwind < tcb->ssthresh){
+ /* slow start */
+ if(tcb->snd.rto)
+ limit = 1*tcb->mss;
+ else
+ limit = L*tcb->mss;
+ tcb->cwind += MIN(tcb->abcbytes, limit);
+ tcb->abcbytes = 0;
+ }
+ else{
+ tcb->snd.rto = 0;
+ /* avoidance */
+ if(tcb->abcbytes >= tcb->cwind){
+ tcb->abcbytes -= tcb->cwind;
+ tcb->cwind += tcb->mss;
+ }
+ }
+}
+
+static void
tcpcreate(Conv *c)
{
c->rq = qopen(QMAX, Qcoalesce, tcpacktimer, c);
- c->wq = qopen((3*QMAX)/2, Qkick, tcpkick, c);
+ c->wq = qopen(QMAX, Qkick, tcpkick, c);
}
static void
@@ -608,7 +709,7 @@
{
if(newstate != TcptimerON){
if(t->state == TcptimerON){
- // unchain
+ /* unchain */
if(priv->timers == t){
priv->timers = t->next;
if(t->prev != nil)
@@ -622,7 +723,7 @@
}
} else {
if(t->state != TcptimerON){
- // chain
+ /* chain */
if(t->prev != nil || t->next != nil)
panic("timerstate2");
t->prev = nil;
@@ -635,7 +736,7 @@
t->state = newstate;
}
-void
+static void
tcpackproc(void *a)
{
Tcptimer *t, *tp, *timeo;
@@ -646,6 +747,9 @@
tcp = a;
priv = tcp->priv;
+ while(waserror())
+ ;
+
for(;;) {
tsleep(&up->sleep, return0, 0, MSPTICK);
@@ -681,7 +785,7 @@
}
}
-void
+static void
tcpgo(Tcppriv *priv, Tcptimer *t)
{
if(t == nil || t->start == 0)
@@ -693,7 +797,7 @@
qunlock(&priv->tl);
}
-void
+static void
tcphalt(Tcppriv *priv, Tcptimer *t)
{
if(t == nil)
@@ -704,17 +808,16 @@
qunlock(&priv->tl);
}
-int
+static int
backoff(int n)
{
return 1 << n;
}
-void
+static void
localclose(Conv *s, char *reason) /* called with tcb locked */
{
Tcpctl *tcb;
- Reseq *rp,*rp1;
Tcppriv *tpriv;
tpriv = s->p->priv;
@@ -728,12 +831,7 @@
tcphalt(tpriv, &tcb->katimer);
/* Flush reassembly queue; nothing more can arrive */
- for(rp = tcb->reseq; rp != nil; rp = rp1) {
- rp1 = rp->next;
- freeblist(rp->bp);
- free(rp);
- }
- tcb->reseq = nil;
+ dumpreseq(tcb);
if(tcb->state == Syn_sent)
Fsconnected(s, reason);
@@ -747,45 +845,46 @@
}
/* mtu (- TCP + IP hdr len) of 1st hop */
-int
-tcpmtu(Proto *tcp, uchar *addr, int version, int *scale)
+static int
+tcpmtu(Route *r, int version, uint *scale)
{
Ipifc *ifc;
int mtu;
- ifc = findipifc(tcp->f, addr, 0);
- switch(version){
- default:
- case V4:
- mtu = DEF_MSS;
- if(ifc != nil)
- mtu = ifc->maxtu - ifc->m->hsize - (TCP4_PKT + TCP4_HDRSIZE);
- break;
- case V6:
- mtu = DEF_MSS6;
- if(ifc != nil)
- mtu = ifc->maxtu - ifc->m->hsize - (TCP6_PKT + TCP6_HDRSIZE);
- break;
- }
- if(ifc != nil){
- if(ifc->mbps > 100)
- *scale = HaveWS | 3;
- else if(ifc->mbps > 10)
- *scale = HaveWS | 1;
- else
- *scale = HaveWS | 0;
- } else
- *scale = HaveWS | 0;
+ /*
+ * set the ws. it doesn't commit us to anything.
+ * ws is the ultimate limit to the bandwidth-delay product.
+ */
+ *scale = Defadvscale;
- return mtu;
+ /*
+ * currently we do not implement path MTU discovery
+ * so use interface MTU *only* if directly reachable
+ * or when we use V4 which allows routers to fragment.
+ * otherwise, we use the default MSS which assumes a
+ * safe minimum MTU of 1280 bytes for V6.
+ */
+ if(r != nil && (ifc = r->ifc) != nil){
+ mtu = ifc->maxtu - ifc->m->hsize;
+ if(version == V4)
+ return mtu - (TCP4_PKT + TCP4_HDRSIZE);
+ mtu -= TCP6_PKT + TCP6_HDRSIZE;
+ if((r->type & (Rifc|Runi)) != 0 || mtu <= DEF_MSS6)
+ return mtu;
+ }
+ if(version == V6)
+ return DEF_MSS6;
+ else
+ return DEF_MSS;
}
-void
+static void
inittcpctl(Conv *s, int mode)
{
Tcpctl *tcb;
Tcp4hdr* h4;
Tcp6hdr* h6;
+ Tcppriv *tpriv;
int mss;
tcb = (Tcpctl*)s->ptcl;
@@ -792,7 +891,7 @@
memset(tcb, 0, sizeof(Tcpctl));
- tcb->ssthresh = 65535;
+ tcb->ssthresh = QMAX; /* reset by tcpsetscale() */
tcb->srtt = tcp_irtt<<LOGAGAIN;
tcb->mdev = 0;
@@ -841,19 +940,18 @@
}
tcb->mss = tcb->cwind = mss;
+ tcb->abcbytes = 0;
+ tpriv = s->p->priv;
+ tpriv->stats[Mss] = tcb->mss;
/* default is no window scaling */
- tcb->window = QMAX;
- tcb->rcv.wnd = QMAX;
- tcb->rcv.scale = 0;
- tcb->snd.scale = 0;
- qsetlimit(s->rq, QMAX);
+ tcpsetscale(s, tcb, 0, 0);
}
/*
* called with s qlocked
*/
-void
+static void
tcpstart(Conv *s, int mode)
{
Tcpctl *tcb;
@@ -865,8 +963,8 @@
if(tpriv->ackprocstarted == 0){
qlock(&tpriv->apl);
if(tpriv->ackprocstarted == 0){
- sprint(kpname, "#I%dtcpack", s->p->f->dev);
- kproc(kpname, tcpackproc, s->p, 0);
+ snprint(kpname, sizeof(kpname), "#I%dtcpack", s->p->f->dev);
+ kproc(kpname, tcpackproc, s->p);
tpriv->ackprocstarted = 1;
}
qunlock(&tpriv->apl);
@@ -895,28 +993,28 @@
}
static char*
-tcpflag(ushort flag)
+tcpflag(char *buf, char *e, ushort flag)
{
- static char buf[128];
+ char *p;
- sprint(buf, "%d", flag>>10); /* Head len */
+ p = seprint(buf, e, "%d", flag>>10); /* Head len */
if(flag & URG)
- strcat(buf, " URG");
+ p = seprint(p, e, " URG");
if(flag & ACK)
- strcat(buf, " ACK");
+ p = seprint(p, e, " ACK");
if(flag & PSH)
- strcat(buf, " PSH");
+ p = seprint(p, e, " PSH");
if(flag & RST)
- strcat(buf, " RST");
+ p = seprint(p, e, " RST");
if(flag & SYN)
- strcat(buf, " SYN");
+ p = seprint(p, e, " SYN");
if(flag & FIN)
- strcat(buf, " FIN");
-
+ p = seprint(p, e, " FIN");
+ USED(p);
return buf;
}
-Block *
+static Block*
htontcp6(Tcp *tcph, Block *data, Tcp6hdr *ph, Tcpctl *tcb)
{
int dlen;
@@ -940,14 +1038,10 @@
if(data) {
dlen = blocklen(data);
data = padblock(data, hdrlen + TCP6_PKT);
- if(data == nil)
- return nil;
}
else {
dlen = 0;
data = allocb(hdrlen + TCP6_PKT + 64); /* the 64 pad is to meet mintu's */
- if(data == nil)
- return nil;
data->wp += hdrlen + TCP6_PKT;
}
@@ -1000,7 +1094,7 @@
return data;
}
-Block *
+static Block*
htontcp4(Tcp *tcph, Block *data, Tcp4hdr *ph, Tcpctl *tcb)
{
int dlen;
@@ -1013,7 +1107,7 @@
if(tcph->flags & SYN){
if(tcph->mss)
hdrlen += MSS_LENGTH;
- if(tcph->ws)
+ if(1)
hdrlen += WS_LENGTH;
optpad = hdrlen & 3;
if(optpad)
@@ -1024,14 +1118,10 @@
if(data) {
dlen = blocklen(data);
data = padblock(data, hdrlen + TCP4_PKT);
- if(data == nil)
- return nil;
}
else {
dlen = 0;
data = allocb(hdrlen + TCP4_PKT + 64); /* the 64 pad is to meet mintu's */
- if(data == nil)
- return nil;
data->wp += hdrlen + TCP4_PKT;
}
@@ -1055,7 +1145,8 @@
hnputs(opt, tcph->mss);
opt += 2;
}
- if(tcph->ws != 0){
+ /* always offer. rfc1323 §2.2 */
+ if(1){
*opt++ = WSOPT;
*opt++ = WS_LENGTH;
*opt++ = tcph->ws;
@@ -1074,7 +1165,7 @@
return data;
}
-int
+static int
ntohtcp6(Tcp *tcph, Block **bpp)
{
Tcp6hdr *h;
@@ -1103,6 +1194,7 @@
tcph->urg = nhgets(h->tcpurg);
tcph->mss = 0;
tcph->ws = 0;
+ tcph->update = 0;
tcph->len = nhgets(h->ploadlen) - hdrlen;
*bpp = pullupblock(*bpp, hdrlen+TCP6_PKT);
@@ -1127,7 +1219,7 @@
break;
case WSOPT:
if(optlen == WS_LENGTH && *(optr+2) <= 14)
- tcph->ws = HaveWS | *(optr+2);
+ tcph->ws = *(optr+2);
break;
}
n -= optlen;
@@ -1136,7 +1228,7 @@
return hdrlen;
}
-int
+static int
ntohtcp4(Tcp *tcph, Block **bpp)
{
Tcp4hdr *h;
@@ -1166,6 +1258,7 @@
tcph->urg = nhgets(h->tcpurg);
tcph->mss = 0;
tcph->ws = 0;
+ tcph->update = 0;
tcph->len = nhgets(h->length) - (hdrlen + TCP4_PKT);
*bpp = pullupblock(*bpp, hdrlen+TCP4_PKT);
@@ -1190,7 +1283,7 @@
break;
case WSOPT:
if(optlen == WS_LENGTH && *(optr+2) <= 14)
- tcph->ws = HaveWS | *(optr+2);
+ tcph->ws = *(optr+2);
break;
}
n -= optlen;
@@ -1200,16 +1293,19 @@
}
/*
- * For outgiing calls, generate an initial sequence
+ * For outgoing calls, generate an initial sequence
* number and put a SYN on the send queue
*/
-void
+static void
tcpsndsyn(Conv *s, Tcpctl *tcb)
{
+ Tcppriv *tpriv;
+
tcb->iss = (nrand(1<<16)<<16)|nrand(1<<16);
tcb->rttseq = tcb->iss;
tcb->snd.wl2 = tcb->iss;
tcb->snd.una = tcb->iss;
+ tcb->snd.rxt = tcb->iss;
tcb->snd.ptr = tcb->rttseq;
tcb->snd.nxt = tcb->rttseq;
tcb->flgcnt++;
@@ -1217,7 +1313,9 @@
tcb->sndsyntime = NOW;
/* set desired mss and scale */
- tcb->mss = tcpmtu(s->p, s->laddr, s->ipversion, &tcb->scale);
+ tcb->mss = tcpmtu(v6lookup(s->p->f, s->raddr, s->laddr, s), s->ipversion, &tcb->scale);
+ tpriv = s->p->priv;
+ tpriv->stats[Mss] = tcb->mss;
}
void
@@ -1229,7 +1327,7 @@
Tcp4hdr ph4;
Tcp6hdr ph6;
- netlog(tcp->f, Logtcp, "sndrst: %s", reason);
+ netlog(tcp->f, Logtcp, "sndrst: %s\n", reason);
tpriv = tcp->priv;
@@ -1307,7 +1405,7 @@
* send a reset to the remote side and close the conversation
* called with s qlocked
*/
-char*
+static char*
tcphangup(Conv *s)
{
Tcp seg;
@@ -1322,7 +1420,7 @@
memset(&seg, 0, sizeof seg);
seg.flags = RST | ACK;
seg.ack = tcb->rcv.nxt;
- tcb->rcv.una = 0;
+ tcb->rcv.ackptr = seg.ack;
seg.seq = tcb->snd.ptr;
seg.wnd = 0;
seg.urg = 0;
@@ -1353,7 +1451,7 @@
/*
* (re)send a SYN ACK
*/
-int
+static int
sndsynack(Proto *tcp, Limbo *lp)
{
Block *hbp;
@@ -1360,7 +1458,7 @@
Tcp4hdr ph4;
Tcp6hdr ph6;
Tcp seg;
- int scale;
+ uint scale;
/* make pseudo header */
switch(lp->version) {
@@ -1388,11 +1486,12 @@
panic("sndrst: version %d", lp->version);
}
+ memset(&seg, 0, sizeof seg);
seg.seq = lp->iss;
seg.ack = lp->irs+1;
seg.flags = SYN|ACK;
seg.urg = 0;
- seg.mss = tcpmtu(tcp, lp->laddr, lp->version, &scale);
+ seg.mss = tcpmtu(v6lookup(tcp->f, lp->raddr, lp->laddr, nil), lp->version, &scale);
seg.wnd = QMAX;
/* if the other side set scale, we should too */
@@ -1570,6 +1669,18 @@
}
}
+static void
+initialwindow(Tcpctl *tcb)
+{
+ /* RFC 3390 initial window */
+ if(tcb->mss < 1095)
+ tcb->cwind = 4*tcb->mss;
+ else if(tcb->mss < 2190)
+ tcb->cwind = 4380;
+ else
+ tcb->cwind = 2*tcb->mss;
+}
+
/*
* come here when we finally get an ACK to our SYN-ACK.
* lookup call in limbo. if found, create a new conversation
@@ -1596,7 +1707,7 @@
/* find a call in limbo */
h = hashipa(src, segp->source);
for(l = &tpriv->lht[h]; (lp = *l) != nil; l = &lp->next){
- netlog(s->p->f, Logtcp, "tcpincoming s %I,%ux/%I,%ux d %I,%ux/%I,%ux v %d/%d",
+ netlog(s->p->f, Logtcp, "tcpincoming s %I!%ud/%I!%ud d %I!%ud/%I!%ud v %d/%d\n",
src, segp->source, lp->raddr, lp->rport,
dst, segp->dest, lp->laddr, lp->lport,
version, lp->version
@@ -1611,7 +1722,7 @@
/* we're assuming no data with the initial SYN */
if(segp->seq != lp->irs+1 || segp->ack != lp->iss+1){
- netlog(s->p->f, Logtcp, "tcpincoming s %lux/%lux a %lux %lux",
+ netlog(s->p->f, Logtcp, "tcpincoming s %lux/%lux a %lux %lux\n",
segp->seq, lp->irs+1, segp->ack, lp->iss+1);
lp = nil;
} else {
@@ -1641,6 +1752,8 @@
tcb->irs = lp->irs;
tcb->rcv.nxt = tcb->irs+1;
+ tcb->rcv.wptr = tcb->rcv.nxt;
+ tcb->rcv.wsnt = 0;
tcb->rcv.urg = tcb->rcv.nxt;
tcb->iss = lp->iss;
@@ -1649,19 +1762,24 @@
tcb->snd.una = tcb->iss+1;
tcb->snd.ptr = tcb->iss+1;
tcb->snd.nxt = tcb->iss+1;
+ tcb->snd.rxt = tcb->iss+1;
tcb->flgcnt = 0;
tcb->flags |= SYNACK;
+ /* set desired mss and scale */
+ tcb->mss = tcpmtu(v6lookup(s->p->f, src, dst, s), version, &tcb->scale);
+
/* our sending max segment size cannot be bigger than what he asked for */
if(lp->mss != 0 && lp->mss < tcb->mss)
tcb->mss = lp->mss;
+ tpriv->stats[Mss] = tcb->mss;
/* window scaling */
tcpsetscale(new, tcb, lp->rcvscale, lp->sndscale);
- /* the congestion window always starts out as a single segment */
+ /* congestion window */
tcb->snd.wnd = segp->wnd;
- tcb->cwind = tcb->mss;
+ initialwindow(tcb);
/* set initial round trip time */
tcb->sndsyntime = lp->lastsend+lp->rexmits*SYNACK_RXTIMER;
@@ -1700,7 +1818,7 @@
return new;
}
-int
+static int
seq_within(ulong x, ulong low, ulong high)
{
if(low <= high){
@@ -1714,25 +1832,25 @@
return 0;
}
-int
+static int
seq_lt(ulong x, ulong y)
{
return (int)(x-y) < 0;
}
-int
+static int
seq_le(ulong x, ulong y)
{
return (int)(x-y) <= 0;
}
-int
+static int
seq_gt(ulong x, ulong y)
{
return (int)(x-y) > 0;
}
-int
+static int
seq_ge(ulong x, ulong y)
{
return (int)(x-y) >= 0;
@@ -1742,7 +1860,7 @@
* use the time between the first SYN and it's ack as the
* initial round trip time
*/
-void
+static void
tcpsynackrtt(Conv *s)
{
Tcpctl *tcb;
@@ -1760,46 +1878,59 @@
tcphalt(tpriv, &tcb->rtt_timer);
}
-void
+static void
update(Conv *s, Tcp *seg)
{
int rtt, delta;
Tcpctl *tcb;
ulong acked;
- ulong expand;
Tcppriv *tpriv;
+ if(seg->update)
+ return;
+ seg->update = 1;
+
tpriv = s->p->priv;
tcb = (Tcpctl*)s->ptcl;
- /* if everything has been acked, force output(?) */
- if(seq_gt(seg->ack, tcb->snd.nxt)) {
- tcb->flags |= FORCE;
- return;
+ /* catch zero-window updates, update window & recover */
+ if(tcb->snd.wnd == 0 && seg->wnd > 0)
+ if(seq_lt(seg->ack, tcb->snd.ptr)){
+ netlog(s->p->f, Logtcp, "tcp: zwu ack %lud una %lud ptr %lud win %lud\n",
+ seg->ack, tcb->snd.una, tcb->snd.ptr, seg->wnd);
+ tcb->snd.wnd = seg->wnd;
+ goto recovery;
}
- /* added by Dong Lin for fast retransmission */
- if(seg->ack == tcb->snd.una
- && tcb->snd.una != tcb->snd.nxt
- && seg->len == 0
- && seg->wnd == tcb->snd.wnd) {
-
- /* this is a pure ack w/o window update */
- netlog(s->p->f, Logtcprxmt, "dupack %lud ack %lud sndwnd %d advwin %d\n",
- tcb->snd.dupacks, seg->ack, tcb->snd.wnd, seg->wnd);
-
- if(++tcb->snd.dupacks == TCPREXMTTHRESH) {
- /*
- * tahoe tcp rxt the packet, half sshthresh,
- * and set cwnd to one packet
- */
+ /* newreno fast retransmit */
+ if(seg->ack == tcb->snd.una)
+ if(tcb->snd.una != tcb->snd.nxt)
+ if(++tcb->snd.dupacks == 3){
+recovery:
+ if(tcb->snd.recovery){
+ tpriv->stats[RecoveryCwind]++;
+ tcb->cwind += tcb->mss;
+ }else if(seq_le(tcb->snd.rxt, seg->ack)){
+ tpriv->stats[Recovery]++;
+ tcb->abcbytes = 0;
tcb->snd.recovery = 1;
+ tcb->snd.partialack = 0;
tcb->snd.rxt = tcb->snd.nxt;
- netlog(s->p->f, Logtcprxmt, "fast rxt %lud, nxt %lud\n", tcb->snd.una, tcb->snd.nxt);
+ tcpcongestion(tcb);
+ tcb->cwind = tcb->ssthresh + 3*tcb->mss;
+ netlog(s->p->f, Logtcpwin, "recovery inflate %ld ss %ld @%lud\n",
+ tcb->cwind, tcb->ssthresh, tcb->snd.rxt);
tcprxmit(s);
- } else {
- /* do reno tcp here. */
+ }else{
+ tpriv->stats[RecoveryNoSeq]++;
+ netlog(s->p->f, Logtcpwin, "!recov %lud not ≤ %lud %ld\n",
+ tcb->snd.rxt, seg->ack, tcb->snd.rxt - seg->ack);
+ /* do not enter fast retransmit */
+ /* do not change ssthresh */
}
+ }else if(tcb->snd.recovery){
+ tpriv->stats[RecoveryCwind]++;
+ tcb->cwind += tcb->mss;
}
/*
@@ -1807,6 +1938,9 @@
*/
if(seq_gt(seg->ack, tcb->snd.wl2)
|| (tcb->snd.wl2 == seg->ack && seg->wnd > tcb->snd.wnd)){
+ /* clear dupack if we advance wl2 */
+ if(tcb->snd.wl2 != seg->ack)
+ tcb->snd.dupacks = 0;
tcb->snd.wnd = seg->wnd;
tcb->snd.wl2 = seg->ack;
}
@@ -1816,22 +1950,11 @@
* don't let us hangup if sending into a closed window and
* we're still getting acks
*/
- if((tcb->flags&RETRAN) && tcb->snd.wnd == 0){
+ if((tcb->flags&RETRAN) && tcb->snd.wnd == 0)
tcb->backedoff = MAXBACKMS/4;
- }
return;
}
- /*
- * any positive ack turns off fast rxt,
- * (should we do new-reno on partial acks?)
- */
- if(!tcb->snd.recovery || seq_ge(seg->ack, tcb->snd.rxt)) {
- tcb->snd.dupacks = 0;
- tcb->snd.recovery = 0;
- } else
- netlog(s->p->f, Logtcp, "rxt next %lud, cwin %ud\n", seg->ack, tcb->cwind);
-
/* Compute the new send window size */
acked = seg->ack - tcb->snd.una;
@@ -1843,24 +1966,41 @@
goto done;
}
- /* slow start as long as we're not recovering from lost packets */
- if(tcb->cwind < tcb->snd.wnd && !tcb->snd.recovery) {
- if(tcb->cwind < tcb->ssthresh) {
- expand = tcb->mss;
- if(acked < expand)
- expand = acked;
+ /*
+ * congestion control
+ */
+ if(tcb->snd.recovery){
+ if(seq_ge(seg->ack, tcb->snd.rxt)){
+ /* recovery finished; deflate window */
+ tpriv->stats[RecoveryDone]++;
+ tcb->snd.dupacks = 0;
+ tcb->snd.recovery = 0;
+ tcb->cwind = (tcb->snd.nxt - tcb->snd.una) + tcb->mss;
+ if(tcb->ssthresh < tcb->cwind)
+ tcb->cwind = tcb->ssthresh;
+ netlog(s->p->f, Logtcpwin, "recovery deflate %ld %ld\n",
+ tcb->cwind, tcb->ssthresh);
+ } else {
+ /* partial ack; we lost more than one segment */
+ tpriv->stats[RecoveryPA]++;
+ if(tcb->cwind > acked)
+ tcb->cwind -= acked;
+ else{
+ netlog(s->p->f, Logtcpwin, "partial ack neg\n");
+ tcb->cwind = tcb->mss;
+ }
+ netlog(s->p->f, Logtcpwin, "partial ack %ld left %ld cwind %ld\n",
+ acked, tcb->snd.rxt - seg->ack, tcb->cwind);
+
+ if(acked >= tcb->mss)
+ tcb->cwind += tcb->mss;
+ tcb->snd.partialack++;
}
- else
- expand = ((int)tcb->mss * tcb->mss) / tcb->cwind;
+ } else
+ tcpabcincr(tcb, acked);
- if(tcb->cwind + expand < tcb->cwind)
- expand = tcb->snd.wnd - tcb->cwind;
- if(tcb->cwind + expand > tcb->snd.wnd)
- expand = tcb->snd.wnd - tcb->cwind;
- tcb->cwind += expand;
- }
-
/* Adjust the timers according to the round trip time */
+ /* todo: fix sloppy treatment of overflow cases here. */
if(tcb->rtt_timer.state == TcptimerON && seq_ge(seg->ack, tcb->rttseq)) {
tcphalt(tpriv, &tcb->rtt_timer);
if((tcb->flags&RETRAN) == 0) {
@@ -1891,13 +2031,23 @@
done:
if(qdiscard(s->wq, acked) < acked)
tcb->flgcnt--;
-
tcb->snd.una = seg->ack;
+
+ /* newreno fast recovery */
+ if(tcb->snd.recovery)
+ tcprxmit(s);
+
if(seq_gt(seg->ack, tcb->snd.urg))
tcb->snd.urg = seg->ack;
- if(tcb->snd.una != tcb->snd.nxt)
- tcpgo(tpriv, &tcb->timer);
+ if(tcb->snd.una != tcb->snd.nxt){
+ /* “impatient” variant */
+ if(!tcb->snd.recovery || tcb->snd.partialack == 1){
+ tcb->time = NOW;
+ tcb->timeuna = tcb->snd.una;
+ tcpgo(tpriv, &tcb->timer);
+ }
+ }
else
tcphalt(tpriv, &tcb->timer);
@@ -1904,12 +2054,13 @@
if(seq_lt(tcb->snd.ptr, tcb->snd.una))
tcb->snd.ptr = tcb->snd.una;
- tcb->flags &= ~RETRAN;
+ if(!tcb->snd.recovery)
+ tcb->flags &= ~RETRAN;
tcb->backoff = 0;
tcb->backedoff = 0;
}
-void
+static void
tcpiput(Proto *tcp, Ipifc*, Block *bp)
{
Tcp seg;
@@ -1917,7 +2068,7 @@
Tcp6hdr *h6;
int hdrlen;
Tcpctl *tcb;
- ushort length;
+ ushort length, csum;
uchar source[IPaddrlen], dest[IPaddrlen];
Conv *s;
Fs *f;
@@ -1980,10 +2131,12 @@
h6->ttl = proto;
hnputl(h6->vcf, length);
if((h6->tcpcksum[0] || h6->tcpcksum[1]) &&
- ptclcsum(bp, TCP6_IPLEN, length+TCP6_PHDRSIZE)) {
+ (csum = ptclcsum(bp, TCP6_IPLEN, length+TCP6_PHDRSIZE)) != 0) {
tpriv->stats[CsumErrs]++;
tpriv->stats[InErrs]++;
- netlog(f, Logtcp, "bad tcp proto cksum\n");
+ netlog(f, Logtcp,
+ "bad tcpv6 proto cksum: got %#ux, computed %#ux\n",
+ h6->tcpcksum[0]<<8 | h6->tcpcksum[1], csum);
freeblist(bp);
return;
}
@@ -1995,7 +2148,7 @@
if(hdrlen < 0){
tpriv->stats[HlenErrs]++;
tpriv->stats[InErrs]++;
- netlog(f, Logtcp, "bad tcp hdr len\n");
+ netlog(f, Logtcp, "bad tcpv6 hdr len\n");
return;
}
@@ -2005,7 +2158,7 @@
if(bp == nil){
tpriv->stats[LenErrs]++;
tpriv->stats[InErrs]++;
- netlog(f, Logtcp, "tcp len < 0 after trim\n");
+ netlog(f, Logtcp, "tcpv6 len < 0 after trim\n");
return;
}
}
@@ -2016,7 +2169,8 @@
/* Look for a matching conversation */
s = iphtlook(&tpriv->ht, source, seg.source, dest, seg.dest);
if(s == nil){
- netlog(f, Logtcp, "iphtlook failed");
+ netlog(f, Logtcp, "iphtlook(src %I!%d, dst %I!%d) failed\n",
+ source, seg.source, dest, seg.dest);
reset:
qunlock(tcp);
sndrst(tcp, source, dest, length, &seg, version, "no conversation");
@@ -2136,8 +2290,12 @@
}
/* Cut the data to fit the receive window */
+ tcprcvwin(s);
if(tcptrim(tcb, &seg, &bp, &length) == -1) {
- netlog(f, Logtcp, "tcp len < 0, %lud %d\n", seg.seq, length);
+ if(seg.seq+1 != tcb->rcv.nxt || length != 1)
+ netlog(f, Logtcp, "tcp: trim: !inwind: seq %lud-%lud win %lud-%lud l %d from %I\n",
+ seg.seq, seg.seq + length - 1,
+ tcb->rcv.nxt, tcb->rcv.nxt + tcb->rcv.wnd-1, length, s->raddr);
update(s, &seg);
if(qlen(s->wq)+tcb->flgcnt == 0 && tcb->state == Closing) {
tcphalt(tpriv, &tcb->rtt_timer);
@@ -2168,12 +2326,15 @@
if(seg.seq != tcb->rcv.nxt)
if(length != 0 || (seg.flags & (SYN|FIN))) {
update(s, &seg);
- if(addreseq(tcb, tpriv, &seg, bp, length) < 0)
+ if(addreseq(f, tcb, tpriv, &seg, bp, length) < 0)
print("reseq %I.%d -> %I.%d\n", s->raddr, s->rport, s->laddr, s->lport);
- tcb->flags |= FORCE;
+ tcb->flags |= FORCE; /* force duplicate ack; RFC 5681 §3.2 */
goto output;
}
+ if(tcb->nreseq > 0)
+ tcb->flags |= FORCE; /* filled hole in sequence space; RFC 5681 §3.2 */
+
/*
* keep looping till we've processed this packet plus any
* adjacent packets in the resequence queue
@@ -2238,7 +2399,8 @@
goto raise;
}
case Time_wait:
- tcb->flags |= FORCE;
+ if(seg.flags & FIN)
+ tcb->flags |= FORCE;
if(tcb->timer.state != TcptimerON)
tcpgo(tpriv, &tcb->timer);
}
@@ -2272,34 +2434,12 @@
* receive queue
*/
if(bp) {
- bp = packblock(bp);
- if(bp == nil)
- panic("tcp packblock");
- qpassnolim(s->rq, bp);
+ qpassnolim(s->rq, packblock(bp));
bp = nil;
-
- /*
- * Force an ack every 2 data messages. This is
- * a hack for rob to make his home system run
- * faster.
- *
- * this also keeps the standard TCP congestion
- * control working since it needs an ack every
- * 2 max segs worth. This is not quite that,
- * but under a real stream is equivalent since
- * every packet has a max seg in it.
- */
- if(++(tcb->rcv.una) >= 2)
- tcb->flags |= FORCE;
}
tcb->rcv.nxt += length;
/*
- * update our rcv window
- */
- tcprcvwin(s);
-
- /*
* turn on the acktimer if there's something
* to ack
*/
@@ -2373,8 +2513,11 @@
getreseq(tcb, &seg, &bp, &length);
- if(tcptrim(tcb, &seg, &bp, &length) == 0)
+ tcprcvwin(s);
+ if(tcptrim(tcb, &seg, &bp, &length) == 0){
+ tcb->flags |= FORCE;
break;
+ }
}
}
output:
@@ -2394,15 +2537,15 @@
* the lock to ipoput the packet so some care has to be
* taken by callers.
*/
-void
+static void
tcpoutput(Conv *s)
{
Tcp seg;
- int msgs;
+ uint msgs;
Tcpctl *tcb;
Block *hbp, *bp;
- int sndcnt, n;
- ulong ssize, dsize, usable, sent;
+ int sndcnt;
+ ulong ssize, dsize, sent;
Fs *f;
Tcppriv *tpriv;
uchar version;
@@ -2411,9 +2554,26 @@
tpriv = s->p->priv;
version = s->ipversion;
- for(msgs = 0; msgs < 100; msgs++) {
- tcb = (Tcpctl*)s->ptcl;
+ tcb = (Tcpctl*)s->ptcl;
+ /* force ack every 2*mss */
+ if((tcb->flags & FORCE) == 0)
+ if(tcb->rcv.nxt - tcb->rcv.ackptr >= 2*tcb->mss){
+ tpriv->stats[Delayack]++;
+ tcb->flags |= FORCE;
+ }
+
+ /* force ack if window opening */
+ if(0)
+ if((tcb->flags & FORCE) == 0){
+ tcprcvwin(s);
+ if((int)(tcb->rcv.wptr - tcb->rcv.wsnt) >= 2*tcb->mss){
+ tpriv->stats[Wopenack]++;
+ tcb->flags |= FORCE;
+ }
+ }
+
+ for(msgs = 0; msgs < 100; msgs++) {
switch(tcb->state) {
case Listen:
case Closed:
@@ -2421,7 +2581,12 @@
return;
}
+ /* Don't send anything else until our SYN has been acked */
+ if(tcb->snd.ptr != tcb->iss && (tcb->flags & SYNACK) == 0)
+ break;
+
/* force an ack when a window has opened up */
+ tcprcvwin(s);
if(tcb->rcv.blocked && tcb->rcv.wnd > 0){
tcb->rcv.blocked = 0;
tcb->flags |= FORCE;
@@ -2429,54 +2594,57 @@
sndcnt = qlen(s->wq)+tcb->flgcnt;
sent = tcb->snd.ptr - tcb->snd.una;
-
- /* Don't send anything else until our SYN has been acked */
- if(tcb->snd.ptr != tcb->iss && (tcb->flags & SYNACK) == 0)
- break;
-
- /* Compute usable segment based on offered window and limit
- * window probes to one
- */
+ ssize = sndcnt;
if(tcb->snd.wnd == 0){
- if(sent != 0) {
- if((tcb->flags&FORCE) == 0)
- break;
-// tcb->snd.ptr = tcb->snd.una;
+ /* zero window probe */
+ if(sent > 0)
+ if(!(tcb->flags & FORCE))
+ break; /* already probing, rto re-probes */
+ if(ssize < sent)
+ ssize = 0;
+ else{
+ ssize -= sent;
+ if(ssize > 0)
+ ssize = 1;
}
- usable = 1;
+ } else {
+ /* calculate usable segment size */
+ if(ssize > tcb->cwind)
+ ssize = tcb->cwind;
+ if(ssize > tcb->snd.wnd)
+ ssize = tcb->snd.wnd;
+
+ if(ssize < sent)
+ ssize = 0;
+ else {
+ ssize -= sent;
+ if(ssize > tcb->mss)
+ ssize = tcb->mss;
+ }
}
- else {
- usable = tcb->cwind;
- if(tcb->snd.wnd < usable)
- usable = tcb->snd.wnd;
- usable -= sent;
- }
- ssize = sndcnt-sent;
- if(ssize && usable < 2)
- netlog(s->p->f, Logtcp, "throttled snd.wnd %lud cwind %lud\n",
- tcb->snd.wnd, tcb->cwind);
- if(usable < ssize)
- ssize = usable;
- if(tcb->mss < ssize)
- ssize = tcb->mss;
+
dsize = ssize;
seg.urg = 0;
- if(ssize == 0)
- if((tcb->flags&FORCE) == 0)
- break;
+ if(!(tcb->flags & FORCE)){
+ if(ssize == 0)
+ break;
+ if(ssize < tcb->mss)
+ if(tcb->snd.nxt == tcb->snd.ptr)
+ if(sent > TCPREXMTTHRESH*tcb->mss)
+ break;
+ }
tcb->flags &= ~FORCE;
- tcprcvwin(s);
/* By default we will generate an ack */
tcphalt(tpriv, &tcb->acktimer);
- tcb->rcv.una = 0;
seg.source = s->lport;
seg.dest = s->rport;
seg.flags = ACK;
seg.mss = 0;
seg.ws = 0;
+ seg.update = 0;
switch(tcb->state){
case Syn_sent:
seg.flags = 0;
@@ -2516,20 +2684,9 @@
}
}
- if(sent+dsize == sndcnt)
+ if(sent+dsize == sndcnt && dsize)
seg.flags |= PSH;
- /* keep track of balance of resent data */
- if(seq_lt(tcb->snd.ptr, tcb->snd.nxt)) {
- n = tcb->snd.nxt - tcb->snd.ptr;
- if(ssize < n)
- n = ssize;
- tcb->resent += n;
- netlog(f, Logtcp, "rexmit: %I.%d -> %I.%d ptr %lux nxt %lux\n",
- s->raddr, s->rport, s->laddr, s->lport, tcb->snd.ptr, tcb->snd.nxt);
- tpriv->stats[RetransSegs]++;
- }
-
tcb->snd.ptr += ssize;
/* Pull up the send pointer so we can accept acks
@@ -2565,13 +2722,17 @@
* expect acknowledges
*/
if(ssize != 0){
- if(tcb->timer.state != TcptimerON)
+ if(tcb->timer.state != TcptimerON){
+ tcb->time = NOW;
+ tcb->timeuna = tcb->snd.una;
tcpgo(tpriv, &tcb->timer);
+ }
/* If round trip timer isn't running, start it.
* measure the longest packet only in case the
* transmission time dominates RTT
*/
+ if(tcb->snd.retransmit == 0)
if(tcb->rtt_timer.state != TcptimerON)
if(ssize == tcb->mss) {
tcpgo(tpriv, &tcb->rtt_timer);
@@ -2580,6 +2741,10 @@
}
tpriv->stats[OutSegs]++;
+ if(tcb->snd.retransmit)
+ tpriv->stats[RetransSegsSent]++;
+ tcb->rcv.ackptr = seg.ack;
+ tcb->rcv.wsnt = tcb->rcv.wptr;
/* put off the next keep alive */
tcpgo(tpriv, &tcb->katimer);
@@ -2600,9 +2765,8 @@
default:
panic("tcpoutput2: version %d", version);
}
- if((msgs%4) == 1){
+ if((msgs%4) == 3){
qunlock(s);
- sched();
qlock(s);
}
}
@@ -2611,7 +2775,7 @@
/*
* the BSD convention (hack?) for keep alives. resend last uchar acked.
*/
-void
+static void
tcpsendka(Conv *s)
{
Tcp seg;
@@ -2621,6 +2785,7 @@
tcb = (Tcpctl*)s->ptcl;
dbp = nil;
+ memset(&seg, 0, sizeof seg);
seg.urg = 0;
seg.source = s->lport;
seg.dest = s->rport;
@@ -2632,7 +2797,8 @@
else
seg.seq = tcb->snd.una-1;
seg.ack = tcb->rcv.nxt;
- tcb->rcv.una = 0;
+ tcb->rcv.ackptr = seg.ack;
+ tcprcvwin(s);
seg.wnd = tcb->rcv.wnd;
if(tcb->state == Finwait2){
seg.flags |= FIN;
@@ -2666,7 +2832,7 @@
/*
* set connection to time out after 12 minutes
*/
-void
+static void
tcpsetkacounter(Tcpctl *tcb)
{
tcb->kacounter = (12 * 60 * 1000) / (tcb->katimer.start*MSPTICK);
@@ -2678,7 +2844,7 @@
* if we've timed out, close the connection
* otherwise, send a keepalive and restart the timer
*/
-void
+static void
tcpkeepalive(void *v)
{
Tcpctl *tcb;
@@ -2706,7 +2872,7 @@
/*
* start keepalive timer
*/
-char*
+static char*
tcpstartka(Conv *s, char **f, int n)
{
Tcpctl *tcb;
@@ -2729,7 +2895,7 @@
/*
* turn checksums on/off
*/
-char*
+static char*
tcpsetchecksum(Conv *s, char **f, int)
{
Tcpctl *tcb;
@@ -2740,30 +2906,38 @@
return nil;
}
-void
+/*
+ * retransmit (at most) one segment at snd.una.
+ * preserve cwind & snd.ptr
+ */
+static void
tcprxmit(Conv *s)
{
Tcpctl *tcb;
+ Tcppriv *tpriv;
+ ulong tcwind, tptr;
tcb = (Tcpctl*)s->ptcl;
-
tcb->flags |= RETRAN|FORCE;
- tcb->snd.ptr = tcb->snd.una;
- /*
- * We should be halving the slow start threshhold (down to one
- * mss) but leaving it at mss seems to work well enough
- */
- tcb->ssthresh = tcb->mss;
-
- /*
- * pull window down to a single packet
- */
+ tptr = tcb->snd.ptr;
+ tcwind = tcb->cwind;
+ tcb->snd.ptr = tcb->snd.una;
tcb->cwind = tcb->mss;
+ tcb->snd.retransmit = 1;
tcpoutput(s);
+ tcb->snd.retransmit = 0;
+ tcb->cwind = tcwind;
+ tcb->snd.ptr = tptr;
+
+ tpriv = s->p->priv;
+ tpriv->stats[RetransSegs]++;
}
-void
+/*
+ * todo: RFC 4138 F-RTO
+ */
+static void
tcptimeout(void *arg)
{
Conv *s;
@@ -2792,11 +2966,29 @@
localclose(s, Etimedout);
break;
}
- netlog(s->p->f, Logtcprxmt, "timeout rexmit 0x%lux %d/%d\n", tcb->snd.una, tcb->timer.start, NOW);
+ netlog(s->p->f, Logtcprxmt, "rxm %d/%d %ldms %lud rto %d %lud %s\n",
+ tcb->srtt, tcb->mdev, NOW-tcb->time,
+ tcb->snd.una-tcb->timeuna, tcb->snd.rto, tcb->snd.ptr,
+ tcpstates[s->state]);
tcpsettimer(tcb);
+ if(tcb->snd.rto == 0)
+ tcpcongestion(tcb);
tcprxmit(s);
+ tcb->snd.ptr = tcb->snd.una;
+ tcb->cwind = tcb->mss;
+ tcb->snd.rto = 1;
tpriv->stats[RetransTimeouts]++;
- tcb->snd.dupacks = 0;
+
+ if(tcb->snd.recovery){
+ tcb->snd.dupacks = 0; /* reno rto */
+ tcb->snd.recovery = 0;
+ tpriv->stats[RecoveryRTO]++;
+ tcb->snd.rxt = tcb->snd.nxt;
+ netlog(s->p->f, Logtcpwin,
+ "rto recovery rxt @%lud\n", tcb->snd.nxt);
+ }
+
+ tcb->abcbytes = 0;
break;
case Time_wait:
localclose(s, nil);
@@ -2808,7 +3000,7 @@
poperror();
}
-int
+static int
inwindow(Tcpctl *tcb, int seq)
{
return seq_within(seq, tcb->rcv.nxt, tcb->rcv.nxt+tcb->rcv.wnd-1);
@@ -2817,36 +3009,83 @@
/*
* set up state for a received SYN (or SYN ACK) packet
*/
-void
+static void
procsyn(Conv *s, Tcp *seg)
{
Tcpctl *tcb;
+ Tcppriv *tpriv;
tcb = (Tcpctl*)s->ptcl;
tcb->flags |= FORCE;
tcb->rcv.nxt = seg->seq + 1;
+ tcb->rcv.wptr = tcb->rcv.nxt;
+ tcb->rcv.wsnt = 0;
tcb->rcv.urg = tcb->rcv.nxt;
tcb->irs = seg->seq;
/* our sending max segment size cannot be bigger than what he asked for */
- if(seg->mss != 0 && seg->mss < tcb->mss)
+ if(seg->mss != 0 && seg->mss < tcb->mss) {
tcb->mss = seg->mss;
+ tpriv = s->p->priv;
+ tpriv->stats[Mss] = tcb->mss;
+ }
- /* the congestion window always starts out as a single segment */
+ /* if the server does not support ws option, disable window scaling */
+ if(seg->ws == 0){
+ tcb->scale = 0;
+ tcb->snd.scale = 0;
+ }
+
tcb->snd.wnd = seg->wnd;
- tcb->cwind = tcb->mss;
+ initialwindow(tcb);
}
-int
-addreseq(Tcpctl *tcb, Tcppriv *tpriv, Tcp *seg, Block *bp, ushort length)
+static int
+dumpreseq(Tcpctl *tcb)
{
- Reseq *rp, *rp1;
- int i, rqlen, qmax;
+ Reseq *r, *next;
+ for(r = tcb->reseq; r != nil; r = next){
+ next = r->next;
+ freeblist(r->bp);
+ free(r);
+ }
+ tcb->reseq = nil;
+ tcb->nreseq = 0;
+ tcb->reseqlen = 0;
+ return -1;
+}
+
+static void
+logreseq(Fs *f, Reseq *r, ulong n)
+{
+ char *s;
+
+ for(; r != nil; r = r->next){
+ s = nil;
+ if(r->next == nil && r->seg.seq != n)
+ s = "hole/end";
+ else if(r->next == nil)
+ s = "end";
+ else if(r->seg.seq != n)
+ s = "hole";
+ if(s != nil)
+ netlog(f, Logtcp, "%s %lud-%lud (%ld) %#ux\n", s,
+ n, r->seg.seq, r->seg.seq-n, r->seg.flags);
+ n = r->seg.seq + r->seg.len;
+ }
+}
+
+static int
+addreseq(Fs *f, Tcpctl *tcb, Tcppriv *tpriv, Tcp *seg, Block *bp, ushort length)
+{
+ Reseq *rp, **rr;
+ int qmax;
+
rp = malloc(sizeof(Reseq));
if(rp == nil){
- freeblist(bp); /* bp always consumed by add_reseq */
+ freeblist(bp); /* bp always consumed by addreseq */
return 0;
}
@@ -2854,56 +3093,39 @@
rp->bp = bp;
rp->length = length;
- /* Place on reassembly list sorting by starting seq number */
- rp1 = tcb->reseq;
- if(rp1 == nil || seq_lt(seg->seq, rp1->seg.seq)) {
- rp->next = rp1;
- tcb->reseq = rp;
- if(rp->next != nil)
- tpriv->stats[OutOfOrder]++;
- return 0;
- }
+ tcb->reseqlen += length;
+ tcb->nreseq++;
- rqlen = 0;
- for(i = 0;; i++) {
- rqlen += rp1->length;
- if(rp1->next == nil || seq_lt(seg->seq, rp1->next->seg.seq)) {
- rp->next = rp1->next;
- rp1->next = rp;
+ /* Place on reassembly list sorting by starting seq number */
+ for(rr = &tcb->reseq;; rr = &(*rr)->next)
+ if(*rr == nil || seq_lt(seg->seq, (*rr)->seg.seq)){
+ rp->next = *rr;
+ *rr = rp;
+ tpriv->stats[Resequenced]++;
if(rp->next != nil)
tpriv->stats[OutOfOrder]++;
break;
}
- rp1 = rp1->next;
- }
- qmax = QMAX<<tcb->rcv.scale;
- if(rqlen > qmax){
- print("resequence queue > window: %d > %d\n", rqlen, qmax);
- i = 0;
- for(rp1 = tcb->reseq; rp1 != nil; rp1 = rp1->next){
- print("%#lux %#lux %#ux\n", rp1->seg.seq,
- rp1->seg.ack, rp1->seg.flags);
- if(i++ > 10){
- print("...\n");
- break;
- }
- }
- // delete entire reassembly queue; wait for retransmit.
- // - should we be smarter and only delete the tail?
- for(rp = tcb->reseq; rp != nil; rp = rp1){
- rp1 = rp->next;
- freeblist(rp->bp);
- free(rp);
- }
- tcb->reseq = nil;
-
- return -1;
+ qmax = tcb->window;
+ if(tcb->reseqlen > qmax){
+ netlog(f, Logtcp, "tcp: reseq: queue > window: %d > %d; %d packets\n", tcb->reseqlen, qmax, tcb->nreseq);
+ logreseq(f, tcb->reseq, tcb->rcv.nxt);
+ tpriv->stats[ReseqBytelim]++;
+ return dumpreseq(tcb);
}
+ qmax = tcb->window / tcb->mss; /* ~190 for qscale==2, 390 for qscale=3 */
+ if(tcb->nreseq > qmax){
+ netlog(f, Logtcp, "resequence queue > packets: %d %d; %d bytes\n", tcb->nreseq, qmax, tcb->reseqlen);
+ logreseq(f, tcb->reseq, tcb->rcv.nxt);
+ tpriv->stats[ReseqPktlim]++;
+ return dumpreseq(tcb);
+ }
+
return 0;
}
-void
+static void
getreseq(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length)
{
Reseq *rp;
@@ -2918,10 +3140,13 @@
*bp = rp->bp;
*length = rp->length;
+ tcb->nreseq--;
+ tcb->reseqlen -= rp->length;
+
free(rp);
}
-int
+static int
tcptrim(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length)
{
ushort len;
@@ -2992,7 +3217,7 @@
return 0;
}
-void
+static void
tcpadvise(Proto *tcp, Block *bp, char *msg)
{
Tcp4hdr *h4;
@@ -3011,8 +3236,7 @@
v4tov6(source, h4->tcpsrc);
psource = nhgets(h4->tcpsport);
pdest = nhgets(h4->tcpdport);
- }
- else {
+ } else {
ipmove(dest, h6->tcpdst);
ipmove(source, h6->tcpsrc);
psource = nhgets(h6->tcpsport);
@@ -3021,8 +3245,7 @@
/* Look for a connection */
qlock(tcp);
- for(p = tcp->conv; *p; p++) {
- s = *p;
+ for(p = tcp->conv; (s = *p) != nil; p++) {
tcb = (Tcpctl*)s->ptcl;
if(s->rport == pdest)
if(s->lport == psource)
@@ -3029,6 +3252,8 @@
if(tcb->state != Closed)
if(ipcmp(s->raddr, dest) == 0)
if(ipcmp(s->laddr, source) == 0){
+ if(s->ignoreadvice)
+ break;
qlock(s);
qunlock(tcp);
switch(tcb->state){
@@ -3058,9 +3283,11 @@
}
/* called with c qlocked */
-char*
+static char*
tcpctl(Conv* c, char** f, int n)
{
+ if(n == 1 && strcmp(f[0], "close") == 0)
+ return tcpclose(c), nil;
if(n == 1 && strcmp(f[0], "hangup") == 0)
return tcphangup(c);
if(n >= 1 && strcmp(f[0], "keepalive") == 0)
@@ -3072,7 +3299,7 @@
return "unknown control request";
}
-int
+static int
tcpstats(Proto *tcp, char *buf, int len)
{
Tcppriv *priv;
@@ -3083,7 +3310,7 @@
p = buf;
e = p+len;
for(i = 0; i < Nstats; i++)
- p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
+ p = seprint(p, e, "%s: %llud\n", statnames[i], priv->stats[i]);
return p - buf;
}
@@ -3096,7 +3323,7 @@
* of questionable validity so we try to use them only when we're
* up against the wall.
*/
-int
+static int
tcpgc(Proto *tcp)
{
Conv *c, **pp, **ep;
@@ -3104,7 +3331,7 @@
Tcpctl *tcb;
- n = natgc(tcp->ipproto);
+ n = 0;
ep = &tcp->conv[tcp->nc];
for(pp = tcp->conv; pp < ep; pp++) {
c = *pp;
@@ -3116,13 +3343,13 @@
switch(tcb->state){
case Syn_received:
if(NOW - tcb->time > 5000){
- localclose(c, "timed out");
+ localclose(c, Etimedout);
n++;
}
break;
case Finwait2:
if(NOW - tcb->time > 5*60*1000){
- localclose(c, "timed out");
+ localclose(c, Etimedout);
n++;
}
break;
@@ -3132,7 +3359,7 @@
return n;
}
-void
+static void
tcpsettimer(Tcpctl *tcb)
{
int x;
@@ -3141,9 +3368,9 @@
x = backoff(tcb->backoff) *
(tcb->mdev + (tcb->srtt>>LOGAGAIN) + MSPTICK) / MSPTICK;
- /* bounded twixt 1/2 and 64 seconds */
- if(x < 500/MSPTICK)
- x = 500/MSPTICK;
+ /* bounded twixt 0.3 and 64 seconds */
+ if(x < 300/MSPTICK)
+ x = 300/MSPTICK;
else if(x > (64000/MSPTICK))
x = 64000/MSPTICK;
tcb->timer.start = x;
@@ -3177,18 +3404,37 @@
Fsproto(fs, tcp);
}
-void
+static void
tcpsetscale(Conv *s, Tcpctl *tcb, ushort rcvscale, ushort sndscale)
{
- if(rcvscale){
- tcb->rcv.scale = rcvscale & 0xff;
- tcb->snd.scale = sndscale & 0xff;
- tcb->window = QMAX<<tcb->snd.scale;
- qsetlimit(s->rq, tcb->window);
- } else {
- tcb->rcv.scale = 0;
- tcb->snd.scale = 0;
- tcb->window = QMAX;
- qsetlimit(s->rq, tcb->window);
- }
+ /*
+ * guess at reasonable queue sizes. there's no current way
+ * to know how many nic receive buffers we can safely tie up in the
+ * tcp stack, and we don't adjust our queues to maximize throughput
+ * and minimize bufferbloat. n.b. the offer (rcvscale) needs to be
+ * respected, but we still control our own buffer commitment by
+ * keeping a seperate qscale.
+ */
+ tcb->rcv.scale = rcvscale & 0xff;
+ tcb->snd.scale = sndscale & 0xff;
+ tcb->qscale = rcvscale & 0xff;
+ if(rcvscale > Maxqscale)
+ tcb->qscale = Maxqscale;
+
+ if(rcvscale != tcb->rcv.scale)
+ netlog(s->p->f, Logtcp, "tcpsetscale: window %lud qlen %d >> window %ud lport %d\n",
+ tcb->window, qlen(s->rq), QMAX<<tcb->qscale, s->lport);
+ tcb->window = QMAX<<tcb->qscale;
+ tcb->ssthresh = tcb->window;
+
+ /*
+ * it's important to set wq large enough to cover the full
+ * bandwidth-delay product. it's possible to be in loss
+ * recovery with a big window, and we need to keep sending
+ * into the inflated window. the difference can be huge
+ * for even modest (70ms) ping times.
+ */
+ qsetlimit(s->rq, QMAX<<tcb->qscale);
+ qsetlimit(s->wq, QMAX<<tcb->qscale);
+ tcprcvwin(s);
}
diff -u a/os/ip//udp.c b/os/ip//udp.c
--- a/os/ip//udp.c
+++ b/os/ip//udp.c
@@ -24,7 +24,6 @@
IP_UDPPROTO = 17,
UDP_USEAD7 = 52,
- UDP_USEAD6 = 36,
Udprxms = 200,
Udptickms = 100,
@@ -40,7 +39,7 @@
uchar length[2]; /* packet length */
uchar id[2]; /* Identification */
uchar frag[2]; /* Fragment information */
- uchar Unused;
+ uchar Unused;
uchar udpproto; /* Protocol */
uchar udpplen[2]; /* Header plus data length */
uchar udpsrc[IPv4addrlen]; /* Ip source */
@@ -73,10 +72,10 @@
typedef struct Udpstats Udpstats;
struct Udpstats
{
- ulong udpInDatagrams;
+ uvlong udpInDatagrams;
ulong udpNoPorts;
ulong udpInErrors;
- ulong udpOutDatagrams;
+ uvlong udpOutDatagrams;
};
typedef struct Udppriv Udppriv;
@@ -101,7 +100,6 @@
typedef struct Udpcb Udpcb;
struct Udpcb
{
- QLock;
uchar headers;
};
@@ -125,7 +123,7 @@
static int
udpstate(Conv *c, char *state, int n)
{
- return snprint(state, n, "%s qin %d qout %d",
+ return snprint(state, n, "%s qin %d qout %d\n",
c->inuse ? "Open" : "Closed",
c->rq ? qlen(c->rq) : 0,
c->wq ? qlen(c->wq) : 0
@@ -151,7 +149,7 @@
static void
udpcreate(Conv *c)
{
- c->rq = qopen(64*1024, Qmsg, 0, 0);
+ c->rq = qopen(512*1024, Qmsg, 0, 0);
c->wq = qbypass(udpkick, c);
}
@@ -175,8 +173,6 @@
ucb = (Udpcb*)c->ptcl;
ucb->headers = 0;
-
- qunlock(c);
}
void
@@ -192,12 +188,13 @@
Udppriv *upriv;
Fs *f;
int version;
- Conv *rc;
+ Routehint *rh;
+ ushort csum;
upriv = c->p->priv;
f = c->p->f;
- netlog(c->p->f, Logudp, "udp: kick\n");
+// netlog(c->p->f, Logudp, "udp: kick\n"); /* frequent and uninteresting */
if(bp == nil)
return;
@@ -219,21 +216,6 @@
rport = nhgets(bp->rp);
bp->rp += 2+2; /* Ignore local port */
break;
- case 6:
- /* get user specified addresses */
- bp = pullupblock(bp, UDP_USEAD6);
- if(bp == nil)
- return;
- ipmove(raddr, bp->rp);
- bp->rp += IPaddrlen;
- ipmove(laddr, bp->rp);
- bp->rp += IPaddrlen;
- /* pick interface closest to dest */
- if(ipforme(f, laddr) != Runi)
- findlocalip(f, laddr, raddr);
- rport = nhgets(bp->rp);
- bp->rp += 2+2; /* Ignore local port */
- break;
default:
rport = 0;
break;
@@ -240,18 +222,12 @@
}
if(ucb->headers) {
- if(memcmp(laddr, v4prefix, IPv4off) == 0 ||
- ipcmp(laddr, IPnoaddr) == 0)
+ if(isv4(laddr) || ipcmp(laddr, IPnoaddr) == 0)
version = V4;
else
version = V6;
} else {
- if( (memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
- memcmp(c->laddr, v4prefix, IPv4off) == 0)
- || ipcmp(c->raddr, IPnoaddr) == 0)
- version = V4;
- else
- version = V6;
+ version = convipvers(c);
}
dlen = blocklen(bp);
@@ -260,9 +236,6 @@
switch(version){
case V4:
bp = padblock(bp, UDP4_IPHDR_SZ+UDP_UDPHDR_SZ);
- if(bp == nil)
- return;
-
uh4 = (Udp4hdr *)(bp->rp);
ptcllen = dlen + UDP_UDPHDR_SZ;
uh4->Unused = 0;
@@ -274,7 +247,7 @@
v6tov4(uh4->udpdst, raddr);
hnputs(uh4->udpdport, rport);
v6tov4(uh4->udpsrc, laddr);
- rc = nil;
+ rh = nil;
} else {
v6tov4(uh4->udpdst, c->raddr);
hnputs(uh4->udpdport, c->rport);
@@ -281,25 +254,26 @@
if(ipcmp(c->laddr, IPnoaddr) == 0)
findlocalip(f, c->laddr, c->raddr);
v6tov4(uh4->udpsrc, c->laddr);
- rc = c;
+ rh = c;
}
hnputs(uh4->udpsport, c->lport);
hnputs(uh4->udplen, ptcllen);
uh4->udpcksum[0] = 0;
uh4->udpcksum[1] = 0;
- hnputs(uh4->udpcksum,
- ptclcsum(bp, UDP4_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP4_PHDR_SZ));
+ csum = ptclcsum(bp, UDP4_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP4_PHDR_SZ);
+ if(csum == 0)
+ csum = 0xffff; /* -0 */
+ hnputs(uh4->udpcksum, csum);
uh4->vihl = IP_VER4;
- ipoput4(f, bp, 0, c->ttl, c->tos, rc);
+ ipoput4(f, bp, 0, c->ttl, c->tos, rh);
break;
case V6:
+ /*
+ * using the v6 ip header to create pseudo header
+ * first then reset it to the normal ip header
+ */
bp = padblock(bp, UDP6_IPHDR_SZ+UDP_UDPHDR_SZ);
- if(bp == nil)
- return;
-
- // using the v6 ip header to create pseudo header
- // first then reset it to the normal ip header
uh6 = (Udp6hdr *)(bp->rp);
memset(uh6, 0, 8);
ptcllen = dlen + UDP_UDPHDR_SZ;
@@ -309,7 +283,7 @@
ipmove(uh6->udpdst, raddr);
hnputs(uh6->udpdport, rport);
ipmove(uh6->udpsrc, laddr);
- rc = nil;
+ rh = nil;
} else {
ipmove(uh6->udpdst, c->raddr);
hnputs(uh6->udpdport, c->rport);
@@ -316,19 +290,21 @@
if(ipcmp(c->laddr, IPnoaddr) == 0)
findlocalip(f, c->laddr, c->raddr);
ipmove(uh6->udpsrc, c->laddr);
- rc = c;
+ rh = c;
}
hnputs(uh6->udpsport, c->lport);
hnputs(uh6->udplen, ptcllen);
uh6->udpcksum[0] = 0;
uh6->udpcksum[1] = 0;
- hnputs(uh6->udpcksum,
- ptclcsum(bp, UDP6_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP6_PHDR_SZ));
+ csum = ptclcsum(bp, UDP6_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP6_PHDR_SZ);
+ if(csum == 0)
+ csum = 0xffff; /* -0 */
+ hnputs(uh6->udpcksum, csum);
memset(uh6, 0, 8);
uh6->viclfl[0] = IP_VER6;
hnputs(uh6->len, ptcllen);
uh6->nextheader = IP_UDPPROTO;
- ipoput6(f, bp, 0, c->ttl, c->tos, rc);
+ ipoput6(f, bp, 0, c->ttl, c->tos, rh);
break;
default:
@@ -360,10 +336,8 @@
uh4 = (Udp4hdr*)(bp->rp);
version = ((uh4->vihl&0xF0)==IP_VER6) ? V6 : V4;
- /*
- * Put back pseudo header for checksum
- * (remember old values for icmpnoconv())
- */
+ /* Put back pseudo header for checksum
+ * (remember old values for icmpnoconv()) */
switch(version) {
case V4:
ottl = uh4->Unused;
@@ -423,7 +397,7 @@
c = iphtlook(&upriv->ht, raddr, rport, laddr, lport);
if(c == nil){
- /* no converstation found */
+ /* no conversation found */
upriv->ustats.udpNoPorts++;
qunlock(udp);
netlog(f, Logudp, "udp: no conv %I!%d -> %I!%d\n", raddr, rport,
@@ -434,7 +408,7 @@
icmpnoconv(f, bp);
break;
case V6:
- icmphostunr(f, ifc, bp, icmp6_port_unreach, 0);
+ icmphostunr6(f, ifc, bp, Icmp6_port_unreach, 0);
break;
default:
panic("udpiput2: version %d", version);
@@ -448,18 +422,8 @@
if(c->state == Announced){
if(ucb->headers == 0){
/* create a new conversation */
- if(ipforme(f, laddr) != Runi) {
- switch(version){
- case V4:
- v4tov6(laddr, ifc->lifc->local);
- break;
- case V6:
- ipmove(laddr, ifc->lifc->local);
- break;
- default:
- panic("udpiput3: version %d", version);
- }
- }
+ if(ipforme(f, laddr) != Runi)
+ ipv6local(ifc, laddr, 0, raddr);
c = Fsnewcall(c, raddr, rport, laddr, lport, version);
if(c == nil){
qunlock(udp);
@@ -507,33 +471,21 @@
p = bp->rp;
ipmove(p, raddr); p += IPaddrlen;
ipmove(p, laddr); p += IPaddrlen;
- ipmove(p, ifc->lifc->local); p += IPaddrlen;
+ if(!ipv6local(ifc, p, 0, raddr))
+ ipmove(p, ifc->lifc != nil ? ifc->lifc->local : IPnoaddr);
+ p += IPaddrlen;
hnputs(p, rport); p += 2;
hnputs(p, lport);
break;
- case 6:
- /* pass the src address */
- bp = padblock(bp, UDP_USEAD6);
- p = bp->rp;
- ipmove(p, raddr); p += IPaddrlen;
- ipmove(p, ipforme(f, laddr)==Runi ? laddr : ifc->lifc->local); p += IPaddrlen;
- hnputs(p, rport); p += 2;
- hnputs(p, lport);
- break;
}
- if(bp->next)
- bp = concatblock(bp);
-
if(qfull(c->rq)){
- qunlock(c);
- netlog(f, Logudp, "udp: qfull %I.%d -> %I.%d\n", raddr, rport,
- laddr, lport);
+ netlog(f, Logudp, "udp: qfull %I.%d -> %I.%d\n",
+ raddr, rport, laddr, lport);
freeblist(bp);
- return;
+ } else {
+ qpass(c->rq, concatblock(bp));
}
-
- qpass(c->rq, bp);
qunlock(c);
}
@@ -545,11 +497,13 @@
ucb = (Udpcb*)c->ptcl;
if(n == 1){
- if(strcmp(f[0], "oldheaders") == 0){
- ucb->headers = 6;
+ if(strcmp(f[0], "hangup") == 0){
+ qhangup(c->rq, nil);
+ qhangup(c->wq, nil);
return nil;
- } else if(strcmp(f[0], "headers") == 0){
- ucb->headers = 7;
+ }
+ if(strcmp(f[0], "headers") == 0){
+ ucb->headers = 7; /* new headers format */
return nil;
}
}
@@ -564,34 +518,25 @@
uchar source[IPaddrlen], dest[IPaddrlen];
ushort psource, pdest;
Conv *s, **p;
- int version;
h4 = (Udp4hdr*)(bp->rp);
- version = ((h4->vihl&0xF0)==IP_VER6) ? V6 : V4;
+ h6 = (Udp6hdr*)(bp->rp);
- switch(version) {
- case V4:
+ if((h4->vihl&0xF0)==IP_VER4) {
v4tov6(dest, h4->udpdst);
v4tov6(source, h4->udpsrc);
psource = nhgets(h4->udpsport);
pdest = nhgets(h4->udpdport);
- break;
- case V6:
- h6 = (Udp6hdr*)(bp->rp);
+ } else {
ipmove(dest, h6->udpdst);
ipmove(source, h6->udpsrc);
psource = nhgets(h6->udpsport);
pdest = nhgets(h6->udpdport);
- break;
- default:
- panic("udpadvise: version %d", version);
- return; /* to avoid a warning */
}
/* Look for a connection */
qlock(udp);
- for(p = udp->conv; *p; p++) {
- s = *p;
+ for(p = udp->conv; (s = *p) != nil; p++) {
if(s->rport == pdest)
if(s->lport == psource)
if(ipcmp(s->raddr, dest) == 0)
@@ -617,7 +562,8 @@
Udppriv *upriv;
upriv = udp->priv;
- return snprint(buf, len, "InDatagrams: %lud\nNoPorts: %lud\nInErrors: %lud\nOutDatagrams: %lud\n",
+ return snprint(buf, len, "InDatagrams: %llud\nNoPorts: %lud\n"
+ "InErrors: %lud\nOutDatagrams: %llud\n",
upriv->ustats.udpInDatagrams,
upriv->ustats.udpNoPorts,
upriv->ustats.udpInErrors,
@@ -624,12 +570,6 @@
upriv->ustats.udpOutDatagrams);
}
-int
-udpgc(Proto *udp)
-{
- return natgc(udp->ipproto);
-}
-
void
udpinit(Fs *fs)
{
@@ -647,7 +587,6 @@
udp->rcv = udpiput;
udp->advise = udpadvise;
udp->stats = udpstats;
- udp->gc = udpgc;
udp->ipproto = IP_UDPPROTO;
udp->nc = Nchans;
udp->ptclsize = sizeof(Udpcb);
--- /dev/null
+++ b/os/ip.original/arp.c
@@ -1,0 +1,681 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include "ip.h"
+#include "ipv6.h"
+
+/*
+ * address resolution tables
+ */
+
+enum
+{
+ NHASH = (1<<6),
+ NCACHE = 256,
+
+ AOK = 1,
+ AWAIT = 2,
+};
+
+char *arpstate[] =
+{
+ "UNUSED",
+ "OK",
+ "WAIT",
+};
+
+/*
+ * one per Fs
+ */
+struct Arp
+{
+ QLock;
+ Fs *f;
+ Arpent *hash[NHASH];
+ Arpent cache[NCACHE];
+ Arpent *rxmt;
+ Proc *rxmitp; /* neib sol re-transmit proc */
+ Rendez rxmtq;
+ Block *dropf, *dropl;
+};
+
+char *Ebadarp = "bad arp";
+
+#define haship(s) ((s)[IPaddrlen-1]%NHASH)
+
+extern int ReTransTimer = RETRANS_TIMER;
+static void rxmitproc(void *v);
+
+void
+arpinit(Fs *f)
+{
+ f->arp = smalloc(sizeof(Arp));
+ f->arp->f = f;
+ f->arp->rxmt = nil;
+ f->arp->dropf = f->arp->dropl = nil;
+ kproc("rxmitproc", rxmitproc, f->arp, 0);
+}
+
+/*
+ * create a new arp entry for an ip address.
+ */
+static Arpent*
+newarp6(Arp *arp, uchar *ip, Ipifc *ifc, int addrxt)
+{
+ uint t;
+ Block *next, *xp;
+ Arpent *a, *e, *f, **l;
+ Medium *m = ifc->m;
+ int empty;
+
+ /* find oldest entry */
+ e = &arp->cache[NCACHE];
+ a = arp->cache;
+ t = a->utime;
+ for(f = a; f < e; f++){
+ if(f->utime < t){
+ t = f->utime;
+ a = f;
+ }
+ }
+
+ /* dump waiting packets */
+ xp = a->hold;
+ a->hold = nil;
+
+ if(isv4(a->ip)){
+ while(xp){
+ next = xp->list;
+ freeblist(xp);
+ xp = next;
+ }
+ }
+ else { // queue icmp unreachable for rxmitproc later on, w/o arp lock
+ if(xp){
+ if(arp->dropl == nil)
+ arp->dropf = xp;
+ else
+ arp->dropl->list = xp;
+
+ for(next = xp->list; next; next = next->list)
+ xp = next;
+ arp->dropl = xp;
+ wakeup(&arp->rxmtq);
+ }
+ }
+
+ /* take out of current chain */
+ l = &arp->hash[haship(a->ip)];
+ for(f = *l; f; f = f->hash){
+ if(f == a){
+ *l = a->hash;
+ break;
+ }
+ l = &f->hash;
+ }
+
+ /* insert into new chain */
+ l = &arp->hash[haship(ip)];
+ a->hash = *l;
+ *l = a;
+
+ memmove(a->ip, ip, sizeof(a->ip));
+ a->utime = NOW;
+ a->ctime = 0;
+ a->type = m;
+
+ a->rtime = NOW + ReTransTimer;
+ a->rxtsrem = MAX_MULTICAST_SOLICIT;
+ a->ifc = ifc;
+ a->ifcid = ifc->ifcid;
+
+ /* put to the end of re-transmit chain; addrxt is 0 when isv4(a->ip) */
+ if(!ipismulticast(a->ip) && addrxt){
+ l = &arp->rxmt;
+ empty = (*l==nil);
+
+ for(f = *l; f; f = f->nextrxt){
+ if(f == a){
+ *l = a->nextrxt;
+ break;
+ }
+ l = &f->nextrxt;
+ }
+ for(f = *l; f; f = f->nextrxt){
+ l = &f->nextrxt;
+ }
+ *l = a;
+ if(empty)
+ wakeup(&arp->rxmtq);
+ }
+
+ a->nextrxt = nil;
+
+ return a;
+}
+
+/* called with arp qlocked */
+
+void
+cleanarpent(Arp *arp, Arpent *a)
+{
+ Arpent *f, **l;
+
+ a->utime = 0;
+ a->ctime = 0;
+ a->type = 0;
+ a->state = 0;
+
+ /* take out of current chain */
+ l = &arp->hash[haship(a->ip)];
+ for(f = *l; f; f = f->hash){
+ if(f == a){
+ *l = a->hash;
+ break;
+ }
+ l = &f->hash;
+ }
+
+ /* take out of re-transmit chain */
+ l = &arp->rxmt;
+ for(f = *l; f; f = f->nextrxt){
+ if(f == a){
+ *l = a->nextrxt;
+ break;
+ }
+ l = &f->nextrxt;
+ }
+ a->nextrxt = nil;
+ a->hash = nil;
+ a->hold = nil;
+ a->last = nil;
+ a->ifc = nil;
+}
+
+/*
+ * fill in the media address if we have it. Otherwise return an
+ * Arpent that represents the state of the address resolution FSM
+ * for ip. Add the packet to be sent onto the list of packets
+ * waiting for ip->mac to be resolved.
+ */
+Arpent*
+arpget(Arp *arp, Block *bp, int version, Ipifc *ifc, uchar *ip, uchar *mac)
+{
+ int hash;
+ Arpent *a;
+ Medium *type = ifc->m;
+ uchar v6ip[IPaddrlen];
+
+ if(version == V4){
+ v4tov6(v6ip, ip);
+ ip = v6ip;
+ }
+
+ qlock(arp);
+ hash = haship(ip);
+ for(a = arp->hash[hash]; a; a = a->hash){
+ if(memcmp(ip, a->ip, sizeof(a->ip)) == 0)
+ if(type == a->type)
+ break;
+ }
+
+ if(a == nil){
+ a = newarp6(arp, ip, ifc, (version != V4));
+ a->state = AWAIT;
+ }
+ a->utime = NOW;
+ if(a->state == AWAIT){
+ if(bp != nil){
+ if(a->hold)
+ a->last->list = bp;
+ else
+ a->hold = bp;
+ a->last = bp;
+ bp->list = nil;
+ }
+ return a; /* return with arp qlocked */
+ }
+
+ memmove(mac, a->mac, a->type->maclen);
+
+ /* remove old entries */
+ if(NOW - a->ctime > 15*60*1000)
+ cleanarpent(arp, a);
+
+ qunlock(arp);
+ return nil;
+}
+
+/*
+ * called with arp locked
+ */
+void
+arprelease(Arp *arp, Arpent*)
+{
+ qunlock(arp);
+}
+
+/*
+ * Copy out the mac address from the Arpent. Return the
+ * block waiting to get sent to this mac address.
+ *
+ * called with arp locked
+ */
+Block*
+arpresolve(Arp *arp, Arpent *a, Medium *type, uchar *mac)
+{
+ Block *bp;
+ Arpent *f, **l;
+
+ if(!isv4(a->ip)){
+ l = &arp->rxmt;
+ for(f = *l; f; f = f->nextrxt){
+ if(f == a){
+ *l = a->nextrxt;
+ break;
+ }
+ l = &f->nextrxt;
+ }
+ }
+
+ memmove(a->mac, mac, type->maclen);
+ a->type = type;
+ a->state = AOK;
+ a->utime = NOW;
+ bp = a->hold;
+ a->hold = nil;
+ qunlock(arp);
+
+ return bp;
+}
+
+void
+arpenter(Fs *fs, int version, uchar *ip, uchar *mac, int n, int refresh)
+{
+ Arp *arp;
+ Route *r;
+ Arpent *a, *f, **l;
+ Ipifc *ifc;
+ Medium *type;
+ Block *bp, *next;
+ uchar v6ip[IPaddrlen];
+
+ arp = fs->arp;
+
+ if(n != 6){
+// print("arp: len = %d\n", n);
+ return;
+ }
+
+ switch(version){
+ case V4:
+ r = v4lookup(fs, ip, nil);
+ v4tov6(v6ip, ip);
+ ip = v6ip;
+ break;
+ case V6:
+ r = v6lookup(fs, ip, nil);
+ break;
+ default:
+ panic("arpenter: version %d", version);
+ return; /* to supress warnings */
+ }
+
+ if(r == nil){
+// print("arp: no route for entry\n");
+ return;
+ }
+
+ ifc = r->ifc;
+ type = ifc->m;
+
+ qlock(arp);
+ for(a = arp->hash[haship(ip)]; a; a = a->hash){
+ if(a->type != type || (a->state != AWAIT && a->state != AOK))
+ continue;
+
+ if(ipcmp(a->ip, ip) == 0){
+ a->state = AOK;
+ memmove(a->mac, mac, type->maclen);
+
+ if(version == V6){
+ /* take out of re-transmit chain */
+ l = &arp->rxmt;
+ for(f = *l; f; f = f->nextrxt){
+ if(f == a){
+ *l = a->nextrxt;
+ break;
+ }
+ l = &f->nextrxt;
+ }
+ }
+
+ a->ifc = ifc;
+ a->ifcid = ifc->ifcid;
+ bp = a->hold;
+ a->hold = nil;
+ if(version == V4)
+ ip += IPv4off;
+ a->utime = NOW;
+ a->ctime = a->utime;
+ qunlock(arp);
+
+ while(bp){
+ next = bp->list;
+ if(ifc != nil){
+ if(waserror()){
+ runlock(ifc);
+ nexterror();
+ }
+ rlock(ifc);
+ if(ifc->m != nil)
+ ifc->m->bwrite(ifc, bp, version, ip);
+ else
+ freeb(bp);
+ runlock(ifc);
+ poperror();
+ } else
+ freeb(bp);
+ bp = next;
+ }
+ return;
+ }
+ }
+
+ if(refresh == 0){
+ a = newarp6(arp, ip, ifc, 0);
+ a->state = AOK;
+ a->type = type;
+ a->ctime = NOW;
+ memmove(a->mac, mac, type->maclen);
+ }
+
+ qunlock(arp);
+}
+
+int
+arpwrite(Fs *fs, char *s, int len)
+{
+ int n;
+ Route *r;
+ Arp *arp;
+ Block *bp;
+ Arpent *a, *fl, **l;
+ Medium *m;
+ char *f[4], buf[256];
+ uchar ip[IPaddrlen], mac[MAClen];
+
+ arp = fs->arp;
+
+ if(len == 0)
+ error(Ebadarp);
+ if(len >= sizeof(buf))
+ len = sizeof(buf)-1;
+ strncpy(buf, s, len);
+ buf[len] = 0;
+ if(len > 0 && buf[len-1] == '\n')
+ buf[len-1] = 0;
+
+ n = getfields(buf, f, 4, 1, " ");
+ if(strcmp(f[0], "flush") == 0){
+ qlock(arp);
+ for(a = arp->cache; a < &arp->cache[NCACHE]; a++){
+ memset(a->ip, 0, sizeof(a->ip));
+ memset(a->mac, 0, sizeof(a->mac));
+ a->hash = nil;
+ a->state = 0;
+ a->utime = 0;
+ while(a->hold != nil){
+ bp = a->hold->list;
+ freeblist(a->hold);
+ a->hold = bp;
+ }
+ }
+ memset(arp->hash, 0, sizeof(arp->hash));
+// clear all pkts on these lists (rxmt, dropf/l)
+ arp->rxmt = nil;
+ arp->dropf = nil;
+ arp->dropl = nil;
+ qunlock(arp);
+ } else if(strcmp(f[0], "add") == 0){
+ switch(n){
+ default:
+ error(Ebadarg);
+ case 3:
+ parseip(ip, f[1]);
+ if(isv4(ip))
+ r = v4lookup(fs, ip+IPv4off, nil);
+ else
+ r = v6lookup(fs, ip, nil);
+ if(r == nil)
+ error("Destination unreachable");
+ m = r->ifc->m;
+ n = parsemac(mac, f[2], m->maclen);
+ break;
+ case 4:
+ m = ipfindmedium(f[1]);
+ if(m == nil)
+ error(Ebadarp);
+ parseip(ip, f[2]);
+ n = parsemac(mac, f[3], m->maclen);
+ break;
+ }
+
+ if(m->ares == nil)
+ error(Ebadarp);
+
+ m->ares(fs, V6, ip, mac, n, 0);
+ } else if(strcmp(f[0], "del") == 0){
+ if(n != 2)
+ error(Ebadarg);
+
+ parseip(ip, f[1]);
+ qlock(arp);
+
+ l = &arp->hash[haship(ip)];
+ for(a = *l; a; a = a->hash){
+ if(memcmp(ip, a->ip, sizeof(a->ip)) == 0){
+ *l = a->hash;
+ break;
+ }
+ l = &a->hash;
+ }
+
+ if(a){
+ /* take out of re-transmit chain */
+ l = &arp->rxmt;
+ for(fl = *l; fl; fl = fl->nextrxt){
+ if(fl == a){
+ *l = a->nextrxt;
+ break;
+ }
+ l = &fl->nextrxt;
+ }
+
+ a->nextrxt = nil;
+ a->hash = nil;
+ a->hold = nil;
+ a->last = nil;
+ a->ifc = nil;
+ memset(a->ip, 0, sizeof(a->ip));
+ memset(a->mac, 0, sizeof(a->mac));
+ }
+ qunlock(arp);
+ } else
+ error(Ebadarp);
+
+ return len;
+}
+
+enum
+{
+ Alinelen= 90,
+};
+
+char *aformat = "%-6.6s %-8.8s %-40.40I %-32.32s\n";
+
+static void
+convmac(char *p, uchar *mac, int n)
+{
+ while(n-- > 0)
+ p += sprint(p, "%2.2ux", *mac++);
+}
+
+int
+arpread(Arp *arp, char *p, ulong offset, int len)
+{
+ Arpent *a;
+ int n;
+ char mac[2*MAClen+1];
+
+ if(offset % Alinelen)
+ return 0;
+
+ offset = offset/Alinelen;
+ len = len/Alinelen;
+
+ n = 0;
+ for(a = arp->cache; len > 0 && a < &arp->cache[NCACHE]; a++){
+ if(a->state == 0)
+ continue;
+ if(offset > 0){
+ offset--;
+ continue;
+ }
+ len--;
+ qlock(arp);
+ convmac(mac, a->mac, a->type->maclen);
+ n += sprint(p+n, aformat, a->type->name, arpstate[a->state], a->ip, mac);
+ qunlock(arp);
+ }
+
+ return n;
+}
+
+extern int
+rxmitsols(Arp *arp)
+{
+ uint sflag;
+ Block *next, *xp;
+ Arpent *a, *b, **l;
+ Fs *f;
+ uchar ipsrc[IPaddrlen];
+ Ipifc *ifc = nil;
+ long nrxt;
+
+ qlock(arp);
+ f = arp->f;
+
+ a = arp->rxmt;
+ if(a==nil){
+ nrxt = 0;
+ goto dodrops; //return nrxt;
+ }
+ nrxt = a->rtime - NOW;
+ if(nrxt > 3*ReTransTimer/4)
+ goto dodrops; //return nrxt;
+
+ for(; a; a = a->nextrxt){
+ ifc = a->ifc;
+ assert(ifc != nil);
+ if((a->rxtsrem <= 0) || !(canrlock(ifc)) || (a->ifcid != ifc->ifcid)){
+ xp = a->hold;
+ a->hold = nil;
+
+ if(xp){
+ if(arp->dropl == nil)
+ arp->dropf = xp;
+ else
+ arp->dropl->list = xp;
+ }
+
+ cleanarpent(arp, a);
+ }
+ else
+ break;
+ }
+ if(a == nil)
+ goto dodrops;
+
+
+ qunlock(arp); /* for icmpns */
+ if((sflag = ipv6anylocal(ifc, ipsrc)) != SRC_UNSPEC)
+ icmpns(f, ipsrc, sflag, a->ip, TARG_MULTI, ifc->mac);
+
+ runlock(ifc);
+ qlock(arp);
+
+ /* put to the end of re-transmit chain */
+ l = &arp->rxmt;
+ for(b = *l; b; b = b->nextrxt){
+ if(b == a){
+ *l = a->nextrxt;
+ break;
+ }
+ l = &b->nextrxt;
+ }
+ for(b = *l; b; b = b->nextrxt){
+ l = &b->nextrxt;
+ }
+ *l = a;
+ a->rxtsrem--;
+ a->nextrxt = nil;
+ a->rtime = NOW + ReTransTimer;
+
+ a = arp->rxmt;
+ if(a==nil)
+ nrxt = 0;
+ else
+ nrxt = a->rtime - NOW;
+
+dodrops:
+ xp = arp->dropf;
+ arp->dropf = nil;
+ arp->dropl = nil;
+ qunlock(arp);
+
+ for(; xp; xp = next){
+ next = xp->list;
+ icmphostunr(f, ifc, xp, icmp6_adr_unreach, 1);
+ }
+
+ return nrxt;
+
+}
+
+static int
+rxready(void *v)
+{
+ Arp *arp = (Arp *) v;
+ int x;
+
+ x = ((arp->rxmt != nil) || (arp->dropf != nil));
+
+ return x;
+}
+
+static void
+rxmitproc(void *v)
+{
+ Arp *arp = v;
+ long wakeupat;
+
+ arp->rxmitp = up;
+ //print("arp rxmitproc started\n");
+ if(waserror()){
+ arp->rxmitp = 0;
+ pexit("hangup", 1);
+ }
+ for(;;){
+ wakeupat = rxmitsols(arp);
+ if(wakeupat == 0)
+ sleep(&arp->rxmtq, rxready, v);
+ else if(wakeupat > ReTransTimer/4)
+ tsleep(&arp->rxmtq, return0, 0, wakeupat);
+ }
+}
+
--- /dev/null
+++ b/os/ip.original/bootp.c
@@ -1,0 +1,231 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "kernel.h"
+#include "ip.h"
+
+static ulong fsip;
+static ulong auip;
+static ulong gwip;
+static ulong ipmask;
+static ulong ipaddr;
+
+enum
+{
+ Bootrequest = 1,
+ Bootreply = 2,
+};
+
+typedef struct Bootp
+{
+ /* udp.c oldheader */
+ uchar raddr[IPaddrlen];
+ uchar laddr[IPaddrlen];
+ uchar rport[2];
+ uchar lport[2];
+ /* bootp itself */
+ uchar op; /* opcode */
+ uchar htype; /* hardware type */
+ uchar hlen; /* hardware address len */
+ uchar hops; /* hops */
+ uchar xid[4]; /* a random number */
+ uchar secs[2]; /* elapsed snce client started booting */
+ uchar pad[2];
+ uchar ciaddr[4]; /* client IP address (client tells server) */
+ uchar yiaddr[4]; /* client IP address (server tells client) */
+ uchar siaddr[4]; /* server IP address */
+ uchar giaddr[4]; /* gateway IP address */
+ uchar chaddr[16]; /* client hardware address */
+ uchar sname[64]; /* server host name (optional) */
+ uchar file[128]; /* boot file name */
+ uchar vend[128]; /* vendor-specific goo */
+} Bootp;
+
+/*
+ * bootp returns:
+ *
+ * "fsip d.d.d.d
+ * auip d.d.d.d
+ * gwip d.d.d.d
+ * ipmask d.d.d.d
+ * ipaddr d.d.d.d"
+ *
+ * where d.d.d.d is the IP address in dotted decimal notation, and each
+ * address is followed by a newline.
+ */
+
+static Bootp req;
+static Proc* rcvprocp;
+static int recv;
+static int done;
+static Rendez bootpr;
+static char rcvbuf[512+2*IPaddrlen+2*2];
+
+static void
+rcvbootp(void *a)
+{
+ int n, fd;
+ Bootp *rp;
+ char *field[4];
+ uchar ip[IPaddrlen];
+
+ if(waserror())
+ pexit("", 0);
+ rcvprocp = up; /* store for postnote below */
+ fd = (int)a; /* at compilation: warning: ../ip/bootp.c:78 conversion of pointer to shorter integer */
+ while(done == 0) {
+ n = kread(fd, rcvbuf, sizeof(rcvbuf));
+ if(n <= 0)
+ break;
+ rp = (Bootp*)rcvbuf;
+ /* currently ignore udp's header */
+ if(memcmp(req.chaddr, rp->chaddr, 6) == 0
+ && rp->htype == 1 && rp->hlen == 6
+ && getfields((char*)rp->vend+4, field, 4, 1, " ") == 4
+ && strncmp((char*)rp->vend, "p9 ", 4) == 0){
+ if(ipaddr == 0)
+ ipaddr = nhgetl(rp->yiaddr);
+ if(ipmask == 0)
+ ipmask = parseip(ip, field[0]);
+ if(fsip == 0)
+ fsip = parseip(ip, field[1]);
+ if(auip == 0)
+ auip = parseip(ip, field[2]);
+ if(gwip == 0)
+ gwip = parseip(ip, field[3]);
+ break;
+ }
+ }
+ poperror();
+ rcvprocp = nil;
+
+ recv = 1;
+ wakeup(&bootpr);
+ pexit("", 0);
+}
+
+static char*
+rbootp(Ipifc *ifc)
+{
+ int cfd, dfd, tries, n;
+ char ia[5+3*24], im[16], *av[3];
+ uchar nipaddr[4], ngwip[4], nipmask[4];
+ char dir[Maxpath];
+
+ av[1] = "0.0.0.0";
+ av[2] = "0.0.0.0";
+ ipifcadd(ifc, av, 3, 0, nil);
+
+ cfd = kannounce("udp!*!68", dir);
+ if(cfd < 0)
+ return "bootp announce failed";
+ strcat(dir, "/data");
+ if(kwrite(cfd, "headers", 7) < 0){
+ kclose(cfd);
+ return "bootp ctl headers failed";
+ }
+ kwrite(cfd, "oldheaders", 10);
+ dfd = kopen(dir, ORDWR);
+ if(dfd < 0){
+ kclose(cfd);
+ return "bootp open data failed";
+ }
+ kclose(cfd);
+
+
+ /* create request */
+ memset(&req, 0, sizeof(req));
+ ipmove(req.raddr, IPv4bcast);
+ hnputs(req.rport, 67);
+ req.op = Bootrequest;
+ req.htype = 1; /* ethernet (all we know) */
+ req.hlen = 6; /* ethernet (all we know) */
+
+ /* Hardware MAC address */
+ memmove(req.chaddr, ifc->mac, 6);
+ /* Fill in the local IP address if we know it */
+ ipv4local(ifc, req.ciaddr);
+ memset(req.file, 0, sizeof(req.file));
+ strcpy((char*)req.vend, "p9 ");
+
+ done = 0;
+ recv = 0;
+
+ kproc("rcvbootp", rcvbootp, (void*)dfd, KPDUPFDG);
+
+ /*
+ * broadcast bootp's till we get a reply,
+ * or fixed number of tries
+ */
+ tries = 0;
+ while(recv == 0) {
+ if(kwrite(dfd, &req, sizeof(req)) < 0)
+ print("bootp: write: %s\n", commonerror());
+
+ tsleep(&bootpr, return0, 0, 1000);
+ if(++tries > 10) {
+ print("bootp: timed out\n");
+ break;
+ }
+ }
+ kclose(dfd);
+ done = 1;
+ if(rcvprocp != nil){
+ postnote(rcvprocp, 1, "timeout", 0);
+ rcvprocp = nil;
+ }
+
+ av[1] = "0.0.0.0";
+ av[2] = "0.0.0.0";
+ ipifcrem(ifc, av, 3);
+
+ hnputl(nipaddr, ipaddr);
+ sprint(ia, "%V", nipaddr);
+ hnputl(nipmask, ipmask);
+ sprint(im, "%V", nipmask);
+ av[1] = ia;
+ av[2] = im;
+ ipifcadd(ifc, av, 3, 0, nil);
+
+ if(gwip != 0) {
+ hnputl(ngwip, gwip);
+ n = snprint(ia, sizeof(ia), "add 0.0.0.0 0.0.0.0 %V", ngwip);
+ routewrite(ifc->conv->p->f, nil, ia, n);
+ }
+ return nil;
+}
+
+static int
+rbootpread(char *bp, ulong offset, int len)
+{
+ int n;
+ char *buf;
+ uchar a[4];
+
+ buf = smalloc(READSTR);
+ if(waserror()){
+ free(buf);
+ nexterror();
+ }
+ hnputl(a, fsip);
+ n = snprint(buf, READSTR, "fsip %15V\n", a);
+ hnputl(a, auip);
+ n += snprint(buf + n, READSTR-n, "auip %15V\n", a);
+ hnputl(a, gwip);
+ n += snprint(buf + n, READSTR-n, "gwip %15V\n", a);
+ hnputl(a, ipmask);
+ n += snprint(buf + n, READSTR-n, "ipmask %15V\n", a);
+ hnputl(a, ipaddr);
+ snprint(buf + n, READSTR-n, "ipaddr %15V\n", a);
+
+ len = readstr(offset, bp, len, buf);
+ poperror();
+ free(buf);
+ return len;
+}
+
+char* (*bootp)(Ipifc*) = rbootp;
+int (*bootpread)(char*, ulong, int) = rbootpread;
--- /dev/null
+++ b/os/ip.original/compress.c
@@ -1,0 +1,520 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include "ip.h"
+#include "ppp.h"
+
+typedef struct Iphdr Iphdr;
+typedef struct Tcphdr Tcphdr;
+typedef struct Ilhdr Ilhdr;
+typedef struct Hdr Hdr;
+typedef struct Tcpc Tcpc;
+
+struct Iphdr
+{
+ uchar vihl; /* Version and header length */
+ uchar tos; /* Type of service */
+ uchar length[2]; /* packet length */
+ uchar id[2]; /* Identification */
+ uchar frag[2]; /* Fragment information */
+ uchar ttl; /* Time to live */
+ uchar proto; /* Protocol */
+ uchar cksum[2]; /* Header checksum */
+ ulong src; /* Ip source (byte ordering unimportant) */
+ ulong dst; /* Ip destination (byte ordering unimportant) */
+};
+
+struct Tcphdr
+{
+ ulong ports; /* defined as a ulong to make comparisons easier */
+ uchar seq[4];
+ uchar ack[4];
+ uchar flag[2];
+ uchar win[2];
+ uchar cksum[2];
+ uchar urg[2];
+};
+
+struct Ilhdr
+{
+ uchar sum[2]; /* Checksum including header */
+ uchar len[2]; /* Packet length */
+ uchar type; /* Packet type */
+ uchar spec; /* Special */
+ uchar src[2]; /* Src port */
+ uchar dst[2]; /* Dst port */
+ uchar id[4]; /* Sequence id */
+ uchar ack[4]; /* Acked sequence */
+};
+
+enum
+{
+ URG = 0x20, /* Data marked urgent */
+ ACK = 0x10, /* Aknowledge is valid */
+ PSH = 0x08, /* Whole data pipe is pushed */
+ RST = 0x04, /* Reset connection */
+ SYN = 0x02, /* Pkt. is synchronise */
+ FIN = 0x01, /* Start close down */
+
+ IP_DF = 0x4000, /* Don't fragment */
+
+ IP_TCPPROTO = 6,
+ IP_ILPROTO = 40,
+ IL_IPHDR = 20,
+};
+
+struct Hdr
+{
+ uchar buf[128];
+ Iphdr *ip;
+ Tcphdr *tcp;
+ int len;
+};
+
+struct Tcpc
+{
+ uchar lastrecv;
+ uchar lastxmit;
+ uchar basexmit;
+ uchar err;
+ uchar compressid;
+ Hdr t[MAX_STATES];
+ Hdr r[MAX_STATES];
+};
+
+enum
+{ /* flag bits for what changed in a packet */
+ NEW_U=(1<<0), /* tcp only */
+ NEW_W=(1<<1), /* tcp only */
+ NEW_A=(1<<2), /* il tcp */
+ NEW_S=(1<<3), /* tcp only */
+ NEW_P=(1<<4), /* tcp only */
+ NEW_I=(1<<5), /* il tcp */
+ NEW_C=(1<<6), /* il tcp */
+ NEW_T=(1<<7), /* il only */
+ TCP_PUSH_BIT = 0x10,
+};
+
+/* reserved, special-case values of above for tcp */
+#define SPECIAL_I (NEW_S|NEW_W|NEW_U) /* echoed interactive traffic */
+#define SPECIAL_D (NEW_S|NEW_A|NEW_W|NEW_U) /* unidirectional data */
+#define SPECIALS_MASK (NEW_S|NEW_A|NEW_W|NEW_U)
+
+int
+encode(void *p, ulong n)
+{
+ uchar *cp;
+
+ cp = p;
+ if(n >= 256 || n == 0) {
+ *cp++ = 0;
+ cp[0] = n >> 8;
+ cp[1] = n;
+ return 3;
+ } else
+ *cp = n;
+ return 1;
+}
+
+#define DECODEL(f) { \
+ if (*cp == 0) {\
+ hnputl(f, nhgetl(f) + ((cp[1] << 8) | cp[2])); \
+ cp += 3; \
+ } else { \
+ hnputl(f, nhgetl(f) + (ulong)*cp++); \
+ } \
+}
+#define DECODES(f) { \
+ if (*cp == 0) {\
+ hnputs(f, nhgets(f) + ((cp[1] << 8) | cp[2])); \
+ cp += 3; \
+ } else { \
+ hnputs(f, nhgets(f) + (ulong)*cp++); \
+ } \
+}
+
+ushort
+tcpcompress(Tcpc *comp, Block *b, Fs *)
+{
+ Iphdr *ip; /* current packet */
+ Tcphdr *tcp; /* current pkt */
+ ulong iplen, tcplen, hlen; /* header length in bytes */
+ ulong deltaS, deltaA; /* general purpose temporaries */
+ ulong changes; /* change mask */
+ uchar new_seq[16]; /* changes from last to current */
+ uchar *cp;
+ Hdr *h; /* last packet */
+ int i, j;
+
+ /*
+ * Bail if this is not a compressible TCP/IP packet
+ */
+ ip = (Iphdr*)b->rp;
+ iplen = (ip->vihl & 0xf) << 2;
+ tcp = (Tcphdr*)(b->rp + iplen);
+ tcplen = (tcp->flag[0] & 0xf0) >> 2;
+ hlen = iplen + tcplen;
+ if((tcp->flag[1] & (SYN|FIN|RST|ACK)) != ACK)
+ return Pip; /* connection control */
+
+ /*
+ * Packet is compressible, look for a connection
+ */
+ changes = 0;
+ cp = new_seq;
+ j = comp->lastxmit;
+ h = &comp->t[j];
+ if(ip->src != h->ip->src || ip->dst != h->ip->dst
+ || tcp->ports != h->tcp->ports) {
+ for(i = 0; i < MAX_STATES; ++i) {
+ j = (comp->basexmit + i) % MAX_STATES;
+ h = &comp->t[j];
+ if(ip->src == h->ip->src && ip->dst == h->ip->dst
+ && tcp->ports == h->tcp->ports)
+ goto found;
+ }
+
+ /* no connection, reuse the oldest */
+ if(i == MAX_STATES) {
+ j = comp->basexmit;
+ j = (j + MAX_STATES - 1) % MAX_STATES;
+ comp->basexmit = j;
+ h = &comp->t[j];
+ goto raise;
+ }
+ }
+found:
+
+ /*
+ * Make sure that only what we expect to change changed.
+ */
+ if(ip->vihl != h->ip->vihl || ip->tos != h->ip->tos ||
+ ip->ttl != h->ip->ttl || ip->proto != h->ip->proto)
+ goto raise; /* headers changed */
+ if(iplen != sizeof(Iphdr) && memcmp(ip+1, h->ip+1, iplen - sizeof(Iphdr)))
+ goto raise; /* ip options changed */
+ if(tcplen != sizeof(Tcphdr) && memcmp(tcp+1, h->tcp+1, tcplen - sizeof(Tcphdr)))
+ goto raise; /* tcp options changed */
+
+ if(tcp->flag[1] & URG) {
+ cp += encode(cp, nhgets(tcp->urg));
+ changes |= NEW_U;
+ } else if(memcmp(tcp->urg, h->tcp->urg, sizeof(tcp->urg)) != 0)
+ goto raise;
+ if(deltaS = nhgets(tcp->win) - nhgets(h->tcp->win)) {
+ cp += encode(cp, deltaS);
+ changes |= NEW_W;
+ }
+ if(deltaA = nhgetl(tcp->ack) - nhgetl(h->tcp->ack)) {
+ if(deltaA > 0xffff)
+ goto raise;
+ cp += encode(cp, deltaA);
+ changes |= NEW_A;
+ }
+ if(deltaS = nhgetl(tcp->seq) - nhgetl(h->tcp->seq)) {
+ if (deltaS > 0xffff)
+ goto raise;
+ cp += encode(cp, deltaS);
+ changes |= NEW_S;
+ }
+
+ /*
+ * Look for the special-case encodings.
+ */
+ switch(changes) {
+ case 0:
+ /*
+ * Nothing changed. If this packet contains data and the last
+ * one didn't, this is probably a data packet following an
+ * ack (normal on an interactive connection) and we send it
+ * compressed. Otherwise it's probably a retransmit,
+ * retransmitted ack or window probe. Send it uncompressed
+ * in case the other side missed the compressed version.
+ */
+ if(nhgets(ip->length) == nhgets(h->ip->length) ||
+ nhgets(h->ip->length) != hlen)
+ goto raise;
+ break;
+ case SPECIAL_I:
+ case SPECIAL_D:
+ /*
+ * Actual changes match one of our special case encodings --
+ * send packet uncompressed.
+ */
+ goto raise;
+ case NEW_S | NEW_A:
+ if (deltaS == deltaA &&
+ deltaS == nhgets(h->ip->length) - hlen) {
+ /* special case for echoed terminal traffic */
+ changes = SPECIAL_I;
+ cp = new_seq;
+ }
+ break;
+ case NEW_S:
+ if (deltaS == nhgets(h->ip->length) - hlen) {
+ /* special case for data xfer */
+ changes = SPECIAL_D;
+ cp = new_seq;
+ }
+ break;
+ }
+ deltaS = nhgets(ip->id) - nhgets(h->ip->id);
+ if(deltaS != 1) {
+ cp += encode(cp, deltaS);
+ changes |= NEW_I;
+ }
+ if (tcp->flag[1] & PSH)
+ changes |= TCP_PUSH_BIT;
+ /*
+ * Grab the cksum before we overwrite it below. Then update our
+ * state with this packet's header.
+ */
+ deltaA = nhgets(tcp->cksum);
+ memmove(h->buf, b->rp, hlen);
+ h->len = hlen;
+ h->tcp = (Tcphdr*)(h->buf + iplen);
+
+ /*
+ * We want to use the original packet as our compressed packet. (cp -
+ * new_seq) is the number of bytes we need for compressed sequence
+ * numbers. In addition we need one byte for the change mask, one
+ * for the connection id and two for the tcp checksum. So, (cp -
+ * new_seq) + 4 bytes of header are needed. hlen is how many bytes
+ * of the original packet to toss so subtract the two to get the new
+ * packet size. The temporaries are gross -egs.
+ */
+ deltaS = cp - new_seq;
+ cp = b->rp;
+ if(comp->lastxmit != j || comp->compressid == 0) {
+ comp->lastxmit = j;
+ hlen -= deltaS + 4;
+ cp += hlen;
+ *cp++ = (changes | NEW_C);
+ *cp++ = j;
+ } else {
+ hlen -= deltaS + 3;
+ cp += hlen;
+ *cp++ = changes;
+ }
+ b->rp += hlen;
+ hnputs(cp, deltaA);
+ cp += 2;
+ memmove(cp, new_seq, deltaS);
+ return Pvjctcp;
+
+raise:
+ /*
+ * Update connection state & send uncompressed packet
+ */
+ memmove(h->buf, b->rp, hlen);
+ h->tcp = (Tcphdr*)(h->buf + iplen);
+ h->len = hlen;
+ h->ip->proto = j;
+ comp->lastxmit = j;
+ return Pvjutcp;
+}
+
+Block*
+tcpuncompress(Tcpc *comp, Block *b, ushort type, Fs *f)
+{
+ uchar *cp, changes;
+ int i;
+ int iplen, len;
+ Iphdr *ip;
+ Tcphdr *tcp;
+ Hdr *h;
+
+ if(type == Pvjutcp) {
+ /*
+ * Locate the saved state for this connection. If the state
+ * index is legal, clear the 'discard' flag.
+ */
+ ip = (Iphdr*)b->rp;
+ if(ip->proto >= MAX_STATES)
+ goto raise;
+ iplen = (ip->vihl & 0xf) << 2;
+ tcp = (Tcphdr*)(b->rp + iplen);
+ comp->lastrecv = ip->proto;
+ len = iplen + ((tcp->flag[0] & 0xf0) >> 2);
+ comp->err = 0;
+netlog(f, Logcompress, "uncompressed %d\n", comp->lastrecv);
+ /*
+ * Restore the IP protocol field then save a copy of this
+ * packet header. The checksum is zeroed in the copy so we
+ * don't have to zero it each time we process a compressed
+ * packet.
+ */
+ ip->proto = IP_TCPPROTO;
+ h = &comp->r[comp->lastrecv];
+ memmove(h->buf, b->rp, len);
+ h->tcp = (Tcphdr*)(h->buf + iplen);
+ h->len = len;
+ h->ip->cksum[0] = h->ip->cksum[1] = 0;
+ return b;
+ }
+
+ cp = b->rp;
+ changes = *cp++;
+ if(changes & NEW_C) {
+ /*
+ * Make sure the state index is in range, then grab the
+ * state. If we have a good state index, clear the 'discard'
+ * flag.
+ */
+ if(*cp >= MAX_STATES)
+ goto raise;
+ comp->err = 0;
+ comp->lastrecv = *cp++;
+netlog(f, Logcompress, "newc %d\n", comp->lastrecv);
+ } else {
+ /*
+ * This packet has no state index. If we've had a
+ * line error since the last time we got an explicit state
+ * index, we have to toss the packet.
+ */
+ if(comp->err != 0){
+ freeblist(b);
+ return nil;
+ }
+netlog(f, Logcompress, "oldc %d\n", comp->lastrecv);
+ }
+
+ /*
+ * Find the state then fill in the TCP checksum and PUSH bit.
+ */
+ h = &comp->r[comp->lastrecv];
+ ip = h->ip;
+ tcp = h->tcp;
+ len = h->len;
+ memmove(tcp->cksum, cp, sizeof tcp->cksum);
+ cp += 2;
+ if(changes & TCP_PUSH_BIT)
+ tcp->flag[1] |= PSH;
+ else
+ tcp->flag[1] &= ~PSH;
+ /*
+ * Fix up the state's ack, seq, urg and win fields based on the
+ * changemask.
+ */
+ switch (changes & SPECIALS_MASK) {
+ case SPECIAL_I:
+ i = nhgets(ip->length) - len;
+ hnputl(tcp->ack, nhgetl(tcp->ack) + i);
+ hnputl(tcp->seq, nhgetl(tcp->seq) + i);
+ break;
+
+ case SPECIAL_D:
+ hnputl(tcp->seq, nhgetl(tcp->seq) + nhgets(ip->length) - len);
+ break;
+
+ default:
+ if(changes & NEW_U) {
+ tcp->flag[1] |= URG;
+ if(*cp == 0){
+ hnputs(tcp->urg, nhgets(cp+1));
+ cp += 3;
+ }else
+ hnputs(tcp->urg, *cp++);
+ } else
+ tcp->flag[1] &= ~URG;
+ if(changes & NEW_W)
+ DECODES(tcp->win)
+ if(changes & NEW_A)
+ DECODEL(tcp->ack)
+ if(changes & NEW_S)
+ DECODEL(tcp->seq)
+ break;
+ }
+
+ /* Update the IP ID */
+ if(changes & NEW_I)
+ DECODES(ip->id)
+ else
+ hnputs(ip->id, nhgets(ip->id) + 1);
+
+ /*
+ * At this point, cp points to the first byte of data in the packet.
+ * Back up cp by the TCP/IP header length to make room for the
+ * reconstructed header.
+ * We assume the packet we were handed has enough space to prepend
+ * up to 128 bytes of header.
+ */
+ b->rp = cp;
+ if(b->rp - b->base < len){
+ b = padblock(b, len);
+ b = pullupblock(b, blocklen(b));
+ } else
+ b->rp -= len;
+ hnputs(ip->length, BLEN(b));
+ memmove(b->rp, ip, len);
+
+ /* recompute the ip header checksum */
+ ip = (Iphdr*)b->rp;
+ hnputs(ip->cksum, ipcsum(b->rp));
+ return b;
+
+raise:
+ netlog(f, Logcompress, "Bad Packet!\n");
+ comp->err = 1;
+ freeblist(b);
+ return nil;
+}
+
+Tcpc*
+compress_init(Tcpc *c)
+{
+ int i;
+ Hdr *h;
+
+ if(c == nil){
+ c = malloc(sizeof(Tcpc));
+ if(c == nil)
+ return nil;
+ }
+ memset(c, 0, sizeof(*c));
+ for(i = 0; i < MAX_STATES; i++){
+ h = &c->t[i];
+ h->ip = (Iphdr*)h->buf;
+ h->tcp = (Tcphdr*)(h->buf + 10);
+ h->len = 20;
+ h = &c->r[i];
+ h->ip = (Iphdr*)h->buf;
+ h->tcp = (Tcphdr*)(h->buf + 10);
+ h->len = 20;
+ }
+
+ return c;
+}
+
+ushort
+compress(Tcpc *tcp, Block *b, Fs *f)
+{
+ Iphdr *ip;
+
+ /*
+ * Bail if this is not a compressible IP packet
+ */
+ ip = (Iphdr*)b->rp;
+ if((nhgets(ip->frag) & 0x3fff) != 0)
+ return Pip;
+
+ switch(ip->proto) {
+ case IP_TCPPROTO:
+ return tcpcompress(tcp, b, f);
+ default:
+ return Pip;
+ }
+}
+
+int
+compress_negotiate(Tcpc *tcp, uchar *data)
+{
+ if(data[0] != MAX_STATES - 1)
+ return -1;
+ tcp->compressid = data[1];
+ return 0;
+}
--- /dev/null
+++ b/os/ip.original/devip.c
@@ -1,0 +1,1417 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "../ip/ip.h"
+
+enum
+{
+ Qtopdir= 1, /* top level directory */
+ Qtopbase,
+ Qarp= Qtopbase,
+ Qbootp,
+ Qndb,
+ Qiproute,
+ Qiprouter,
+ Qipselftab,
+ Qlog,
+
+ Qprotodir, /* directory for a protocol */
+ Qprotobase,
+ Qclone= Qprotobase,
+ Qstats,
+
+ Qconvdir, /* directory for a conversation */
+ Qconvbase,
+ Qctl= Qconvbase,
+ Qdata,
+ Qerr,
+ Qlisten,
+ Qlocal,
+ Qremote,
+ Qstatus,
+ Qsnoop,
+
+ Logtype= 5,
+ Masktype= (1<<Logtype)-1,
+ Logconv= 12,
+ Maskconv= (1<<Logconv)-1,
+ Shiftconv= Logtype,
+ Logproto= 8,
+ Maskproto= (1<<Logproto)-1,
+ Shiftproto= Logtype + Logconv,
+
+ Nfs= 32,
+};
+#define TYPE(x) ( ((u32)(x).path) & Masktype )
+#define CONV(x) ( (((u32)(x).path) >> Shiftconv) & Maskconv )
+#define PROTO(x) ( (((u32)(x).path) >> Shiftproto) & Maskproto )
+#define QID(p, c, y) ( ((p)<<(Shiftproto)) | ((c)<<Shiftconv) | (y) )
+
+static char network[] = "network";
+
+QLock fslock;
+Fs *ipfs[Nfs]; /* attached fs's */
+Queue *qlog;
+
+extern void nullmediumlink(void);
+extern void pktmediumlink(void);
+static long ndbwrite(Fs*, char*, ulong, int);
+extern void closeconv(Conv*);
+
+static int
+ip3gen(Chan *c, int i, Dir *dp)
+{
+ Qid q;
+ Conv *cv;
+ char *p;
+
+ cv = ipfs[c->dev]->p[PROTO(c->qid)]->conv[CONV(c->qid)];
+ if(cv->owner == nil)
+ kstrdup(&cv->owner, eve);
+ mkqid(&q, QID(PROTO(c->qid), CONV(c->qid), i), 0, QTFILE);
+
+ switch(i) {
+ default:
+ return -1;
+ case Qctl:
+ devdir(c, q, "ctl", 0, cv->owner, cv->perm, dp);
+ return 1;
+ case Qdata:
+ devdir(c, q, "data", qlen(cv->rq), cv->owner, cv->perm, dp);
+ return 1;
+ case Qerr:
+ devdir(c, q, "err", qlen(cv->eq), cv->owner, cv->perm, dp);
+ return 1;
+ case Qlisten:
+ devdir(c, q, "listen", 0, cv->owner, cv->perm, dp);
+ return 1;
+ case Qlocal:
+ p = "local";
+ break;
+ case Qremote:
+ p = "remote";
+ break;
+ case Qsnoop:
+ if(strcmp(cv->p->name, "ipifc") != 0)
+ return -1;
+ devdir(c, q, "snoop", qlen(cv->sq), cv->owner, 0400, dp);
+ return 1;
+ case Qstatus:
+ p = "status";
+ break;
+ }
+ devdir(c, q, p, 0, cv->owner, 0444, dp);
+ return 1;
+}
+
+static int
+ip2gen(Chan *c, int i, Dir *dp)
+{
+ Qid q;
+
+ switch(i) {
+ case Qclone:
+ mkqid(&q, QID(PROTO(c->qid), 0, Qclone), 0, QTFILE);
+ devdir(c, q, "clone", 0, network, 0666, dp);
+ return 1;
+ case Qstats:
+ mkqid(&q, QID(PROTO(c->qid), 0, Qstats), 0, QTFILE);
+ devdir(c, q, "stats", 0, network, 0444, dp);
+ return 1;
+ }
+ return -1;
+}
+
+static int
+ip1gen(Chan *c, int i, Dir *dp)
+{
+ Qid q;
+ char *p;
+ int prot;
+ int len = 0;
+ Fs *f;
+ extern ulong kerndate;
+
+ f = ipfs[c->dev];
+
+ prot = 0666;
+ mkqid(&q, QID(0, 0, i), 0, QTFILE);
+ switch(i) {
+ default:
+ return -1;
+ case Qarp:
+ p = "arp";
+ break;
+ case Qbootp:
+ p = "bootp";
+ if(bootp == nil)
+ return 0;
+ break;
+ case Qndb:
+ p = "ndb";
+ len = strlen(f->ndb);
+ q.vers = f->ndbvers;
+ break;
+ case Qiproute:
+ p = "iproute";
+ break;
+ case Qipselftab:
+ p = "ipselftab";
+ prot = 0444;
+ break;
+ case Qiprouter:
+ p = "iprouter";
+ break;
+ case Qlog:
+ p = "log";
+ break;
+ }
+ devdir(c, q, p, len, network, prot, dp);
+ if(i == Qndb && f->ndbmtime > kerndate)
+ dp->mtime = f->ndbmtime;
+ return 1;
+}
+
+static int
+ipgen(Chan *c, char*, Dirtab*, int, int s, Dir *dp)
+{
+ Qid q;
+ Conv *cv;
+ Fs *f;
+
+ f = ipfs[c->dev];
+
+ switch(TYPE(c->qid)) {
+ case Qtopdir:
+ if(s == DEVDOTDOT){
+ mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
+ sprint(up->genbuf, "#I%ud", c->dev);
+ devdir(c, q, up->genbuf, 0, network, 0555, dp);
+ return 1;
+ }
+ if(s < f->np) {
+ if(f->p[s]->connect == nil)
+ return 0; /* protocol with no user interface */
+ mkqid(&q, QID(s, 0, Qprotodir), 0, QTDIR);
+ devdir(c, q, f->p[s]->name, 0, network, 0555, dp);
+ return 1;
+ }
+ s -= f->np;
+ return ip1gen(c, s+Qtopbase, dp);
+ case Qarp:
+ case Qbootp:
+ case Qndb:
+ case Qlog:
+ case Qiproute:
+ case Qiprouter:
+ case Qipselftab:
+ return ip1gen(c, TYPE(c->qid), dp);
+ case Qprotodir:
+ if(s == DEVDOTDOT){
+ mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
+ sprint(up->genbuf, "#I%ud", c->dev);
+ devdir(c, q, up->genbuf, 0, network, 0555, dp);
+ return 1;
+ }
+ if(s < f->p[PROTO(c->qid)]->ac) {
+ cv = f->p[PROTO(c->qid)]->conv[s];
+ sprint(up->genbuf, "%d", s);
+ mkqid(&q, QID(PROTO(c->qid), s, Qconvdir), 0, QTDIR);
+ devdir(c, q, up->genbuf, 0, cv->owner, 0555, dp);
+ return 1;
+ }
+ s -= f->p[PROTO(c->qid)]->ac;
+ return ip2gen(c, s+Qprotobase, dp);
+ case Qclone:
+ case Qstats:
+ return ip2gen(c, TYPE(c->qid), dp);
+ case Qconvdir:
+ if(s == DEVDOTDOT){
+ s = PROTO(c->qid);
+ mkqid(&q, QID(s, 0, Qprotodir), 0, QTDIR);
+ devdir(c, q, f->p[s]->name, 0, network, 0555, dp);
+ return 1;
+ }
+ return ip3gen(c, s+Qconvbase, dp);
+ case Qctl:
+ case Qdata:
+ case Qerr:
+ case Qlisten:
+ case Qlocal:
+ case Qremote:
+ case Qstatus:
+ case Qsnoop:
+ return ip3gen(c, TYPE(c->qid), dp);
+ }
+ return -1;
+}
+
+static void
+ipreset(void)
+{
+ nullmediumlink();
+ pktmediumlink();
+
+ fmtinstall('i', eipfmt);
+ fmtinstall('I', eipfmt);
+ fmtinstall('E', eipfmt);
+ fmtinstall('V', eipfmt);
+ fmtinstall('M', eipfmt);
+}
+
+static Fs*
+ipgetfs(int dev)
+{
+ extern void (*ipprotoinit[])(Fs*);
+ Fs *f;
+ int i;
+
+ if(dev >= Nfs)
+ return nil;
+
+ qlock(&fslock);
+ if(ipfs[dev] == nil){
+ f = smalloc(sizeof(Fs));
+ ip_init(f);
+ arpinit(f);
+ netloginit(f);
+ for(i = 0; ipprotoinit[i]; i++)
+ ipprotoinit[i](f);
+ f->dev = dev;
+ ipfs[dev] = f;
+ }
+ qunlock(&fslock);
+
+ return ipfs[dev];
+}
+
+IPaux*
+newipaux(char *owner, char *tag)
+{
+ IPaux *a;
+ int n;
+
+ a = smalloc(sizeof(*a));
+ kstrdup(&a->owner, owner);
+ memset(a->tag, ' ', sizeof(a->tag));
+ n = strlen(tag);
+ if(n > sizeof(a->tag))
+ n = sizeof(a->tag);
+ memmove(a->tag, tag, n);
+ return a;
+}
+
+#define ATTACHER(c) (((IPaux*)((c)->aux))->owner)
+
+static Chan*
+ipattach(char* spec)
+{
+ Chan *c;
+ int dev;
+
+ dev = atoi(spec);
+ if(dev >= Nfs)
+ error("bad specification");
+
+ ipgetfs(dev);
+ c = devattach('I', spec);
+ mkqid(&c->qid, QID(0, 0, Qtopdir), 0, QTDIR);
+ c->dev = dev;
+
+ c->aux = newipaux(commonuser(), "none");
+
+ return c;
+}
+
+static Walkqid*
+ipwalk(Chan* c, Chan *nc, char **name, s32 nname)
+{
+ IPaux *a = c->aux;
+ Walkqid* w;
+
+ w = devwalk(c, nc, name, nname, nil, 0, ipgen);
+ if(w != nil && w->clone != nil)
+ w->clone->aux = newipaux(a->owner, a->tag);
+ return w;
+}
+
+static s32
+ipstat(Chan* c, uchar* db, s32 n)
+{
+ return devstat(c, db, n, nil, 0, ipgen);
+}
+
+static int
+incoming(void* arg)
+{
+ Conv *conv;
+
+ conv = arg;
+ return conv->incall != nil;
+}
+
+static int m2p[] = {
+ [OREAD] 4,
+ [OWRITE] 2,
+ [ORDWR] 6
+};
+
+static Chan*
+ipopen(Chan* c, u32 omode)
+{
+ Conv *cv, *nc;
+ Proto *p;
+ int perm;
+ Fs *f;
+
+ perm = m2p[omode&3];
+
+ f = ipfs[c->dev];
+
+ switch(TYPE(c->qid)) {
+ default:
+ break;
+ case Qndb:
+ if(omode & (OWRITE|OTRUNC) && !iseve())
+ error(Eperm);
+ if((omode & (OWRITE|OTRUNC)) == (OWRITE|OTRUNC))
+ f->ndb[0] = 0;
+ break;
+ case Qlog:
+ netlogopen(f);
+ break;
+ case Qiprouter:
+ iprouteropen(f);
+ break;
+ case Qiproute:
+ break;
+ case Qtopdir:
+ case Qprotodir:
+ case Qconvdir:
+ case Qstatus:
+ case Qremote:
+ case Qlocal:
+ case Qstats:
+ case Qbootp:
+ case Qipselftab:
+ if(omode != OREAD)
+ error(Eperm);
+ break;
+ case Qsnoop:
+ if(omode != OREAD)
+ error(Eperm);
+ p = f->p[PROTO(c->qid)];
+ cv = p->conv[CONV(c->qid)];
+ if(strcmp(ATTACHER(c), cv->owner) != 0 && !iseve())
+ error(Eperm);
+ incref(&cv->snoopers);
+ break;
+ case Qclone:
+ p = f->p[PROTO(c->qid)];
+ qlock(p);
+ if(waserror()){
+ qunlock(p);
+ nexterror();
+ }
+ cv = Fsprotoclone(p, ATTACHER(c));
+ qunlock(p);
+ poperror();
+ if(cv == nil) {
+ error(Enodev);
+ break;
+ }
+ mkqid(&c->qid, QID(p->x, cv->x, Qctl), 0, QTFILE);
+ break;
+ case Qdata:
+ case Qctl:
+ case Qerr:
+ p = f->p[PROTO(c->qid)];
+ qlock(p);
+ cv = p->conv[CONV(c->qid)];
+ qlock(cv);
+ if(waserror()) {
+ qunlock(cv);
+ qunlock(p);
+ nexterror();
+ }
+ if((perm & (cv->perm>>6)) != perm) {
+ if(strcmp(ATTACHER(c), cv->owner) != 0)
+ error(Eperm);
+ if((perm & cv->perm) != perm)
+ error(Eperm);
+
+ }
+ cv->inuse++;
+ if(cv->inuse == 1){
+ kstrdup(&cv->owner, ATTACHER(c));
+ cv->perm = 0660;
+ }
+ qunlock(cv);
+ qunlock(p);
+ poperror();
+ break;
+ case Qlisten:
+ cv = f->p[PROTO(c->qid)]->conv[CONV(c->qid)];
+ if((perm & (cv->perm>>6)) != perm) {
+ if(strcmp(ATTACHER(c), cv->owner) != 0)
+ error(Eperm);
+ if((perm & cv->perm) != perm)
+ error(Eperm);
+
+ }
+
+ if(cv->state != Announced)
+ error("not announced");
+
+ if(waserror()){
+ closeconv(cv);
+ nexterror();
+ }
+ qlock(cv);
+ cv->inuse++;
+ qunlock(cv);
+
+ nc = nil;
+ while(nc == nil) {
+ /* give up if we got a hangup */
+ if(qisclosed(cv->rq))
+ error("listen hungup");
+
+ qlock(&cv->listenq);
+ if(waserror()) {
+ qunlock(&cv->listenq);
+ nexterror();
+ }
+
+ /* wait for a connect */
+ sleep(&cv->listenr, incoming, cv);
+
+ qlock(cv);
+ nc = cv->incall;
+ if(nc != nil){
+ cv->incall = nc->next;
+ mkqid(&c->qid, QID(PROTO(c->qid), nc->x, Qctl), 0, QTFILE);
+ kstrdup(&cv->owner, ATTACHER(c));
+ }
+ qunlock(cv);
+
+ qunlock(&cv->listenq);
+ poperror();
+ }
+ closeconv(cv);
+ poperror();
+ break;
+ }
+ c->mode = openmode(omode);
+ c->flag |= COPEN;
+ c->offset = 0;
+ return c;
+}
+
+static s32
+ipwstat(Chan *c, uchar *dp, s32 n)
+{
+ Dir *d;
+ Conv *cv;
+ Fs *f;
+ Proto *p;
+
+ f = ipfs[c->dev];
+ switch(TYPE(c->qid)) {
+ default:
+ error(Eperm);
+ break;
+ case Qctl:
+ case Qdata:
+ break;
+ }
+
+ d = smalloc(sizeof(*d)+n);
+ if(waserror()){
+ free(d);
+ nexterror();
+ }
+ n = convM2D(dp, n, d, (char*)&d[1]);
+ if(n == 0)
+ error(Eshortstat);
+ p = f->p[PROTO(c->qid)];
+ cv = p->conv[CONV(c->qid)];
+ if(!iseve() && strcmp(ATTACHER(c), cv->owner) != 0)
+ error(Eperm);
+ if(!emptystr(d->uid))
+ kstrdup(&cv->owner, d->uid);
+ if(d->mode != ~0UL)
+ cv->perm = d->mode & 0777;
+ poperror();
+ free(d);
+ return n;
+}
+
+extern void
+closeconv(Conv *cv)
+{
+ Conv *nc;
+ Ipmulti *mp;
+
+ qlock(cv);
+
+ if(--cv->inuse > 0) {
+ qunlock(cv);
+ return;
+ }
+
+ /* close all incoming calls since no listen will ever happen */
+ for(nc = cv->incall; nc; nc = cv->incall){
+ cv->incall = nc->next;
+ closeconv(nc);
+ }
+ cv->incall = nil;
+
+ kstrdup(&cv->owner, network);
+ cv->perm = 0660;
+
+ while((mp = cv->multi) != nil)
+ ipifcremmulti(cv, mp->ma, mp->ia);
+
+ cv->r = nil;
+ cv->rgen = 0;
+ cv->p->close(cv);
+ cv->state = Idle;
+ qunlock(cv);
+}
+
+static void
+ipclose(Chan* c)
+{
+ Fs *f;
+
+ f = ipfs[c->dev];
+ switch(TYPE(c->qid)) {
+ default:
+ break;
+ case Qlog:
+ if(c->flag & COPEN)
+ netlogclose(f);
+ break;
+ case Qiprouter:
+ if(c->flag & COPEN)
+ iprouterclose(f);
+ break;
+ case Qdata:
+ case Qctl:
+ case Qerr:
+ if(c->flag & COPEN)
+ closeconv(f->p[PROTO(c->qid)]->conv[CONV(c->qid)]);
+ break;
+ case Qsnoop:
+ if(c->flag & COPEN)
+ decref(&f->p[PROTO(c->qid)]->conv[CONV(c->qid)]->snoopers);
+ break;
+ }
+ free(((IPaux*)c->aux)->owner);
+ free(c->aux);
+}
+
+enum
+{
+ Statelen= 32*1024,
+};
+
+static s32
+ipread(Chan *ch, void *a, s32 n, s64 off)
+{
+ Conv *c;
+ Proto *x;
+ char *buf, *p;
+ s32 rv;
+ Fs *f;
+ ulong offset = off;
+
+ f = ipfs[ch->dev];
+
+ p = a;
+ switch(TYPE(ch->qid)) {
+ default:
+ error(Eperm);
+ case Qtopdir:
+ case Qprotodir:
+ case Qconvdir:
+ return devdirread(ch, a, n, 0, 0, ipgen);
+ case Qarp:
+ return arpread(f->arp, a, offset, n);
+ case Qbootp:
+ return bootpread(a, offset, n);
+ case Qndb:
+ return readstr(offset, a, n, f->ndb);
+ case Qiproute:
+ return routeread(f, a, offset, n);
+ case Qiprouter:
+ return iprouterread(f, a, n);
+ case Qipselftab:
+ return ipselftabread(f, a, offset, n);
+ case Qlog:
+ return netlogread(f, a, offset, n);
+ case Qctl:
+ sprint(up->genbuf, "%ud", CONV(ch->qid));
+ return readstr(offset, p, n, up->genbuf);
+ case Qremote:
+ buf = smalloc(Statelen);
+ x = f->p[PROTO(ch->qid)];
+ c = x->conv[CONV(ch->qid)];
+ if(x->remote == nil) {
+ sprint(buf, "%I!%d\n", c->raddr, c->rport);
+ } else {
+ (*x->remote)(c, buf, Statelen-2);
+ }
+ rv = readstr(offset, p, n, buf);
+ free(buf);
+ return rv;
+ case Qlocal:
+ buf = smalloc(Statelen);
+ x = f->p[PROTO(ch->qid)];
+ c = x->conv[CONV(ch->qid)];
+ if(x->local == nil) {
+ sprint(buf, "%I!%d\n", c->laddr, c->lport);
+ } else {
+ (*x->local)(c, buf, Statelen-2);
+ }
+ rv = readstr(offset, p, n, buf);
+ free(buf);
+ return rv;
+ case Qstatus:
+ buf = smalloc(Statelen);
+ x = f->p[PROTO(ch->qid)];
+ c = x->conv[CONV(ch->qid)];
+ (*x->state)(c, buf, Statelen-2);
+ rv = readstr(offset, p, n, buf);
+ free(buf);
+ return rv;
+ case Qdata:
+ c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
+ return qread(c->rq, a, n);
+ case Qerr:
+ c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
+ return qread(c->eq, a, n);
+ case Qsnoop:
+ c = f->p[PROTO(ch->qid)]->conv[CONV(ch->qid)];
+ return qread(c->sq, a, n);
+ case Qstats:
+ x = f->p[PROTO(ch->qid)];
+ if(x->stats == nil)
+ error("stats not implemented");
+ buf = smalloc(Statelen);
+ (*x->stats)(x, buf, Statelen);
+ rv = readstr(offset, p, n, buf);
+ free(buf);
+ return rv;
+ }
+}
+
+static Block*
+ipbread(Chan* ch, s32 n, u32 offset)
+{
+ Conv *c;
+ Proto *x;
+ Fs *f;
+
+ switch(TYPE(ch->qid)){
+ case Qdata:
+ f = ipfs[ch->dev];
+ x = f->p[PROTO(ch->qid)];
+ c = x->conv[CONV(ch->qid)];
+ return qbread(c->rq, n);
+ default:
+ return devbread(ch, n, offset);
+ }
+}
+
+/*
+ * set local address to be that of the ifc closest to remote address
+ */
+static void
+setladdr(Conv* c)
+{
+ findlocalip(c->p->f, c->laddr, c->raddr);
+}
+
+/*
+ * set a local port making sure the quad of raddr,rport,laddr,lport is unique
+ */
+static char*
+setluniqueport(Conv* c, int lport)
+{
+ Proto *p;
+ Conv *xp;
+ int x;
+
+ p = c->p;
+
+ qlock(p);
+ for(x = 0; x < p->nc; x++){
+ xp = p->conv[x];
+ if(xp == nil)
+ break;
+ if(xp == c)
+ continue;
+ if((xp->state == Connected || xp->state == Announced)
+ && xp->lport == lport
+ && xp->rport == c->rport
+ && ipcmp(xp->raddr, c->raddr) == 0
+ && ipcmp(xp->laddr, c->laddr) == 0){
+ qunlock(p);
+ return "address in use";
+ }
+ }
+ c->lport = lport;
+ qunlock(p);
+ return nil;
+}
+
+/*
+ * pick a local port and set it
+ */
+extern void
+setlport(Conv* c)
+{
+ Proto *p;
+ ushort *pp;
+ int x, found;
+
+ p = c->p;
+ if(c->restricted)
+ pp = &p->nextrport;
+ else
+ pp = &p->nextport;
+ qlock(p);
+ for(;;(*pp)++){
+ /*
+ * Fsproto initialises p->nextport to 0 and the restricted
+ * ports (p->nextrport) to 600.
+ * Restricted ports must lie between 600 and 1024.
+ * For the initial condition or if the unrestricted port number
+ * has wrapped round, select a random port between 5000 and 1<<15
+ * to start at.
+ */
+ if(c->restricted){
+ if(*pp >= 1024)
+ *pp = 600;
+ }
+ else while(*pp < 5000)
+ *pp = nrand(1<<15);
+
+ found = 0;
+ for(x = 0; x < p->nc; x++){
+ if(p->conv[x] == nil)
+ break;
+ if(p->conv[x]->lport == *pp){
+ found = 1;
+ break;
+ }
+ }
+ if(found == 0)
+ break;
+ }
+ c->lport = (*pp)++;
+ qunlock(p);
+}
+
+/*
+ * set a local address and port from a string of the form
+ * [address!]port[!r]
+ */
+static char*
+setladdrport(Conv* c, char* str, int announcing)
+{
+ char *p;
+ char *rv;
+ ushort lport;
+ uchar addr[IPaddrlen];
+
+ rv = nil;
+
+ /*
+ * ignore restricted part if it exists. it's
+ * meaningless on local ports.
+ */
+ p = strchr(str, '!');
+ if(p != nil){
+ *p++ = 0;
+ if(strcmp(p, "r") == 0)
+ p = nil;
+ }
+
+ c->lport = 0;
+ if(p == nil){
+ if(announcing)
+ ipmove(c->laddr, IPnoaddr);
+ else
+ setladdr(c);
+ p = str;
+ } else {
+ if(strcmp(str, "*") == 0)
+ ipmove(c->laddr, IPnoaddr);
+ else {
+ parseip(addr, str);
+ if(ipforme(c->p->f, addr))
+ ipmove(c->laddr, addr);
+ else
+ return "not a local IP address";
+ }
+ }
+
+ /* one process can get all connections */
+ if(announcing && strcmp(p, "*") == 0){
+ if(!iseve())
+ error(Eperm);
+ return setluniqueport(c, 0);
+ }
+
+ lport = atoi(p);
+ if(lport <= 0)
+ setlport(c);
+ else
+ rv = setluniqueport(c, lport);
+ return rv;
+}
+
+static char*
+setraddrport(Conv* c, char* str)
+{
+ char *p;
+
+ p = strchr(str, '!');
+ if(p == nil)
+ return "malformed address";
+ *p++ = 0;
+ parseip(c->raddr, str);
+ c->rport = atoi(p);
+ p = strchr(p, '!');
+ if(p){
+ if(strstr(p, "!r") != nil)
+ c->restricted = 1;
+ }
+ return nil;
+}
+
+/*
+ * called by protocol connect routine to set addresses
+ */
+char*
+Fsstdconnect(Conv *c, char *argv[], int argc)
+{
+ char *p;
+
+ switch(argc) {
+ default:
+ return "bad args to connect";
+ case 2:
+ p = setraddrport(c, argv[1]);
+ if(p != nil)
+ return p;
+ setladdr(c);
+ setlport(c);
+ break;
+ case 3:
+ p = setraddrport(c, argv[1]);
+ if(p != nil)
+ return p;
+ p = setladdrport(c, argv[2], 0);
+ if(p != nil)
+ return p;
+ }
+
+ if((memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
+ memcmp(c->laddr, v4prefix, IPv4off) == 0)
+ || ipcmp(c->raddr, IPnoaddr) == 0)
+ c->ipversion = V4;
+ else
+ c->ipversion = V6;
+
+ return nil;
+}
+/*
+ * initiate connection and sleep till its set up
+ */
+static int
+connected(void* a)
+{
+ return ((Conv*)a)->state == Connected;
+}
+static void
+connectctlmsg(Proto *x, Conv *c, Cmdbuf *cb)
+{
+ char *p;
+
+ if(c->state != 0)
+ error(Econinuse);
+ c->state = Connecting;
+ c->cerr[0] = '\0';
+ if(x->connect == nil)
+ error("connect not supported");
+ p = x->connect(c, cb->f, cb->nf);
+ if(p != nil)
+ error(p);
+
+ qunlock(c);
+ if(waserror()){
+ qlock(c);
+ nexterror();
+ }
+ sleep(&c->cr, connected, c);
+ qlock(c);
+ poperror();
+
+ if(c->cerr[0] != '\0')
+ error(c->cerr);
+}
+
+/*
+ * called by protocol announce routine to set addresses
+ */
+char*
+Fsstdannounce(Conv* c, char* argv[], int argc)
+{
+ memset(c->raddr, 0, sizeof(c->raddr));
+ c->rport = 0;
+ switch(argc){
+ default:
+ return "bad args to announce";
+ case 2:
+ return setladdrport(c, argv[1], 1);
+ }
+}
+
+/*
+ * initiate announcement and sleep till its set up
+ */
+static int
+announced(void* a)
+{
+ return ((Conv*)a)->state == Announced;
+}
+static void
+announcectlmsg(Proto *x, Conv *c, Cmdbuf *cb)
+{
+ char *p;
+
+ if(c->state != 0)
+ error(Econinuse);
+ c->state = Announcing;
+ c->cerr[0] = '\0';
+ if(x->announce == nil)
+ error("announce not supported");
+ p = x->announce(c, cb->f, cb->nf);
+ if(p != nil)
+ error(p);
+
+ qunlock(c);
+ if(waserror()){
+ qlock(c);
+ nexterror();
+ }
+ sleep(&c->cr, announced, c);
+ qlock(c);
+ poperror();
+
+ if(c->cerr[0] != '\0')
+ error(c->cerr);
+}
+
+/*
+ * called by protocol bind routine to set addresses
+ */
+char*
+Fsstdbind(Conv* c, char* argv[], int argc)
+{
+ switch(argc){
+ default:
+ return "bad args to bind";
+ case 2:
+ return setladdrport(c, argv[1], 0);
+ }
+}
+
+static void
+bindctlmsg(Proto *x, Conv *c, Cmdbuf *cb)
+{
+ char *p;
+
+ if(x->bind == nil)
+ p = Fsstdbind(c, cb->f, cb->nf);
+ else
+ p = x->bind(c, cb->f, cb->nf);
+ if(p != nil)
+ error(p);
+}
+
+static void
+tosctlmsg(Conv *c, Cmdbuf *cb)
+{
+ if(cb->nf < 2)
+ c->tos = 0;
+ else
+ c->tos = atoi(cb->f[1]);
+}
+
+static void
+ttlctlmsg(Conv *c, Cmdbuf *cb)
+{
+ if(cb->nf < 2)
+ c->ttl = MAXTTL;
+ else
+ c->ttl = atoi(cb->f[1]);
+}
+
+static s32
+ipwrite(Chan* ch, void *v, s32 n, s64 off)
+{
+ Conv *c;
+ Proto *x;
+ char *p;
+ Cmdbuf *cb;
+ uchar ia[IPaddrlen], ma[IPaddrlen];
+ Fs *f;
+ char *a;
+
+ a = v;
+ f = ipfs[ch->dev];
+
+ switch(TYPE(ch->qid)){
+ default:
+ error(Eperm);
+ case Qdata:
+ x = f->p[PROTO(ch->qid)];
+ c = x->conv[CONV(ch->qid)];
+
+ if(c->wq == nil)
+ error(Eperm);
+
+ qwrite(c->wq, a, n);
+ break;
+ case Qarp:
+ return arpwrite(f, a, n);
+ case Qiproute:
+ return routewrite(f, ch, a, n);
+ case Qlog:
+ netlogctl(f, a, n);
+ return n;
+ case Qndb:
+ return ndbwrite(f, a, off, n);
+ case Qctl:
+ x = f->p[PROTO(ch->qid)];
+ c = x->conv[CONV(ch->qid)];
+ cb = parsecmd(a, n);
+
+ qlock(c);
+ if(waserror()) {
+ qunlock(c);
+ free(cb);
+ nexterror();
+ }
+ if(cb->nf < 1)
+ error("short control request");
+ if(strcmp(cb->f[0], "connect") == 0)
+ connectctlmsg(x, c, cb);
+ else if(strcmp(cb->f[0], "announce") == 0)
+ announcectlmsg(x, c, cb);
+ else if(strcmp(cb->f[0], "bind") == 0)
+ bindctlmsg(x, c, cb);
+ else if(strcmp(cb->f[0], "ttl") == 0)
+ ttlctlmsg(c, cb);
+ else if(strcmp(cb->f[0], "tos") == 0)
+ tosctlmsg(c, cb);
+ else if(strcmp(cb->f[0], "ignoreadvice") == 0)
+ c->ignoreadvice = 1;
+ else if(strcmp(cb->f[0], "addmulti") == 0){
+ if(cb->nf < 2)
+ error("addmulti needs interface address");
+ if(cb->nf == 2){
+ if(!ipismulticast(c->raddr))
+ error("addmulti for a non multicast address");
+ parseip(ia, cb->f[1]);
+ ipifcaddmulti(c, c->raddr, ia);
+ } else {
+ parseip(ma, cb->f[2]);
+ if(!ipismulticast(ma))
+ error("addmulti for a non multicast address");
+ parseip(ia, cb->f[1]);
+ ipifcaddmulti(c, ma, ia);
+ }
+ } else if(strcmp(cb->f[0], "remmulti") == 0){
+ if(cb->nf < 2)
+ error("remmulti needs interface address");
+ if(!ipismulticast(c->raddr))
+ error("remmulti for a non multicast address");
+ parseip(ia, cb->f[1]);
+ ipifcremmulti(c, c->raddr, ia);
+ } else if(x->ctl != nil) {
+ p = x->ctl(c, cb->f, cb->nf);
+ if(p != nil)
+ error(p);
+ } else
+ error("unknown control request");
+ qunlock(c);
+ free(cb);
+ poperror();
+ }
+ return n;
+}
+
+static s32
+ipbwrite(Chan* ch, Block* bp, u32 offset)
+{
+ Conv *c;
+ Proto *x;
+ Fs *f;
+ int n;
+
+ switch(TYPE(ch->qid)){
+ case Qdata:
+ f = ipfs[ch->dev];
+ x = f->p[PROTO(ch->qid)];
+ c = x->conv[CONV(ch->qid)];
+
+ if(c->wq == nil)
+ error(Eperm);
+
+ if(bp->next)
+ bp = concatblock(bp);
+ n = BLEN(bp);
+ qbwrite(c->wq, bp);
+ return n;
+ default:
+ return devbwrite(ch, bp, offset);
+ }
+}
+
+Dev ipdevtab = {
+ 'I',
+ "ip",
+
+ ipreset,
+ devinit,
+ devshutdown,
+ ipattach,
+ ipwalk,
+ ipstat,
+ ipopen,
+ devcreate,
+ ipclose,
+ ipread,
+ ipbread,
+ ipwrite,
+ ipbwrite,
+ devremove,
+ ipwstat,
+};
+
+int
+Fsproto(Fs *f, Proto *p)
+{
+ if(f->np >= Maxproto)
+ return -1;
+
+ p->f = f;
+
+ if(p->ipproto > 0){
+ if(f->t2p[p->ipproto] != nil)
+ return -1;
+ f->t2p[p->ipproto] = p;
+ }
+
+ p->qid.type = QTDIR;
+ p->qid.path = QID(f->np, 0, Qprotodir);
+ p->conv = malloc(sizeof(Conv*)*(p->nc+1));
+ if(p->conv == nil)
+ panic("Fsproto");
+
+ p->x = f->np;
+ p->nextport = 0;
+ p->nextrport = 600;
+ f->p[f->np++] = p;
+
+ return 0;
+}
+
+/*
+ * return true if this protocol is
+ * built in
+ */
+int
+Fsbuiltinproto(Fs* f, uchar proto)
+{
+ return f->t2p[proto] != nil;
+}
+
+/*
+ * called with protocol locked
+ */
+Conv*
+Fsprotoclone(Proto *p, char *user)
+{
+ Conv *c, **pp, **ep;
+
+retry:
+ c = nil;
+ ep = &p->conv[p->nc];
+ for(pp = p->conv; pp < ep; pp++) {
+ c = *pp;
+ if(c == nil){
+ c = malloc(sizeof(Conv));
+ if(c == nil)
+ error(Enomem);
+ qlock(c);
+ c->p = p;
+ c->x = pp - p->conv;
+ if(p->ptclsize != 0){
+ c->ptcl = malloc(p->ptclsize);
+ if(c->ptcl == nil) {
+ free(c);
+ error(Enomem);
+ }
+ }
+ *pp = c;
+ p->ac++;
+ c->eq = qopen(1024, Qmsg, 0, 0);
+ (*p->create)(c);
+ break;
+ }
+ if(canqlock(c)){
+ /*
+ * make sure both processes and protocol
+ * are done with this Conv
+ */
+ if(c->inuse == 0 && (p->inuse == nil || (*p->inuse)(c) == 0))
+ break;
+
+ qunlock(c);
+ }
+ }
+ if(pp >= ep) {
+ if(p->gc != nil && (*p->gc)(p))
+ goto retry;
+ return nil;
+ }
+
+ c->inuse = 1;
+ kstrdup(&c->owner, user);
+ c->perm = 0660;
+ c->state = Idle;
+ ipmove(c->laddr, IPnoaddr);
+ ipmove(c->raddr, IPnoaddr);
+ c->r = nil;
+ c->rgen = 0;
+ c->lport = 0;
+ c->rport = 0;
+ c->restricted = 0;
+ c->ttl = MAXTTL;
+ c->tos = DFLTTOS;
+ qreopen(c->rq);
+ qreopen(c->wq);
+ qreopen(c->eq);
+
+ qunlock(c);
+ return c;
+}
+
+int
+Fsconnected(Conv* c, char* msg)
+{
+ if(msg != nil && *msg != '\0')
+ kstrcpy(c->cerr, msg, sizeof(c->cerr));
+
+ switch(c->state){
+
+ case Announcing:
+ c->state = Announced;
+ break;
+
+ case Connecting:
+ c->state = Connected;
+ break;
+ }
+
+ wakeup(&c->cr);
+ return 0;
+}
+
+Proto*
+Fsrcvpcol(Fs* f, uchar proto)
+{
+ if(f->ipmux)
+ return f->ipmux;
+ else
+ return f->t2p[proto];
+}
+
+Proto*
+Fsrcvpcolx(Fs *f, uchar proto)
+{
+ return f->t2p[proto];
+}
+
+/*
+ * called with protocol locked
+ */
+Conv*
+Fsnewcall(Conv *c, uchar *raddr, ushort rport, uchar *laddr, ushort lport, uchar version)
+{
+ Conv *nc;
+ Conv **l;
+ int i;
+
+ qlock(c);
+ i = 0;
+ for(l = &c->incall; *l; l = &(*l)->next)
+ i++;
+ if(i >= Maxincall) {
+ qunlock(c);
+ return nil;
+ }
+
+ /* find a free conversation */
+ nc = Fsprotoclone(c->p, network);
+ if(nc == nil) {
+ qunlock(c);
+ return nil;
+ }
+ ipmove(nc->raddr, raddr);
+ nc->rport = rport;
+ ipmove(nc->laddr, laddr);
+ nc->lport = lport;
+ nc->next = nil;
+ *l = nc;
+ nc->state = Connected;
+ nc->ipversion = version;
+
+ qunlock(c);
+
+ wakeup(&c->listenr);
+
+ return nc;
+}
+
+static long
+ndbwrite(Fs *f, char *a, ulong off, int n)
+{
+ if(off > strlen(f->ndb))
+ error(Eio);
+ if(off+n >= sizeof(f->ndb)-1)
+ error(Eio);
+ memmove(f->ndb+off, a, n);
+ f->ndb[off+n] = 0;
+ f->ndbvers++;
+ f->ndbmtime = seconds();
+ return n;
+}
+
+ulong
+scalednconv(void)
+{
+ if(conf.npage*BY2PG >= 128*MB)
+ return Nchans*4;
+ return Nchans;
+}
--- /dev/null
+++ b/os/ip.original/dhcp.c
@@ -1,0 +1,447 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "kernel.h"
+#include "ip.h"
+#include "ppp.h"
+
+Ipaddr pppdns[2];
+
+static ulong fsip;
+static ulong auip;
+static ulong gwip;
+static ulong ipmask;
+static ulong ipaddr;
+static ulong dns1ip;
+static ulong dns2ip;
+
+int dhcpmsgtype;
+int debug=0;
+enum
+{
+ Bootrequest = 1,
+ Bootreply = 2,
+};
+
+typedef struct Bootp
+{
+ /* udp.c oldheader */
+ uchar raddr[IPaddrlen];
+ uchar laddr[IPaddrlen];
+ uchar rport[2];
+ uchar lport[2];
+ /* bootp itself */
+ uchar op; /* opcode */
+ uchar htype; /* hardware type */
+ uchar hlen; /* hardware address len */
+ uchar hops; /* hops */
+ uchar xid[4]; /* a random number */
+ uchar secs[2]; /* elapsed snce client started booting */
+ uchar flags[2]; /* flags */
+ uchar ciaddr[4]; /* client IP address (client tells server) */
+ uchar yiaddr[4]; /* client IP address (server tells client) */
+ uchar siaddr[4]; /* server IP address */
+ uchar giaddr[4]; /* gateway IP address */
+ uchar chaddr[16]; /* client hardware address */
+ uchar sname[64]; /* server host name (optional) */
+ uchar file[128]; /* boot file name */
+ uchar vend[128]; /* vendor-specific goo 340 */
+} Bootp;
+
+static Bootp req;
+static Proc* rcvprocp;
+static int recv;
+static int done;
+static Rendez bootpr;
+static char rcvbuf[512+2*IPaddrlen+2*2]; /* 576 */
+static uchar sid[4];
+static ulong iplease;
+
+/*
+ * bootp returns:
+ *
+ * "fsip d.d.d.d
+ * auip d.d.d.d
+ * gwip d.d.d.d
+ * ipmask d.d.d.d
+ * ipaddr d.d.d.d
+ * dns1ip d.d.d.d
+ * dns2ip d.d.d.d
+ *
+ * where d.d.d.d is the IP address in dotted decimal notation, and each
+ * address is followed by a newline.
+ Last change: SUN 13 Sep 2001 4:36 pm
+ */
+
+/*
+ * Parse the vendor specific fields according to RFC 1084.
+ * We are overloading the "cookie server" to be the Inferno
+ * authentication server and the "resource location server"
+ * to be the Inferno file server.
+ *
+ * If the vendor specific field is formatted properly, it
+ * will being with the four bytes 99.130.83.99 and end with
+ * an 0xFF byte.
+ */
+static int
+parsevend(uchar* pvend)
+{
+ uchar *vend=pvend;
+ int dhcpmsg=0;
+ /* The field must start with 99.130.83.99 to be compliant */
+ if ((vend[0] != 99) || (vend[1] != 130) || (vend[2] != 83) || (vend[3] != 99)){
+ print("bad bootp vendor field: %.2x%.2x%.2x%.2x", vend[0], vend[1], vend[2], vend[3]);
+ return -1;
+ }
+
+ /* Skip over the magic cookie */
+ vend += 4;
+
+ while ((vend[0] != 0) && (vend[0] != 0xFF)) {
+ int i;
+//
+ if(debug){
+ print(">>>Opt[%d] [%d]", vend[0], vend[1]);
+ for(i=0; i<vend[1]; i++)
+ print(" %2.2x", vend[i+2]);
+ print("\n");
+ }
+//
+ switch (vend[0]) {
+ case 1: /* Subnet mask field */
+ /* There must be only one subnet mask */
+ if (vend[1] == 4)
+ ipmask = (vend[2]<<24)|(vend[3]<<16)| (vend[4]<<8)| vend[5];
+ else{
+ return -1;
+ }
+ break;
+
+ case 3: /* Gateway/router field */
+ /* We are only concerned with first address */
+ if (vend[1] >0 && vend[1]%4==0)
+ gwip = (vend[2]<<24)|(vend[3]<<16)|(vend[4]<<8)|vend[5];
+ else
+ return -1;
+ break;
+ case 6: /* domain name server */
+ if(vend[1]>0 && vend[1] %4==0){
+ dns1ip=(vend[2]<<24)|(vend[3]<<16)|(vend[4]<<8)|vend[5];
+ if(vend[1]>4)
+ dns2ip=(vend[6]<<24)|(vend[7]<<16)|(vend[8]<<8)|vend[9];
+ }else
+ return -1;
+ break;
+
+ case 8: /* "Cookie server" (auth server) field */
+ /* We are only concerned with first address */
+ if (vend[1] > 0 && vend[1]%4==0)
+ auip = (vend[2]<<24)|(vend[3]<<16)|(vend[4]<<8)|vend[5];
+ else
+ return -1;
+ break;
+
+ case 11: /* "Resource loc server" (file server) field */
+ /* We are only concerned with first address */
+ if (vend[1] > 0 && vend[1]%4==0)
+ fsip = (vend[2]<<24)| (vend[3]<<16)| (vend[4]<<8)| vend[5];
+ else
+ return -1;
+ break;
+ case 51: /* ip lease time */
+ if(vend[1]==4){
+ iplease=(vend[2]<<24)|(vend[3]<<16)|(vend[4]<<8)|vend[5];
+ }else
+ return -1;
+ break;
+ case 53: /* DHCP message type */
+ if(vend[1]==1)
+ dhcpmsg=vend[2];
+ else
+ return -1;
+ break;
+ case 54: /* server identifier */
+ if(vend[1]==4){
+ memmove(sid, vend+2, 4);
+ }else
+ return -1;
+ break;
+
+ default: /* Everything else stops us */
+ break;
+ }
+
+ /* Skip over the field */
+ vend += vend[1] + 2;
+ }
+ if(debug)
+ print(">>>Opt[%d] [%d]\n", vend[0], vend[1]);
+ return dhcpmsg;
+}
+
+static void
+dispvend(uchar* pvend)
+{
+ uchar *vend=pvend;
+
+ //print("<<<Magic : %2.2x%2.2x%2.2x%2.2x\n", vend[0], vend[1], vend[2], vend[3]);
+
+ vend += 4; /* Skip over the magic cookie */
+ while ((vend[0] != 0) && (vend[0] != 0xFF)) {
+ // int i;
+ // print("<<<Opt[%d] [%d]", vend[0], vend[1]);
+ //for(i=0; i<vend[1]; i++)
+ // print(" %2.2x", vend[i+2]);
+ //print("\n");
+
+ vend += vend[1] + 2;
+ }
+ //print("<<<Opt[ %2.2x] [%2.2x]\n", vend[0], vend[1]);
+}
+
+static void
+rcvbootp(void *a)
+{
+ int n, fd, dhcp;
+ Bootp *rp;
+
+ if(waserror())
+ pexit("", 0);
+ rcvprocp = up; /* store for postnote below */
+ fd = (int)a;
+ while(done == 0) {
+ if(debug)
+ print("rcvbootp:looping\n");
+
+ n = kread(fd, rcvbuf, sizeof(rcvbuf));
+ if(n <= 0)
+ break;
+ rp = (Bootp*)rcvbuf;
+ if (memcmp(req.chaddr, rp->chaddr, 6) == 0 && rp->htype == 1 && rp->hlen == 6) {
+ ipaddr = (rp->yiaddr[0]<<24)| (rp->yiaddr[1]<<16)| (rp->yiaddr[2]<<8)| rp->yiaddr[3];
+ if(debug)
+ print("ipaddr = %2.2x %2.2x %2.2x %2.2x \n", rp->yiaddr[0], rp->yiaddr[1], rp->yiaddr[2], rp->yiaddr[3]);
+ //memmove(req.siaddr, rp->siaddr, 4); /* siaddr */
+ dhcp = parsevend(rp->vend);
+
+ if(dhcpmsgtype < dhcp){
+ dhcpmsgtype=dhcp;
+ recv = 1;
+ wakeup(&bootpr);
+ if(dhcp==0 || dhcp ==5 || dhcp == 6 )
+ break;
+ }
+ }
+ }
+ poperror();
+ rcvprocp = nil;
+
+ if(debug)
+ print("rcvbootp exit\n");
+ pexit("", 0);
+}
+
+static char*
+rbootp(Ipifc *ifc)
+{
+ int cfd, dfd, tries, n;
+ char ia[5+3*16], im[16], *av[3];
+ uchar nipaddr[4], ngwip[4], nipmask[4];
+ char dir[Maxpath];
+ static uchar vend_rfc1048[] = { 99, 130, 83, 99 };
+ uchar *vend;
+
+ /*
+ * broadcast bootp's till we get a reply,
+ * or fixed number of tries
+ */
+ if(debug)
+ print("dhcp: bootp() called\n");
+ tries = 0;
+ av[1] = "0.0.0.0";
+ av[2] = "0.0.0.0";
+ ipifcadd(ifc, av, 3, 0, nil);
+
+ cfd = kannounce("udp!*!68", dir);
+ if(cfd < 0)
+ return "dhcp announce failed";
+ strcat(dir, "/data");
+ if(kwrite(cfd, "headers", 7) < 0){
+ kclose(cfd);
+ return "dhcp ctl headers failed";
+ }
+ kwrite(cfd, "oldheaders", 10);
+ dfd = kopen(dir, ORDWR);
+ if(dfd < 0){
+ kclose(cfd);
+ return "dhcp open data failed";
+ }
+ kclose(cfd);
+
+ while(tries<1){
+ tries++;
+ memset(sid, 0, 4);
+ iplease=0;
+ dhcpmsgtype=-2;
+/* DHCPDISCOVER*/
+ done = 0;
+ recv = 0;
+ kproc("rcvbootp", rcvbootp, (void*)dfd, KPDUPFDG);
+ /* Prepare DHCPDISCOVER */
+ memset(&req, 0, sizeof(req));
+ ipmove(req.raddr, IPv4bcast);
+ hnputs(req.rport, 67);
+ req.op = Bootrequest;
+ req.htype = 1; /* ethernet (all we know) */
+ req.hlen = 6; /* ethernet (all we know) */
+
+ memmove(req.chaddr, ifc->mac, 6); /* Hardware MAC address */
+ //ipv4local(ifc, req.ciaddr); /* Fill in the local IP address if we know it */
+ memset(req.file, 0, sizeof(req.file));
+ vend=req.vend;
+ memmove(vend, vend_rfc1048, 4); vend+=4;
+ *vend++=53; *vend++=1;*vend++=1; /* dhcp msg type==3, dhcprequest */
+
+ *vend++=61;*vend++=7;*vend++=1;
+ memmove(vend, ifc->mac, 6);vend+=6;
+ *vend=0xff;
+
+ if(debug)
+ dispvend(req.vend);
+ for(n=0;n<4;n++){
+ if(kwrite(dfd, &req, sizeof(req))<0) /* SEND DHCPDISCOVER */
+ print("DHCPDISCOVER: %r");
+
+ tsleep(&bootpr, return0, 0, 1000); /* wait DHCPOFFER */
+ if(debug)
+ print("[DHCP] DISCOVER: msgtype = %d\n", dhcpmsgtype);
+
+ if(dhcpmsgtype==2) /* DHCPOFFER */
+ break;
+ else if(dhcpmsgtype==0) /* bootp */
+ return nil;
+ else if(dhcpmsgtype== -2) /* time out */
+ continue;
+ else
+ break;
+
+ }
+ if(dhcpmsgtype!=2)
+ continue;
+
+/* DHCPREQUEST */
+ memset(req.vend, 0, sizeof(req.vend));
+ vend=req.vend;
+ memmove(vend, vend_rfc1048, 4);vend+=4;
+
+ *vend++=53; *vend++=1;*vend++=3; /* dhcp msg type==3, dhcprequest */
+
+ *vend++=50; *vend++=4; /* requested ip address */
+ *vend++=(ipaddr >> 24)&0xff;
+ *vend++=(ipaddr >> 16)&0xff;
+ *vend++=(ipaddr >> 8) & 0xff;
+ *vend++=ipaddr & 0xff;
+
+ *vend++=51;*vend++=4; /* lease time */
+ *vend++=(iplease>>24)&0xff; *vend++=(iplease>>16)&0xff; *vend++=(iplease>>8)&0xff; *vend++=iplease&0xff;
+
+ *vend++=54; *vend++=4; /* server identifier */
+ memmove(vend, sid, 4); vend+=4;
+
+ *vend++=61;*vend++=07;*vend++=01; /* client identifier */
+ memmove(vend, ifc->mac, 6);vend+=6;
+ *vend=0xff;
+ if(debug)
+ dispvend(req.vend);
+ if(kwrite(dfd, &req, sizeof(req))<0){
+ print("DHCPREQUEST: %r");
+ continue;
+ }
+ tsleep(&bootpr, return0, 0, 2000);
+ if(dhcpmsgtype==5) /* wait for DHCPACK */
+ break;
+ else
+ continue;
+ /* CHECK ARP */
+ /* DHCPDECLINE */
+ }
+ kclose(dfd);
+ done = 1;
+ if(rcvprocp != nil){
+ postnote(rcvprocp, 1, "timeout", 0);
+ rcvprocp = nil;
+ }
+
+ av[1] = "0.0.0.0";
+ av[2] = "0.0.0.0";
+ ipifcrem(ifc, av, 3);
+
+ hnputl(nipaddr, ipaddr);
+ sprint(ia, "%V", nipaddr);
+ hnputl(nipmask, ipmask);
+ sprint(im, "%V", nipmask);
+ av[1] = ia;
+ av[2] = im;
+ ipifcadd(ifc, av, 3, 0, nil);
+
+ if(gwip != 0) {
+ hnputl(ngwip, gwip);
+ n = sprint(ia, "add 0.0.0.0 0.0.0.0 %V", ngwip);
+ routewrite(ifc->conv->p->f, nil, ia, n);
+ }
+ return nil;
+}
+
+static int
+rbootpread(char *bp, ulong offset, int len)
+{
+ int n, i;
+ char *buf;
+ uchar a[4];
+
+ if(debug)
+ print("dhcp: bootpread() \n");
+ buf = smalloc(READSTR);
+ if(waserror()){
+ free(buf);
+ nexterror();
+ }
+
+ hnputl(a, fsip);
+ n = snprint(buf, READSTR, "fsip %15V\n", a);
+ hnputl(a, auip);
+ n += snprint(buf + n, READSTR-n, "auip %15V\n", a);
+ hnputl(a, gwip);
+ n += snprint(buf + n, READSTR-n, "gwip %15V\n", a);
+ hnputl(a, ipmask);
+ n += snprint(buf + n, READSTR-n, "ipmask %15V\n", a);
+ hnputl(a, ipaddr);
+ n += snprint(buf + n, READSTR-n, "ipaddr %15V\n", a);
+ n += snprint(buf+n, READSTR-n, "expired %lud\n", iplease);
+
+ n += snprint(buf + n, READSTR-n, "dns");
+ if(dns2ip){
+ hnputl(a, dns2ip);
+ n+=snprint(buf + n, READSTR-n, " %15V", a);
+ }
+ if(dns1ip){
+ hnputl(a, dns1ip);
+ n += snprint(buf + n, READSTR-n, " %15V", a);
+ }
+
+ for(i=0; i<2; i++)
+ if(ipcmp(pppdns[i], IPnoaddr) != 0 && ipcmp(pppdns[i], v4prefix) != 0)
+ n += snprint(buf + n, READSTR-n, " %15I", pppdns[i]);
+
+ snprint(buf + n, READSTR-n, "\n");
+ len = readstr(offset, bp, len, buf);
+ poperror();
+ free(buf);
+ return len;
+}
+
+char* (*bootp)(Ipifc*) = rbootp;
+int (*bootpread)(char*, ulong, int) = rbootpread;
--- /dev/null
+++ b/os/ip.original/eipconvtest.c
@@ -1,0 +1,152 @@
+#include <u.h>
+#include <libc.h>
+
+enum
+{
+ Isprefix= 16,
+};
+
+uchar prefixvals[256] =
+{
+[0x00] 0 | Isprefix,
+[0x80] 1 | Isprefix,
+[0xC0] 2 | Isprefix,
+[0xE0] 3 | Isprefix,
+[0xF0] 4 | Isprefix,
+[0xF8] 5 | Isprefix,
+[0xFC] 6 | Isprefix,
+[0xFE] 7 | Isprefix,
+[0xFF] 8 | Isprefix,
+};
+
+uchar v4prefix[16] = {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0xff, 0xff,
+ 0, 0, 0, 0
+};
+
+void
+hnputl(void *p, ulong v)
+{
+ uchar *a;
+
+ a = p;
+ a[0] = v>>24;
+ a[1] = v>>16;
+ a[2] = v>>8;
+ a[3] = v;
+}
+
+int
+eipconv(va_list *arg, Fconv *f)
+{
+ char buf[8*5];
+ static char *efmt = "%.2lux%.2lux%.2lux%.2lux%.2lux%.2lux";
+ static char *ifmt = "%d.%d.%d.%d";
+ uchar *p, ip[16];
+ ulong *lp;
+ ushort s;
+ int i, j, n, eln, eli;
+
+ switch(f->chr) {
+ case 'E': /* Ethernet address */
+ p = va_arg(*arg, uchar*);
+ sprint(buf, efmt, p[0], p[1], p[2], p[3], p[4], p[5]);
+ break;
+ case 'I': /* Ip address */
+ p = va_arg(*arg, uchar*);
+common:
+ if(memcmp(p, v4prefix, 12) == 0)
+ sprint(buf, ifmt, p[12], p[13], p[14], p[15]);
+ else {
+ /* find longest elision */
+ eln = eli = -1;
+ for(i = 0; i < 16; i += 2){
+ for(j = i; j < 16; j += 2)
+ if(p[j] != 0 || p[j+1] != 0)
+ break;
+ if(j > i && j - i > eln){
+ eli = i;
+ eln = j - i;
+ }
+ }
+
+ /* print with possible elision */
+ n = 0;
+ for(i = 0; i < 16; i += 2){
+ if(i == eli){
+ n += sprint(buf+n, "::");
+ i += eln;
+ if(i >= 16)
+ break;
+ } else if(i != 0)
+ n += sprint(buf+n, ":");
+ s = (p[i]<<8) + p[i+1];
+ n += sprint(buf+n, "%ux", s);
+ }
+ }
+ break;
+ case 'i': /* v6 address as 4 longs */
+ lp = va_arg(*arg, ulong*);
+ for(i = 0; i < 4; i++)
+ hnputl(ip+4*i, *lp++);
+ p = ip;
+ goto common;
+ case 'V': /* v4 ip address */
+ p = va_arg(*arg, uchar*);
+ sprint(buf, ifmt, p[0], p[1], p[2], p[3]);
+ break;
+ case 'M': /* ip mask */
+ p = va_arg(*arg, uchar*);
+
+ /* look for a prefix mask */
+ for(i = 0; i < 16; i++)
+ if(p[i] != 0xff)
+ break;
+ if(i < 16){
+ if((prefixvals[p[i]] & Isprefix) == 0)
+ goto common;
+ for(j = i+1; j < 16; j++)
+ if(p[j] != 0)
+ goto common;
+ n = 8*i + (prefixvals[p[i]] & ~Isprefix);
+ } else
+ n = 8*16;
+
+ /* got one, use /xx format */
+ sprint(buf, "/%d", n);
+ break;
+ default:
+ strcpy(buf, "(eipconv)");
+ }
+ strconv(buf, f);
+ return sizeof(uchar*);
+}
+
+uchar testvec[11][16] =
+{
+ { 0,0,0,0, 0,0,0,0, 0,0,0xff,0xff, 1,3,4,5, },
+ { 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, },
+ { 0xff,0xff,0x80,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, },
+ { 0xff,0xff,0xff,0xc0, 0,0,0,0, 0,0,0,0, 0,0,0,0, },
+ { 0xff,0xff,0xff,0xff, 0xe0,0,0,0, 0,0,0,0, 0,0,0,0, },
+ { 0xff,0xff,0xff,0xff, 0xff,0xf0,0,0, 0,0,0,0, 0,0,0,0, },
+ { 0xff,0xff,0xff,0xff, 0xff,0xff,0xf8,0, 0,0,0,0, 0,0,0,0, },
+ { 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, },
+ { 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0, },
+ { 0,0,0,0, 0,0x11,0,0, 0,0,0,0, 0,0,0,0, },
+ { 0,0,0,0x11, 0,0,0,0, 0,0,0,0, 0,0,0,0x12, },
+};
+
+void
+main(void)
+{
+ int i;
+
+ fmtinstall('I', eipconv);
+ fmtinstall('M', eipconv);
+ for(i = 0; i < 11; i++)
+ print("%I\n%M\n", testvec[i], testvec[i]);
+ exits(0);
+}
--- /dev/null
+++ b/os/ip.original/esp.c
@@ -1,0 +1,866 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include "ip.h"
+
+#include "libsec.h"
+
+typedef struct Esphdr Esphdr;
+typedef struct Esptail Esptail;
+typedef struct Userhdr Userhdr;
+typedef struct Esppriv Esppriv;
+typedef struct Espcb Espcb;
+typedef struct Algorithm Algorithm;
+typedef struct Esprc4 Esprc4;
+
+#define DPRINT if(0)print
+
+enum
+{
+ IP_ESPPROTO = 50,
+ EsphdrSize = 28, // includes IP header
+ IphdrSize = 20, // options have been striped
+ EsptailSize = 2, // does not include pad or auth data
+ UserhdrSize = 4, // user visable header size - if enabled
+};
+
+struct Esphdr
+{
+ /* ip header */
+ uchar vihl; /* Version and header length */
+ uchar tos; /* Type of service */
+ uchar length[2]; /* packet length */
+ uchar id[2]; /* Identification */
+ uchar frag[2]; /* Fragment information */
+ uchar Unused;
+ uchar espproto; /* Protocol */
+ uchar espplen[2]; /* Header plus data length */
+ uchar espsrc[4]; /* Ip source */
+ uchar espdst[4]; /* Ip destination */
+
+ /* esp header */
+ uchar espspi[4]; /* Security parameter index */
+ uchar espseq[4]; /* Sequence number */
+};
+
+struct Esptail
+{
+ uchar pad;
+ uchar nexthdr;
+};
+
+/* header as seen by the user */
+struct Userhdr
+{
+ uchar nexthdr; // next protocol
+ uchar unused[3];
+};
+
+struct Esppriv
+{
+ ulong in;
+ ulong inerrors;
+};
+
+/*
+ * protocol specific part of Conv
+ */
+struct Espcb
+{
+ int incoming;
+ int header; // user user level header
+ ulong spi;
+ ulong seq; // last seq sent
+ ulong window; // for replay attacks
+ char *espalg;
+ void *espstate; // other state for esp
+ int espivlen; // in bytes
+ int espblklen;
+ int (*cipher)(Espcb*, uchar *buf, int len);
+ char *ahalg;
+ void *ahstate; // other state for esp
+ int ahlen; // auth data length in bytes
+ int ahblklen;
+ int (*auth)(Espcb*, uchar *buf, int len, uchar *hash);
+};
+
+struct Algorithm
+{
+ char *name;
+ int keylen; // in bits
+ void (*init)(Espcb*, char* name, uchar *key, int keylen);
+};
+
+
+enum {
+ RC4forward = 10*1024*1024, // maximum skip forward
+ RC4back = 100*1024, // maximum look back
+};
+
+struct Esprc4
+{
+ ulong cseq; // current byte sequence number
+ RC4state current;
+
+ int ovalid; // old is valid
+ ulong lgseq; // last good sequence
+ ulong oseq; // old byte sequence number
+ RC4state old;
+};
+
+static Conv* convlookup(Proto *esp, ulong spi);
+static char *setalg(Espcb *ecb, char **f, int n, Algorithm *alg);
+static void nullespinit(Espcb*, char*, uchar *key, int keylen);
+static void nullahinit(Espcb*, char*, uchar *key, int keylen);
+static void shaahinit(Espcb*, char*, uchar *key, int keylen);
+static void md5ahinit(Espcb*, char*, uchar *key, int keylen);
+static void desespinit(Espcb *ecb, char *name, uchar *k, int n);
+static void rc4espinit(Espcb *ecb, char *name, uchar *k, int n);
+static void espkick(void *x);
+
+static Algorithm espalg[] =
+{
+ "null", 0, nullespinit,
+ "des_56_cbc", 64, desespinit,
+ "rc4_128", 128, rc4espinit,
+ nil, 0, nil,
+};
+
+static Algorithm ahalg[] =
+{
+ "null", 0, nullahinit,
+ "hmac_sha1_96", 128, shaahinit,
+ "hmac_md5_96", 128, md5ahinit,
+ nil, 0, nil,
+};
+
+static char*
+espconnect(Conv *c, char **argv, int argc)
+{
+ char *p, *pp;
+ char *e = nil;
+ ulong spi;
+ Espcb *ecb = (Espcb*)c->ptcl;
+
+ switch(argc) {
+ default:
+ e = "bad args to connect";
+ break;
+ case 2:
+ p = strchr(argv[1], '!');
+ if(p == nil){
+ e = "malformed address";
+ break;
+ }
+ *p++ = 0;
+ parseip(c->raddr, argv[1]);
+ findlocalip(c->p->f, c->laddr, c->raddr);
+ ecb->incoming = 0;
+ ecb->seq = 0;
+ if(strcmp(p, "*") == 0) {
+ qlock(c->p);
+ for(;;) {
+ spi = nrand(1<<16) + 256;
+ if(convlookup(c->p, spi) == nil)
+ break;
+ }
+ qunlock(c->p);
+ ecb->spi = spi;
+ ecb->incoming = 1;
+ qhangup(c->wq, nil);
+ } else {
+ spi = strtoul(p, &pp, 10);
+ if(pp == p) {
+ e = "malformed address";
+ break;
+ }
+ ecb->spi = spi;
+ qhangup(c->rq, nil);
+ }
+ nullespinit(ecb, "null", nil, 0);
+ nullahinit(ecb, "null", nil, 0);
+ }
+ Fsconnected(c, e);
+
+ return e;
+}
+
+
+static int
+espstate(Conv *c, char *state, int n)
+{
+ return snprint(state, n, "%s", c->inuse?"Open\n":"Closed\n");
+}
+
+static void
+espcreate(Conv *c)
+{
+ c->rq = qopen(64*1024, Qmsg, 0, 0);
+ c->wq = qopen(64*1024, Qkick, espkick, c);
+}
+
+static void
+espclose(Conv *c)
+{
+ Espcb *ecb;
+
+ qclose(c->rq);
+ qclose(c->wq);
+ qclose(c->eq);
+ ipmove(c->laddr, IPnoaddr);
+ ipmove(c->raddr, IPnoaddr);
+
+ ecb = (Espcb*)c->ptcl;
+ free(ecb->espstate);
+ free(ecb->ahstate);
+ memset(ecb, 0, sizeof(Espcb));
+}
+
+static void
+espkick(void *x)
+{
+ Conv *c = x;
+ Esphdr *eh;
+ Esptail *et;
+ Userhdr *uh;
+ Espcb *ecb;
+ Block *bp;
+ int nexthdr;
+ int payload;
+ int pad;
+ int align;
+ uchar *auth;
+
+ bp = qget(c->wq);
+ if(bp == nil)
+ return;
+
+ qlock(c);
+ ecb = c->ptcl;
+
+ if(ecb->header) {
+ /* make sure the message has a User header */
+ bp = pullupblock(bp, UserhdrSize);
+ if(bp == nil) {
+ qunlock(c);
+ return;
+ }
+ uh = (Userhdr*)bp->rp;
+ nexthdr = uh->nexthdr;
+ bp->rp += UserhdrSize;
+ } else {
+ nexthdr = 0; // what should this be?
+ }
+
+ payload = BLEN(bp) + ecb->espivlen;
+
+ /* Make space to fit ip header */
+ bp = padblock(bp, EsphdrSize + ecb->espivlen);
+
+ align = 4;
+ if(ecb->espblklen > align)
+ align = ecb->espblklen;
+ if(align % ecb->ahblklen != 0)
+ panic("espkick: ahblklen is important after all");
+ pad = (align-1) - (payload + EsptailSize-1)%align;
+
+ /*
+ * Make space for tail
+ * this is done by calling padblock with a negative size
+ * Padblock does not change bp->wp!
+ */
+ bp = padblock(bp, -(pad+EsptailSize+ecb->ahlen));
+ bp->wp += pad+EsptailSize+ecb->ahlen;
+
+ eh = (Esphdr *)(bp->rp);
+ et = (Esptail*)(bp->rp + EsphdrSize + payload + pad);
+
+ // fill in tail
+ et->pad = pad;
+ et->nexthdr = nexthdr;
+
+ ecb->cipher(ecb, bp->rp+EsphdrSize, payload+pad+EsptailSize);
+ auth = bp->rp + EsphdrSize + payload + pad + EsptailSize;
+
+ // fill in head
+ eh->vihl = IP_VER4;
+ hnputl(eh->espspi, ecb->spi);
+ hnputl(eh->espseq, ++ecb->seq);
+ v6tov4(eh->espsrc, c->laddr);
+ v6tov4(eh->espdst, c->raddr);
+ eh->espproto = IP_ESPPROTO;
+ eh->frag[0] = 0;
+ eh->frag[1] = 0;
+
+ ecb->auth(ecb, bp->rp+IphdrSize, (EsphdrSize-IphdrSize)+payload+pad+EsptailSize, auth);
+
+ qunlock(c);
+ //print("esp: pass down: %uld\n", BLEN(bp));
+ ipoput4(c->p->f, bp, 0, c->ttl, c->tos, c);
+}
+
+void
+espiput(Proto *esp, Ipifc*, Block *bp)
+{
+ Esphdr *eh;
+ Esptail *et;
+ Userhdr *uh;
+ Conv *c;
+ Espcb *ecb;
+ uchar raddr[IPaddrlen], laddr[IPaddrlen];
+ Fs *f;
+ uchar *auth;
+ ulong spi;
+ int payload, nexthdr;
+
+ f = esp->f;
+
+ bp = pullupblock(bp, EsphdrSize+EsptailSize);
+ if(bp == nil) {
+ netlog(f, Logesp, "esp: short packet\n");
+ return;
+ }
+
+ eh = (Esphdr*)(bp->rp);
+ spi = nhgetl(eh->espspi);
+ v4tov6(raddr, eh->espsrc);
+ v4tov6(laddr, eh->espdst);
+
+ qlock(esp);
+ /* Look for a conversation structure for this port */
+ c = convlookup(esp, spi);
+ if(c == nil) {
+ qunlock(esp);
+ netlog(f, Logesp, "esp: no conv %I -> %I!%d\n", raddr,
+ laddr, spi);
+ icmpnoconv(f, bp);
+ freeblist(bp);
+ return;
+ }
+
+ qlock(c);
+ qunlock(esp);
+
+ ecb = c->ptcl;
+ // too hard to do decryption/authentication on block lists
+ if(bp->next)
+ bp = concatblock(bp);
+
+ if(BLEN(bp) < EsphdrSize + ecb->espivlen + EsptailSize + ecb->ahlen) {
+ qunlock(c);
+ netlog(f, Logesp, "esp: short block %I -> %I!%d\n", raddr,
+ laddr, spi);
+ freeb(bp);
+ return;
+ }
+
+ eh = (Esphdr*)(bp->rp);
+ auth = bp->wp - ecb->ahlen;
+ if(!ecb->auth(ecb, eh->espspi, auth-eh->espspi, auth)) {
+ qunlock(c);
+print("esp: bad auth %I -> %I!%ld\n", raddr, laddr, spi);
+ netlog(f, Logesp, "esp: bad auth %I -> %I!%d\n", raddr,
+ laddr, spi);
+ freeb(bp);
+ return;
+ }
+
+ payload = BLEN(bp)-EsphdrSize-ecb->ahlen;
+ if(payload<=0 || payload%4 != 0 || payload%ecb->espblklen!=0) {
+ qunlock(c);
+ netlog(f, Logesp, "esp: bad length %I -> %I!%d payload=%d BLEN=%d\n", raddr,
+ laddr, spi, payload, BLEN(bp));
+ freeb(bp);
+ return;
+ }
+ if(!ecb->cipher(ecb, bp->rp+EsphdrSize, payload)) {
+ qunlock(c);
+print("esp: cipher failed %I -> %I!%ld: %r\n", raddr, laddr, spi);
+ netlog(f, Logesp, "esp: cipher failed %I -> %I!%d: %r\n", raddr,
+ laddr, spi);
+ freeb(bp);
+ return;
+ }
+
+ payload -= EsptailSize;
+ et = (Esptail*)(bp->rp + EsphdrSize + payload);
+ payload -= et->pad + ecb->espivlen;
+ nexthdr = et->nexthdr;
+ if(payload <= 0) {
+ qunlock(c);
+ netlog(f, Logesp, "esp: short packet after decrypt %I -> %I!%d\n", raddr,
+ laddr, spi);
+ freeb(bp);
+ return;
+ }
+
+ // trim packet
+ bp->rp += EsphdrSize + ecb->espivlen;
+ bp->wp = bp->rp + payload;
+ if(ecb->header) {
+ // assume UserhdrSize < EsphdrSize
+ bp->rp -= UserhdrSize;
+ uh = (Userhdr*)bp->rp;
+ memset(uh, 0, UserhdrSize);
+ uh->nexthdr = nexthdr;
+ }
+
+ if(qfull(c->rq)){
+ netlog(f, Logesp, "esp: qfull %I -> %I.%uld\n", raddr,
+ laddr, spi);
+ freeblist(bp);
+ }else {
+//print("esp: pass up: %uld\n", BLEN(bp));
+ qpass(c->rq, bp);
+ }
+
+ qunlock(c);
+}
+
+char*
+espctl(Conv *c, char **f, int n)
+{
+ Espcb *ecb = c->ptcl;
+ char *e = nil;
+
+ if(strcmp(f[0], "esp") == 0)
+ e = setalg(ecb, f, n, espalg);
+ else if(strcmp(f[0], "ah") == 0)
+ e = setalg(ecb, f, n, ahalg);
+ else if(strcmp(f[0], "header") == 0)
+ ecb->header = 1;
+ else if(strcmp(f[0], "noheader") == 0)
+ ecb->header = 0;
+ else
+ e = "unknown control request";
+ return e;
+}
+
+void
+espadvise(Proto *esp, Block *bp, char *msg)
+{
+ Esphdr *h;
+ Conv *c;
+ ulong spi;
+
+ h = (Esphdr*)(bp->rp);
+
+ spi = nhgets(h->espspi);
+ qlock(esp);
+ c = convlookup(esp, spi);
+ if(c != nil) {
+ qhangup(c->rq, msg);
+ qhangup(c->wq, msg);
+ }
+ qunlock(esp);
+ freeblist(bp);
+}
+
+int
+espstats(Proto *esp, char *buf, int len)
+{
+ Esppriv *upriv;
+
+ upriv = esp->priv;
+ return snprint(buf, len, "%lud %lud\n",
+ upriv->in,
+ upriv->inerrors);
+}
+
+static int
+esplocal(Conv *c, char *buf, int len)
+{
+ Espcb *ecb = c->ptcl;
+ int n;
+
+ qlock(c);
+ if(ecb->incoming)
+ n = snprint(buf, len, "%I!%uld\n", c->laddr, ecb->spi);
+ else
+ n = snprint(buf, len, "%I\n", c->laddr);
+ qunlock(c);
+ return n;
+}
+
+static int
+espremote(Conv *c, char *buf, int len)
+{
+ Espcb *ecb = c->ptcl;
+ int n;
+
+ qlock(c);
+ if(ecb->incoming)
+ n = snprint(buf, len, "%I\n", c->raddr);
+ else
+ n = snprint(buf, len, "%I!%uld\n", c->raddr, ecb->spi);
+ qunlock(c);
+ return n;
+}
+
+static Conv*
+convlookup(Proto *esp, ulong spi)
+{
+ Conv *c, **p;
+ Espcb *ecb;
+
+ for(p=esp->conv; *p; p++){
+ c = *p;
+ ecb = c->ptcl;
+ if(ecb->incoming && ecb->spi == spi)
+ return c;
+ }
+ return nil;
+}
+
+static char *
+setalg(Espcb *ecb, char **f, int n, Algorithm *alg)
+{
+ uchar *key;
+ int i, nbyte, nchar;
+ int c;
+
+ if(n < 2)
+ return "bad format";
+ for(; alg->name; alg++)
+ if(strcmp(f[1], alg->name) == 0)
+ break;
+ if(alg->name == nil)
+ return "unknown algorithm";
+
+ if(n != 3)
+ return "bad format";
+ nbyte = (alg->keylen + 7) >> 3;
+ nchar = strlen(f[2]);
+ for(i=0; i<nchar; i++) {
+ c = f[2][i];
+ if(c >= '0' && c <= '9')
+ f[2][i] -= '0';
+ else if(c >= 'a' && c <= 'f')
+ f[2][i] -= 'a'-10;
+ else if(c >= 'A' && c <= 'F')
+ f[2][i] -= 'A'-10;
+ else
+ return "bad character in key";
+ }
+ key = smalloc(nbyte);
+ for(i=0; i<nchar && i*2<nbyte; i++) {
+ c = f[2][nchar-i-1];
+ if(i&1)
+ c <<= 4;
+ key[i>>1] |= c;
+ }
+
+ alg->init(ecb, alg->name, key, alg->keylen);
+ free(key);
+ return nil;
+}
+
+static int
+nullcipher(Espcb*, uchar*, int)
+{
+ return 1;
+}
+
+static void
+nullespinit(Espcb *ecb, char *name, uchar*, int)
+{
+ ecb->espalg = name;
+ ecb->espblklen = 1;
+ ecb->espivlen = 0;
+ ecb->cipher = nullcipher;
+}
+
+static int
+nullauth(Espcb*, uchar*, int, uchar*)
+{
+ return 1;
+}
+
+static void
+nullahinit(Espcb *ecb, char *name, uchar*, int)
+{
+ ecb->ahalg = name;
+ ecb->ahblklen = 1;
+ ecb->ahlen = 0;
+ ecb->auth = nullauth;
+}
+
+void
+seanq_hmac_sha1(uchar hash[SHA1dlen], uchar *t, long tlen, uchar *key, long klen)
+{
+ uchar ipad[65], opad[65];
+ int i;
+ DigestState *digest;
+ uchar innerhash[SHA1dlen];
+
+ for(i=0; i<64; i++){
+ ipad[i] = 0x36;
+ opad[i] = 0x5c;
+ }
+ ipad[64] = opad[64] = 0;
+ for(i=0; i<klen; i++){
+ ipad[i] ^= key[i];
+ opad[i] ^= key[i];
+ }
+ digest = sha1(ipad, 64, nil, nil);
+ sha1(t, tlen, innerhash, digest);
+ digest = sha1(opad, 64, nil, nil);
+ sha1(innerhash, SHA1dlen, hash, digest);
+}
+
+static int
+shaauth(Espcb *ecb, uchar *t, int tlen, uchar *auth)
+{
+ uchar hash[SHA1dlen];
+ int r;
+
+ memset(hash, 0, SHA1dlen);
+ seanq_hmac_sha1(hash, t, tlen, (uchar*)ecb->ahstate, 16);
+ r = memcmp(auth, hash, ecb->ahlen) == 0;
+ memmove(auth, hash, ecb->ahlen);
+ return r;
+}
+
+static void
+shaahinit(Espcb *ecb, char *name, uchar *key, int klen)
+{
+ if(klen != 128)
+ panic("shaahinit: bad keylen");
+ klen >>= 8; // convert to bytes
+
+ ecb->ahalg = name;
+ ecb->ahblklen = 1;
+ ecb->ahlen = 12;
+ ecb->auth = shaauth;
+ ecb->ahstate = smalloc(klen);
+ memmove(ecb->ahstate, key, klen);
+}
+
+void
+seanq_hmac_md5(uchar hash[MD5dlen], uchar *t, long tlen, uchar *key, long klen)
+{
+ uchar ipad[65], opad[65];
+ int i;
+ DigestState *digest;
+ uchar innerhash[MD5dlen];
+
+ for(i=0; i<64; i++){
+ ipad[i] = 0x36;
+ opad[i] = 0x5c;
+ }
+ ipad[64] = opad[64] = 0;
+ for(i=0; i<klen; i++){
+ ipad[i] ^= key[i];
+ opad[i] ^= key[i];
+ }
+ digest = md5(ipad, 64, nil, nil);
+ md5(t, tlen, innerhash, digest);
+ digest = md5(opad, 64, nil, nil);
+ md5(innerhash, MD5dlen, hash, digest);
+}
+
+static int
+md5auth(Espcb *ecb, uchar *t, int tlen, uchar *auth)
+{
+ uchar hash[MD5dlen];
+ int r;
+
+ memset(hash, 0, MD5dlen);
+ seanq_hmac_md5(hash, t, tlen, (uchar*)ecb->ahstate, 16);
+ r = memcmp(auth, hash, ecb->ahlen) == 0;
+ memmove(auth, hash, ecb->ahlen);
+ return r;
+}
+
+static void
+md5ahinit(Espcb *ecb, char *name, uchar *key, int klen)
+{
+ if(klen != 128)
+ panic("md5ahinit: bad keylen");
+ klen >>= 3; // convert to bytes
+
+
+ ecb->ahalg = name;
+ ecb->ahblklen = 1;
+ ecb->ahlen = 12;
+ ecb->auth = md5auth;
+ ecb->ahstate = smalloc(klen);
+ memmove(ecb->ahstate, key, klen);
+}
+
+static int
+descipher(Espcb *ecb, uchar *p, int n)
+{
+ uchar tmp[8];
+ uchar *pp, *tp, *ip, *eip, *ep;
+ DESstate *ds = ecb->espstate;
+
+ ep = p + n;
+ if(ecb->incoming) {
+ memmove(ds->ivec, p, 8);
+ p += 8;
+ while(p < ep){
+ memmove(tmp, p, 8);
+ block_cipher(ds->expanded, p, 1);
+ tp = tmp;
+ ip = ds->ivec;
+ for(eip = ip+8; ip < eip; ){
+ *p++ ^= *ip;
+ *ip++ = *tp++;
+ }
+ }
+ } else {
+ memmove(p, ds->ivec, 8);
+ for(p += 8; p < ep; p += 8){
+ pp = p;
+ ip = ds->ivec;
+ for(eip = ip+8; ip < eip; )
+ *pp++ ^= *ip++;
+ block_cipher(ds->expanded, p, 0);
+ memmove(ds->ivec, p, 8);
+ }
+ }
+ return 1;
+}
+
+static void
+desespinit(Espcb *ecb, char *name, uchar *k, int n)
+{
+ uchar key[8];
+ uchar ivec[8];
+ int i;
+
+ // bits to bytes
+ n = (n+7)>>3;
+ if(n > 8)
+ n = 8;
+ memset(key, 0, sizeof(key));
+ memmove(key, k, n);
+ for(i=0; i<8; i++)
+ ivec[i] = nrand(256);
+ ecb->espalg = name;
+ ecb->espblklen = 8;
+ ecb->espivlen = 8;
+ ecb->cipher = descipher;
+ ecb->espstate = smalloc(sizeof(DESstate));
+ setupDESstate(ecb->espstate, key, ivec);
+}
+
+static int
+rc4cipher(Espcb *ecb, uchar *p, int n)
+{
+ Esprc4 *esprc4;
+ RC4state tmpstate;
+ ulong seq;
+ long d, dd;
+
+ if(n < 4)
+ return 0;
+
+ esprc4 = ecb->espstate;
+ if(ecb->incoming) {
+ seq = nhgetl(p);
+ p += 4;
+ n -= 4;
+ d = seq-esprc4->cseq;
+ if(d == 0) {
+ rc4(&esprc4->current, p, n);
+ esprc4->cseq += n;
+ if(esprc4->ovalid) {
+ dd = esprc4->cseq - esprc4->lgseq;
+ if(dd > RC4back)
+ esprc4->ovalid = 0;
+ }
+ } else if(d > 0) {
+print("missing packet: %uld %ld\n", seq, d);
+ // this link is hosed
+ if(d > RC4forward) {
+ strcpy(up->errstr, "rc4cipher: skipped too much");
+ return 0;
+ }
+ esprc4->lgseq = seq;
+ if(!esprc4->ovalid) {
+ esprc4->ovalid = 1;
+ esprc4->oseq = esprc4->cseq;
+ memmove(&esprc4->old, &esprc4->current, sizeof(RC4state));
+ }
+ rc4skip(&esprc4->current, d);
+ rc4(&esprc4->current, p, n);
+ esprc4->cseq = seq+n;
+ } else {
+print("reordered packet: %uld %ld\n", seq, d);
+ dd = seq - esprc4->oseq;
+ if(!esprc4->ovalid || -d > RC4back || dd < 0) {
+ strcpy(up->errstr, "rc4cipher: too far back");
+ return 0;
+ }
+ memmove(&tmpstate, &esprc4->old, sizeof(RC4state));
+ rc4skip(&tmpstate, dd);
+ rc4(&tmpstate, p, n);
+ return 1;
+ }
+
+ // move old state up
+ if(esprc4->ovalid) {
+ dd = esprc4->cseq - RC4back - esprc4->oseq;
+ if(dd > 0) {
+ rc4skip(&esprc4->old, dd);
+ esprc4->oseq += dd;
+ }
+ }
+ } else {
+ hnputl(p, esprc4->cseq);
+ p += 4;
+ n -= 4;
+ rc4(&esprc4->current, p, n);
+ esprc4->cseq += n;
+ }
+ return 1;
+}
+
+static void
+rc4espinit(Espcb *ecb, char *name, uchar *k, int n)
+{
+ Esprc4 *esprc4;
+
+ // bits to bytes
+ n = (n+7)>>3;
+ esprc4 = smalloc(sizeof(Esprc4));
+ memset(esprc4, 0, sizeof(Esprc4));
+ setupRC4state(&esprc4->current, k, n);
+ ecb->espalg = name;
+ ecb->espblklen = 4;
+ ecb->espivlen = 4;
+ ecb->cipher = rc4cipher;
+ ecb->espstate = esprc4;
+}
+
+void
+espinit(Fs *fs)
+{
+ Proto *esp;
+
+ esp = smalloc(sizeof(Proto));
+ esp->priv = smalloc(sizeof(Esppriv));
+ esp->name = "esp";
+ esp->connect = espconnect;
+ esp->announce = nil;
+ esp->ctl = espctl;
+ esp->state = espstate;
+ esp->create = espcreate;
+ esp->close = espclose;
+ esp->rcv = espiput;
+ esp->advise = espadvise;
+ esp->stats = espstats;
+ esp->local = esplocal;
+ esp->remote = espremote;
+ esp->ipproto = IP_ESPPROTO;
+ esp->nc = Nchans;
+ esp->ptclsize = sizeof(Espcb);
+
+ Fsproto(fs, esp);
+}
--- /dev/null
+++ b/os/ip.original/ethermedium.c
@@ -1,0 +1,792 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include "ip.h"
+#include "ipv6.h"
+#include "kernel.h"
+
+typedef struct Etherhdr Etherhdr;
+struct Etherhdr
+{
+ uchar d[6];
+ uchar s[6];
+ uchar t[2];
+};
+
+static uchar ipbroadcast[IPaddrlen] = {
+ 0xff,0xff,0xff,0xff,
+ 0xff,0xff,0xff,0xff,
+ 0xff,0xff,0xff,0xff,
+ 0xff,0xff,0xff,0xff,
+};
+
+static uchar etherbroadcast[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+
+static void etherread4(void *a);
+static void etherread6(void *a);
+static void etherbind(Ipifc *ifc, int argc, char **argv);
+static void etherunbind(Ipifc *ifc);
+static void etherbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip);
+static void etheraddmulti(Ipifc *ifc, uchar *a, uchar *ia);
+static void etherremmulti(Ipifc *ifc, uchar *a, uchar *ia);
+static Block* multicastarp(Fs *f, Arpent *a, Medium*, uchar *mac);
+static void sendarp(Ipifc *ifc, Arpent *a);
+static void sendgarp(Ipifc *ifc, uchar*);
+static int multicastea(uchar *ea, uchar *ip);
+static void recvarpproc(void*);
+static void resolveaddr6(Ipifc *ifc, Arpent *a);
+static void etherpref2addr(uchar *pref, uchar *ea);
+
+Medium ethermedium =
+{
+.name= "ether",
+.hsize= 14,
+.mintu= 60,
+.maxtu= 1514,
+.maclen= 6,
+.bind= etherbind,
+.unbind= etherunbind,
+.bwrite= etherbwrite,
+.addmulti= etheraddmulti,
+.remmulti= etherremmulti,
+.ares= arpenter,
+.areg= sendgarp,
+.pref2addr= etherpref2addr,
+};
+
+Medium gbemedium =
+{
+.name= "gbe",
+.hsize= 14,
+.mintu= 60,
+.maxtu= 9014,
+.maclen= 6,
+.bind= etherbind,
+.unbind= etherunbind,
+.bwrite= etherbwrite,
+.addmulti= etheraddmulti,
+.remmulti= etherremmulti,
+.ares= arpenter,
+.areg= sendgarp,
+.pref2addr= etherpref2addr,
+};
+
+typedef struct Etherrock Etherrock;
+struct Etherrock
+{
+ Fs *f; /* file system we belong to */
+ Proc *arpp; /* arp process */
+ Proc *read4p; /* reading process (v4)*/
+ Proc *read6p; /* reading process (v6)*/
+ Chan *mchan4; /* Data channel for v4 */
+ Chan *achan; /* Arp channel */
+ Chan *cchan4; /* Control channel for v4 */
+ Chan *mchan6; /* Data channel for v6 */
+ Chan *cchan6; /* Control channel for v6 */
+};
+
+/*
+ * ethernet arp request
+ */
+enum
+{
+ ETARP = 0x0806,
+ ETIP4 = 0x0800,
+ ETIP6 = 0x86DD,
+ ARPREQUEST = 1,
+ ARPREPLY = 2,
+};
+
+typedef struct Etherarp Etherarp;
+struct Etherarp
+{
+ uchar d[6];
+ uchar s[6];
+ uchar type[2];
+ uchar hrd[2];
+ uchar pro[2];
+ uchar hln;
+ uchar pln;
+ uchar op[2];
+ uchar sha[6];
+ uchar spa[4];
+ uchar tha[6];
+ uchar tpa[4];
+};
+
+static char *nbmsg = "nonblocking";
+
+/*
+ * called to bind an IP ifc to an ethernet device
+ * called with ifc wlock'd
+ */
+static void
+etherbind(Ipifc *ifc, int argc, char **argv)
+{
+ Chan *mchan4, *cchan4, *achan, *mchan6, *cchan6;
+ char addr[Maxpath]; //char addr[2*KNAMELEN];
+ char dir[Maxpath]; //char dir[2*KNAMELEN];
+ char *buf;
+ int fd, cfd, n;
+ char *ptr;
+ Etherrock *er;
+
+ if(argc < 2)
+ error(Ebadarg);
+
+ mchan4 = cchan4 = achan = mchan6 = cchan6 = nil;
+ buf = nil;
+ if(waserror()){
+ if(mchan4 != nil)
+ cclose(mchan4);
+ if(cchan4 != nil)
+ cclose(cchan4);
+ if(achan != nil)
+ cclose(achan);
+ if(mchan6 != nil)
+ cclose(mchan6);
+ if(cchan6 != nil)
+ cclose(cchan6);
+ if(buf != nil)
+ free(buf);
+ nexterror();
+ }
+
+ /*
+ * open ip converstation
+ *
+ * the dial will fail if the type is already open on
+ * this device.
+ */
+ snprint(addr, sizeof(addr), "%s!0x800", argv[2]);
+ fd = kdial(addr, nil, dir, &cfd);
+ if(fd < 0)
+ errorf("dial 0x800 failed: %s", up->env->errstr);
+ mchan4 = commonfdtochan(fd, ORDWR, 0, 1);
+ cchan4 = commonfdtochan(cfd, ORDWR, 0, 1);
+ kclose(fd);
+ kclose(cfd);
+
+ /*
+ * make it non-blocking
+ */
+ devtab[cchan4->type]->write(cchan4, nbmsg, strlen(nbmsg), 0);
+
+ /*
+ * get mac address and speed
+ */
+ snprint(addr, sizeof(addr), "%s/stats", dir);
+ fd = kopen(addr, OREAD);
+ if(fd < 0)
+ errorf("can't open ether stats: %s", up->env->errstr);
+
+ buf = smalloc(512);
+ n = kread(fd, buf, 511);
+ kclose(fd);
+ if(n <= 0)
+ error(Eio);
+ buf[n] = 0;
+
+ ptr = strstr(buf, "addr: ");
+ if(!ptr)
+ error(Eio);
+ ptr += 6;
+ parsemac(ifc->mac, ptr, 6);
+
+ ptr = strstr(buf, "mbps: ");
+ if(ptr){
+ ptr += 6;
+ ifc->mbps = atoi(ptr);
+ } else
+ ifc->mbps = 100;
+
+ /*
+ * open arp conversation
+ */
+ snprint(addr, sizeof(addr), "%s!0x806", argv[2]);
+ fd = kdial(addr, nil, nil, nil);
+ if(fd < 0)
+ errorf("dial 0x806 failed: %s", up->env->errstr);
+ achan = commonfdtochan(fd, ORDWR, 0, 1);
+ kclose(fd);
+
+ /*
+ * open ip conversation
+ *
+ * the dial will fail if the type is already open on
+ * this device.
+ */
+ snprint(addr, sizeof(addr), "%s!0x86DD", argv[2]);
+ fd = kdial(addr, nil, dir, &cfd);
+ if(fd < 0)
+ errorf("dial 0x86DD failed: %s", up->env->errstr);
+ mchan6 = commonfdtochan(fd, ORDWR, 0, 1);
+ cchan6 = commonfdtochan(cfd, ORDWR, 0, 1);
+ kclose(fd);
+ kclose(cfd);
+
+ /*
+ * make it non-blocking
+ */
+ devtab[cchan6->type]->write(cchan6, nbmsg, strlen(nbmsg), 0);
+
+ er = smalloc(sizeof(*er));
+ er->mchan4 = mchan4;
+ er->cchan4 = cchan4;
+ er->achan = achan;
+ er->mchan6 = mchan6;
+ er->cchan6 = cchan6;
+ er->f = ifc->conv->p->f;
+ ifc->arg = er;
+
+ free(buf);
+ poperror();
+
+ kproc("etherread4", etherread4, ifc, 0);
+ kproc("recvarpproc", recvarpproc, ifc, 0);
+ kproc("etherread6", etherread6, ifc, 0);
+}
+
+/*
+ * called with ifc wlock'd
+ */
+static void
+etherunbind(Ipifc *ifc)
+{
+ Etherrock *er = ifc->arg;
+
+ if(er->read4p)
+ postnote(er->read4p, 1, "unbind", 0);
+ if(er->read6p)
+ postnote(er->read6p, 1, "unbind", 0);
+ if(er->arpp)
+ postnote(er->arpp, 1, "unbind", 0);
+
+ /* wait for readers to die */
+ while(er->arpp != 0 || er->read4p != 0 || er->read6p != 0)
+ tsleep(&up->sleep, return0, 0, 300);
+
+ if(er->mchan4 != nil)
+ cclose(er->mchan4);
+ if(er->achan != nil)
+ cclose(er->achan);
+ if(er->cchan4 != nil)
+ cclose(er->cchan4);
+ if(er->mchan6 != nil)
+ cclose(er->mchan6);
+ if(er->cchan6 != nil)
+ cclose(er->cchan6);
+
+ free(er);
+}
+
+/*
+ * called by ipoput with a single block to write with ifc rlock'd
+ */
+static void
+etherbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip)
+{
+ Etherhdr *eh;
+ Arpent *a;
+ uchar mac[6];
+ Etherrock *er = ifc->arg;
+
+ /* get mac address of destination */
+ a = arpget(er->f->arp, bp, version, ifc, ip, mac);
+ if(a){
+ /* check for broadcast or multicast */
+ bp = multicastarp(er->f, a, ifc->m, mac);
+ if(bp==nil){
+ switch(version){
+ case V4:
+ sendarp(ifc, a);
+ break;
+ case V6:
+ resolveaddr6(ifc, a);
+ break;
+ default:
+ panic("etherbwrite: version %d", version);
+ }
+ return;
+ }
+ }
+
+ /* make it a single block with space for the ether header */
+ bp = padblock(bp, ifc->m->hsize);
+ if(bp->next)
+ bp = concatblock(bp);
+ if(BLEN(bp) < ifc->mintu)
+ bp = adjustblock(bp, ifc->mintu);
+ eh = (Etherhdr*)bp->rp;
+
+ /* copy in mac addresses and ether type */
+ memmove(eh->s, ifc->mac, sizeof(eh->s));
+ memmove(eh->d, mac, sizeof(eh->d));
+
+ switch(version){
+ case V4:
+ eh->t[0] = 0x08;
+ eh->t[1] = 0x00;
+ devtab[er->mchan4->type]->bwrite(er->mchan4, bp, 0);
+ break;
+ case V6:
+ eh->t[0] = 0x86;
+ eh->t[1] = 0xDD;
+ devtab[er->mchan6->type]->bwrite(er->mchan6, bp, 0);
+ break;
+ default:
+ panic("etherbwrite2: version %d", version);
+ }
+ ifc->out++;
+}
+
+
+/*
+ * process to read from the ethernet
+ */
+static void
+etherread4(void *a)
+{
+ Ipifc *ifc;
+ Block *bp;
+ Etherrock *er;
+
+ ifc = a;
+ er = ifc->arg;
+ er->read4p = up; /* hide identity under a rock for unbind */
+ if(waserror()){
+ er->read4p = 0;
+ pexit("hangup", 1);
+ }
+ for(;;){
+ bp = devtab[er->mchan4->type]->bread(er->mchan4, ifc->maxtu, 0);
+ if(!canrlock(ifc)){
+ freeb(bp);
+ continue;
+ }
+ if(waserror()){
+ runlock(ifc);
+ nexterror();
+ }
+ ifc->in++;
+ bp->rp += ifc->m->hsize;
+ if(ifc->lifc == nil)
+ freeb(bp);
+ else
+ ipiput4(er->f, ifc, bp);
+ runlock(ifc);
+ poperror();
+ }
+}
+
+
+/*
+ * process to read from the ethernet, IPv6
+ */
+static void
+etherread6(void *a)
+{
+ Ipifc *ifc;
+ Block *bp;
+ Etherrock *er;
+
+ ifc = a;
+ er = ifc->arg;
+ er->read6p = up; /* hide identity under a rock for unbind */
+ if(waserror()){
+ er->read6p = 0;
+ pexit("hangup", 1);
+ }
+ for(;;){
+ bp = devtab[er->mchan6->type]->bread(er->mchan6, ifc->maxtu, 0);
+ if(!canrlock(ifc)){
+ freeb(bp);
+ continue;
+ }
+ if(waserror()){
+ runlock(ifc);
+ nexterror();
+ }
+ ifc->in++;
+ bp->rp += ifc->m->hsize;
+ if(ifc->lifc == nil)
+ freeb(bp);
+ else
+ ipiput6(er->f, ifc, bp);
+ runlock(ifc);
+ poperror();
+ }
+}
+
+static void
+etheraddmulti(Ipifc *ifc, uchar *a, uchar *)
+{
+ uchar mac[6];
+ char buf[64];
+ Etherrock *er = ifc->arg;
+ int version;
+
+ version = multicastea(mac, a);
+ sprint(buf, "addmulti %E", mac);
+ switch(version){
+ case V4:
+ devtab[er->cchan4->type]->write(er->cchan4, buf, strlen(buf), 0);
+ break;
+ case V6:
+ devtab[er->cchan6->type]->write(er->cchan6, buf, strlen(buf), 0);
+ break;
+ default:
+ panic("etheraddmulti: version %d", version);
+ }
+}
+
+static void
+etherremmulti(Ipifc *ifc, uchar *a, uchar *)
+{
+ uchar mac[6];
+ char buf[64];
+ Etherrock *er = ifc->arg;
+ int version;
+
+ version = multicastea(mac, a);
+ sprint(buf, "remmulti %E", mac);
+ switch(version){
+ case V4:
+ devtab[er->cchan4->type]->write(er->cchan4, buf, strlen(buf), 0);
+ break;
+ case V6:
+ devtab[er->cchan6->type]->write(er->cchan6, buf, strlen(buf), 0);
+ break;
+ default:
+ panic("etherremmulti: version %d", version);
+ }
+}
+
+/*
+ * send an ethernet arp
+ * (only v4, v6 uses the neighbor discovery, rfc1970)
+ */
+static void
+sendarp(Ipifc *ifc, Arpent *a)
+{
+ int n;
+ Block *bp;
+ Etherarp *e;
+ Etherrock *er = ifc->arg;
+
+ /* don't do anything if it's been less than a second since the last */
+ if(NOW - a->ctime < 1000){
+ arprelease(er->f->arp, a);
+ return;
+ }
+
+ /* remove all but the last message */
+ while((bp = a->hold) != nil){
+ if(bp == a->last)
+ break;
+ a->hold = bp->list;
+ freeblist(bp);
+ }
+
+ /* try to keep it around for a second more */
+ a->ctime = NOW;
+ arprelease(er->f->arp, a);
+
+ n = sizeof(Etherarp);
+ if(n < a->type->mintu)
+ n = a->type->mintu;
+ bp = allocb(n);
+ memset(bp->rp, 0, n);
+ e = (Etherarp*)bp->rp;
+ memmove(e->tpa, a->ip+IPv4off, sizeof(e->tpa));
+ ipv4local(ifc, e->spa);
+ memmove(e->sha, ifc->mac, sizeof(e->sha));
+ memset(e->d, 0xff, sizeof(e->d)); /* ethernet broadcast */
+ memmove(e->s, ifc->mac, sizeof(e->s));
+
+ hnputs(e->type, ETARP);
+ hnputs(e->hrd, 1);
+ hnputs(e->pro, ETIP4);
+ e->hln = sizeof(e->sha);
+ e->pln = sizeof(e->spa);
+ hnputs(e->op, ARPREQUEST);
+ bp->wp += n;
+
+ n = devtab[er->achan->type]->bwrite(er->achan, bp, 0);
+ if(n < 0)
+ print("arp: send: %r\n");
+}
+
+static void
+resolveaddr6(Ipifc *ifc, Arpent *a)
+{
+ int sflag;
+ Block *bp;
+ Etherrock *er = ifc->arg;
+ uchar ipsrc[IPaddrlen];
+
+ /* don't do anything if it's been less than a second since the last */
+ if(NOW - a->ctime < ReTransTimer){
+ arprelease(er->f->arp, a);
+ return;
+ }
+
+ /* remove all but the last message */
+ while((bp = a->hold) != nil){
+ if(bp == a->last)
+ break;
+ a->hold = bp->list;
+ freeblist(bp);
+ }
+
+ /* try to keep it around for a second more */
+ a->ctime = NOW;
+ a->rtime = NOW + ReTransTimer;
+ if(a->rxtsrem <= 0) {
+ arprelease(er->f->arp, a);
+ return;
+ }
+
+ a->rxtsrem--;
+ arprelease(er->f->arp, a);
+
+ if(sflag = ipv6anylocal(ifc, ipsrc))
+ icmpns(er->f, ipsrc, sflag, a->ip, TARG_MULTI, ifc->mac);
+}
+
+/*
+ * send a gratuitous arp to refresh arp caches
+ */
+static void
+sendgarp(Ipifc *ifc, uchar *ip)
+{
+ int n;
+ Block *bp;
+ Etherarp *e;
+ Etherrock *er = ifc->arg;
+
+ /* don't arp for our initial non address */
+ if(ipcmp(ip, IPnoaddr) == 0)
+ return;
+
+ n = sizeof(Etherarp);
+ if(n < ifc->m->mintu)
+ n = ifc->m->mintu;
+ bp = allocb(n);
+ memset(bp->rp, 0, n);
+ e = (Etherarp*)bp->rp;
+ memmove(e->tpa, ip+IPv4off, sizeof(e->tpa));
+ memmove(e->spa, ip+IPv4off, sizeof(e->spa));
+ memmove(e->sha, ifc->mac, sizeof(e->sha));
+ memset(e->d, 0xff, sizeof(e->d)); /* ethernet broadcast */
+ memmove(e->s, ifc->mac, sizeof(e->s));
+
+ hnputs(e->type, ETARP);
+ hnputs(e->hrd, 1);
+ hnputs(e->pro, ETIP4);
+ e->hln = sizeof(e->sha);
+ e->pln = sizeof(e->spa);
+ hnputs(e->op, ARPREQUEST);
+ bp->wp += n;
+
+ n = devtab[er->achan->type]->bwrite(er->achan, bp, 0);
+ if(n < 0)
+ print("garp: send: %r\n");
+}
+
+static void
+recvarp(Ipifc *ifc)
+{
+ int n;
+ Block *ebp, *rbp;
+ Etherarp *e, *r;
+ uchar ip[IPaddrlen];
+ static uchar eprinted[4];
+ Etherrock *er = ifc->arg;
+
+ ebp = devtab[er->achan->type]->bread(er->achan, ifc->maxtu, 0);
+ if(ebp == nil) {
+ print("arp: rcv: %r\n");
+ return;
+ }
+
+ e = (Etherarp*)ebp->rp;
+ switch(nhgets(e->op)) {
+ default:
+ break;
+
+ case ARPREPLY:
+ /* check for machine using my ip address */
+ v4tov6(ip, e->spa);
+ if(iplocalonifc(ifc, ip) || ipproxyifc(er->f, ifc, ip)){
+ if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) != 0){
+ print("arprep: 0x%E/0x%E also has ip addr %V\n",
+ e->s, e->sha, e->spa);
+ break;
+ }
+ }
+
+ /* make sure we're not entering broadcast addresses */
+ if(ipcmp(ip, ipbroadcast) == 0 ||
+ !memcmp(e->sha, etherbroadcast, sizeof(e->sha))){
+ print("arprep: 0x%E/0x%E cannot register broadcast address %I\n",
+ e->s, e->sha, e->spa);
+ break;
+ }
+
+ arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), 0);
+ break;
+
+ case ARPREQUEST:
+ /* don't answer arps till we know who we are */
+ if(ifc->lifc == 0)
+ break;
+
+ /* check for machine using my ip or ether address */
+ v4tov6(ip, e->spa);
+ if(iplocalonifc(ifc, ip) || ipproxyifc(er->f, ifc, ip)){
+ if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) != 0){
+ if (memcmp(eprinted, e->spa, sizeof(e->spa))){
+ /* print only once */
+ print("arpreq: 0x%E also has ip addr %V\n", e->sha, e->spa);
+ memmove(eprinted, e->spa, sizeof(e->spa));
+ }
+ }
+ } else {
+ if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) == 0){
+ print("arpreq: %V also has ether addr %E\n", e->spa, e->sha);
+ break;
+ }
+ }
+
+ /* refresh what we know about sender */
+ arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), 1);
+
+ /* answer only requests for our address or systems we're proxying for */
+ v4tov6(ip, e->tpa);
+ if(!iplocalonifc(ifc, ip))
+ if(!ipproxyifc(er->f, ifc, ip))
+ break;
+
+ n = sizeof(Etherarp);
+ if(n < ifc->mintu)
+ n = ifc->mintu;
+ rbp = allocb(n);
+ r = (Etherarp*)rbp->rp;
+ memset(r, 0, sizeof(Etherarp));
+ hnputs(r->type, ETARP);
+ hnputs(r->hrd, 1);
+ hnputs(r->pro, ETIP4);
+ r->hln = sizeof(r->sha);
+ r->pln = sizeof(r->spa);
+ hnputs(r->op, ARPREPLY);
+ memmove(r->tha, e->sha, sizeof(r->tha));
+ memmove(r->tpa, e->spa, sizeof(r->tpa));
+ memmove(r->sha, ifc->mac, sizeof(r->sha));
+ memmove(r->spa, e->tpa, sizeof(r->spa));
+ memmove(r->d, e->sha, sizeof(r->d));
+ memmove(r->s, ifc->mac, sizeof(r->s));
+ rbp->wp += n;
+
+ n = devtab[er->achan->type]->bwrite(er->achan, rbp, 0);
+ if(n < 0)
+ print("arp: write: %r\n");
+ }
+ freeb(ebp);
+}
+
+static void
+recvarpproc(void *v)
+{
+ Ipifc *ifc = v;
+ Etherrock *er = ifc->arg;
+
+ er->arpp = up;
+ if(waserror()){
+ er->arpp = 0;
+ pexit("hangup", 1);
+ }
+ for(;;)
+ recvarp(ifc);
+}
+
+static int
+multicastea(uchar *ea, uchar *ip)
+{
+ int x;
+
+ switch(x = ipismulticast(ip)){
+ case V4:
+ ea[0] = 0x01;
+ ea[1] = 0x00;
+ ea[2] = 0x5e;
+ ea[3] = ip[13] & 0x7f;
+ ea[4] = ip[14];
+ ea[5] = ip[15];
+ break;
+ case V6:
+ ea[0] = 0x33;
+ ea[1] = 0x33;
+ ea[2] = ip[12];
+ ea[3] = ip[13];
+ ea[4] = ip[14];
+ ea[5] = ip[15];
+ break;
+ }
+ return x;
+}
+
+/*
+ * fill in an arp entry for broadcast or multicast
+ * addresses. Return the first queued packet for the
+ * IP address.
+ */
+static Block*
+multicastarp(Fs *f, Arpent *a, Medium *medium, uchar *mac)
+{
+ /* is it broadcast? */
+ switch(ipforme(f, a->ip)){
+ case Runi:
+ return nil;
+ case Rbcast:
+ memset(mac, 0xff, 6);
+ return arpresolve(f->arp, a, medium, mac);
+ default:
+ break;
+ }
+
+ /* if multicast, fill in mac */
+ switch(multicastea(mac, a->ip)){
+ case V4:
+ case V6:
+ return arpresolve(f->arp, a, medium, mac);
+ }
+
+ /* let arp take care of it */
+ return nil;
+}
+
+void
+ethermediumlink(void)
+{
+ addipmedium(ðermedium);
+ addipmedium(&gbemedium);
+}
+
+
+static void
+etherpref2addr(uchar *pref, uchar *ea)
+{
+ pref[8] = ea[0] | 0x2;
+ pref[9] = ea[1];
+ pref[10] = ea[2];
+ pref[11] = 0xFF;
+ pref[12] = 0xFE;
+ pref[13] = ea[3];
+ pref[14] = ea[4];
+ pref[15] = ea[5];
+}
--- /dev/null
+++ b/os/ip.original/gre.c
@@ -1,0 +1,282 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include "ip.h"
+
+#define DPRINT if(0)print
+
+enum
+{
+ GRE_IPONLY = 12, /* size of ip header */
+ GRE_IPPLUSGRE = 12, /* minimum size of GRE header */
+ IP_GREPROTO = 47,
+
+ GRErxms = 200,
+ GREtickms = 100,
+ GREmaxxmit = 10,
+};
+
+typedef struct GREhdr
+{
+ /* ip header */
+ uchar vihl; /* Version and header length */
+ uchar tos; /* Type of service */
+ uchar len[2]; /* packet length (including headers) */
+ uchar id[2]; /* Identification */
+ uchar frag[2]; /* Fragment information */
+ uchar Unused;
+ uchar proto; /* Protocol */
+ uchar cksum[2]; /* checksum */
+ uchar src[4]; /* Ip source */
+ uchar dst[4]; /* Ip destination */
+
+ /* gre header */
+ uchar flags[2];
+ uchar eproto[2]; /* encapsulation protocol */
+} GREhdr;
+
+typedef struct GREpriv GREpriv;
+struct GREpriv
+{
+ int raw; /* Raw GRE mode */
+
+ /* non-MIB stats */
+ ulong csumerr; /* checksum errors */
+ ulong lenerr; /* short packet */
+};
+
+static void grekick(void *x, Block *bp);
+
+static char*
+greconnect(Conv *c, char **argv, int argc)
+{
+ Proto *p;
+ char *err;
+ Conv *tc, **cp, **ecp;
+
+ err = Fsstdconnect(c, argv, argc);
+ if(err != nil)
+ return err;
+
+ /* make sure noone's already connected to this other sys */
+ p = c->p;
+ qlock(p);
+ ecp = &p->conv[p->nc];
+ for(cp = p->conv; cp < ecp; cp++){
+ tc = *cp;
+ if(tc == nil)
+ break;
+ if(tc == c)
+ continue;
+ if(tc->rport == c->rport && ipcmp(tc->raddr, c->raddr) == 0){
+ err = "already connected to that addr/proto";
+ ipmove(c->laddr, IPnoaddr);
+ ipmove(c->raddr, IPnoaddr);
+ break;
+ }
+ }
+ qunlock(p);
+
+ if(err != nil)
+ return err;
+ Fsconnected(c, nil);
+
+ return nil;
+}
+
+static void
+grecreate(Conv *c)
+{
+ c->rq = qopen(64*1024, Qmsg, 0, c);
+ c->wq = qbypass(grekick, c);
+}
+
+static int
+grestate(Conv *c, char *state, int n)
+{
+ USED(c);
+ return snprint(state, n, "%s", "Datagram");
+}
+
+static char*
+greannounce(Conv*, char**, int)
+{
+ return "pktifc does not support announce";
+}
+
+static void
+greclose(Conv *c)
+{
+ qclose(c->rq);
+ qclose(c->wq);
+ qclose(c->eq);
+ ipmove(c->laddr, IPnoaddr);
+ ipmove(c->raddr, IPnoaddr);
+ c->lport = 0;
+ c->rport = 0;
+}
+
+int drop;
+
+static void
+grekick(void *x, Block *bp)
+{
+ Conv *c = x;
+ GREhdr *ghp;
+ uchar laddr[IPaddrlen], raddr[IPaddrlen];
+
+ if(bp == nil)
+ return;
+
+ /* Make space to fit ip header (gre header already there) */
+ bp = padblock(bp, GRE_IPONLY);
+ if(bp == nil)
+ return;
+
+ /* make sure the message has a GRE header */
+ bp = pullupblock(bp, GRE_IPONLY+GRE_IPPLUSGRE);
+ if(bp == nil)
+ return;
+
+ ghp = (GREhdr *)(bp->rp);
+ ghp->vihl = IP_VER4;
+
+ if(!((GREpriv*)c->p->priv)->raw){
+ v4tov6(raddr, ghp->dst);
+ if(ipcmp(raddr, v4prefix) == 0)
+ memmove(ghp->dst, c->raddr + IPv4off, IPv4addrlen);
+ v4tov6(laddr, ghp->src);
+ if(ipcmp(laddr, v4prefix) == 0){
+ if(ipcmp(c->laddr, IPnoaddr) == 0)
+ findlocalip(c->p->f, c->laddr, raddr); /* pick interface closest to dest */
+ memmove(ghp->src, c->laddr + IPv4off, IPv4addrlen);
+ }
+ hnputs(ghp->eproto, c->rport);
+ }
+
+ ghp->proto = IP_GREPROTO;
+ ghp->frag[0] = 0;
+ ghp->frag[1] = 0;
+
+ ipoput4(c->p->f, bp, 0, c->ttl, c->tos, nil);
+}
+
+static void
+greiput(Proto *gre, Ipifc*, Block *bp)
+{
+ int len;
+ GREhdr *ghp;
+ Conv *c, **p;
+ ushort eproto;
+ uchar raddr[IPaddrlen];
+ GREpriv *gpriv;
+
+ gpriv = gre->priv;
+ ghp = (GREhdr*)(bp->rp);
+
+ v4tov6(raddr, ghp->src);
+ eproto = nhgets(ghp->eproto);
+ qlock(gre);
+
+ /* Look for a conversation structure for this port and address */
+ c = nil;
+ for(p = gre->conv; *p; p++) {
+ c = *p;
+ if(c->inuse == 0)
+ continue;
+ if(c->rport == eproto &&
+ (gpriv->raw || ipcmp(c->raddr, raddr) == 0))
+ break;
+ }
+
+ if(*p == nil) {
+ qunlock(gre);
+ freeblist(bp);
+ return;
+ }
+
+ qunlock(gre);
+
+ /*
+ * Trim the packet down to data size
+ */
+ len = nhgets(ghp->len) - GRE_IPONLY;
+ if(len < GRE_IPPLUSGRE){
+ freeblist(bp);
+ return;
+ }
+ bp = trimblock(bp, GRE_IPONLY, len);
+ if(bp == nil){
+ gpriv->lenerr++;
+ return;
+ }
+
+ /*
+ * Can't delimit packet so pull it all into one block.
+ */
+ if(qlen(c->rq) > 64*1024)
+ freeblist(bp);
+ else{
+ bp = concatblock(bp);
+ if(bp == 0)
+ panic("greiput");
+ qpass(c->rq, bp);
+ }
+}
+
+int
+grestats(Proto *gre, char *buf, int len)
+{
+ GREpriv *gpriv;
+
+ gpriv = gre->priv;
+
+ return snprint(buf, len, "gre: len %lud\n", gpriv->lenerr);
+}
+
+char*
+grectl(Conv *c, char **f, int n)
+{
+ GREpriv *gpriv;
+
+ gpriv = c->p->priv;
+ if(n == 1){
+ if(strcmp(f[0], "raw") == 0){
+ gpriv->raw = 1;
+ return nil;
+ }
+ else if(strcmp(f[0], "cooked") == 0){
+ gpriv->raw = 0;
+ return nil;
+ }
+ }
+ return "unknown control request";
+}
+
+void
+greinit(Fs *fs)
+{
+ Proto *gre;
+
+ gre = smalloc(sizeof(Proto));
+ gre->priv = smalloc(sizeof(GREpriv));
+ gre->name = "gre";
+ gre->connect = greconnect;
+ gre->announce = greannounce;
+ gre->state = grestate;
+ gre->create = grecreate;
+ gre->close = greclose;
+ gre->rcv = greiput;
+ gre->ctl = grectl;
+ gre->advise = nil;
+ gre->stats = grestats;
+ gre->ipproto = IP_GREPROTO;
+ gre->nc = 64;
+ gre->ptclsize = 0;
+
+ Fsproto(fs, gre);
+}
--- /dev/null
+++ b/os/ip.original/icmp.c
@@ -1,0 +1,496 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include "ip.h"
+
+typedef struct Icmp {
+ uchar vihl; /* Version and header length */
+ uchar tos; /* Type of service */
+ uchar length[2]; /* packet length */
+ uchar id[2]; /* Identification */
+ uchar frag[2]; /* Fragment information */
+ uchar ttl; /* Time to live */
+ uchar proto; /* Protocol */
+ uchar ipcksum[2]; /* Header checksum */
+ uchar src[4]; /* Ip source */
+ uchar dst[4]; /* Ip destination */
+ uchar type;
+ uchar code;
+ uchar cksum[2];
+ uchar icmpid[2];
+ uchar seq[2];
+ uchar data[1];
+} Icmp;
+
+enum { /* Packet Types */
+ EchoReply = 0,
+ Unreachable = 3,
+ SrcQuench = 4,
+ Redirect = 5,
+ EchoRequest = 8,
+ TimeExceed = 11,
+ InParmProblem = 12,
+ Timestamp = 13,
+ TimestampReply = 14,
+ InfoRequest = 15,
+ InfoReply = 16,
+ AddrMaskRequest = 17,
+ AddrMaskReply = 18,
+
+ Maxtype = 18,
+};
+
+enum
+{
+ MinAdvise = 24, /* minimum needed for us to advise another protocol */
+};
+
+char *icmpnames[Maxtype+1] =
+{
+[EchoReply] "EchoReply",
+[Unreachable] "Unreachable",
+[SrcQuench] "SrcQuench",
+[Redirect] "Redirect",
+[EchoRequest] "EchoRequest",
+[TimeExceed] "TimeExceed",
+[InParmProblem] "InParmProblem",
+[Timestamp] "Timestamp",
+[TimestampReply] "TimestampReply",
+[InfoRequest] "InfoRequest",
+[InfoReply] "InfoReply",
+[AddrMaskRequest] "AddrMaskRequest",
+[AddrMaskReply ] "AddrMaskReply ",
+};
+
+enum {
+ IP_ICMPPROTO = 1,
+ ICMP_IPSIZE = 20,
+ ICMP_HDRSIZE = 8,
+};
+
+enum
+{
+ InMsgs,
+ InErrors,
+ OutMsgs,
+ CsumErrs,
+ LenErrs,
+ HlenErrs,
+
+ Nstats,
+};
+
+static char *statnames[Nstats] =
+{
+[InMsgs] "InMsgs",
+[InErrors] "InErrors",
+[OutMsgs] "OutMsgs",
+[CsumErrs] "CsumErrs",
+[LenErrs] "LenErrs",
+[HlenErrs] "HlenErrs",
+};
+
+typedef struct Icmppriv Icmppriv;
+struct Icmppriv
+{
+ ulong stats[Nstats];
+
+ /* message counts */
+ ulong in[Maxtype+1];
+ ulong out[Maxtype+1];
+};
+
+static void icmpkick(void *x, Block*);
+
+static void
+icmpcreate(Conv *c)
+{
+ c->rq = qopen(64*1024, Qmsg, 0, c);
+ c->wq = qbypass(icmpkick, c);
+}
+
+extern char*
+icmpconnect(Conv *c, char **argv, int argc)
+{
+ char *e;
+
+ e = Fsstdconnect(c, argv, argc);
+ if(e != nil)
+ return e;
+ Fsconnected(c, e);
+
+ return nil;
+}
+
+extern int
+icmpstate(Conv *c, char *state, int n)
+{
+ USED(c);
+ return snprint(state, n, "%s qin %d qout %d",
+ "Datagram",
+ c->rq ? qlen(c->rq) : 0,
+ c->wq ? qlen(c->wq) : 0
+ );
+}
+
+extern char*
+icmpannounce(Conv *c, char **argv, int argc)
+{
+ char *e;
+
+ e = Fsstdannounce(c, argv, argc);
+ if(e != nil)
+ return e;
+ Fsconnected(c, nil);
+
+ return nil;
+}
+
+extern void
+icmpclose(Conv *c)
+{
+ qclose(c->rq);
+ qclose(c->wq);
+ ipmove(c->laddr, IPnoaddr);
+ ipmove(c->raddr, IPnoaddr);
+ c->lport = 0;
+}
+
+static void
+icmpkick(void *x, Block *bp)
+{
+ Conv *c = x;
+ Icmp *p;
+ Icmppriv *ipriv;
+
+ if(bp == nil)
+ return;
+
+ if(blocklen(bp) < ICMP_IPSIZE + ICMP_HDRSIZE){
+ freeblist(bp);
+ return;
+ }
+ p = (Icmp *)(bp->rp);
+ p->vihl = IP_VER4;
+ ipriv = c->p->priv;
+ if(p->type <= Maxtype)
+ ipriv->out[p->type]++;
+
+ v6tov4(p->dst, c->raddr);
+ v6tov4(p->src, c->laddr);
+ p->proto = IP_ICMPPROTO;
+ hnputs(p->icmpid, c->lport);
+ memset(p->cksum, 0, sizeof(p->cksum));
+ hnputs(p->cksum, ptclcsum(bp, ICMP_IPSIZE, blocklen(bp) - ICMP_IPSIZE));
+ ipriv->stats[OutMsgs]++;
+ ipoput4(c->p->f, bp, 0, c->ttl, c->tos, nil);
+}
+
+extern void
+icmpttlexceeded(Fs *f, uchar *ia, Block *bp)
+{
+ Block *nbp;
+ Icmp *p, *np;
+
+ p = (Icmp *)bp->rp;
+
+ netlog(f, Logicmp, "sending icmpttlexceeded -> %V\n", p->src);
+ nbp = allocb(ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8);
+ nbp->wp += ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8;
+ np = (Icmp *)nbp->rp;
+ np->vihl = IP_VER4;
+ memmove(np->dst, p->src, sizeof(np->dst));
+ v6tov4(np->src, ia);
+ memmove(np->data, bp->rp, ICMP_IPSIZE + 8);
+ np->type = TimeExceed;
+ np->code = 0;
+ np->proto = IP_ICMPPROTO;
+ hnputs(np->icmpid, 0);
+ hnputs(np->seq, 0);
+ memset(np->cksum, 0, sizeof(np->cksum));
+ hnputs(np->cksum, ptclcsum(nbp, ICMP_IPSIZE, blocklen(nbp) - ICMP_IPSIZE));
+ ipoput4(f, nbp, 0, MAXTTL, DFLTTOS, nil);
+
+}
+
+static void
+icmpunreachable(Fs *f, Block *bp, int code, int seq)
+{
+ Block *nbp;
+ Icmp *p, *np;
+ int i;
+ uchar addr[IPaddrlen];
+
+ p = (Icmp *)bp->rp;
+
+ /* only do this for unicast sources and destinations */
+ v4tov6(addr, p->dst);
+ i = ipforme(f, addr);
+ if((i&Runi) == 0)
+ return;
+ v4tov6(addr, p->src);
+ i = ipforme(f, addr);
+ if(i != 0 && (i&Runi) == 0)
+ return;
+
+ netlog(f, Logicmp, "sending icmpnoconv -> %V\n", p->src);
+ nbp = allocb(ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8);
+ nbp->wp += ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8;
+ np = (Icmp *)nbp->rp;
+ np->vihl = IP_VER4;
+ memmove(np->dst, p->src, sizeof(np->dst));
+ memmove(np->src, p->dst, sizeof(np->src));
+ memmove(np->data, bp->rp, ICMP_IPSIZE + 8);
+ np->type = Unreachable;
+ np->code = code;
+ np->proto = IP_ICMPPROTO;
+ hnputs(np->icmpid, 0);
+ hnputs(np->seq, seq);
+ memset(np->cksum, 0, sizeof(np->cksum));
+ hnputs(np->cksum, ptclcsum(nbp, ICMP_IPSIZE, blocklen(nbp) - ICMP_IPSIZE));
+ ipoput4(f, nbp, 0, MAXTTL, DFLTTOS, nil);
+}
+
+extern void
+icmpnoconv(Fs *f, Block *bp)
+{
+ icmpunreachable(f, bp, 3, 0);
+}
+
+extern void
+icmpcantfrag(Fs *f, Block *bp, int mtu)
+{
+ icmpunreachable(f, bp, 4, mtu);
+}
+
+static void
+goticmpkt(Proto *icmp, Block *bp)
+{
+ Conv **c, *s;
+ Icmp *p;
+ uchar dst[IPaddrlen];
+ ushort recid;
+
+ p = (Icmp *) bp->rp;
+ v4tov6(dst, p->src);
+ recid = nhgets(p->icmpid);
+
+ for(c = icmp->conv; *c; c++) {
+ s = *c;
+ if(s->lport == recid)
+ if(ipcmp(s->raddr, dst) == 0){
+ bp = concatblock(bp);
+ if(bp != nil)
+ qpass(s->rq, bp);
+ return;
+ }
+ }
+ freeblist(bp);
+}
+
+static Block *
+mkechoreply(Block *bp)
+{
+ Icmp *q;
+ uchar ip[4];
+
+ q = (Icmp *)bp->rp;
+ q->vihl = IP_VER4;
+ memmove(ip, q->src, sizeof(q->dst));
+ memmove(q->src, q->dst, sizeof(q->src));
+ memmove(q->dst, ip, sizeof(q->dst));
+ q->type = EchoReply;
+ memset(q->cksum, 0, sizeof(q->cksum));
+ hnputs(q->cksum, ptclcsum(bp, ICMP_IPSIZE, blocklen(bp) - ICMP_IPSIZE));
+
+ return bp;
+}
+
+static char *unreachcode[] =
+{
+[0] "net unreachable",
+[1] "host unreachable",
+[2] "protocol unreachable",
+[3] "port unreachable",
+[4] "fragmentation needed and DF set",
+[5] "source route failed",
+};
+
+static void
+icmpiput(Proto *icmp, Ipifc*, Block *bp)
+{
+ int n, iplen;
+ Icmp *p;
+ Block *r;
+ Proto *pr;
+ char *msg;
+ char m2[128];
+ Icmppriv *ipriv;
+
+ ipriv = icmp->priv;
+
+ ipriv->stats[InMsgs]++;
+
+ p = (Icmp *)bp->rp;
+ netlog(icmp->f, Logicmp, "icmpiput %d %d\n", p->type, p->code);
+ n = blocklen(bp);
+ if(n < ICMP_IPSIZE+ICMP_HDRSIZE){
+ ipriv->stats[InErrors]++;
+ ipriv->stats[HlenErrs]++;
+ netlog(icmp->f, Logicmp, "icmp hlen %d\n", n);
+ goto raise;
+ }
+ iplen = nhgets(p->length);
+ if(iplen > n || (iplen % 1)){
+ ipriv->stats[LenErrs]++;
+ ipriv->stats[InErrors]++;
+ netlog(icmp->f, Logicmp, "icmp length %d\n", iplen);
+ goto raise;
+ }
+ if(ptclcsum(bp, ICMP_IPSIZE, iplen - ICMP_IPSIZE)){
+ ipriv->stats[InErrors]++;
+ ipriv->stats[CsumErrs]++;
+ netlog(icmp->f, Logicmp, "icmp checksum error\n");
+ goto raise;
+ }
+ if(p->type <= Maxtype)
+ ipriv->in[p->type]++;
+
+ switch(p->type) {
+ case EchoRequest:
+ if (iplen < n)
+ bp = trimblock(bp, 0, iplen);
+ r = mkechoreply(bp);
+ ipriv->out[EchoReply]++;
+ ipoput4(icmp->f, r, 0, MAXTTL, DFLTTOS, nil);
+ break;
+ case Unreachable:
+ if(p->code > 5)
+ msg = unreachcode[1];
+ else
+ msg = unreachcode[p->code];
+
+ bp->rp += ICMP_IPSIZE+ICMP_HDRSIZE;
+ if(blocklen(bp) < MinAdvise){
+ ipriv->stats[LenErrs]++;
+ goto raise;
+ }
+ p = (Icmp *)bp->rp;
+ pr = Fsrcvpcolx(icmp->f, p->proto);
+ if(pr != nil && pr->advise != nil) {
+ (*pr->advise)(pr, bp, msg);
+ return;
+ }
+
+ bp->rp -= ICMP_IPSIZE+ICMP_HDRSIZE;
+ goticmpkt(icmp, bp);
+ break;
+ case TimeExceed:
+ if(p->code == 0){
+ sprint(m2, "ttl exceeded at %V", p->src);
+
+ bp->rp += ICMP_IPSIZE+ICMP_HDRSIZE;
+ if(blocklen(bp) < MinAdvise){
+ ipriv->stats[LenErrs]++;
+ goto raise;
+ }
+ p = (Icmp *)bp->rp;
+ pr = Fsrcvpcolx(icmp->f, p->proto);
+ if(pr != nil && pr->advise != nil) {
+ (*pr->advise)(pr, bp, m2);
+ return;
+ }
+ bp->rp -= ICMP_IPSIZE+ICMP_HDRSIZE;
+ }
+
+ goticmpkt(icmp, bp);
+ break;
+ default:
+ goticmpkt(icmp, bp);
+ break;
+ }
+ return;
+
+raise:
+ freeblist(bp);
+}
+
+void
+icmpadvise(Proto *icmp, Block *bp, char *msg)
+{
+ Conv **c, *s;
+ Icmp *p;
+ uchar dst[IPaddrlen];
+ ushort recid;
+
+ p = (Icmp *) bp->rp;
+ v4tov6(dst, p->dst);
+ recid = nhgets(p->icmpid);
+
+ for(c = icmp->conv; *c; c++) {
+ s = *c;
+ if(s->lport == recid)
+ if(ipcmp(s->raddr, dst) == 0){
+ qhangup(s->rq, msg);
+ qhangup(s->wq, msg);
+ break;
+ }
+ }
+ freeblist(bp);
+}
+
+int
+icmpstats(Proto *icmp, char *buf, int len)
+{
+ Icmppriv *priv;
+ char *p, *e;
+ int i;
+
+ priv = icmp->priv;
+ p = buf;
+ e = p+len;
+ for(i = 0; i < Nstats; i++)
+ p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
+ for(i = 0; i <= Maxtype; i++){
+ if(icmpnames[i])
+ p = seprint(p, e, "%s: %lud %lud\n", icmpnames[i], priv->in[i], priv->out[i]);
+ else
+ p = seprint(p, e, "%d: %lud %lud\n", i, priv->in[i], priv->out[i]);
+ }
+ return p - buf;
+}
+
+int
+icmpgc(Proto *icmp)
+{
+ return natgc(icmp->ipproto);
+}
+
+void
+icmpinit(Fs *fs)
+{
+ Proto *icmp;
+
+ icmp = smalloc(sizeof(Proto));
+ icmp->priv = smalloc(sizeof(Icmppriv));
+ icmp->name = "icmp";
+ icmp->connect = icmpconnect;
+ icmp->announce = icmpannounce;
+ icmp->state = icmpstate;
+ icmp->create = icmpcreate;
+ icmp->close = icmpclose;
+ icmp->rcv = icmpiput;
+ icmp->stats = icmpstats;
+ icmp->ctl = nil;
+ icmp->advise = icmpadvise;
+ icmp->gc = icmpgc;
+ icmp->ipproto = IP_ICMPPROTO;
+ icmp->nc = 128;
+ icmp->ptclsize = 0;
+
+ Fsproto(fs, icmp);
+}
--- /dev/null
+++ b/os/ip.original/icmp6.c
@@ -1,0 +1,917 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "ip.h"
+#include "ipv6.h"
+
+typedef struct ICMPpkt ICMPpkt;
+typedef struct IPICMP IPICMP;
+typedef struct Ndpkt Ndpkt;
+typedef struct NdiscC NdiscC;
+
+struct ICMPpkt {
+ uchar type;
+ uchar code;
+ uchar cksum[2];
+ uchar icmpid[2];
+ uchar seq[2];
+};
+
+struct IPICMP {
+ Ip6hdr;
+ ICMPpkt;
+};
+
+struct NdiscC
+{
+ IPICMP;
+ uchar target[IPaddrlen];
+};
+
+struct Ndpkt
+{
+ NdiscC;
+ uchar otype;
+ uchar olen; // length in units of 8 octets(incl type, code),
+ // 1 for IEEE 802 addresses
+ uchar lnaddr[6]; // link-layer address
+};
+
+enum {
+ // ICMPv6 types
+ EchoReply = 0,
+ UnreachableV6 = 1,
+ PacketTooBigV6 = 2,
+ TimeExceedV6 = 3,
+ SrcQuench = 4,
+ ParamProblemV6 = 4,
+ Redirect = 5,
+ EchoRequest = 8,
+ TimeExceed = 11,
+ InParmProblem = 12,
+ Timestamp = 13,
+ TimestampReply = 14,
+ InfoRequest = 15,
+ InfoReply = 16,
+ AddrMaskRequest = 17,
+ AddrMaskReply = 18,
+ EchoRequestV6 = 128,
+ EchoReplyV6 = 129,
+ RouterSolicit = 133,
+ RouterAdvert = 134,
+ NbrSolicit = 135,
+ NbrAdvert = 136,
+ RedirectV6 = 137,
+
+ Maxtype6 = 137,
+};
+
+char *icmpnames6[Maxtype6+1] =
+{
+[EchoReply] "EchoReply",
+[UnreachableV6] "UnreachableV6",
+[PacketTooBigV6] "PacketTooBigV6",
+[TimeExceedV6] "TimeExceedV6",
+[SrcQuench] "SrcQuench",
+[Redirect] "Redirect",
+[EchoRequest] "EchoRequest",
+[TimeExceed] "TimeExceed",
+[InParmProblem] "InParmProblem",
+[Timestamp] "Timestamp",
+[TimestampReply] "TimestampReply",
+[InfoRequest] "InfoRequest",
+[InfoReply] "InfoReply",
+[AddrMaskRequest] "AddrMaskRequest",
+[AddrMaskReply] "AddrMaskReply",
+[EchoRequestV6] "EchoRequestV6",
+[EchoReplyV6] "EchoReplyV6",
+[RouterSolicit] "RouterSolicit",
+[RouterAdvert] "RouterAdvert",
+[NbrSolicit] "NbrSolicit",
+[NbrAdvert] "NbrAdvert",
+[RedirectV6] "RedirectV6",
+};
+
+enum
+{
+ InMsgs6,
+ InErrors6,
+ OutMsgs6,
+ CsumErrs6,
+ LenErrs6,
+ HlenErrs6,
+ HoplimErrs6,
+ IcmpCodeErrs6,
+ TargetErrs6,
+ OptlenErrs6,
+ AddrmxpErrs6,
+ RouterAddrErrs6,
+
+ Nstats6,
+};
+
+static char *statnames6[Nstats6] =
+{
+[InMsgs6] "InMsgs",
+[InErrors6] "InErrors",
+[OutMsgs6] "OutMsgs",
+[CsumErrs6] "CsumErrs",
+[LenErrs6] "LenErrs",
+[HlenErrs6] "HlenErrs",
+[HoplimErrs6] "HoplimErrs",
+[IcmpCodeErrs6] "IcmpCodeErrs",
+[TargetErrs6] "TargetErrs",
+[OptlenErrs6] "OptlenErrs",
+[AddrmxpErrs6] "AddrmxpErrs",
+[RouterAddrErrs6] "RouterAddrErrs",
+};
+
+typedef struct Icmppriv6
+{
+ ulong stats[Nstats6];
+
+ /* message counts */
+ ulong in[Maxtype6+1];
+ ulong out[Maxtype6+1];
+} Icmppriv6;
+
+typedef struct Icmpcb6
+{
+ QLock;
+ uchar headers;
+} Icmpcb6;
+
+static char *unreachcode[] =
+{
+[icmp6_no_route] "no route to destination",
+[icmp6_ad_prohib] "comm with destination administratively prohibited",
+[icmp6_unassigned] "icmp unreachable: unassigned error code (2)",
+[icmp6_adr_unreach] "address unreachable",
+[icmp6_port_unreach] "port unreachable",
+[icmp6_unkn_code] "icmp unreachable: unknown code",
+};
+
+enum {
+ ICMP_USEAD6 = 40,
+};
+
+enum {
+ Oflag = 1<<5,
+ Sflag = 1<<6,
+ Rflag = 1<<7,
+};
+
+enum {
+ slladd = 1,
+ tlladd = 2,
+ prfinfo = 3,
+ redhdr = 4,
+ mtuopt = 5,
+};
+
+static void icmpkick6(void *x, Block *bp);
+
+static void
+icmpcreate6(Conv *c)
+{
+ c->rq = qopen(64*1024, Qmsg, 0, c);
+ c->wq = qbypass(icmpkick6, c);
+}
+
+static void
+set_cksum(Block *bp)
+{
+ IPICMP *p = (IPICMP *)(bp->rp);
+
+ hnputl(p->vcf, 0); // borrow IP header as pseudoheader
+ hnputs(p->ploadlen, blocklen(bp)-IPV6HDR_LEN);
+ p->proto = 0;
+ p->ttl = ICMPv6; // ttl gets set later
+ hnputs(p->cksum, 0);
+ hnputs(p->cksum, ptclcsum(bp, 0, blocklen(bp)));
+ p->proto = ICMPv6;
+}
+
+static Block *
+newIPICMP(int packetlen)
+{
+ Block *nbp;
+ nbp = allocb(packetlen);
+ nbp->wp += packetlen;
+ memset(nbp->rp, 0, packetlen);
+ return nbp;
+}
+
+void
+icmpadvise6(Proto *icmp, Block *bp, char *msg)
+{
+ Conv **c, *s;
+ IPICMP *p;
+ ushort recid;
+
+ p = (IPICMP *) bp->rp;
+ recid = nhgets(p->icmpid);
+
+ for(c = icmp->conv; *c; c++) {
+ s = *c;
+ if(s->lport == recid)
+ if(ipcmp(s->raddr, p->dst) == 0){
+ qhangup(s->rq, msg);
+ qhangup(s->wq, msg);
+ break;
+ }
+ }
+ freeblist(bp);
+}
+
+static void
+icmpkick6(void *x, Block *bp)
+{
+ Conv *c = x;
+ IPICMP *p;
+ uchar laddr[IPaddrlen], raddr[IPaddrlen];
+ Icmppriv6 *ipriv = c->p->priv;
+ Icmpcb6 *icb = (Icmpcb6*)c->ptcl;
+
+ if(bp == nil)
+ return;
+
+ if(icb->headers==6) {
+ /* get user specified addresses */
+ bp = pullupblock(bp, ICMP_USEAD6);
+ if(bp == nil)
+ return;
+ bp->rp += 8;
+ ipmove(laddr, bp->rp);
+ bp->rp += IPaddrlen;
+ ipmove(raddr, bp->rp);
+ bp->rp += IPaddrlen;
+ bp = padblock(bp, sizeof(Ip6hdr));
+ }
+
+ if(blocklen(bp) < sizeof(IPICMP)){
+ freeblist(bp);
+ return;
+ }
+ p = (IPICMP *)(bp->rp);
+ if(icb->headers == 6) {
+ ipmove(p->dst, raddr);
+ ipmove(p->src, laddr);
+ } else {
+ ipmove(p->dst, c->raddr);
+ ipmove(p->src, c->laddr);
+ hnputs(p->icmpid, c->lport);
+ }
+
+ set_cksum(bp);
+ p->vcf[0] = 0x06 << 4;
+ if(p->type <= Maxtype6)
+ ipriv->out[p->type]++;
+ ipoput6(c->p->f, bp, 0, c->ttl, c->tos, nil);
+}
+
+char*
+icmpctl6(Conv *c, char **argv, int argc)
+{
+ Icmpcb6 *icb;
+
+ icb = (Icmpcb6*) c->ptcl;
+
+ if(argc==1) {
+ if(strcmp(argv[0], "headers")==0) {
+ icb->headers = 6;
+ return nil;
+ }
+ }
+ return "unknown control request";
+}
+
+static void
+goticmpkt6(Proto *icmp, Block *bp, int muxkey)
+{
+ Conv **c, *s;
+ IPICMP *p = (IPICMP *)bp->rp;
+ ushort recid;
+ uchar *addr;
+
+ if(muxkey == 0) {
+ recid = nhgets(p->icmpid);
+ addr = p->src;
+ }
+ else {
+ recid = muxkey;
+ addr = p->dst;
+ }
+
+ for(c = icmp->conv; *c; c++){
+ s = *c;
+ if(s->lport == recid && ipcmp(s->raddr, addr) == 0){
+ bp = concatblock(bp);
+ if(bp != nil)
+ qpass(s->rq, bp);
+ return;
+ }
+ }
+
+ freeblist(bp);
+}
+
+static Block *
+mkechoreply6(Block *bp)
+{
+ IPICMP *p = (IPICMP *)(bp->rp);
+ uchar addr[IPaddrlen];
+
+ ipmove(addr, p->src);
+ ipmove(p->src, p->dst);
+ ipmove(p->dst, addr);
+ p->type = EchoReplyV6;
+ set_cksum(bp);
+ return bp;
+}
+
+/*
+ * sends out an ICMPv6 neighbor solicitation
+ * suni == SRC_UNSPEC or SRC_UNI,
+ * tuni == TARG_MULTI => multicast for address resolution,
+ * and tuni == TARG_UNI => neighbor reachability.
+ */
+
+extern void
+icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac)
+{
+ Block *nbp;
+ Ndpkt *np;
+ Proto *icmp = f->t2p[ICMPv6];
+ Icmppriv6 *ipriv = icmp->priv;
+
+
+ nbp = newIPICMP(sizeof(Ndpkt));
+ np = (Ndpkt*) nbp->rp;
+
+
+ if(suni == SRC_UNSPEC)
+ memmove(np->src, v6Unspecified, IPaddrlen);
+ else
+ memmove(np->src, src, IPaddrlen);
+
+ if(tuni == TARG_UNI)
+ memmove(np->dst, targ, IPaddrlen);
+ else
+ ipv62smcast(np->dst, targ);
+
+ np->type = NbrSolicit;
+ np->code = 0;
+ memmove(np->target, targ, IPaddrlen);
+ if(suni != SRC_UNSPEC) {
+ np->otype = SRC_LLADDRESS;
+ np->olen = 1; /* 1+1+6 = 8 = 1 8-octet */
+ memmove(np->lnaddr, mac, sizeof(np->lnaddr));
+ }
+ else {
+ int r = sizeof(Ndpkt)-sizeof(NdiscC);
+ nbp->wp -= r;
+ }
+
+ set_cksum(nbp);
+ np = (Ndpkt*) nbp->rp;
+ np->ttl = HOP_LIMIT;
+ np->vcf[0] = 0x06 << 4;
+ ipriv->out[NbrSolicit]++;
+ netlog(f, Logicmp, "sending neighbor solicitation %I\n", targ);
+ ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
+}
+
+/*
+ * sends out an ICMPv6 neighbor advertisement. pktflags == RSO flags.
+ */
+extern void
+icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags)
+{
+ Block *nbp;
+ Ndpkt *np;
+ Proto *icmp = f->t2p[ICMPv6];
+ Icmppriv6 *ipriv = icmp->priv;
+
+ nbp = newIPICMP(sizeof(Ndpkt));
+ np = (Ndpkt*) nbp->rp;
+
+ memmove(np->src, src, IPaddrlen);
+ memmove(np->dst, dst, IPaddrlen);
+
+ np->type = NbrAdvert;
+ np->code = 0;
+ np->icmpid[0] = flags;
+ memmove(np->target, targ, IPaddrlen);
+
+ np->otype = TARGET_LLADDRESS;
+ np->olen = 1;
+ memmove(np->lnaddr, mac, sizeof(np->lnaddr));
+
+ set_cksum(nbp);
+ np = (Ndpkt*) nbp->rp;
+ np->ttl = HOP_LIMIT;
+ np->vcf[0] = 0x06 << 4;
+ ipriv->out[NbrAdvert]++;
+ netlog(f, Logicmp, "sending neighbor advertisement %I\n", src);
+ ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
+}
+
+extern void
+icmphostunr(Fs *f, Ipifc *ifc, Block *bp, int code, int free)
+{
+ Block *nbp;
+ IPICMP *np;
+ Ip6hdr *p;
+ int osz = BLEN(bp);
+ int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
+ Proto *icmp = f->t2p[ICMPv6];
+ Icmppriv6 *ipriv = icmp->priv;
+
+ p = (Ip6hdr *) bp->rp;
+
+ if(isv6mcast(p->src))
+ goto clean;
+
+ nbp = newIPICMP(sz);
+ np = (IPICMP *) nbp->rp;
+
+ rlock(ifc);
+ if(ipv6anylocal(ifc, np->src)) {
+ netlog(f, Logicmp, "send icmphostunr -> s%I d%I\n", p->src, p->dst);
+ }
+ else {
+ netlog(f, Logicmp, "icmphostunr fail -> s%I d%I\n", p->src, p->dst);
+ freeblist(nbp);
+ if(free)
+ goto clean;
+ else
+ return;
+ }
+
+ memmove(np->dst, p->src, IPaddrlen);
+ np->type = UnreachableV6;
+ np->code = code;
+ memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+ set_cksum(nbp);
+ np->ttl = HOP_LIMIT;
+ np->vcf[0] = 0x06 << 4;
+ ipriv->out[UnreachableV6]++;
+
+ if(free)
+ ipiput6(f, ifc, nbp);
+ else {
+ ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
+ return;
+ }
+
+clean:
+ runlock(ifc);
+ freeblist(bp);
+}
+
+extern void
+icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp)
+{
+ Block *nbp;
+ IPICMP *np;
+ Ip6hdr *p;
+ int osz = BLEN(bp);
+ int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
+ Proto *icmp = f->t2p[ICMPv6];
+ Icmppriv6 *ipriv = icmp->priv;
+
+ p = (Ip6hdr *) bp->rp;
+
+ if(isv6mcast(p->src))
+ return;
+
+ nbp = newIPICMP(sz);
+ np = (IPICMP *) nbp->rp;
+
+ if(ipv6anylocal(ifc, np->src)) {
+ netlog(f, Logicmp, "send icmpttlexceeded6 -> s%I d%I\n", p->src, p->dst);
+ }
+ else {
+ netlog(f, Logicmp, "icmpttlexceeded6 fail -> s%I d%I\n", p->src, p->dst);
+ return;
+ }
+
+ memmove(np->dst, p->src, IPaddrlen);
+ np->type = TimeExceedV6;
+ np->code = 0;
+ memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+ set_cksum(nbp);
+ np->ttl = HOP_LIMIT;
+ np->vcf[0] = 0x06 << 4;
+ ipriv->out[TimeExceedV6]++;
+ ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
+}
+
+extern void
+icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp)
+{
+ Block *nbp;
+ IPICMP *np;
+ Ip6hdr *p;
+ int osz = BLEN(bp);
+ int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
+ Proto *icmp = f->t2p[ICMPv6];
+ Icmppriv6 *ipriv = icmp->priv;
+
+ p = (Ip6hdr *) bp->rp;
+
+ if(isv6mcast(p->src))
+ return;
+
+ nbp = newIPICMP(sz);
+ np = (IPICMP *) nbp->rp;
+
+ if(ipv6anylocal(ifc, np->src)) {
+ netlog(f, Logicmp, "send icmppkttoobig6 -> s%I d%I\n", p->src, p->dst);
+ }
+ else {
+ netlog(f, Logicmp, "icmppkttoobig6 fail -> s%I d%I\n", p->src, p->dst);
+ return;
+ }
+
+ memmove(np->dst, p->src, IPaddrlen);
+ np->type = PacketTooBigV6;
+ np->code = 0;
+ hnputl(np->icmpid, ifc->maxtu - ifc->m->hsize);
+ memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+ set_cksum(nbp);
+ np->ttl = HOP_LIMIT;
+ np->vcf[0] = 0x06 << 4;
+ ipriv->out[PacketTooBigV6]++;
+ ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
+}
+
+/*
+ * RFC 2461, pages 39-40, pages 57-58.
+ */
+static int
+valid(Proto *icmp, Ipifc *ifc, Block *bp, Icmppriv6 *ipriv) {
+ int sz, osz, unsp, n, ttl, iplen;
+ int pktsz = BLEN(bp);
+ uchar *packet = bp->rp;
+ IPICMP *p = (IPICMP *) packet;
+ Ndpkt *np;
+
+ USED(ifc);
+ n = blocklen(bp);
+ if(n < sizeof(IPICMP)) {
+ ipriv->stats[HlenErrs6]++;
+ netlog(icmp->f, Logicmp, "icmp hlen %d\n", n);
+ goto err;
+ }
+
+ iplen = nhgets(p->ploadlen);
+ if(iplen > n-IPV6HDR_LEN || (iplen % 1)) {
+ ipriv->stats[LenErrs6]++;
+ netlog(icmp->f, Logicmp, "icmp length %d\n", iplen);
+ goto err;
+ }
+
+ // Rather than construct explicit pseudoheader, overwrite IPv6 header
+ if(p->proto != ICMPv6) {
+ // This code assumes no extension headers!!!
+ netlog(icmp->f, Logicmp, "icmp error: extension header\n");
+ goto err;
+ }
+ memset(packet, 0, 4);
+ ttl = p->ttl;
+ p->ttl = p->proto;
+ p->proto = 0;
+ if(ptclcsum(bp, 0, iplen + IPV6HDR_LEN)) {
+ ipriv->stats[CsumErrs6]++;
+ netlog(icmp->f, Logicmp, "icmp checksum error\n");
+ goto err;
+ }
+ p->proto = p->ttl;
+ p->ttl = ttl;
+
+ /* additional tests for some pkt types */
+ if( (p->type == NbrSolicit) ||
+ (p->type == NbrAdvert) ||
+ (p->type == RouterAdvert) ||
+ (p->type == RouterSolicit) ||
+ (p->type == RedirectV6) ) {
+
+ if(p->ttl != HOP_LIMIT) {
+ ipriv->stats[HoplimErrs6]++;
+ goto err;
+ }
+ if(p->code != 0) {
+ ipriv->stats[IcmpCodeErrs6]++;
+ goto err;
+ }
+
+ switch (p->type) {
+ case NbrSolicit:
+ case NbrAdvert:
+ np = (Ndpkt*) p;
+ if(isv6mcast(np->target)) {
+ ipriv->stats[TargetErrs6]++;
+ goto err;
+ }
+ if(optexsts(np) && (np->olen == 0)) {
+ ipriv->stats[OptlenErrs6]++;
+ goto err;
+ }
+
+ if(p->type == NbrSolicit) {
+ if(ipcmp(np->src, v6Unspecified) == 0) {
+ if(!issmcast(np->dst) || optexsts(np)) {
+ ipriv->stats[AddrmxpErrs6]++;
+ goto err;
+ }
+ }
+ }
+
+ if(p->type == NbrAdvert) {
+ if((isv6mcast(np->dst))&&(nhgets(np->icmpid) & Sflag)){
+ ipriv->stats[AddrmxpErrs6]++;
+ goto err;
+ }
+ }
+ break;
+
+ case RouterAdvert:
+ if(pktsz - sizeof(Ip6hdr) < 16) {
+ ipriv->stats[HlenErrs6]++;
+ goto err;
+ }
+ if(!islinklocal(p->src)) {
+ ipriv->stats[RouterAddrErrs6]++;
+ goto err;
+ }
+ sz = sizeof(IPICMP) + 8;
+ while ((sz+1) < pktsz) {
+ osz = *(packet+sz+1);
+ if(osz <= 0) {
+ ipriv->stats[OptlenErrs6]++;
+ goto err;
+ }
+ sz += 8*osz;
+ }
+ break;
+
+ case RouterSolicit:
+ if(pktsz - sizeof(Ip6hdr) < 8) {
+ ipriv->stats[HlenErrs6]++;
+ goto err;
+ }
+ unsp = (ipcmp(p->src, v6Unspecified) == 0);
+ sz = sizeof(IPICMP) + 8;
+ while ((sz+1) < pktsz) {
+ osz = *(packet+sz+1);
+ if((osz <= 0) ||
+ (unsp && (*(packet+sz) == slladd)) ) {
+ ipriv->stats[OptlenErrs6]++;
+ goto err;
+ }
+ sz += 8*osz;
+ }
+ break;
+
+ case RedirectV6:
+ //to be filled in
+ break;
+
+ default:
+ goto err;
+ }
+ }
+
+ return 1;
+
+err:
+ ipriv->stats[InErrors6]++;
+ return 0;
+}
+
+static int
+targettype(Fs *f, Ipifc *ifc, uchar *target)
+{
+ Iplifc *lifc;
+ int t;
+
+ rlock(ifc);
+ if(ipproxyifc(f, ifc, target)) {
+ runlock(ifc);
+ return t_uniproxy;
+ }
+
+ for(lifc = ifc->lifc; lifc; lifc = lifc->next) {
+ if(ipcmp(lifc->local, target) == 0) {
+ t = (lifc->tentative) ? t_unitent : t_unirany;
+ runlock(ifc);
+ return t;
+ }
+ }
+
+ runlock(ifc);
+ return 0;
+}
+
+static void
+icmpiput6(Proto *icmp, Ipifc *ipifc, Block *bp)
+{
+ uchar *packet = bp->rp;
+ IPICMP *p = (IPICMP *)packet;
+ Icmppriv6 *ipriv = icmp->priv;
+ Block *r;
+ Proto *pr;
+ char *msg, m2[128];
+ Ndpkt* np;
+ uchar pktflags;
+ uchar lsrc[IPaddrlen];
+ int refresh = 1;
+ Iplifc *lifc;
+
+ if(!valid(icmp, ipifc, bp, ipriv))
+ goto raise;
+
+ if(p->type <= Maxtype6)
+ ipriv->in[p->type]++;
+ else
+ goto raise;
+
+ switch(p->type) {
+ case EchoRequestV6:
+ r = mkechoreply6(bp);
+ ipriv->out[EchoReply]++;
+ ipoput6(icmp->f, r, 0, MAXTTL, DFLTTOS, nil);
+ break;
+
+ case UnreachableV6:
+ if(p->code > 4)
+ msg = unreachcode[icmp6_unkn_code];
+ else
+ msg = unreachcode[p->code];
+
+ bp->rp += sizeof(IPICMP);
+ if(blocklen(bp) < 8){
+ ipriv->stats[LenErrs6]++;
+ goto raise;
+ }
+ p = (IPICMP *)bp->rp;
+ pr = Fsrcvpcolx(icmp->f, p->proto);
+ if(pr != nil && pr->advise != nil) {
+ (*pr->advise)(pr, bp, msg);
+ return;
+ }
+
+ bp->rp -= sizeof(IPICMP);
+ goticmpkt6(icmp, bp, 0);
+ break;
+
+ case TimeExceedV6:
+ if(p->code == 0){
+ sprint(m2, "ttl exceeded at %I", p->src);
+
+ bp->rp += sizeof(IPICMP);
+ if(blocklen(bp) < 8){
+ ipriv->stats[LenErrs6]++;
+ goto raise;
+ }
+ p = (IPICMP *)bp->rp;
+ pr = Fsrcvpcolx(icmp->f, p->proto);
+ if(pr != nil && pr->advise != nil) {
+ (*pr->advise)(pr, bp, m2);
+ return;
+ }
+ bp->rp -= sizeof(IPICMP);
+ }
+
+ goticmpkt6(icmp, bp, 0);
+ break;
+
+ case RouterAdvert:
+ case RouterSolicit:
+ /* using lsrc as a temp, munge hdr for goticmp6
+ memmove(lsrc, p->src, IPaddrlen);
+ memmove(p->src, p->dst, IPaddrlen);
+ memmove(p->dst, lsrc, IPaddrlen); */
+
+ goticmpkt6(icmp, bp, p->type);
+ break;
+
+ case NbrSolicit:
+ np = (Ndpkt*) p;
+ pktflags = 0;
+ switch (targettype(icmp->f, ipifc, np->target)) {
+ case t_unirany:
+ pktflags |= Oflag;
+ /* fall through */
+
+ case t_uniproxy:
+ if(ipcmp(np->src, v6Unspecified) != 0) {
+ arpenter(icmp->f, V6, np->src, np->lnaddr, 8*np->olen-2, 0);
+ pktflags |= Sflag;
+ }
+ if(ipv6local(ipifc, lsrc)) {
+ icmpna(icmp->f, lsrc,
+ (ipcmp(np->src, v6Unspecified)==0)?v6allnodesL:np->src,
+ np->target, ipifc->mac, pktflags);
+ }
+ else
+ freeblist(bp);
+ break;
+
+ case t_unitent:
+ /* not clear what needs to be done. send up
+ * an icmp mesg saying don't use this address? */
+
+ default:
+ freeblist(bp);
+ }
+
+ break;
+
+ case NbrAdvert:
+ np = (Ndpkt*) p;
+
+ /* if the target address matches one of the local interface
+ * address and the local interface address has tentative bit set,
+ * then insert into ARP table. this is so the duplication address
+ * detection part of ipconfig can discover duplication through
+ * the arp table
+ */
+ lifc = iplocalonifc(ipifc, np->target);
+ if(lifc && lifc->tentative)
+ refresh = 0;
+ arpenter(icmp->f, V6, np->target, np->lnaddr, 8*np->olen-2, refresh);
+ freeblist(bp);
+ break;
+
+ case PacketTooBigV6:
+
+ default:
+ goticmpkt6(icmp, bp, 0);
+ break;
+ }
+ return;
+
+raise:
+ freeblist(bp);
+
+}
+
+int
+icmpstats6(Proto *icmp6, char *buf, int len)
+{
+ Icmppriv6 *priv;
+ char *p, *e;
+ int i;
+
+ priv = icmp6->priv;
+ p = buf;
+ e = p+len;
+ for(i = 0; i < Nstats6; i++)
+ p = seprint(p, e, "%s: %lud\n", statnames6[i], priv->stats[i]);
+ for(i = 0; i <= Maxtype6; i++){
+ if(icmpnames6[i])
+ p = seprint(p, e, "%s: %lud %lud\n", icmpnames6[i], priv->in[i], priv->out[i]);
+/* else
+ p = seprint(p, e, "%d: %lud %lud\n", i, priv->in[i], priv->out[i]);
+*/
+ }
+ return p - buf;
+}
+
+
+// need to import from icmp.c
+extern int icmpstate(Conv *c, char *state, int n);
+extern char* icmpannounce(Conv *c, char **argv, int argc);
+extern char* icmpconnect(Conv *c, char **argv, int argc);
+extern void icmpclose(Conv *c);
+
+void
+icmp6init(Fs *fs)
+{
+ Proto *icmp6 = smalloc(sizeof(Proto));
+
+ icmp6->priv = smalloc(sizeof(Icmppriv6));
+ icmp6->name = "icmpv6";
+ icmp6->connect = icmpconnect;
+ icmp6->announce = icmpannounce;
+ icmp6->state = icmpstate;
+ icmp6->create = icmpcreate6;
+ icmp6->close = icmpclose;
+ icmp6->rcv = icmpiput6;
+ icmp6->stats = icmpstats6;
+ icmp6->ctl = icmpctl6;
+ icmp6->advise = icmpadvise6;
+ icmp6->gc = nil;
+ icmp6->ipproto = ICMPv6;
+ icmp6->nc = 16;
+ icmp6->ptclsize = sizeof(Icmpcb6);
+
+ Fsproto(fs, icmp6);
+}
+
--- /dev/null
+++ b/os/ip.original/igmp.c
@@ -1,0 +1,291 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include "ip.h"
+
+enum
+{
+ IGMP_IPHDRSIZE = 20, /* size of ip header */
+ IGMP_HDRSIZE = 8, /* size of IGMP header */
+ IP_IGMPPROTO = 2,
+
+ IGMPquery = 1,
+ IGMPreport = 2,
+
+ MSPTICK = 100,
+ MAXTIMEOUT = 10000/MSPTICK, /* at most 10 secs for a response */
+};
+
+typedef struct IGMPpkt IGMPpkt;
+struct IGMPpkt
+{
+ /* ip header */
+ uchar vihl; /* Version and header length */
+ uchar tos; /* Type of service */
+ uchar len[2]; /* packet length (including headers) */
+ uchar id[2]; /* Identification */
+ uchar frag[2]; /* Fragment information */
+ uchar Unused;
+ uchar proto; /* Protocol */
+ uchar cksum[2]; /* checksum of ip portion */
+ uchar src[IPaddrlen]; /* Ip source */
+ uchar dst[IPaddrlen]; /* Ip destination */
+
+ /* igmp header */
+ uchar vertype; /* version and type */
+ uchar unused;
+ uchar igmpcksum[2]; /* checksum of igmp portion */
+ uchar group[IPaddrlen]; /* multicast group */
+};
+
+/*
+ * lists for group reports
+ */
+typedef struct IGMPrep IGMPrep;
+struct IGMPrep
+{
+ IGMPrep *next;
+ Media *m;
+ int ticks;
+ Multicast *multi;
+};
+
+typedef struct IGMP IGMP;
+struct IGMP
+{
+ Lock;
+ Rendez r;
+ IGMPrep *reports;
+};
+
+IGMP igmpalloc;
+
+ Proto igmp;
+extern Fs fs;
+
+static struct Stats
+{
+ ulong inqueries;
+ ulong outqueries;
+ ulong inreports;
+ ulong outreports;
+} stats;
+
+void
+igmpsendreport(Media *m, uchar *addr)
+{
+ IGMPpkt *p;
+ Block *bp;
+
+ bp = allocb(sizeof(IGMPpkt));
+ if(bp == nil)
+ return;
+ p = (IGMPpkt*)bp->wp;
+ p->vihl = IP_VER4;
+ bp->wp += sizeof(IGMPpkt);
+ memset(bp->rp, 0, sizeof(IGMPpkt));
+ hnputl(p->src, Mediagetaddr(m));
+ hnputl(p->dst, Ipallsys);
+ p->vertype = (1<<4) | IGMPreport;
+ p->proto = IP_IGMPPROTO;
+ memmove(p->group, addr, IPaddrlen);
+ hnputs(p->igmpcksum, ptclcsum(bp, IGMP_IPHDRSIZE, IGMP_HDRSIZE));
+ netlog(Logigmp, "igmpreport %I\n", p->group);
+ stats.outreports++;
+ ipoput4(bp, 0, 1, DFLTTOS, nil); /* TTL of 1 */
+}
+
+static int
+isreport(void *a)
+{
+ USED(a);
+ return igmpalloc.reports != 0;
+}
+
+
+void
+igmpproc(void *a)
+{
+ IGMPrep *rp, **lrp;
+ Multicast *mp, **lmp;
+ uchar ip[IPaddrlen];
+
+ USED(a);
+
+ for(;;){
+ sleep(&igmpalloc.r, isreport, 0);
+ for(;;){
+ lock(&igmpalloc);
+
+ if(igmpalloc.reports == nil)
+ break;
+
+ /* look for a single report */
+ lrp = &igmpalloc.reports;
+ mp = nil;
+ for(rp = *lrp; rp; rp = *lrp){
+ rp->ticks++;
+ lmp = &rp->multi;
+ for(mp = *lmp; mp; mp = *lmp){
+ if(rp->ticks >= mp->timeout){
+ *lmp = mp->next;
+ break;
+ }
+ lmp = &mp->next;
+ }
+ if(mp != nil)
+ break;
+
+ if(rp->multi != nil){
+ lrp = &rp->next;
+ continue;
+ } else {
+ *lrp = rp->next;
+ free(rp);
+ }
+ }
+ unlock(&igmpalloc);
+
+ if(mp){
+ /* do a single report and try again */
+ hnputl(ip, mp->addr);
+ igmpsendreport(rp->m, ip);
+ free(mp);
+ continue;
+ }
+
+ tsleep(&up->sleep, return0, 0, MSPTICK);
+ }
+ unlock(&igmpalloc);
+ }
+
+}
+
+void
+igmpiput(Media *m, Ipifc *, Block *bp)
+{
+ int n;
+ IGMPpkt *ghp;
+ Ipaddr group;
+ IGMPrep *rp, **lrp;
+ Multicast *mp, **lmp;
+
+ ghp = (IGMPpkt*)(bp->rp);
+ netlog(Logigmp, "igmpiput: %d %I\n", ghp->vertype, ghp->group);
+
+ n = blocklen(bp);
+ if(n < IGMP_IPHDRSIZE+IGMP_HDRSIZE){
+ netlog(Logigmp, "igmpiput: bad len\n");
+ goto error;
+ }
+ if((ghp->vertype>>4) != 1){
+ netlog(Logigmp, "igmpiput: bad igmp type\n");
+ goto error;
+ }
+ if(ptclcsum(bp, IGMP_IPHDRSIZE, IGMP_HDRSIZE)){
+ netlog(Logigmp, "igmpiput: checksum error %I\n", ghp->src);
+ goto error;
+ }
+
+ group = nhgetl(ghp->group);
+
+ lock(&igmpalloc);
+ switch(ghp->vertype & 0xf){
+ case IGMPquery:
+ /*
+ * start reporting groups that we're a member of.
+ */
+ stats.inqueries++;
+ for(rp = igmpalloc.reports; rp; rp = rp->next)
+ if(rp->m == m)
+ break;
+ if(rp != nil)
+ break; /* already reporting */
+
+ mp = Mediacopymulti(m);
+ if(mp == nil)
+ break;
+
+ rp = malloc(sizeof(*rp));
+ if(rp == nil)
+ break;
+
+ rp->m = m;
+ rp->multi = mp;
+ rp->ticks = 0;
+ for(; mp; mp = mp->next)
+ mp->timeout = nrand(MAXTIMEOUT);
+ rp->next = igmpalloc.reports;
+ igmpalloc.reports = rp;
+
+ wakeup(&igmpalloc.r);
+
+ break;
+ case IGMPreport:
+ /*
+ * find report list for this medium
+ */
+ stats.inreports++;
+ lrp = &igmpalloc.reports;
+ for(rp = *lrp; rp; rp = *lrp){
+ if(rp->m == m)
+ break;
+ lrp = &rp->next;
+ }
+ if(rp == nil)
+ break;
+
+ /*
+ * if someone else has reported a group,
+ * we don't have to.
+ */
+ lmp = &rp->multi;
+ for(mp = *lmp; mp; mp = *lmp){
+ if(mp->addr == group){
+ *lmp = mp->next;
+ free(mp);
+ break;
+ }
+ lmp = &mp->next;
+ }
+
+ break;
+ }
+ unlock(&igmpalloc);
+
+error:
+ freeb(bp);
+}
+
+int
+igmpstats(char *buf, int len)
+{
+ return snprint(buf, len, "\trcvd %d %d\n\tsent %d %d\n",
+ stats.inqueries, stats.inreports,
+ stats.outqueries, stats.outreports);
+}
+
+void
+igmpinit(Fs *fs)
+{
+ igmp.name = "igmp";
+ igmp.connect = nil;
+ igmp.announce = nil;
+ igmp.ctl = nil;
+ igmp.state = nil;
+ igmp.close = nil;
+ igmp.rcv = igmpiput;
+ igmp.stats = igmpstats;
+ igmp.ipproto = IP_IGMPPROTO;
+ igmp.nc = 0;
+ igmp.ptclsize = 0;
+
+ igmpreportfn = igmpsendreport;
+ kproc("igmpproc", igmpproc, 0, 0);
+
+ Fsproto(fs, &igmp);
+}
--- /dev/null
+++ b/os/ip.original/ihbootp.c
@@ -1,0 +1,323 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "kernel.h"
+#include "ip.h"
+
+static ulong fsip;
+static ulong auip;
+static ulong gwip;
+static ulong ipmask;
+static ulong ipaddr;
+static ulong dnsip;
+
+enum
+{
+ Bootrequest = 1,
+ Bootreply = 2,
+};
+
+typedef struct Bootp
+{
+ /* udp.c oldheader */
+ uchar raddr[IPaddrlen];
+ uchar laddr[IPaddrlen];
+ uchar rport[2];
+ uchar lport[2];
+ /* bootp itself */
+ uchar op; /* opcode */
+ uchar htype; /* hardware type */
+ uchar hlen; /* hardware address len */
+ uchar hops; /* hops */
+ uchar xid[4]; /* a random number */
+ uchar secs[2]; /* elapsed snce client started booting */
+ uchar pad[2];
+ uchar ciaddr[4]; /* client IP address (client tells server) */
+ uchar yiaddr[4]; /* client IP address (server tells client) */
+ uchar siaddr[4]; /* server IP address */
+ uchar giaddr[4]; /* gateway IP address */
+ uchar chaddr[16]; /* client hardware address */
+ uchar sname[64]; /* server host name (optional) */
+ uchar file[128]; /* boot file name */
+ uchar vend[128]; /* vendor-specific goo */
+} Bootp;
+
+/*
+ * bootp returns:
+ *
+ * "fsip d.d.d.d
+ * auip d.d.d.d
+ * gwip d.d.d.d
+ * ipmask d.d.d.d
+ * ipaddr d.d.d.d
+ * dnsip d.d.d.d"
+ *
+ * where d.d.d.d is the IP address in dotted decimal notation, and each
+ * address is followed by a newline.
+ */
+
+static Bootp req;
+static Proc* rcvprocp;
+static int recv;
+static int done;
+static Rendez bootpr;
+static char rcvbuf[512];
+static int bootpdebug;
+
+/*
+ * Parse the vendor specific fields according to RFC 1084.
+ * We are overloading the "cookie server" to be the Inferno
+ * authentication server and the "resource location server"
+ * to be the Inferno file server.
+ *
+ * If the vendor specific field is formatted properly, it
+ * will begin with the four bytes 99.130.83.99 and end with
+ * an 0xFF byte.
+ */
+static void
+parsevend(uchar* vend)
+{
+ /* The field must start with 99.130.83.99 to be compliant */
+ if ((vend[0] != 99) || (vend[1] != 130) ||
+ (vend[2] != 83) || (vend[3] != 99)){
+ if(bootpdebug)
+ print("bad bootp vendor field: %.2x%.2x%.2x%.2x", vend[0], vend[1], vend[2], vend[3]);
+ return;
+ }
+
+ /* Skip over the magic cookie */
+ vend += 4;
+
+ while ((vend[0] != 0) && (vend[0] != 0xFF)) {
+ if(bootpdebug){
+ int i;
+ print("vend %d [%d]", vend[0], vend[1]);
+ for(i=0; i<vend[1]; i++)
+ print(" %2.2x", vend[i]);
+ print("\n");
+ }
+ switch (vend[0]) {
+ case 1: /* Subnet mask field */
+ /* There must be only one subnet mask */
+ if (vend[1] != 4)
+ return;
+
+ ipmask = (vend[2]<<24)|
+ (vend[3]<<16)|
+ (vend[4]<<8)|
+ vend[5];
+ break;
+
+ case 3: /* Gateway/router field */
+ /* We are only concerned with first address */
+ if (vend[1] < 4)
+ break;
+
+ gwip = (vend[2]<<24)|
+ (vend[3]<<16)|
+ (vend[4]<<8)|
+ vend[5];
+ break;
+
+ case 6: /* DNS server */
+ /* We are only concerned with first address */
+ if (vend[1] < 4)
+ break;
+
+ dnsip = (vend[2]<<24)|
+ (vend[3]<<16)|
+ (vend[4]<<8)|
+ vend[5];
+ break;
+
+ case 8: /* "Cookie server" (auth server) field */
+ /* We are only concerned with first address */
+ if (vend[1] < 4)
+ break;
+
+ auip = (vend[2]<<24)|
+ (vend[3]<<16)|
+ (vend[4]<<8)|
+ vend[5];
+ break;
+
+ case 11: /* "Resource loc server" (file server) field */
+ /* We are only concerned with first address */
+ if (vend[1] < 4)
+ break;
+
+ fsip = (vend[2]<<24)|
+ (vend[3]<<16)|
+ (vend[4]<<8)|
+ vend[5];
+ break;
+
+ default: /* Ignore everything else */
+ break;
+ }
+
+ /* Skip over the field */
+ vend += vend[1] + 2;
+ }
+}
+
+static void
+rcvbootp(void *a)
+{
+ int n, fd;
+ Bootp *rp;
+
+ if(waserror())
+ pexit("", 0);
+ rcvprocp = up; /* store for postnote below */
+ fd = (int)a;
+ while(done == 0) {
+ n = kread(fd, rcvbuf, sizeof(rcvbuf));
+ if(n <= 0)
+ break;
+ rp = (Bootp*)rcvbuf;
+ if (memcmp(req.chaddr, rp->chaddr, 6) == 0 &&
+ rp->htype == 1 && rp->hlen == 6) {
+ ipaddr = (rp->yiaddr[0]<<24)|
+ (rp->yiaddr[1]<<16)|
+ (rp->yiaddr[2]<<8)|
+ rp->yiaddr[3];
+ parsevend(rp->vend);
+ break;
+ }
+ }
+ poperror();
+ rcvprocp = nil;
+
+ recv = 1;
+ wakeup(&bootpr);
+ pexit("", 0);
+}
+
+static char*
+rbootp(Ipifc *ifc)
+{
+ int cfd, dfd, tries, n;
+ char ia[5+3*16], im[16], *av[3];
+ uchar nipaddr[4], ngwip[4], nipmask[4];
+ char dir[Maxpath];
+ static uchar vend_rfc1048[] = { 99, 130, 83, 99 };
+
+ av[1] = "0.0.0.0";
+ av[2] = "0.0.0.0";
+ ipifcadd(ifc, av, 3, 0, nil);
+
+ cfd = kannounce("udp!*!68", dir);
+ if(cfd < 0)
+ return "bootp announce failed";
+ strcat(dir, "/data");
+ if(kwrite(cfd, "headers", 7) < 0){
+ kclose(cfd);
+ return "bootp ctl headers failed";
+ }
+ kwrite(cfd, "oldheaders", 10);
+ dfd = kopen(dir, ORDWR);
+ if(dfd < 0){
+ kclose(cfd);
+ return "bootp open data failed";
+ }
+ kclose(cfd);
+
+ /* create request */
+ memset(&req, 0, sizeof(req));
+ ipmove(req.raddr, IPv4bcast);
+ hnputs(req.rport, 67);
+ req.op = Bootrequest;
+ req.htype = 1; /* ethernet (all we know) */
+ req.hlen = 6; /* ethernet (all we know) */
+
+ /* Hardware MAC address */
+ memmove(req.chaddr, ifc->mac, 6);
+ /* Fill in the local IP address if we know it */
+ ipv4local(ifc, req.ciaddr);
+ memset(req.file, 0, sizeof(req.file));
+ memmove(req.vend, vend_rfc1048, 4);
+
+ done = 0;
+ recv = 0;
+
+ kproc("rcvbootp", rcvbootp, (void*)dfd, KPDUPFDG);
+
+ /*
+ * broadcast bootp's till we get a reply,
+ * or fixed number of tries
+ */
+ tries = 0;
+ while(recv == 0) {
+ if(kwrite(dfd, &req, sizeof(req)) < 0)
+ print("bootp: write: %r");
+
+ tsleep(&bootpr, return0, 0, 1000);
+ if(++tries > 10) {
+ print("bootp: timed out\n");
+ break;
+ }
+ }
+ kclose(dfd);
+ done = 1;
+ if(rcvprocp != nil){
+ postnote(rcvprocp, 1, "timeout", 0);
+ rcvprocp = nil;
+ }
+
+ av[1] = "0.0.0.0";
+ av[2] = "0.0.0.0";
+ ipifcrem(ifc, av, 3);
+
+ hnputl(nipaddr, ipaddr);
+ sprint(ia, "%V", nipaddr);
+ hnputl(nipmask, ipmask);
+ sprint(im, "%V", nipmask);
+ av[1] = ia;
+ av[2] = im;
+ ipifcadd(ifc, av, 3, 0, nil);
+
+ if(gwip != 0) {
+ hnputl(ngwip, gwip);
+ n = sprint(ia, "add 0.0.0.0 0.0.0.0 %V", ngwip);
+ routewrite(ifc->conv->p->f, nil, ia, n);
+ }
+ return nil;
+}
+
+static int
+rbootpread(char *bp, ulong offset, int len)
+{
+ int n;
+ char *buf;
+ uchar a[4];
+
+ buf = smalloc(READSTR);
+ if(waserror()){
+ free(buf);
+ nexterror();
+ }
+ hnputl(a, fsip);
+ n = snprint(buf, READSTR, "fsip %15V\n", a);
+ hnputl(a, auip);
+ n += snprint(buf + n, READSTR-n, "auip %15V\n", a);
+ hnputl(a, gwip);
+ n += snprint(buf + n, READSTR-n, "gwip %15V\n", a);
+ hnputl(a, ipmask);
+ n += snprint(buf + n, READSTR-n, "ipmask %15V\n", a);
+ hnputl(a, ipaddr);
+ n += snprint(buf + n, READSTR-n, "ipaddr %15V\n", a);
+ hnputl(a, dnsip);
+ snprint(buf + n, READSTR-n, "dnsip %15V\n", a);
+
+ len = readstr(offset, bp, len, buf);
+ poperror();
+ free(buf);
+ return len;
+}
+
+char* (*bootp)(Ipifc*) = rbootp;
+int (*bootpread)(char*, ulong, int) = rbootpread;
--- /dev/null
+++ b/os/ip.original/il.c
@@ -1,0 +1,1414 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include "ip.h"
+
+enum /* Connection state */
+{
+ Ilclosed,
+ Ilsyncer,
+ Ilsyncee,
+ Ilestablished,
+ Illistening,
+ Ilclosing,
+ Ilopening, /* only for file server */
+};
+
+char *ilstates[] =
+{
+ "Closed",
+ "Syncer",
+ "Syncee",
+ "Established",
+ "Listen",
+ "Closing",
+ "Opening", /* only for file server */
+};
+
+enum /* Packet types */
+{
+ Ilsync,
+ Ildata,
+ Ildataquery,
+ Ilack,
+ Ilquery,
+ Ilstate,
+ Ilclose,
+};
+
+char *iltype[] =
+{
+ "sync",
+ "data",
+ "dataquery",
+ "ack",
+ "query",
+ "state",
+ "close"
+};
+
+enum
+{
+ Seconds = 1000,
+ Iltickms = 50, /* time base */
+ AckDelay = 2*Iltickms, /* max time twixt message rcvd & ack sent */
+ MaxTimeout = 30*Seconds, /* max time between rexmit */
+ QueryTime = 10*Seconds, /* time between subsequent queries */
+ DeathTime = 30*QueryTime,
+
+ MaxRexmit = 16, /* max retransmissions before hangup */
+ Defaultwin = 20,
+
+ LogAGain = 3,
+ AGain = 1<<LogAGain,
+ LogDGain = 2,
+ DGain = 1<<LogDGain,
+
+ DefByteRate = 100, /* assume a megabit link */
+ DefRtt = 50, /* cross country on a great day */
+
+ Maxrq = 64*1024,
+};
+
+enum
+{
+ Nqt= 8,
+};
+
+typedef struct Ilcb Ilcb;
+struct Ilcb /* Control block */
+{
+ int state; /* Connection state */
+ Conv *conv;
+ QLock ackq; /* Unacknowledged queue */
+ Block *unacked;
+ Block *unackedtail;
+ ulong unackedbytes;
+ QLock outo; /* Out of order packet queue */
+ Block *outoforder;
+ ulong next; /* Id of next to send */
+ ulong recvd; /* Last packet received */
+ ulong acksent; /* Last packet acked */
+ ulong start; /* Local start id */
+ ulong rstart; /* Remote start id */
+ int window; /* Maximum receive window */
+ int rxquery; /* number of queries on this connection */
+ int rxtot; /* number of retransmits on this connection */
+ int rexmit; /* number of retransmits of *unacked */
+ ulong qt[Nqt+1]; /* state table for query messages */
+ int qtx; /* ... index into qt */
+
+ /* if set, fasttimeout causes a connection request to terminate after 4*Iltickms */
+ int fasttimeout;
+
+ /* timers */
+ ulong lastxmit; /* time of last xmit */
+ ulong lastrecv; /* time of last recv */
+ ulong timeout; /* retransmission time for *unacked */
+ ulong acktime; /* time to send next ack */
+ ulong querytime; /* time to send next query */
+
+ /* adaptive measurements */
+ int delay; /* Average of the fixed rtt delay */
+ int rate; /* Average uchar rate */
+ int mdev; /* Mean deviation of rtt */
+ int maxrtt; /* largest rtt seen */
+ ulong rttack; /* The ack we are waiting for */
+ int rttlen; /* Length of rttack packet */
+ uvlong rttstart; /* Time we issued rttack packet */
+};
+
+enum
+{
+ IL_IPSIZE = 20,
+ IL_HDRSIZE = 18,
+ IL_LISTEN = 0,
+ IL_CONNECT = 1,
+ IP_ILPROTO = 40,
+};
+
+typedef struct Ilhdr Ilhdr;
+struct Ilhdr
+{
+ uchar vihl; /* Version and header length */
+ uchar tos; /* Type of service */
+ uchar length[2]; /* packet length */
+ uchar id[2]; /* Identification */
+ uchar frag[2]; /* Fragment information */
+ uchar ttl; /* Time to live */
+ uchar proto; /* Protocol */
+ uchar cksum[2]; /* Header checksum */
+ uchar src[4]; /* Ip source */
+ uchar dst[4]; /* Ip destination */
+ uchar ilsum[2]; /* Checksum including header */
+ uchar illen[2]; /* Packet length */
+ uchar iltype; /* Packet type */
+ uchar ilspec; /* Special */
+ uchar ilsrc[2]; /* Src port */
+ uchar ildst[2]; /* Dst port */
+ uchar ilid[4]; /* Sequence id */
+ uchar ilack[4]; /* Acked sequence */
+};
+
+enum
+{
+ InMsgs,
+ OutMsgs,
+ CsumErrs, /* checksum errors */
+ HlenErrs, /* header length error */
+ LenErrs, /* short packet */
+ OutOfOrder, /* out of order */
+ Retrans, /* retransmissions */
+ DupMsg,
+ DupBytes,
+ DroppedMsgs,
+
+ Nstats,
+};
+
+static char *statnames[] =
+{
+[InMsgs] "InMsgs",
+[OutMsgs] "OutMsgs",
+[CsumErrs] "CsumErrs",
+[HlenErrs] "HlenErr",
+[LenErrs] "LenErrs",
+[OutOfOrder] "OutOfOrder",
+[Retrans] "Retrans",
+[DupMsg] "DupMsg",
+[DupBytes] "DupBytes",
+[DroppedMsgs] "DroppedMsgs",
+};
+
+typedef struct Ilpriv Ilpriv;
+struct Ilpriv
+{
+ Ipht ht;
+
+ ulong stats[Nstats];
+
+ ulong csumerr; /* checksum errors */
+ ulong hlenerr; /* header length error */
+ ulong lenerr; /* short packet */
+ ulong order; /* out of order */
+ ulong rexmit; /* retransmissions */
+ ulong dup;
+ ulong dupb;
+
+ /* keeping track of the ack kproc */
+ int ackprocstarted;
+ QLock apl;
+};
+
+/* state for query/dataquery messages */
+
+
+void ilrcvmsg(Conv*, Block*);
+void ilsendctl(Conv*, Ilhdr*, int, ulong, ulong, int);
+void ilackq(Ilcb*, Block*);
+void ilprocess(Conv*, Ilhdr*, Block*);
+void ilpullup(Conv*);
+void ilhangup(Conv*, char*);
+void ilfreeq(Ilcb*);
+void ilrexmit(Ilcb*);
+void ilbackoff(Ilcb*);
+void ilsettimeout(Ilcb*);
+char* ilstart(Conv*, int, int);
+void ilackproc(void*);
+void iloutoforder(Conv*, Ilhdr*, Block*);
+void iliput(Proto*, Ipifc*, Block*);
+void iladvise(Proto*, Block*, char*);
+int ilnextqt(Ilcb*);
+void ilcbinit(Ilcb*);
+int later(ulong, ulong, char*);
+void ilreject(Fs*, Ilhdr*);
+void illocalclose(Conv *c);
+ int ilcksum = 1;
+static int initseq = 25001;
+static ulong scalediv, scalemul;
+static char *etime = "connection timed out";
+
+static char*
+ilconnect(Conv *c, char **argv, int argc)
+{
+ char *e, *p;
+ int fast;
+
+ /* huge hack to quickly try an il connection */
+ fast = 0;
+ if(argc > 1){
+ p = strstr(argv[1], "!fasttimeout");
+ if(p != nil){
+ *p = 0;
+ fast = 1;
+ }
+ }
+
+ e = Fsstdconnect(c, argv, argc);
+ if(e != nil)
+ return e;
+ return ilstart(c, IL_CONNECT, fast);
+}
+
+static int
+ilstate(Conv *c, char *state, int n)
+{
+ Ilcb *ic;
+
+ ic = (Ilcb*)(c->ptcl);
+ return snprint(state, n, "%s qin %d qout %d del %5.5d Br %5.5d md %5.5d una %5.5lud rex %5.5d rxq %5.5d max %5.5d",
+ ilstates[ic->state],
+ c->rq ? qlen(c->rq) : 0,
+ c->wq ? qlen(c->wq) : 0,
+ ic->delay>>LogAGain, ic->rate>>LogAGain, ic->mdev>>LogDGain,
+ ic->unackedbytes, ic->rxtot, ic->rxquery, ic->maxrtt);
+}
+
+static int
+ilinuse(Conv *c)
+{
+ Ilcb *ic;
+
+ ic = (Ilcb*)(c->ptcl);
+ return ic->state != Ilclosed;
+
+}
+
+/* called with c locked */
+static char*
+ilannounce(Conv *c, char **argv, int argc)
+{
+ char *e;
+
+ e = Fsstdannounce(c, argv, argc);
+ if(e != nil)
+ return e;
+ e = ilstart(c, IL_LISTEN, 0);
+ if(e != nil)
+ return e;
+ Fsconnected(c, nil);
+
+ return nil;
+}
+
+void
+illocalclose(Conv *c)
+{
+ Ilcb *ic;
+ Ilpriv *ipriv;
+
+ ipriv = c->p->priv;
+ ic = (Ilcb*)c->ptcl;
+ ic->state = Ilclosed;
+ iphtrem(&ipriv->ht, c);
+ ipmove(c->laddr, IPnoaddr);
+ c->lport = 0;
+}
+
+static void
+ilclose(Conv *c)
+{
+ Ilcb *ic;
+
+ ic = (Ilcb*)c->ptcl;
+
+ qclose(c->rq);
+ qclose(c->wq);
+ qclose(c->eq);
+
+ switch(ic->state) {
+ case Ilclosing:
+ case Ilclosed:
+ break;
+ case Ilsyncer:
+ case Ilsyncee:
+ case Ilestablished:
+ ic->state = Ilclosing;
+ ilsettimeout(ic);
+ ilsendctl(c, nil, Ilclose, ic->next, ic->recvd, 0);
+ break;
+ case Illistening:
+ illocalclose(c);
+ break;
+ }
+ ilfreeq(ic);
+}
+
+void
+ilkick(void *x, Block *bp)
+{
+ Conv *c = x;
+ Ilhdr *ih;
+ Ilcb *ic;
+ int dlen;
+ ulong id, ack;
+ Fs *f;
+ Ilpriv *priv;
+
+ f = c->p->f;
+ priv = c->p->priv;
+ ic = (Ilcb*)c->ptcl;
+
+ if(bp == nil)
+ return;
+
+ switch(ic->state) {
+ case Ilclosed:
+ case Illistening:
+ case Ilclosing:
+ freeblist(bp);
+ qhangup(c->rq, nil);
+ return;
+ }
+
+ dlen = blocklen(bp);
+
+ /* Make space to fit il & ip */
+ bp = padblock(bp, IL_IPSIZE+IL_HDRSIZE);
+ ih = (Ilhdr *)(bp->rp);
+ ih->vihl = IP_VER4;
+
+ /* Ip fields */
+ ih->frag[0] = 0;
+ ih->frag[1] = 0;
+ v6tov4(ih->dst, c->raddr);
+ v6tov4(ih->src, c->laddr);
+ ih->proto = IP_ILPROTO;
+
+ /* Il fields */
+ hnputs(ih->illen, dlen+IL_HDRSIZE);
+ hnputs(ih->ilsrc, c->lport);
+ hnputs(ih->ildst, c->rport);
+
+ qlock(&ic->ackq);
+ id = ic->next++;
+ hnputl(ih->ilid, id);
+ ack = ic->recvd;
+ hnputl(ih->ilack, ack);
+ ic->acksent = ack;
+ ic->acktime = NOW + AckDelay;
+ ih->iltype = Ildata;
+ ih->ilspec = 0;
+ ih->ilsum[0] = 0;
+ ih->ilsum[1] = 0;
+
+ /* Checksum of ilheader plus data (not ip & no pseudo header) */
+ if(ilcksum)
+ hnputs(ih->ilsum, ptclcsum(bp, IL_IPSIZE, dlen+IL_HDRSIZE));
+
+ ilackq(ic, bp);
+ qunlock(&ic->ackq);
+
+ /* Start the round trip timer for this packet if the timer is free */
+ if(ic->rttack == 0) {
+ ic->rttack = id;
+ ic->rttstart = fastticks(nil);
+ ic->rttlen = dlen + IL_IPSIZE + IL_HDRSIZE;
+ }
+
+ if(later(NOW, ic->timeout, nil))
+ ilsettimeout(ic);
+ ipoput4(f, bp, 0, c->ttl, c->tos, c);
+ priv->stats[OutMsgs]++;
+}
+
+static void
+ilcreate(Conv *c)
+{
+ c->rq = qopen(Maxrq, 0, 0, c);
+ c->wq = qbypass(ilkick, c);
+}
+
+int
+ilxstats(Proto *il, char *buf, int len)
+{
+ Ilpriv *priv;
+ char *p, *e;
+ int i;
+
+ priv = il->priv;
+ p = buf;
+ e = p+len;
+ for(i = 0; i < Nstats; i++)
+ p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
+ return p - buf;
+}
+
+void
+ilackq(Ilcb *ic, Block *bp)
+{
+ Block *np;
+ int n;
+
+ n = blocklen(bp);
+
+ /* Enqueue a copy on the unacked queue in case this one gets lost */
+ np = copyblock(bp, n);
+ if(ic->unacked)
+ ic->unackedtail->list = np;
+ else
+ ic->unacked = np;
+ ic->unackedtail = np;
+ np->list = nil;
+ ic->unackedbytes += n;
+}
+
+static
+void
+ilrttcalc(Ilcb *ic, Block *bp)
+{
+ int rtt, tt, pt, delay, rate;
+
+ rtt = fastticks(nil) - ic->rttstart;
+ rtt = (rtt*scalemul)/scalediv;
+ delay = ic->delay;
+ rate = ic->rate;
+
+ /* Guard against zero wrap */
+ if(rtt > 120000 || rtt < 0)
+ return;
+
+ /* this block had to be transmitted after the one acked so count its size */
+ ic->rttlen += blocklen(bp) + IL_IPSIZE + IL_HDRSIZE;
+
+ if(ic->rttlen < 256){
+ /* guess fixed delay as rtt of small packets */
+ delay += rtt - (delay>>LogAGain);
+ if(delay < AGain)
+ delay = AGain;
+ ic->delay = delay;
+ } else {
+ /* if packet took longer than avg rtt delay, recalc rate */
+ tt = rtt - (delay>>LogAGain);
+ if(tt > 0){
+ rate += ic->rttlen/tt - (rate>>LogAGain);
+ if(rate < AGain)
+ rate = AGain;
+ ic->rate = rate;
+ }
+ }
+
+ /* mdev */
+ pt = ic->rttlen/(rate>>LogAGain) + (delay>>LogAGain);
+ ic->mdev += abs(rtt-pt) - (ic->mdev>>LogDGain);
+
+ if(rtt > ic->maxrtt)
+ ic->maxrtt = rtt;
+}
+
+void
+ilackto(Ilcb *ic, ulong ackto, Block *bp)
+{
+ Ilhdr *h;
+ ulong id;
+
+ if(ic->rttack == ackto)
+ ilrttcalc(ic, bp);
+
+ /* Cancel if we've passed the packet we were interested in */
+ if(ic->rttack <= ackto)
+ ic->rttack = 0;
+
+ qlock(&ic->ackq);
+ while(ic->unacked) {
+ h = (Ilhdr *)ic->unacked->rp;
+ id = nhgetl(h->ilid);
+ if(ackto < id)
+ break;
+
+ bp = ic->unacked;
+ ic->unacked = bp->list;
+ bp->list = nil;
+ ic->unackedbytes -= blocklen(bp);
+ freeblist(bp);
+ ic->rexmit = 0;
+ ilsettimeout(ic);
+ }
+ qunlock(&ic->ackq);
+}
+
+void
+iliput(Proto *il, Ipifc*, Block *bp)
+{
+ char *st;
+ Ilcb *ic;
+ Ilhdr *ih;
+ uchar raddr[IPaddrlen];
+ uchar laddr[IPaddrlen];
+ ushort sp, dp, csum;
+ int plen, illen;
+ Conv *new, *s;
+ Ilpriv *ipriv;
+
+ ipriv = il->priv;
+
+ ih = (Ilhdr *)bp->rp;
+ plen = blocklen(bp);
+ if(plen < IL_IPSIZE+IL_HDRSIZE){
+ netlog(il->f, Logil, "il: hlenerr\n");
+ ipriv->stats[HlenErrs]++;
+ goto raise;
+ }
+
+ illen = nhgets(ih->illen);
+ if(illen+IL_IPSIZE > plen){
+ netlog(il->f, Logil, "il: lenerr\n");
+ ipriv->stats[LenErrs]++;
+ goto raise;
+ }
+
+ sp = nhgets(ih->ildst);
+ dp = nhgets(ih->ilsrc);
+ v4tov6(raddr, ih->src);
+ v4tov6(laddr, ih->dst);
+
+ if((csum = ptclcsum(bp, IL_IPSIZE, illen)) != 0) {
+ if(ih->iltype > Ilclose)
+ st = "?";
+ else
+ st = iltype[ih->iltype];
+ ipriv->stats[CsumErrs]++;
+ netlog(il->f, Logil, "il: cksum %ux %ux, pkt(%s id %lud ack %lud %I/%d->%d)\n",
+ csum, st, nhgetl(ih->ilid), nhgetl(ih->ilack), raddr, sp, dp);
+ goto raise;
+ }
+
+ qlock(il);
+ s = iphtlook(&ipriv->ht, raddr, dp, laddr, sp);
+ if(s == nil){
+ if(ih->iltype == Ilsync)
+ ilreject(il->f, ih); /* no listener */
+ qunlock(il);
+ goto raise;
+ }
+
+ ic = (Ilcb*)s->ptcl;
+ if(ic->state == Illistening){
+ if(ih->iltype != Ilsync){
+ qunlock(il);
+ if(ih->iltype > Ilclose)
+ st = "?";
+ else
+ st = iltype[ih->iltype];
+ ilreject(il->f, ih); /* no channel and not sync */
+ netlog(il->f, Logil, "il: no channel, pkt(%s id %lud ack %lud %I/%ud->%ud)\n",
+ st, nhgetl(ih->ilid), nhgetl(ih->ilack), raddr, sp, dp);
+ goto raise;
+ }
+
+ new = Fsnewcall(s, raddr, dp, laddr, sp, V4);
+ if(new == nil){
+ qunlock(il);
+ netlog(il->f, Logil, "il: bad newcall %I/%ud->%ud\n", raddr, sp, dp);
+ ilsendctl(s, ih, Ilclose, 0, nhgetl(ih->ilid), 0);
+ goto raise;
+ }
+ s = new;
+
+ ic = (Ilcb*)s->ptcl;
+
+ ic->conv = s;
+ ic->state = Ilsyncee;
+ ilcbinit(ic);
+ ic->rstart = nhgetl(ih->ilid);
+ iphtadd(&ipriv->ht, s);
+ }
+
+ qlock(s);
+ qunlock(il);
+ if(waserror()){
+ qunlock(s);
+ nexterror();
+ }
+ ilprocess(s, ih, bp);
+ qunlock(s);
+ poperror();
+ return;
+raise:
+ freeblist(bp);
+}
+
+void
+_ilprocess(Conv *s, Ilhdr *h, Block *bp)
+{
+ Ilcb *ic;
+ ulong id, ack;
+ Ilpriv *priv;
+
+ id = nhgetl(h->ilid);
+ ack = nhgetl(h->ilack);
+
+ ic = (Ilcb*)s->ptcl;
+
+ ic->lastrecv = NOW;
+ ic->querytime = NOW + QueryTime;
+ priv = s->p->priv;
+ priv->stats[InMsgs]++;
+
+ switch(ic->state) {
+ default:
+ netlog(s->p->f, Logil, "il: unknown state %d\n", ic->state);
+ case Ilclosed:
+ freeblist(bp);
+ break;
+ case Ilsyncer:
+ switch(h->iltype) {
+ default:
+ break;
+ case Ilsync:
+ if(ack != ic->start)
+ ilhangup(s, "connection rejected");
+ else {
+ ic->recvd = id;
+ ic->rstart = id;
+ ilsendctl(s, nil, Ilack, ic->next, ic->recvd, 0);
+ ic->state = Ilestablished;
+ ic->fasttimeout = 0;
+ ic->rexmit = 0;
+ Fsconnected(s, nil);
+ ilpullup(s);
+ }
+ break;
+ case Ilclose:
+ if(ack == ic->start)
+ ilhangup(s, "connection rejected");
+ break;
+ }
+ freeblist(bp);
+ break;
+ case Ilsyncee:
+ switch(h->iltype) {
+ default:
+ break;
+ case Ilsync:
+ if(id != ic->rstart || ack != 0){
+ illocalclose(s);
+ } else {
+ ic->recvd = id;
+ ilsendctl(s, nil, Ilsync, ic->start, ic->recvd, 0);
+ }
+ break;
+ case Ilack:
+ if(ack == ic->start) {
+ ic->state = Ilestablished;
+ ic->fasttimeout = 0;
+ ic->rexmit = 0;
+ ilpullup(s);
+ }
+ break;
+ case Ildata:
+ if(ack == ic->start) {
+ ic->state = Ilestablished;
+ ic->fasttimeout = 0;
+ ic->rexmit = 0;
+ goto established;
+ }
+ break;
+ case Ilclose:
+ if(ack == ic->start)
+ ilhangup(s, "remote close");
+ break;
+ }
+ freeblist(bp);
+ break;
+ case Ilestablished:
+ established:
+ switch(h->iltype) {
+ case Ilsync:
+ if(id != ic->rstart)
+ ilhangup(s, "remote close");
+ else
+ ilsendctl(s, nil, Ilack, ic->next, ic->rstart, 0);
+ freeblist(bp);
+ break;
+ case Ildata:
+ /*
+ * avoid consuming all the mount rpc buffers in the
+ * system. if the input queue is too long, drop this
+ * packet.
+ */
+ if (s->rq && qlen(s->rq) >= Maxrq) {
+ priv->stats[DroppedMsgs]++;
+ freeblist(bp);
+ break;
+ }
+
+ ilackto(ic, ack, bp);
+ iloutoforder(s, h, bp);
+ ilpullup(s);
+ break;
+ case Ildataquery:
+ ilackto(ic, ack, bp);
+ iloutoforder(s, h, bp);
+ ilpullup(s);
+ ilsendctl(s, nil, Ilstate, ic->next, ic->recvd, h->ilspec);
+ break;
+ case Ilack:
+ ilackto(ic, ack, bp);
+ freeblist(bp);
+ break;
+ case Ilquery:
+ ilackto(ic, ack, bp);
+ ilsendctl(s, nil, Ilstate, ic->next, ic->recvd, h->ilspec);
+ freeblist(bp);
+ break;
+ case Ilstate:
+ if(ack >= ic->rttack)
+ ic->rttack = 0;
+ ilackto(ic, ack, bp);
+ if(h->ilspec > Nqt)
+ h->ilspec = 0;
+ if(ic->qt[h->ilspec] > ack){
+ ilrexmit(ic);
+ ilsettimeout(ic);
+ }
+ freeblist(bp);
+ break;
+ case Ilclose:
+ freeblist(bp);
+ if(ack < ic->start || ack > ic->next)
+ break;
+ ic->recvd = id;
+ ilsendctl(s, nil, Ilclose, ic->next, ic->recvd, 0);
+ ic->state = Ilclosing;
+ ilsettimeout(ic);
+ ilfreeq(ic);
+ break;
+ }
+ break;
+ case Illistening:
+ freeblist(bp);
+ break;
+ case Ilclosing:
+ switch(h->iltype) {
+ case Ilclose:
+ ic->recvd = id;
+ ilsendctl(s, nil, Ilclose, ic->next, ic->recvd, 0);
+ if(ack == ic->next)
+ ilhangup(s, nil);
+ break;
+ default:
+ break;
+ }
+ freeblist(bp);
+ break;
+ }
+}
+
+void
+ilrexmit(Ilcb *ic)
+{
+ Ilhdr *h;
+ Block *nb;
+ Conv *c;
+ ulong id;
+ Ilpriv *priv;
+
+ nb = nil;
+ qlock(&ic->ackq);
+ if(ic->unacked)
+ nb = copyblock(ic->unacked, blocklen(ic->unacked));
+ qunlock(&ic->ackq);
+
+ if(nb == nil)
+ return;
+
+ h = (Ilhdr*)nb->rp;
+ h->vihl = IP_VER4;
+
+ h->iltype = Ildataquery;
+ hnputl(h->ilack, ic->recvd);
+ h->ilspec = ilnextqt(ic);
+ h->ilsum[0] = 0;
+ h->ilsum[1] = 0;
+ hnputs(h->ilsum, ptclcsum(nb, IL_IPSIZE, nhgets(h->illen)));
+
+ c = ic->conv;
+ id = nhgetl(h->ilid);
+ netlog(c->p->f, Logil, "il: rexmit %d %ud: %d %d: %i %d/%d\n", id, ic->recvd,
+ ic->rexmit, ic->timeout,
+ c->raddr, c->lport, c->rport);
+
+ ilbackoff(ic);
+
+ ipoput4(c->p->f, nb, 0, c->ttl, c->tos, c);
+
+ /* statistics */
+ ic->rxtot++;
+ priv = c->p->priv;
+ priv->rexmit++;
+}
+
+/* DEBUG */
+void
+ilprocess(Conv *s, Ilhdr *h, Block *bp)
+{
+ Ilcb *ic;
+
+ ic = (Ilcb*)s->ptcl;
+
+ USED(ic);
+ netlog(s->p->f, Logilmsg, "%11s rcv %d/%d snt %d/%d pkt(%s id %d ack %d %d->%d) ",
+ ilstates[ic->state], ic->rstart, ic->recvd, ic->start,
+ ic->next, iltype[h->iltype], nhgetl(h->ilid),
+ nhgetl(h->ilack), nhgets(h->ilsrc), nhgets(h->ildst));
+
+ _ilprocess(s, h, bp);
+
+ netlog(s->p->f, Logilmsg, "%11s rcv %d snt %d\n", ilstates[ic->state], ic->recvd, ic->next);
+}
+
+void
+ilhangup(Conv *s, char *msg)
+{
+ Ilcb *ic;
+ int callout;
+
+ netlog(s->p->f, Logil, "il: hangup! %I %d/%d: %s\n", s->raddr,
+ s->lport, s->rport, msg?msg:"no reason");
+
+ ic = (Ilcb*)s->ptcl;
+ callout = ic->state == Ilsyncer;
+ illocalclose(s);
+
+ qhangup(s->rq, msg);
+ qhangup(s->wq, msg);
+
+ if(callout)
+ Fsconnected(s, msg);
+}
+
+void
+ilpullup(Conv *s)
+{
+ Ilcb *ic;
+ Ilhdr *oh;
+ Block *bp;
+ ulong oid, dlen;
+ Ilpriv *ipriv;
+
+ ic = (Ilcb*)s->ptcl;
+ if(ic->state != Ilestablished)
+ return;
+
+ qlock(&ic->outo);
+ while(ic->outoforder) {
+ bp = ic->outoforder;
+ oh = (Ilhdr*)bp->rp;
+ oid = nhgetl(oh->ilid);
+ if(oid <= ic->recvd) {
+ ic->outoforder = bp->list;
+ freeblist(bp);
+ continue;
+ }
+ if(oid != ic->recvd+1){
+ ipriv = s->p->priv;
+ ipriv->stats[OutOfOrder]++;
+ break;
+ }
+
+ ic->recvd = oid;
+ ic->outoforder = bp->list;
+
+ bp->list = nil;
+ dlen = nhgets(oh->illen)-IL_HDRSIZE;
+ bp = trimblock(bp, IL_IPSIZE+IL_HDRSIZE, dlen);
+ /*
+ * Upper levels don't know about multiple-block
+ * messages so copy all into one (yick).
+ */
+ bp = concatblock(bp);
+ if(bp == 0)
+ panic("ilpullup");
+ bp = packblock(bp);
+ if(bp == 0)
+ panic("ilpullup2");
+ qpass(s->rq, bp);
+ }
+ qunlock(&ic->outo);
+}
+
+void
+iloutoforder(Conv *s, Ilhdr *h, Block *bp)
+{
+ Ilcb *ic;
+ uchar *lid;
+ Block *f, **l;
+ ulong id, newid;
+ Ilpriv *ipriv;
+
+ ipriv = s->p->priv;
+ ic = (Ilcb*)s->ptcl;
+ bp->list = nil;
+
+ id = nhgetl(h->ilid);
+ /* Window checks */
+ if(id <= ic->recvd || id > ic->recvd+ic->window) {
+ netlog(s->p->f, Logil, "il: message outside window %ud <%ud-%ud>: %i %d/%d\n",
+ id, ic->recvd, ic->recvd+ic->window, s->raddr, s->lport, s->rport);
+ freeblist(bp);
+ return;
+ }
+
+ /* Packet is acceptable so sort onto receive queue for pullup */
+ qlock(&ic->outo);
+ if(ic->outoforder == nil)
+ ic->outoforder = bp;
+ else {
+ l = &ic->outoforder;
+ for(f = *l; f; f = f->list) {
+ lid = ((Ilhdr*)(f->rp))->ilid;
+ newid = nhgetl(lid);
+ if(id <= newid) {
+ if(id == newid) {
+ ipriv->stats[DupMsg]++;
+ ipriv->stats[DupBytes] += blocklen(bp);
+ qunlock(&ic->outo);
+ freeblist(bp);
+ return;
+ }
+ bp->list = f;
+ *l = bp;
+ qunlock(&ic->outo);
+ return;
+ }
+ l = &f->list;
+ }
+ *l = bp;
+ }
+ qunlock(&ic->outo);
+}
+
+void
+ilsendctl(Conv *ipc, Ilhdr *inih, int type, ulong id, ulong ack, int ilspec)
+{
+ Ilhdr *ih;
+ Ilcb *ic;
+ Block *bp;
+ int ttl, tos;
+
+ bp = allocb(IL_IPSIZE+IL_HDRSIZE);
+ bp->wp += IL_IPSIZE+IL_HDRSIZE;
+
+ ih = (Ilhdr *)(bp->rp);
+ ih->vihl = IP_VER4;
+
+ /* Ip fields */
+ ih->proto = IP_ILPROTO;
+ hnputs(ih->illen, IL_HDRSIZE);
+ ih->frag[0] = 0;
+ ih->frag[1] = 0;
+ if(inih) {
+ hnputl(ih->dst, nhgetl(inih->src));
+ hnputl(ih->src, nhgetl(inih->dst));
+ hnputs(ih->ilsrc, nhgets(inih->ildst));
+ hnputs(ih->ildst, nhgets(inih->ilsrc));
+ hnputl(ih->ilid, nhgetl(inih->ilack));
+ hnputl(ih->ilack, nhgetl(inih->ilid));
+ ttl = MAXTTL;
+ tos = DFLTTOS;
+ }
+ else {
+ v6tov4(ih->dst, ipc->raddr);
+ v6tov4(ih->src, ipc->laddr);
+ hnputs(ih->ilsrc, ipc->lport);
+ hnputs(ih->ildst, ipc->rport);
+ hnputl(ih->ilid, id);
+ hnputl(ih->ilack, ack);
+ ic = (Ilcb*)ipc->ptcl;
+ ic->acksent = ack;
+ ic->acktime = NOW;
+ ttl = ipc->ttl;
+ tos = ipc->tos;
+ }
+ ih->iltype = type;
+ ih->ilspec = ilspec;
+ ih->ilsum[0] = 0;
+ ih->ilsum[1] = 0;
+
+ if(ilcksum)
+ hnputs(ih->ilsum, ptclcsum(bp, IL_IPSIZE, IL_HDRSIZE));
+
+if(ipc==nil)
+ panic("ipc is nil caller is %.8lux", getcallerpc(&ipc));
+if(ipc->p==nil)
+ panic("ipc->p is nil");
+
+ netlog(ipc->p->f, Logilmsg, "ctl(%s id %d ack %d %d->%d)\n",
+ iltype[ih->iltype], nhgetl(ih->ilid), nhgetl(ih->ilack),
+ nhgets(ih->ilsrc), nhgets(ih->ildst));
+
+ ipoput4(ipc->p->f, bp, 0, ttl, tos, ipc);
+}
+
+void
+ilreject(Fs *f, Ilhdr *inih)
+{
+ Ilhdr *ih;
+ Block *bp;
+
+ bp = allocb(IL_IPSIZE+IL_HDRSIZE);
+ bp->wp += IL_IPSIZE+IL_HDRSIZE;
+
+ ih = (Ilhdr *)(bp->rp);
+ ih->vihl = IP_VER4;
+
+ /* Ip fields */
+ ih->proto = IP_ILPROTO;
+ hnputs(ih->illen, IL_HDRSIZE);
+ ih->frag[0] = 0;
+ ih->frag[1] = 0;
+ hnputl(ih->dst, nhgetl(inih->src));
+ hnputl(ih->src, nhgetl(inih->dst));
+ hnputs(ih->ilsrc, nhgets(inih->ildst));
+ hnputs(ih->ildst, nhgets(inih->ilsrc));
+ hnputl(ih->ilid, nhgetl(inih->ilack));
+ hnputl(ih->ilack, nhgetl(inih->ilid));
+ ih->iltype = Ilclose;
+ ih->ilspec = 0;
+ ih->ilsum[0] = 0;
+ ih->ilsum[1] = 0;
+
+ if(ilcksum)
+ hnputs(ih->ilsum, ptclcsum(bp, IL_IPSIZE, IL_HDRSIZE));
+
+ ipoput4(f, bp, 0, MAXTTL, DFLTTOS, nil);
+}
+
+void
+ilsettimeout(Ilcb *ic)
+{
+ ulong pt;
+
+ pt = (ic->delay>>LogAGain)
+ + ic->unackedbytes/(ic->rate>>LogAGain)
+ + (ic->mdev>>(LogDGain-1))
+ + AckDelay;
+ if(pt > MaxTimeout)
+ pt = MaxTimeout;
+ ic->timeout = NOW + pt;
+}
+
+void
+ilbackoff(Ilcb *ic)
+{
+ ulong pt;
+ int i;
+
+ pt = (ic->delay>>LogAGain)
+ + ic->unackedbytes/(ic->rate>>LogAGain)
+ + (ic->mdev>>(LogDGain-1))
+ + AckDelay;
+ for(i = 0; i < ic->rexmit; i++)
+ pt = pt + (pt>>1);
+ if(pt > MaxTimeout)
+ pt = MaxTimeout;
+ ic->timeout = NOW + pt;
+
+ if(ic->fasttimeout)
+ ic->timeout = NOW+Iltickms;
+
+ ic->rexmit++;
+}
+
+// complain if two numbers not within an hour of each other
+#define Tfuture (1000*60*60)
+int
+later(ulong t1, ulong t2, char *x)
+{
+ int dt;
+
+ dt = t1 - t2;
+ if(dt > 0) {
+ if(x != nil && dt > Tfuture)
+ print("%s: way future %d\n", x, dt);
+ return 1;
+ }
+ if(dt < -Tfuture) {
+ if(x != nil)
+ print("%s: way past %d\n", x, -dt);
+ return 1;
+ }
+ return 0;
+}
+
+void
+ilackproc(void *x)
+{
+ Ilcb *ic;
+ Conv **s, *p;
+ Proto *il;
+
+ il = x;
+
+loop:
+ tsleep(&up->sleep, return0, 0, Iltickms);
+ for(s = il->conv; s && *s; s++) {
+ p = *s;
+ ic = (Ilcb*)p->ptcl;
+
+ switch(ic->state) {
+ case Ilclosed:
+ case Illistening:
+ break;
+ case Ilclosing:
+ if(later(NOW, ic->timeout, "timeout0")) {
+ if(ic->rexmit > MaxRexmit){
+ ilhangup(p, nil);
+ break;
+ }
+ ilsendctl(p, nil, Ilclose, ic->next, ic->recvd, 0);
+ ilbackoff(ic);
+ }
+ break;
+
+ case Ilsyncee:
+ case Ilsyncer:
+ if(later(NOW, ic->timeout, "timeout1")) {
+ if(ic->rexmit > MaxRexmit){
+ ilhangup(p, etime);
+ break;
+ }
+ ilsendctl(p, nil, Ilsync, ic->start, ic->recvd, 0);
+ ilbackoff(ic);
+ }
+ break;
+
+ case Ilestablished:
+ if(ic->recvd != ic->acksent)
+ if(later(NOW, ic->acktime, "acktime"))
+ ilsendctl(p, nil, Ilack, ic->next, ic->recvd, 0);
+
+ if(later(NOW, ic->querytime, "querytime")){
+ if(later(NOW, ic->lastrecv+DeathTime, "deathtime")){
+ netlog(il->f, Logil, "il: hangup: deathtime\n");
+ ilhangup(p, etime);
+ break;
+ }
+ ilsendctl(p, nil, Ilquery, ic->next, ic->recvd, ilnextqt(ic));
+ ic->querytime = NOW + QueryTime;
+ }
+
+ if(ic->unacked != nil)
+ if(later(NOW, ic->timeout, "timeout2")) {
+ if(ic->rexmit > MaxRexmit){
+ netlog(il->f, Logil, "il: hangup: too many rexmits\n");
+ ilhangup(p, etime);
+ break;
+ }
+ ilsendctl(p, nil, Ilquery, ic->next, ic->recvd, ilnextqt(ic));
+ ic->rxquery++;
+ ilbackoff(ic);
+ }
+ break;
+ }
+ }
+ goto loop;
+}
+
+void
+ilcbinit(Ilcb *ic)
+{
+ ic->start = nrand(0x1000000);
+ ic->next = ic->start+1;
+ ic->recvd = 0;
+ ic->window = Defaultwin;
+ ic->unackedbytes = 0;
+ ic->unacked = nil;
+ ic->outoforder = nil;
+ ic->rexmit = 0;
+ ic->rxtot = 0;
+ ic->rxquery = 0;
+ ic->qtx = 1;
+ ic->fasttimeout = 0;
+
+ /* timers */
+ ic->delay = DefRtt<<LogAGain;
+ ic->mdev = DefRtt<<LogDGain;
+ ic->rate = DefByteRate<<LogAGain;
+ ic->querytime = NOW + QueryTime;
+ ic->lastrecv = NOW; /* or we'll timeout right away */
+ ilsettimeout(ic);
+}
+
+char*
+ilstart(Conv *c, int type, int fasttimeout)
+{
+ Ilcb *ic;
+ Ilpriv *ipriv;
+ char kpname[KNAMELEN];
+
+ ipriv = c->p->priv;
+
+ if(ipriv->ackprocstarted == 0){
+ qlock(&ipriv->apl);
+ if(ipriv->ackprocstarted == 0){
+ sprint(kpname, "#I%dilack", c->p->f->dev);
+ kproc(kpname, ilackproc, c->p, 0);
+ ipriv->ackprocstarted = 1;
+ }
+ qunlock(&ipriv->apl);
+ }
+
+ ic = (Ilcb*)c->ptcl;
+ ic->conv = c;
+
+ if(ic->state != Ilclosed)
+ return nil;
+
+ ilcbinit(ic);
+
+ if(fasttimeout){
+ /* timeout if we can't connect quickly */
+ ic->fasttimeout = 1;
+ ic->timeout = NOW+Iltickms;
+ ic->rexmit = MaxRexmit - 4;
+ };
+
+ switch(type) {
+ default:
+ netlog(c->p->f, Logil, "il: start: type %d\n", type);
+ break;
+ case IL_LISTEN:
+ ic->state = Illistening;
+ iphtadd(&ipriv->ht, c);
+ break;
+ case IL_CONNECT:
+ ic->state = Ilsyncer;
+ iphtadd(&ipriv->ht, c);
+ ilsendctl(c, nil, Ilsync, ic->start, ic->recvd, 0);
+ break;
+ }
+
+ return nil;
+}
+
+void
+ilfreeq(Ilcb *ic)
+{
+ Block *bp, *next;
+
+ qlock(&ic->ackq);
+ for(bp = ic->unacked; bp; bp = next) {
+ next = bp->list;
+ freeblist(bp);
+ }
+ ic->unacked = nil;
+ qunlock(&ic->ackq);
+
+ qlock(&ic->outo);
+ for(bp = ic->outoforder; bp; bp = next) {
+ next = bp->list;
+ freeblist(bp);
+ }
+ ic->outoforder = nil;
+ qunlock(&ic->outo);
+}
+
+void
+iladvise(Proto *il, Block *bp, char *msg)
+{
+ Ilhdr *h;
+ Ilcb *ic;
+ uchar source[IPaddrlen], dest[IPaddrlen];
+ ushort psource;
+ Conv *s, **p;
+
+ h = (Ilhdr*)(bp->rp);
+
+ v4tov6(dest, h->dst);
+ v4tov6(source, h->src);
+ psource = nhgets(h->ilsrc);
+
+
+ /* Look for a connection, unfortunately the destination port is missing */
+ qlock(il);
+ for(p = il->conv; *p; p++) {
+ s = *p;
+ if(s->lport == psource)
+ if(ipcmp(s->laddr, source) == 0)
+ if(ipcmp(s->raddr, dest) == 0){
+ qunlock(il);
+ ic = (Ilcb*)s->ptcl;
+ switch(ic->state){
+ case Ilsyncer:
+ ilhangup(s, msg);
+ break;
+ }
+ freeblist(bp);
+ return;
+ }
+ }
+ qunlock(il);
+ freeblist(bp);
+}
+
+int
+ilnextqt(Ilcb *ic)
+{
+ int x;
+
+ qlock(&ic->ackq);
+ x = ic->qtx;
+ if(++x > Nqt)
+ x = 1;
+ ic->qtx = x;
+ ic->qt[x] = ic->next-1; /* highest xmitted packet */
+ ic->qt[0] = ic->qt[x]; /* compatibility with old implementations */
+ qunlock(&ic->ackq);
+
+ return x;
+}
+
+/* calculate scale constants that converts fast ticks to ms (more or less) */
+static void
+inittimescale(void)
+{
+ uvlong hz;
+
+ fastticks(&hz);
+ if(hz > 1000){
+ scalediv = hz/1000;
+ scalemul = 1;
+ } else {
+ scalediv = 1;
+ scalemul = 1000/hz;
+ }
+}
+
+int
+ilgc(Proto *il)
+{
+ return natgc(il->ipproto);
+}
+
+void
+ilinit(Fs *f)
+{
+ Proto *il;
+
+ inittimescale();
+
+ il = smalloc(sizeof(Proto));
+ il->priv = smalloc(sizeof(Ilpriv));
+ il->name = "il";
+ il->connect = ilconnect;
+ il->announce = ilannounce;
+ il->state = ilstate;
+ il->create = ilcreate;
+ il->close = ilclose;
+ il->rcv = iliput;
+ il->ctl = nil;
+ il->advise = iladvise;
+ il->stats = ilxstats;
+ il->inuse = ilinuse;
+ il->gc = ilgc;
+ il->ipproto = IP_ILPROTO;
+ il->nc = scalednconv();
+ il->ptclsize = sizeof(Ilcb);
+ Fsproto(f, il);
+}
--- /dev/null
+++ b/os/ip.original/ip.c
@@ -1,0 +1,797 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include "ip.h"
+
+typedef struct IP IP;
+typedef struct Fragment4 Fragment4;
+typedef struct Fragment6 Fragment6;
+typedef struct Ipfrag Ipfrag;
+
+enum
+{
+ IP4HDR = 20, /* sizeof(Ip4hdr) */
+ IP6HDR = 40, /* sizeof(Ip6hdr) */
+ IP_HLEN4 = 0x05, /* Header length in words */
+ IP_DF = 0x4000, /* Don't fragment */
+ IP_MF = 0x2000, /* More fragments */
+ IP6FHDR = 8, /* sizeof(Fraghdr6) */
+ IP_MAX = 64*1024, /* Maximum Internet packet size */
+};
+
+#define BLKIPVER(xp) (((Ip4hdr*)((xp)->rp))->vihl&0xF0)
+
+/* MIB II counters */
+enum
+{
+ Forwarding,
+ DefaultTTL,
+ InReceives,
+ InHdrErrors,
+ InAddrErrors,
+ ForwDatagrams,
+ InUnknownProtos,
+ InDiscards,
+ InDelivers,
+ OutRequests,
+ OutDiscards,
+ OutNoRoutes,
+ ReasmTimeout,
+ ReasmReqds,
+ ReasmOKs,
+ ReasmFails,
+ FragOKs,
+ FragFails,
+ FragCreates,
+
+ Nstats,
+};
+
+struct Fragment4
+{
+ Block* blist;
+ Fragment4* next;
+ ulong src;
+ ulong dst;
+ ushort id;
+ ulong age;
+};
+
+struct Fragment6
+{
+ Block* blist;
+ Fragment6* next;
+ uchar src[IPaddrlen];
+ uchar dst[IPaddrlen];
+ uint id;
+ ulong age;
+};
+
+struct Ipfrag
+{
+ ushort foff;
+ ushort flen;
+};
+
+/* an instance of IP */
+struct IP
+{
+ ulong stats[Nstats];
+
+ QLock fraglock4;
+ Fragment4* flisthead4;
+ Fragment4* fragfree4;
+ Ref id4;
+
+ QLock fraglock6;
+ Fragment6* flisthead6;
+ Fragment6* fragfree6;
+ Ref id6;
+
+ int iprouting; /* true if we route like a gateway */
+};
+
+static char *statnames[] =
+{
+[Forwarding] "Forwarding",
+[DefaultTTL] "DefaultTTL",
+[InReceives] "InReceives",
+[InHdrErrors] "InHdrErrors",
+[InAddrErrors] "InAddrErrors",
+[ForwDatagrams] "ForwDatagrams",
+[InUnknownProtos] "InUnknownProtos",
+[InDiscards] "InDiscards",
+[InDelivers] "InDelivers",
+[OutRequests] "OutRequests",
+[OutDiscards] "OutDiscards",
+[OutNoRoutes] "OutNoRoutes",
+[ReasmTimeout] "ReasmTimeout",
+[ReasmReqds] "ReasmReqds",
+[ReasmOKs] "ReasmOKs",
+[ReasmFails] "ReasmFails",
+[FragOKs] "FragOKs",
+[FragFails] "FragFails",
+[FragCreates] "FragCreates",
+};
+
+#define BLKIP(xp) ((Ip4hdr*)((xp)->rp))
+/*
+ * This sleazy macro relies on the media header size being
+ * larger than sizeof(Ipfrag). ipreassemble checks this is true
+ */
+#define BKFG(xp) ((Ipfrag*)((xp)->base))
+
+ushort ipcsum(uchar*);
+Block* ip4reassemble(IP*, int, Block*, Ip4hdr*);
+void ipfragfree4(IP*, Fragment4*);
+Fragment4* ipfragallo4(IP*);
+
+
+void
+ip_init_6(Fs *f)
+{
+ V6params *v6p;
+
+ v6p = smalloc(sizeof(V6params));
+
+ v6p->rp.mflag = 0; // default not managed
+ v6p->rp.oflag = 0;
+ v6p->rp.maxraint = 600000; // millisecs
+ v6p->rp.minraint = 200000;
+ v6p->rp.linkmtu = 0; // no mtu sent
+ v6p->rp.reachtime = 0;
+ v6p->rp.rxmitra = 0;
+ v6p->rp.ttl = MAXTTL;
+ v6p->rp.routerlt = 3*(v6p->rp.maxraint);
+
+ v6p->hp.rxmithost = 1000; // v6 RETRANS_TIMER
+
+ v6p->cdrouter = -1;
+
+ f->v6p = v6p;
+
+}
+
+void
+initfrag(IP *ip, int size)
+{
+ Fragment4 *fq4, *eq4;
+ Fragment6 *fq6, *eq6;
+
+ ip->fragfree4 = (Fragment4*)malloc(sizeof(Fragment4) * size);
+ if(ip->fragfree4 == nil)
+ panic("initfrag");
+
+ eq4 = &ip->fragfree4[size];
+ for(fq4 = ip->fragfree4; fq4 < eq4; fq4++)
+ fq4->next = fq4+1;
+
+ ip->fragfree4[size-1].next = nil;
+
+ ip->fragfree6 = (Fragment6*)malloc(sizeof(Fragment6) * size);
+ if(ip->fragfree6 == nil)
+ panic("initfrag");
+
+ eq6 = &ip->fragfree6[size];
+ for(fq6 = ip->fragfree6; fq6 < eq6; fq6++)
+ fq6->next = fq6+1;
+
+ ip->fragfree6[size-1].next = nil;
+}
+
+void
+ip_init(Fs *f)
+{
+ IP *ip;
+
+ ip = smalloc(sizeof(IP));
+ initfrag(ip, 100);
+ f->ip = ip;
+
+ ip_init_6(f);
+}
+
+void
+iprouting(Fs *f, int on)
+{
+ f->ip->iprouting = on;
+ if(f->ip->iprouting==0)
+ f->ip->stats[Forwarding] = 2;
+ else
+ f->ip->stats[Forwarding] = 1;
+}
+
+int
+ipoput4(Fs *f, Block *bp, int gating, int ttl, int tos, Conv *c)
+{
+ Ipifc *ifc;
+ uchar *gate;
+ ulong fragoff;
+ Block *xp, *nb;
+ Ip4hdr *eh, *feh;
+ int lid, len, seglen, chunk, dlen, blklen, offset, medialen;
+ Route *r, *sr;
+ IP *ip;
+ int rv = 0;
+
+ ip = f->ip;
+
+ /* Fill out the ip header */
+ eh = (Ip4hdr*)(bp->rp);
+
+ ip->stats[OutRequests]++;
+
+ /* Number of uchars in data and ip header to write */
+ len = blocklen(bp);
+
+ if(gating){
+ chunk = nhgets(eh->length);
+ if(chunk > len){
+ ip->stats[OutDiscards]++;
+ netlog(f, Logip, "short gated packet\n");
+ goto free;
+ }
+ if(chunk < len)
+ len = chunk;
+ }
+ if(len >= IP_MAX){
+ ip->stats[OutDiscards]++;
+ netlog(f, Logip, "exceeded ip max size %V\n", eh->dst);
+ goto free;
+ }
+
+ r = v4lookup(f, eh->dst, c);
+ if(r == nil){
+ ip->stats[OutNoRoutes]++;
+ netlog(f, Logip, "no interface %V\n", eh->dst);
+ rv = -1;
+ goto free;
+ }
+
+ ifc = r->ifc;
+ if(r->type & (Rifc|Runi))
+ gate = eh->dst;
+ else
+ if(r->type & (Rbcast|Rmulti)) {
+ gate = eh->dst;
+ sr = v4lookup(f, eh->src, nil);
+ if(sr != nil && (sr->type & Runi))
+ ifc = sr->ifc;
+ }
+ else
+ gate = r->v4.gate;
+
+ if(!gating)
+ eh->vihl = IP_VER4|IP_HLEN4;
+ eh->ttl = ttl;
+ if(!gating)
+ eh->tos = tos;
+
+ if(!canrlock(ifc))
+ goto free;
+ if(waserror()){
+ runlock(ifc);
+ nexterror();
+ }
+ if(ifc->m == nil)
+ goto raise;
+
+ /* Output NAT */
+ if(nato(bp, ifc, f) != 0)
+ goto raise;
+
+ /* If we dont need to fragment just send it */
+ medialen = ifc->maxtu - ifc->m->hsize;
+ if(len <= medialen) {
+ if(!gating)
+ hnputs(eh->id, incref(&ip->id4));
+ hnputs(eh->length, len);
+ if(!gating){
+ eh->frag[0] = 0;
+ eh->frag[1] = 0;
+ }
+ eh->cksum[0] = 0;
+ eh->cksum[1] = 0;
+ hnputs(eh->cksum, ipcsum(&eh->vihl));
+ ifc->m->bwrite(ifc, bp, V4, gate);
+ runlock(ifc);
+ poperror();
+ return 0;
+ }
+
+if((eh->frag[0] & (IP_DF>>8)) && !gating) print("%V: DF set\n", eh->dst);
+
+ if(eh->frag[0] & (IP_DF>>8)){
+ ip->stats[FragFails]++;
+ ip->stats[OutDiscards]++;
+ icmpcantfrag(f, bp, medialen);
+ netlog(f, Logip, "%V: eh->frag[0] & (IP_DF>>8)\n", eh->dst);
+ goto raise;
+ }
+
+ seglen = (medialen - IP4HDR) & ~7;
+ if(seglen < 8){
+ ip->stats[FragFails]++;
+ ip->stats[OutDiscards]++;
+ netlog(f, Logip, "%V seglen < 8\n", eh->dst);
+ goto raise;
+ }
+
+ dlen = len - IP4HDR;
+ xp = bp;
+ if(gating)
+ lid = nhgets(eh->id);
+ else
+ lid = incref(&ip->id4);
+
+ offset = IP4HDR;
+ while(xp != nil && offset && offset >= BLEN(xp)) {
+ offset -= BLEN(xp);
+ xp = xp->next;
+ }
+ xp->rp += offset;
+
+ if(gating)
+ fragoff = nhgets(eh->frag)<<3;
+ else
+ fragoff = 0;
+ dlen += fragoff;
+ for(; fragoff < dlen; fragoff += seglen) {
+ nb = allocb(IP4HDR+seglen);
+ feh = (Ip4hdr*)(nb->rp);
+
+ memmove(nb->wp, eh, IP4HDR);
+ nb->wp += IP4HDR;
+
+ if((fragoff + seglen) >= dlen) {
+ seglen = dlen - fragoff;
+ hnputs(feh->frag, fragoff>>3);
+ }
+ else
+ hnputs(feh->frag, (fragoff>>3)|IP_MF);
+
+ hnputs(feh->length, seglen + IP4HDR);
+ hnputs(feh->id, lid);
+
+ /* Copy up the data area */
+ chunk = seglen;
+ while(chunk) {
+ if(!xp) {
+ ip->stats[OutDiscards]++;
+ ip->stats[FragFails]++;
+ freeblist(nb);
+ netlog(f, Logip, "!xp: chunk %d\n", chunk);
+ goto raise;
+ }
+ blklen = chunk;
+ if(BLEN(xp) < chunk)
+ blklen = BLEN(xp);
+ memmove(nb->wp, xp->rp, blklen);
+ nb->wp += blklen;
+ xp->rp += blklen;
+ chunk -= blklen;
+ if(xp->rp == xp->wp)
+ xp = xp->next;
+ }
+
+ feh->cksum[0] = 0;
+ feh->cksum[1] = 0;
+ hnputs(feh->cksum, ipcsum(&feh->vihl));
+ ifc->m->bwrite(ifc, nb, V4, gate);
+ ip->stats[FragCreates]++;
+ }
+ ip->stats[FragOKs]++;
+raise:
+ runlock(ifc);
+ poperror();
+free:
+ freeblist(bp);
+ return rv;
+}
+
+void
+ipiput4(Fs *f, Ipifc *ifc, Block *bp)
+{
+ int hl;
+ int hop, tos, proto, olen;
+ Ip4hdr *h;
+ Proto *p;
+ ushort frag;
+ int notforme;
+ uchar *dp, v6dst[IPaddrlen];
+ IP *ip;
+ Route *r;
+
+ if(BLKIPVER(bp) != IP_VER4) {
+ ipiput6(f, ifc, bp);
+ return;
+ }
+
+ ip = f->ip;
+ ip->stats[InReceives]++;
+
+ /*
+ * Ensure we have all the header info in the first
+ * block. Make life easier for other protocols by
+ * collecting up to the first 64 bytes in the first block.
+ */
+ if(BLEN(bp) < 64) {
+ hl = blocklen(bp);
+ if(hl < IP4HDR)
+ hl = IP4HDR;
+ if(hl > 64)
+ hl = 64;
+ bp = pullupblock(bp, hl);
+ if(bp == nil)
+ return;
+ }
+
+ h = (Ip4hdr*)(bp->rp);
+
+ /* Input NAT */
+ nati(bp, ifc);
+
+ /* dump anything that whose header doesn't checksum */
+ if((bp->flag & Bipck) == 0 && ipcsum(&h->vihl)) {
+ ip->stats[InHdrErrors]++;
+ netlog(f, Logip, "ip: checksum error %V\n", h->src);
+ freeblist(bp);
+ return;
+ }
+ v4tov6(v6dst, h->dst);
+ notforme = ipforme(f, v6dst) == 0;
+
+ /* Check header length and version */
+ if((h->vihl&0x0F) != IP_HLEN4) {
+ hl = (h->vihl&0xF)<<2;
+ if(hl < (IP_HLEN4<<2)) {
+ ip->stats[InHdrErrors]++;
+ netlog(f, Logip, "ip: %V bad hivl %ux\n", h->src, h->vihl);
+ freeblist(bp);
+ return;
+ }
+ /* If this is not routed strip off the options */
+ if(notforme == 0) {
+ olen = nhgets(h->length);
+ dp = bp->rp + (hl - (IP_HLEN4<<2));
+ memmove(dp, h, IP_HLEN4<<2);
+ bp->rp = dp;
+ h = (Ip4hdr*)(bp->rp);
+ h->vihl = (IP_VER4|IP_HLEN4);
+ hnputs(h->length, olen-hl+(IP_HLEN4<<2));
+ }
+ }
+
+ /* route */
+ if(notforme) {
+ Conv conv;
+
+ if(!ip->iprouting){
+ freeb(bp);
+ return;
+ }
+
+ /* don't forward to source's network */
+ conv.r = nil;
+ r = v4lookup(f, h->dst, &conv);
+ if(r == nil || r->ifc == ifc){
+ ip->stats[OutDiscards]++;
+ freeblist(bp);
+ return;
+ }
+
+ /* don't forward if packet has timed out */
+ hop = h->ttl;
+ if(hop < 1) {
+ ip->stats[InHdrErrors]++;
+ icmpttlexceeded(f, ifc->lifc->local, bp);
+ freeblist(bp);
+ return;
+ }
+
+ /* reassemble if the interface expects it */
+if(r->ifc == nil) panic("nil route rfc");
+ if(r->ifc->reassemble){
+ frag = nhgets(h->frag);
+ if(frag) {
+ h->tos = 0;
+ if(frag & IP_MF)
+ h->tos = 1;
+ bp = ip4reassemble(ip, frag, bp, h);
+ if(bp == nil)
+ return;
+ h = (Ip4hdr*)(bp->rp);
+ }
+ }
+
+ ip->stats[ForwDatagrams]++;
+ tos = h->tos;
+ hop = h->ttl;
+ ipoput4(f, bp, 1, hop - 1, tos, &conv);
+ return;
+ }
+
+ frag = nhgets(h->frag);
+ if(frag) {
+ h->tos = 0;
+ if(frag & IP_MF)
+ h->tos = 1;
+ bp = ip4reassemble(ip, frag, bp, h);
+ if(bp == nil)
+ return;
+ h = (Ip4hdr*)(bp->rp);
+ }
+
+ /* don't let any frag info go up the stack */
+ h->frag[0] = 0;
+ h->frag[1] = 0;
+
+ proto = h->proto;
+ p = Fsrcvpcol(f, proto);
+ if(p != nil && p->rcv != nil) {
+ ip->stats[InDelivers]++;
+ (*p->rcv)(p, ifc, bp);
+ return;
+ }
+ ip->stats[InDiscards]++;
+ ip->stats[InUnknownProtos]++;
+ freeblist(bp);
+}
+
+int
+ipstats(Fs *f, char *buf, int len)
+{
+ IP *ip;
+ char *p, *e;
+ int i;
+
+ ip = f->ip;
+ ip->stats[DefaultTTL] = MAXTTL;
+
+ p = buf;
+ e = p+len;
+ for(i = 0; i < Nstats; i++)
+ p = seprint(p, e, "%s: %lud\n", statnames[i], ip->stats[i]);
+ return p - buf;
+}
+
+Block*
+ip4reassemble(IP *ip, int offset, Block *bp, Ip4hdr *ih)
+{
+ int fend;
+ ushort id;
+ Fragment4 *f, *fnext;
+ ulong src, dst;
+ Block *bl, **l, *last, *prev;
+ int ovlap, len, fragsize, pktposn;
+
+ src = nhgetl(ih->src);
+ dst = nhgetl(ih->dst);
+ id = nhgets(ih->id);
+
+ /*
+ * block lists are too hard, pullupblock into a single block
+ */
+ if(bp->next){
+ bp = pullupblock(bp, blocklen(bp));
+ ih = (Ip4hdr*)(bp->rp);
+ }
+
+ qlock(&ip->fraglock4);
+
+ /*
+ * find a reassembly queue for this fragment
+ */
+ for(f = ip->flisthead4; f; f = fnext){
+ fnext = f->next; /* because ipfragfree4 changes the list */
+ if(f->src == src && f->dst == dst && f->id == id)
+ break;
+ if(f->age < NOW){
+ ip->stats[ReasmTimeout]++;
+ ipfragfree4(ip, f);
+ }
+ }
+
+ /*
+ * if this isn't a fragmented packet, accept it
+ * and get rid of any fragments that might go
+ * with it.
+ */
+ if(!ih->tos && (offset & ~(IP_MF|IP_DF)) == 0) {
+ if(f != nil) {
+ ipfragfree4(ip, f);
+ ip->stats[ReasmFails]++;
+ }
+ qunlock(&ip->fraglock4);
+ return bp;
+ }
+
+ if(bp->base+sizeof(Ipfrag) >= bp->rp){
+ bp = padblock(bp, sizeof(Ipfrag));
+ bp->rp += sizeof(Ipfrag);
+ }
+
+ BKFG(bp)->foff = offset<<3;
+ BKFG(bp)->flen = nhgets(ih->length)-IP4HDR;
+
+ /* First fragment allocates a reassembly queue */
+ if(f == nil) {
+ f = ipfragallo4(ip);
+ f->id = id;
+ f->src = src;
+ f->dst = dst;
+
+ f->blist = bp;
+
+ qunlock(&ip->fraglock4);
+ ip->stats[ReasmReqds]++;
+ return nil;
+ }
+
+ /*
+ * find the new fragment's position in the queue
+ */
+ prev = nil;
+ l = &f->blist;
+ bl = f->blist;
+ while(bl != nil && BKFG(bp)->foff > BKFG(bl)->foff) {
+ prev = bl;
+ l = &bl->next;
+ bl = bl->next;
+ }
+
+ /* Check overlap of a previous fragment - trim away as necessary */
+ if(prev) {
+ ovlap = BKFG(prev)->foff + BKFG(prev)->flen - BKFG(bp)->foff;
+ if(ovlap > 0) {
+ if(ovlap >= BKFG(bp)->flen) {
+ freeblist(bp);
+ qunlock(&ip->fraglock4);
+ return nil;
+ }
+ BKFG(prev)->flen -= ovlap;
+ }
+ }
+
+ /* Link onto assembly queue */
+ bp->next = *l;
+ *l = bp;
+
+ /* Check to see if succeeding segments overlap */
+ if(bp->next) {
+ l = &bp->next;
+ fend = BKFG(bp)->foff + BKFG(bp)->flen;
+ /* Take completely covered segments out */
+ while(*l) {
+ ovlap = fend - BKFG(*l)->foff;
+ if(ovlap <= 0)
+ break;
+ if(ovlap < BKFG(*l)->flen) {
+ BKFG(*l)->flen -= ovlap;
+ BKFG(*l)->foff += ovlap;
+ /* move up ih hdrs */
+ memmove((*l)->rp + ovlap, (*l)->rp, IP4HDR);
+ (*l)->rp += ovlap;
+ break;
+ }
+ last = (*l)->next;
+ (*l)->next = nil;
+ freeblist(*l);
+ *l = last;
+ }
+ }
+
+ /*
+ * look for a complete packet. if we get to a fragment
+ * without IP_MF set, we're done.
+ */
+ pktposn = 0;
+ for(bl = f->blist; bl; bl = bl->next) {
+ if(BKFG(bl)->foff != pktposn)
+ break;
+ if((BLKIP(bl)->frag[0]&(IP_MF>>8)) == 0) {
+ bl = f->blist;
+ len = nhgets(BLKIP(bl)->length);
+ bl->wp = bl->rp + len;
+
+ /* Pullup all the fragment headers and
+ * return a complete packet
+ */
+ for(bl = bl->next; bl; bl = bl->next) {
+ fragsize = BKFG(bl)->flen;
+ len += fragsize;
+ bl->rp += IP4HDR;
+ bl->wp = bl->rp + fragsize;
+ }
+
+ bl = f->blist;
+ f->blist = nil;
+ ipfragfree4(ip, f);
+ ih = BLKIP(bl);
+ hnputs(ih->length, len);
+ qunlock(&ip->fraglock4);
+ ip->stats[ReasmOKs]++;
+ return bl;
+ }
+ pktposn += BKFG(bl)->flen;
+ }
+ qunlock(&ip->fraglock4);
+ return nil;
+}
+
+/*
+ * ipfragfree4 - Free a list of fragments - assume hold fraglock4
+ */
+void
+ipfragfree4(IP *ip, Fragment4 *frag)
+{
+ Fragment4 *fl, **l;
+
+ if(frag->blist)
+ freeblist(frag->blist);
+
+ frag->src = 0;
+ frag->id = 0;
+ frag->blist = nil;
+
+ l = &ip->flisthead4;
+ for(fl = *l; fl; fl = fl->next) {
+ if(fl == frag) {
+ *l = frag->next;
+ break;
+ }
+ l = &fl->next;
+ }
+
+ frag->next = ip->fragfree4;
+ ip->fragfree4 = frag;
+
+}
+
+/*
+ * ipfragallo4 - allocate a reassembly queue - assume hold fraglock4
+ */
+Fragment4 *
+ipfragallo4(IP *ip)
+{
+ Fragment4 *f;
+
+ while(ip->fragfree4 == nil) {
+ /* free last entry on fraglist */
+ for(f = ip->flisthead4; f->next; f = f->next)
+ ;
+ ipfragfree4(ip, f);
+ }
+ f = ip->fragfree4;
+ ip->fragfree4 = f->next;
+ f->next = ip->flisthead4;
+ ip->flisthead4 = f;
+ f->age = NOW + 30000;
+
+ return f;
+}
+
+ushort
+ipcsum(uchar *addr)
+{
+ int len;
+ ulong sum;
+
+ sum = 0;
+ len = (addr[0]&0xf)<<2;
+
+ while(len > 0) {
+ sum += addr[0]<<8 | addr[1] ;
+ len -= 2;
+ addr += 2;
+ }
+
+ sum = (sum & 0xffff) + (sum >> 16);
+ sum = (sum & 0xffff) + (sum >> 16);
+
+ return (sum^0xffff);
+}
--- /dev/null
+++ b/os/ip.original/ip.h
@@ -1,0 +1,709 @@
+typedef struct Conv Conv;
+typedef struct Fs Fs;
+typedef union Hwaddr Hwaddr;
+typedef struct IP IP;
+typedef struct IPaux IPaux;
+typedef struct Ipself Ipself;
+typedef struct Ipselftab Ipselftab;
+typedef struct Iplink Iplink;
+typedef struct Iplifc Iplifc;
+typedef struct Ipmulti Ipmulti;
+typedef struct IProuter IProuter;
+typedef struct Ipifc Ipifc;
+typedef struct Iphash Iphash;
+typedef struct Ipht Ipht;
+typedef struct Netlog Netlog;
+typedef struct Ifclog Ifclog;
+typedef struct Medium Medium;
+typedef struct Proto Proto;
+typedef struct Arpent Arpent;
+typedef struct Arp Arp;
+typedef struct Route Route;
+
+typedef struct Routerparams Routerparams;
+typedef struct Hostparams Hostparams;
+typedef struct V6router V6router;
+typedef struct V6params V6params;
+
+typedef struct Ip4hdr Ip4hdr;
+typedef struct Nat Nat;
+
+#pragma incomplete Arp
+#pragma incomplete Ifclog
+#pragma incomplete Ipself
+#pragma incomplete Ipselftab
+#pragma incomplete IP
+#pragma incomplete Netlog
+
+enum
+{
+ Addrlen= 64,
+ Maxproto= 20,
+ Nhash= 64,
+ Maxincall= 5,
+ Nchans= 16383,
+ MAClen= 16, /* longest mac address */
+
+ MAXTTL= 255,
+ DFLTTOS= 0,
+
+ IPaddrlen= 16,
+ IPv4addrlen= 4,
+ IPv4off= 12,
+ IPllen= 4,
+
+ /* ip versions */
+ V4= 4,
+ V6= 6,
+ IP_VER4= 0x40,
+ IP_VER6= 0x60,
+
+ /* 2^Lroot trees in the root table */
+ Lroot= 10,
+
+ Maxpath = 64,
+};
+
+enum
+{
+ Idle= 0,
+ Announcing= 1,
+ Announced= 2,
+ Connecting= 3,
+ Connected= 4,
+};
+
+/* on the wire packet header */
+struct Ip4hdr
+{
+ uchar vihl; /* Version and header length */
+ uchar tos; /* Type of service */
+ uchar length[2]; /* packet length */
+ uchar id[2]; /* ip->identification */
+ uchar frag[2]; /* Fragment information */
+ uchar ttl; /* Time to live */
+ uchar proto; /* Protocol */
+ uchar cksum[2]; /* Header checksum */
+ uchar src[4]; /* IP source */
+ uchar dst[4]; /* IP destination */
+ uchar data[1]; /* start of data */
+};
+
+/*
+ * one per conversation directory
+ */
+struct Conv
+{
+ QLock;
+
+ int x; /* conversation index */
+ Proto* p;
+
+ int restricted; /* remote port is restricted */
+ uint ttl; /* max time to live */
+ uint tos; /* type of service */
+ int ignoreadvice; /* don't terminate connection on icmp errors */
+
+ uchar ipversion;
+ uchar laddr[IPaddrlen]; /* local IP address */
+ uchar raddr[IPaddrlen]; /* remote IP address */
+ ushort lport; /* local port number */
+ ushort rport; /* remote port number */
+
+ char *owner; /* protections */
+ int perm;
+ int inuse; /* opens of listen/data/ctl */
+ int length;
+ int state;
+
+ /* udp specific */
+ int headers; /* data src/dst headers in udp */
+ int reliable; /* true if reliable udp */
+
+ Conv* incall; /* calls waiting to be listened for */
+ Conv* next;
+
+ Queue* rq; /* queued data waiting to be read */
+ Queue* wq; /* queued data waiting to be written */
+ Queue* eq; /* returned error packets */
+ Queue* sq; /* snooping queue */
+ Ref snoopers; /* number of processes with snoop open */
+
+ Rendez cr;
+ char cerr[ERRMAX];
+
+ QLock listenq;
+ Rendez listenr;
+
+ Ipmulti *multi; /* multicast bindings for this interface */
+
+ void* ptcl; /* protocol specific stuff */
+
+ Route *r; /* last route used */
+ ulong rgen; /* routetable generation for *r */
+};
+
+struct Medium
+{
+ char *name;
+ int hsize; /* medium header size */
+ int mintu; /* default min mtu */
+ int maxtu; /* default max mtu */
+ int maclen; /* mac address length */
+ void (*bind)(Ipifc*, int, char**);
+ void (*unbind)(Ipifc*);
+ void (*bwrite)(Ipifc *ifc, Block *b, int version, uchar *ip);
+
+ /* for arming interfaces to receive multicast */
+ void (*addmulti)(Ipifc *ifc, uchar *a, uchar *ia);
+ void (*remmulti)(Ipifc *ifc, uchar *a, uchar *ia);
+
+ /* process packets written to 'data' */
+ void (*pktin)(Fs *f, Ipifc *ifc, Block *bp);
+
+ /* routes for router boards */
+ void (*addroute)(Ipifc *ifc, int, uchar*, uchar*, uchar*, int);
+ void (*remroute)(Ipifc *ifc, int, uchar*, uchar*);
+ void (*flushroutes)(Ipifc *ifc);
+
+ /* for routing multicast groups */
+ void (*joinmulti)(Ipifc *ifc, uchar *a, uchar *ia);
+ void (*leavemulti)(Ipifc *ifc, uchar *a, uchar *ia);
+
+ /* address resolution */
+ void (*ares)(Fs*, int, uchar*, uchar*, int, int); /* resolve */
+ void (*areg)(Ipifc*, uchar*); /* register */
+
+ /* v6 address generation */
+ void (*pref2addr)(uchar *pref, uchar *ea);
+
+ int unbindonclose; /* if non-zero, unbind on last close */
+};
+
+/* logical interface associated with a physical one */
+struct Iplifc
+{
+ uchar local[IPaddrlen];
+ uchar mask[IPaddrlen];
+ uchar remote[IPaddrlen];
+ uchar net[IPaddrlen];
+ uchar tentative; /* =1 => v6 dup disc on, =0 => confirmed unique */
+ uchar onlink; /* =1 => onlink, =0 offlink. */
+ uchar autoflag; /* v6 autonomous flag */
+ long validlt; /* v6 valid lifetime */
+ long preflt; /* v6 preferred lifetime */
+ long origint; /* time when addr was added */
+ Iplink *link; /* addresses linked to this lifc */
+ Iplifc *next;
+};
+
+/* binding twixt Ipself and Iplifc */
+struct Iplink
+{
+ Ipself *self;
+ Iplifc *lifc;
+ Iplink *selflink; /* next link for this local address */
+ Iplink *lifclink; /* next link for this ifc */
+ ulong expire;
+ Iplink *next; /* free list */
+ int ref;
+};
+
+/* rfc 2461, pp.40--43. */
+
+/* default values, one per stack */
+struct Routerparams {
+ int mflag;
+ int oflag;
+ int maxraint;
+ int minraint;
+ int linkmtu;
+ int reachtime;
+ int rxmitra;
+ int ttl;
+ int routerlt;
+};
+
+struct Hostparams {
+ int rxmithost;
+};
+
+struct Ipifc
+{
+ RWlock;
+
+ Conv *conv; /* link to its conversation structure */
+ char dev[64]; /* device we're attached to */
+ Medium *m; /* Media pointer */
+ int maxtu; /* Maximum transfer unit */
+ int mintu; /* Minumum tranfer unit */
+ int mbps; /* megabits per second */
+ void *arg; /* medium specific */
+ int reassemble; /* reassemble IP packets before forwarding */
+
+ /* these are used so that we can unbind on the fly */
+ Lock idlock;
+ uchar ifcid; /* incremented each 'bind/unbind/add/remove' */
+ int ref; /* number of proc's using this ipifc */
+ Rendez wait; /* where unbinder waits for ref == 0 */
+ int unbinding;
+
+ uchar mac[MAClen]; /* MAC address */
+
+ Iplifc *lifc; /* logical interfaces on this physical one */
+
+ ulong in, out; /* message statistics */
+ ulong inerr, outerr; /* ... */
+
+ uchar sendra6; /* == 1 => send router advs on this ifc */
+ uchar recvra6; /* == 1 => recv router advs on this ifc */
+ Routerparams rp; /* router parameters as in RFC 2461, pp.40--43.
+ used only if node is router */
+};
+
+/*
+ * one per multicast-lifc pair used by a Conv
+ */
+struct Ipmulti
+{
+ uchar ma[IPaddrlen];
+ uchar ia[IPaddrlen];
+ Ipmulti *next;
+};
+
+/*
+ * hash table for 2 ip addresses + 2 ports
+ */
+enum
+{
+ Nipht= 521, /* convenient prime */
+
+ IPmatchexact= 0, /* match on 4 tuple */
+ IPmatchany, /* *!* */
+ IPmatchport, /* *!port */
+ IPmatchaddr, /* addr!* */
+ IPmatchpa, /* addr!port */
+};
+struct Iphash
+{
+ Iphash *next;
+ Conv *c;
+ int match;
+};
+struct Ipht
+{
+ Lock;
+ Iphash *tab[Nipht];
+};
+void iphtadd(Ipht*, Conv*);
+void iphtrem(Ipht*, Conv*);
+Conv* iphtlook(Ipht *ht, uchar *sa, ushort sp, uchar *da, ushort dp);
+
+/*
+ * one per multiplexed protocol
+ */
+struct Proto
+{
+ QLock;
+ char* name; /* protocol name */
+ int x; /* protocol index */
+ int ipproto; /* ip protocol type */
+
+ char* (*connect)(Conv*, char**, int);
+ char* (*announce)(Conv*, char**, int);
+ char* (*bind)(Conv*, char**, int);
+ int (*state)(Conv*, char*, int);
+ void (*create)(Conv*);
+ void (*close)(Conv*);
+ void (*rcv)(Proto*, Ipifc*, Block*);
+ char* (*ctl)(Conv*, char**, int);
+ void (*advise)(Proto*, Block*, char*);
+ int (*stats)(Proto*, char*, int);
+ int (*local)(Conv*, char*, int);
+ int (*remote)(Conv*, char*, int);
+ int (*inuse)(Conv*);
+ int (*gc)(Proto*); /* returns true if any conversations are freed */
+
+ Fs *f; /* file system this proto is part of */
+ Conv **conv; /* array of conversations */
+ int ptclsize; /* size of per protocol ctl block */
+ int nc; /* number of conversations */
+ int ac;
+ Qid qid; /* qid for protocol directory */
+ ushort nextport;
+ ushort nextrport;
+
+ void *priv;
+};
+
+/*
+ * Stream for sending packets to user level
+ */
+struct IProuter {
+ QLock;
+ int opens;
+ Queue *q;
+};
+
+/*
+ * one per IP protocol stack
+ */
+struct Fs
+{
+ RWlock;
+ int dev;
+
+ int np;
+ Proto* p[Maxproto+1]; /* list of supported protocols */
+ Proto* t2p[256]; /* vector of all protocols */
+ Proto* ipifc; /* kludge for ipifcremroute & ipifcaddroute */
+ Proto* ipmux; /* kludge for finding an ip multiplexor */
+
+ IP *ip;
+ Ipselftab *self;
+ Arp *arp;
+ V6params *v6p;
+ IProuter iprouter;
+
+ Route *v4root[1<<Lroot]; /* v4 routing forest */
+ Route *v6root[1<<Lroot]; /* v6 routing forest */
+ Route *queue; /* used as temp when reinjecting routes */
+
+ Netlog *alog;
+ Ifclog *ilog;
+
+ char ndb[1024]; /* an ndb entry for this interface */
+ int ndbvers;
+ long ndbmtime;
+};
+
+/* one per default router known to host */
+struct V6router {
+ uchar inuse;
+ Ipifc *ifc;
+ int ifcid;
+ uchar routeraddr[IPaddrlen];
+ long ltorigin;
+ Routerparams rp;
+};
+
+struct V6params
+{
+ Routerparams rp; /* v6 params, one copy per node now */
+ Hostparams hp;
+ V6router v6rlist[3]; /* max 3 default routers, currently */
+ int cdrouter; /* uses only v6rlist[cdrouter] if */
+ /* cdrouter >= 0. */
+};
+
+
+int Fsconnected(Conv*, char*);
+Conv* Fsnewcall(Conv*, uchar*, ushort, uchar*, ushort, uchar);
+int Fspcolstats(char*, int);
+int Fsproto(Fs*, Proto*);
+int Fsbuiltinproto(Fs*, uchar);
+Conv* Fsprotoclone(Proto*, char*);
+Proto* Fsrcvpcol(Fs*, uchar);
+Proto* Fsrcvpcolx(Fs*, uchar);
+char* Fsstdconnect(Conv*, char**, int);
+char* Fsstdannounce(Conv*, char**, int);
+char* Fsstdbind(Conv*, char**, int);
+ulong scalednconv(void);
+void closeconv(Conv*);
+
+/*
+ * logging
+ */
+enum
+{
+ Logip= 1<<1,
+ Logtcp= 1<<2,
+ Logfs= 1<<3,
+ Logil= 1<<4,
+ Logicmp= 1<<5,
+ Logudp= 1<<6,
+ Logcompress= 1<<7,
+ Logilmsg= 1<<8,
+ Loggre= 1<<9,
+ Logppp= 1<<10,
+ Logtcprxmt= 1<<11,
+ Logigmp= 1<<12,
+ Logudpmsg= 1<<13,
+ Logipmsg= 1<<14,
+ Logrudp= 1<<15,
+ Logrudpmsg= 1<<16,
+ Logesp= 1<<17,
+ Logtcpwin= 1<<18,
+ Lognat= 1<<19,
+};
+
+void netloginit(Fs*);
+void netlogopen(Fs*);
+void netlogclose(Fs*);
+void netlogctl(Fs*, char*, int);
+long netlogread(Fs*, void*, ulong, long);
+void netlog(Fs*, int, char*, ...);
+void ifcloginit(Fs*);
+long ifclogread(Fs*, Chan *,void*, ulong, long);
+void ifclog(Fs*, uchar *, int);
+void ifclogopen(Fs*, Chan*);
+void ifclogclose(Fs*, Chan*);
+
+/*
+ * iproute.c
+ */
+typedef struct RouteTree RouteTree;
+typedef struct Routewalk Routewalk;
+typedef struct V4route V4route;
+typedef struct V6route V6route;
+
+enum
+{
+
+ /* type bits */
+ Rv4= (1<<0), /* this is a version 4 route */
+ Rifc= (1<<1), /* this route is a directly connected interface */
+ Rptpt= (1<<2), /* this route is a pt to pt interface */
+ Runi= (1<<3), /* a unicast self address */
+ Rbcast= (1<<4), /* a broadcast self address */
+ Rmulti= (1<<5), /* a multicast self address */
+ Rproxy= (1<<6), /* this route should be proxied */
+};
+
+struct Routewalk
+{
+ int o;
+ int h;
+ char* p;
+ char* e;
+ void* state;
+ void (*walk)(Route*, Routewalk*);
+};
+
+struct RouteTree
+{
+ Route* right;
+ Route* left;
+ Route* mid;
+ uchar depth;
+ uchar type;
+ uchar ifcid; /* must match ifc->id */
+ Ipifc *ifc;
+ char tag[4];
+ int ref;
+};
+
+struct V4route
+{
+ ulong address;
+ ulong endaddress;
+ uchar gate[IPv4addrlen];
+};
+
+struct V6route
+{
+ ulong address[IPllen];
+ ulong endaddress[IPllen];
+ uchar gate[IPaddrlen];
+};
+
+struct Route
+{
+ RouteTree;
+
+ union {
+ V6route v6;
+ V4route v4;
+ };
+};
+extern void v4addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type);
+extern void v6addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type);
+extern void v4delroute(Fs *f, uchar *a, uchar *mask, int dolock);
+extern void v6delroute(Fs *f, uchar *a, uchar *mask, int dolock);
+extern Route* v4lookup(Fs *f, uchar *a, Conv *c);
+extern Route* v6lookup(Fs *f, uchar *a, Conv *c);
+extern long routeread(Fs *f, char*, ulong, int);
+extern long routewrite(Fs *f, Chan*, char*, int);
+extern void routetype(int, char*);
+extern void ipwalkroutes(Fs*, Routewalk*);
+extern void convroute(Route*, uchar*, uchar*, uchar*, char*, int*);
+
+/*
+ * devip.c
+ */
+
+/*
+ * Hanging off every ip channel's ->aux is the following structure.
+ * It maintains the state used by devip and iproute.
+ */
+struct IPaux
+{
+ char *owner; /* the user that did the attach */
+ char tag[4];
+};
+
+extern IPaux* newipaux(char*, char*);
+extern void setlport(Conv*);
+
+/*
+ * arp.c
+ */
+struct Arpent
+{
+ uchar ip[IPaddrlen];
+ uchar mac[MAClen];
+ Medium *type; /* media type */
+ Arpent* hash;
+ Block* hold;
+ Block* last;
+ uint ctime; /* time entry was created or refreshed */
+ uint utime; /* time entry was last used */
+ uchar state;
+ Arpent *nextrxt; /* re-transmit chain */
+ uint rtime; /* time for next retransmission */
+ uchar rxtsrem;
+ Ipifc *ifc;
+ uchar ifcid; /* must match ifc->id */
+};
+
+extern void arpinit(Fs*);
+extern int arpread(Arp*, char*, ulong, int);
+extern int arpwrite(Fs*, char*, int);
+extern Arpent* arpget(Arp*, Block *bp, int version, Ipifc *ifc, uchar *ip, uchar *h);
+extern void arprelease(Arp*, Arpent *a);
+extern Block* arpresolve(Arp*, Arpent *a, Medium *type, uchar *mac);
+extern void arpenter(Fs*, int version, uchar *ip, uchar *mac, int len, int norefresh);
+
+/*
+ * ipaux.c
+ */
+
+extern int myetheraddr(uchar*, char*);
+extern ulong parseip(uchar*, char*);
+extern ulong parseipmask(uchar*, char*);
+extern char* v4parseip(uchar*, char*);
+extern void maskip(uchar *from, uchar *mask, uchar *to);
+extern int parsemac(uchar *to, char *from, int len);
+extern uchar* defmask(uchar*);
+extern int isv4(uchar*);
+extern void v4tov6(uchar *v6, uchar *v4);
+extern int v6tov4(uchar *v4, uchar *v6);
+extern int eipfmt(Fmt*);
+
+#define ipmove(x, y) memmove(x, y, IPaddrlen)
+#define ipcmp(x, y) ( (x)[IPaddrlen-1] != (y)[IPaddrlen-1] || memcmp(x, y, IPaddrlen) )
+
+#define ip4move(x, y) memmove(x, y, IPv4addrlen)
+#define ip4cmp(x, y) ( (x)[IPv4addrlen-1] != (y)[IPv4addrlen-1] || memcmp(x, y, IPv4addrlen) )
+
+extern uchar IPv4bcast[IPaddrlen];
+extern uchar IPv4bcastobs[IPaddrlen];
+extern uchar IPv4allsys[IPaddrlen];
+extern uchar IPv4allrouter[IPaddrlen];
+extern uchar IPnoaddr[IPaddrlen];
+extern uchar v4prefix[IPaddrlen];
+extern uchar IPallbits[IPaddrlen];
+
+#define NOW TK2MS(MACHP(0)->ticks)
+
+/*
+ * media
+ */
+extern Medium ethermedium;
+extern Medium nullmedium;
+extern Medium pktmedium;
+extern Medium tripmedium;
+
+/*
+ * ipifc.c
+ */
+extern Medium* ipfindmedium(char *name);
+extern void addipmedium(Medium *med);
+extern int ipforme(Fs*, uchar *addr);
+extern int iptentative(Fs*, uchar *addr);
+extern int ipisbm(uchar *);
+extern int ipismulticast(uchar *);
+extern Ipifc* findipifc(Fs*, uchar *remote, int type);
+extern void findprimaryip(Fs*, uchar*);
+extern void findlocalip(Fs*, uchar *local, uchar *remote);
+extern int ipv4local(Ipifc *ifc, uchar *addr);
+extern int ipv6local(Ipifc *ifc, uchar *addr);
+extern int ipv6anylocal(Ipifc *ifc, uchar *addr);
+extern Iplifc* iplocalonifc(Ipifc *ifc, uchar *ip);
+extern int ipproxyifc(Fs *f, Ipifc *ifc, uchar *ip);
+extern int ipismulticast(uchar *ip);
+extern int ipisbooting(void);
+extern int ipifccheckin(Ipifc *ifc, Medium *med);
+extern void ipifccheckout(Ipifc *ifc);
+extern int ipifcgrab(Ipifc *ifc);
+extern void ipifcaddroute(Fs*, int, uchar*, uchar*, uchar*, int);
+extern void ipifcremroute(Fs*, int, uchar*, uchar*);
+extern void ipifcremmulti(Conv *c, uchar *ma, uchar *ia);
+extern void ipifcaddmulti(Conv *c, uchar *ma, uchar *ia);
+extern char* ipifcrem(Ipifc *ifc, char **argv, int argc);
+extern char* ipifcadd(Ipifc *ifc, char **argv, int argc, int tentative, Iplifc *lifcp);
+extern long ipselftabread(Fs*, char *a, ulong offset, int n);
+extern char* ipifcaddpref6(Ipifc *ifc, char**argv, int argc);
+extern void ipsendra6(Fs *f, int on);
+
+/*
+ * ip.c
+ */
+extern void iprouting(Fs*, int);
+extern void icmpnoconv(Fs*, Block*);
+extern void icmpcantfrag(Fs*, Block*, int);
+extern void icmpttlexceeded(Fs*, uchar*, Block*);
+extern ushort ipcsum(uchar*);
+extern void ipiput4(Fs*, Ipifc*, Block*);
+extern void ipiput6(Fs*, Ipifc*, Block*);
+extern int ipoput4(Fs*, Block*, int, int, int, Conv*);
+extern int ipoput6(Fs*, Block*, int, int, int, Conv*);
+extern int ipstats(Fs*, char*, int);
+extern ushort ptclbsum(uchar*, int);
+extern ushort ptclcsum(Block*, int, int);
+extern void ip_init(Fs*);
+extern void update_mtucache(uchar*, ulong);
+extern ulong restrict_mtu(uchar*, ulong);
+
+/*
+ * bootp.c
+ */
+char* (*bootp)(Ipifc*);
+int (*bootpread)(char*, ulong, int);
+
+/*
+ * iprouter.c
+ */
+void useriprouter(Fs*, Ipifc*, Block*);
+void iprouteropen(Fs*);
+void iprouterclose(Fs*);
+long iprouterread(Fs*, void*, int);
+
+/*
+ * resolving inferno/plan9 differences
+ */
+Chan* commonfdtochan(int, int, int, int);
+char* commonuser(void);
+char* commonerror(void);
+
+/*
+ * chandial.c
+ */
+extern Chan* chandial(char*, char*, char*, Chan**);
+
+/*
+ * global to all of the stack
+ */
+extern void (*igmpreportfn)(Ipifc*, uchar*);
+
+/*
+ * nat.c
+ */
+extern int nato(Block*, Ipifc*, Fs*);
+extern void nati(Block*, Ipifc*);
+extern int natgc(uchar);
+
+extern int addnataddr(uchar*, uchar*, Iplifc*);
+extern int removenataddr(uchar*, uchar*, Iplifc*);
+extern void shownataddr(void);
+extern void flushnataddr(void);
--- /dev/null
+++ b/os/ip.original/ipaux.c
@@ -1,0 +1,729 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "ip.h"
+#include "ipv6.h"
+
+/*
+ * well known IP addresses
+ */
+uchar IPv4bcast[IPaddrlen] = {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff
+};
+uchar IPv4allsys[IPaddrlen] = {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0xff, 0xff,
+ 0xe0, 0, 0, 0x01
+};
+uchar IPv4allrouter[IPaddrlen] = {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0xff, 0xff,
+ 0xe0, 0, 0, 0x02
+};
+uchar IPallbits[IPaddrlen] = {
+ 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff
+};
+
+uchar IPnoaddr[IPaddrlen];
+
+/*
+ * prefix of all v4 addresses
+ */
+uchar v4prefix[IPaddrlen] = {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0xff, 0xff,
+ 0, 0, 0, 0
+};
+
+
+char *v6hdrtypes[Maxhdrtype] =
+{
+ [HBH] "HopbyHop",
+ [ICMP] "ICMP",
+ [IGMP] "IGMP",
+ [GGP] "GGP",
+ [IPINIP] "IP",
+ [ST] "ST",
+ [TCP] "TCP",
+ [UDP] "UDP",
+ [ISO_TP4] "ISO_TP4",
+ [RH] "Routinghdr",
+ [FH] "Fraghdr",
+ [IDRP] "IDRP",
+ [RSVP] "RSVP",
+ [AH] "Authhdr",
+ [ESP] "ESP",
+ [ICMPv6] "ICMPv6",
+ [NNH] "Nonexthdr",
+ [ISO_IP] "ISO_IP",
+ [IGRP] "IGRP",
+ [OSPF] "OSPF",
+};
+
+/*
+ * well known IPv6 addresses
+ */
+uchar v6Unspecified[IPaddrlen] = {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0
+};
+uchar v6loopback[IPaddrlen] = {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0x01
+};
+uchar v6linklocal[IPaddrlen] = {
+ 0xfe, 0x80, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0
+};
+uchar v6linklocalmask[IPaddrlen] = {
+ 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0
+};
+int v6llpreflen = 8; // link-local prefix length
+uchar v6sitelocal[IPaddrlen] = {
+ 0xfe, 0xc0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0
+};
+uchar v6sitelocalmask[IPaddrlen] = {
+ 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0
+};
+int v6slpreflen = 6; // site-local prefix length
+uchar v6glunicast[IPaddrlen] = {
+ 0x08, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0
+};
+uchar v6multicast[IPaddrlen] = {
+ 0xff, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0
+};
+uchar v6multicastmask[IPaddrlen] = {
+ 0xff, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0
+};
+int v6mcpreflen = 1; // multicast prefix length
+uchar v6allnodesN[IPaddrlen] = {
+ 0xff, 0x01, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0x01
+};
+uchar v6allnodesNmask[IPaddrlen] = {
+ 0xff, 0xff, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0
+};
+int v6aNpreflen = 2; // all nodes (N) prefix
+uchar v6allnodesL[IPaddrlen] = {
+ 0xff, 0x02, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0x01
+};
+uchar v6allnodesLmask[IPaddrlen] = {
+ 0xff, 0xff, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0
+};
+int v6aLpreflen = 2; // all nodes (L) prefix
+uchar v6allroutersN[IPaddrlen] = {
+ 0xff, 0x01, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0x02
+};
+uchar v6allroutersL[IPaddrlen] = {
+ 0xff, 0x02, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0x02
+};
+uchar v6allroutersS[IPaddrlen] = {
+ 0xff, 0x05, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0x02
+};
+uchar v6solicitednode[IPaddrlen] = {
+ 0xff, 0x02, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0x01,
+ 0xff, 0, 0, 0
+};
+uchar v6solicitednodemask[IPaddrlen] = {
+ 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0x0, 0x0, 0x0
+};
+int v6snpreflen = 13;
+
+
+
+
+ushort
+ptclcsum(Block *bp, int offset, int len)
+{
+ uchar *addr;
+ ulong losum, hisum;
+ ushort csum;
+ int odd, blocklen, x;
+
+ /* Correct to front of data area */
+ while(bp != nil && offset && offset >= BLEN(bp)) {
+ offset -= BLEN(bp);
+ bp = bp->next;
+ }
+ if(bp == nil)
+ return 0;
+
+ addr = bp->rp + offset;
+ blocklen = BLEN(bp) - offset;
+
+ if(bp->next == nil) {
+ if(blocklen < len)
+ len = blocklen;
+ return ~ptclbsum(addr, len) & 0xffff;
+ }
+
+ losum = 0;
+ hisum = 0;
+
+ odd = 0;
+ while(len) {
+ x = blocklen;
+ if(len < x)
+ x = len;
+
+ csum = ptclbsum(addr, x);
+ if(odd)
+ hisum += csum;
+ else
+ losum += csum;
+ odd = (odd+x) & 1;
+ len -= x;
+
+ bp = bp->next;
+ if(bp == nil)
+ break;
+ blocklen = BLEN(bp);
+ addr = bp->rp;
+ }
+
+ losum += hisum>>8;
+ losum += (hisum&0xff)<<8;
+ while((csum = losum>>16) != 0)
+ losum = csum + (losum & 0xffff);
+
+ return ~losum & 0xffff;
+}
+
+enum
+{
+ Isprefix= 16,
+};
+
+static uchar prefixvals[256] =
+{
+[0x00] 0 | Isprefix,
+[0x80] 1 | Isprefix,
+[0xC0] 2 | Isprefix,
+[0xE0] 3 | Isprefix,
+[0xF0] 4 | Isprefix,
+[0xF8] 5 | Isprefix,
+[0xFC] 6 | Isprefix,
+[0xFE] 7 | Isprefix,
+[0xFF] 8 | Isprefix,
+};
+
+int
+eipfmt(Fmt *f)
+{
+ char buf[5*8];
+ static char *efmt = "%.2lux%.2lux%.2lux%.2lux%.2lux%.2lux";
+ static char *ifmt = "%d.%d.%d.%d";
+ uchar *p, ip[16];
+ ulong *lp;
+ ushort s;
+ int i, j, n, eln, eli;
+
+ switch(f->r) {
+ case 'E': /* Ethernet address */
+ p = va_arg(f->args, uchar*);
+ return fmtprint(f, efmt, p[0], p[1], p[2], p[3], p[4], p[5]);
+
+ case 'I': /* Ip address */
+ p = va_arg(f->args, uchar*);
+common:
+ if(memcmp(p, v4prefix, 12) == 0)
+ return fmtprint(f, ifmt, p[12], p[13], p[14], p[15]);
+
+ /* find longest elision */
+ eln = eli = -1;
+ for(i = 0; i < 16; i += 2){
+ for(j = i; j < 16; j += 2)
+ if(p[j] != 0 || p[j+1] != 0)
+ break;
+ if(j > i && j - i > eln){
+ eli = i;
+ eln = j - i;
+ }
+ }
+
+ /* print with possible elision */
+ n = 0;
+ for(i = 0; i < 16; i += 2){
+ if(i == eli){
+ n += sprint(buf+n, "::");
+ i += eln;
+ if(i >= 16)
+ break;
+ } else if(i != 0)
+ n += sprint(buf+n, ":");
+ s = (p[i]<<8) + p[i+1];
+ n += sprint(buf+n, "%ux", s);
+ }
+ return fmtstrcpy(f, buf);
+
+ case 'i': /* v6 address as 4 longs */
+ lp = va_arg(f->args, ulong*);
+ for(i = 0; i < 4; i++)
+ hnputl(ip+4*i, *lp++);
+ p = ip;
+ goto common;
+
+ case 'V': /* v4 ip address */
+ p = va_arg(f->args, uchar*);
+ return fmtprint(f, ifmt, p[0], p[1], p[2], p[3]);
+
+ case 'M': /* ip mask */
+ p = va_arg(f->args, uchar*);
+
+ /* look for a prefix mask */
+ for(i = 0; i < 16; i++)
+ if(p[i] != 0xff)
+ break;
+ if(i < 16){
+ if((prefixvals[p[i]] & Isprefix) == 0)
+ goto common;
+ for(j = i+1; j < 16; j++)
+ if(p[j] != 0)
+ goto common;
+ n = 8*i + (prefixvals[p[i]] & ~Isprefix);
+ } else
+ n = 8*16;
+
+ /* got one, use /xx format */
+ return fmtprint(f, "/%d", n);
+ }
+ return fmtstrcpy(f, "(eipfmt)");
+}
+
+#define CLASS(p) ((*(uchar*)(p))>>6)
+
+extern char*
+v4parseip(uchar *to, char *from)
+{
+ int i;
+ char *p;
+
+ p = from;
+ for(i = 0; i < 4 && *p; i++){
+ to[i] = strtoul(p, &p, 0);
+ if(*p == '.')
+ p++;
+ }
+ switch(CLASS(to)){
+ case 0: /* class A - 1 uchar net */
+ case 1:
+ if(i == 3){
+ to[3] = to[2];
+ to[2] = to[1];
+ to[1] = 0;
+ } else if(i == 2){
+ to[3] = to[1];
+ to[1] = 0;
+ }
+ break;
+ case 2: /* class B - 2 uchar net */
+ if(i == 3){
+ to[3] = to[2];
+ to[2] = 0;
+ }
+ break;
+ }
+ return p;
+}
+
+int
+isv4(uchar *ip)
+{
+ return memcmp(ip, v4prefix, IPv4off) == 0;
+}
+
+
+/*
+ * the following routines are unrolled with no memset's to speed
+ * up the usual case
+ */
+void
+v4tov6(uchar *v6, uchar *v4)
+{
+ v6[0] = 0;
+ v6[1] = 0;
+ v6[2] = 0;
+ v6[3] = 0;
+ v6[4] = 0;
+ v6[5] = 0;
+ v6[6] = 0;
+ v6[7] = 0;
+ v6[8] = 0;
+ v6[9] = 0;
+ v6[10] = 0xff;
+ v6[11] = 0xff;
+ v6[12] = v4[0];
+ v6[13] = v4[1];
+ v6[14] = v4[2];
+ v6[15] = v4[3];
+}
+
+int
+v6tov4(uchar *v4, uchar *v6)
+{
+ if(v6[0] == 0
+ && v6[1] == 0
+ && v6[2] == 0
+ && v6[3] == 0
+ && v6[4] == 0
+ && v6[5] == 0
+ && v6[6] == 0
+ && v6[7] == 0
+ && v6[8] == 0
+ && v6[9] == 0
+ && v6[10] == 0xff
+ && v6[11] == 0xff)
+ {
+ v4[0] = v6[12];
+ v4[1] = v6[13];
+ v4[2] = v6[14];
+ v4[3] = v6[15];
+ return 0;
+ } else {
+ memset(v4, 0, 4);
+ return -1;
+ }
+}
+
+ulong
+parseip(uchar *to, char *from)
+{
+ int i, elipsis = 0, v4 = 1;
+ ulong x;
+ char *p, *op;
+
+ memset(to, 0, IPaddrlen);
+ p = from;
+ for(i = 0; i < 16 && *p; i+=2){
+ op = p;
+ x = strtoul(p, &p, 16);
+ if(*p == '.' || (*p == 0 && i == 0)){
+ p = v4parseip(to+i, op);
+ i += 4;
+ break;
+ } else {
+ to[i] = x>>8;
+ to[i+1] = x;
+ }
+ if(*p == ':'){
+ v4 = 0;
+ if(*++p == ':'){
+ elipsis = i+2;
+ p++;
+ }
+ }
+ }
+ if(i < 16){
+ memmove(&to[elipsis+16-i], &to[elipsis], i-elipsis);
+ memset(&to[elipsis], 0, 16-i);
+ }
+ if(v4){
+ to[10] = to[11] = 0xff;
+ return nhgetl(to+12);
+ } else
+ return 6;
+}
+
+/*
+ * hack to allow ip v4 masks to be entered in the old
+ * style
+ */
+ulong
+parseipmask(uchar *to, char *from)
+{
+ ulong x;
+ int i;
+ uchar *p;
+
+ if(*from == '/'){
+ /* as a number of prefix bits */
+ i = atoi(from+1);
+ if(i < 0)
+ i = 0;
+ if(i > 128)
+ i = 128;
+ memset(to, 0, IPaddrlen);
+ for(p = to; i >= 8; i -= 8)
+ *p++ = 0xff;
+ if(i > 0)
+ *p = ~((1<<(8-i))-1);
+ x = nhgetl(to+IPv4off);
+ } else {
+ /* as a straight bit mask */
+ x = parseip(to, from);
+ if(memcmp(to, v4prefix, IPv4off) == 0)
+ memset(to, 0xff, IPv4off);
+ }
+ return x;
+}
+
+void
+maskip(uchar *from, uchar *mask, uchar *to)
+{
+ int i;
+
+ for(i = 0; i < IPaddrlen; i++)
+ to[i] = from[i] & mask[i];
+}
+
+uchar classmask[4][16] = {
+ 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0x00,0x00,0x00,
+ 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0x00,0x00,0x00,
+ 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0x00,0x00,
+ 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0x00,
+};
+
+uchar*
+defmask(uchar *ip)
+{
+ if(isv4(ip))
+ return classmask[ip[IPv4off]>>6];
+ else {
+ if(ipcmp(ip, v6loopback) == 0)
+ return IPallbits;
+ else if(memcmp(ip, v6linklocal, v6llpreflen) == 0)
+ return v6linklocalmask;
+ else if(memcmp(ip, v6sitelocal, v6slpreflen) == 0)
+ return v6sitelocalmask;
+ else if(memcmp(ip, v6solicitednode, v6snpreflen) == 0)
+ return v6solicitednodemask;
+ else if(memcmp(ip, v6multicast, v6mcpreflen) == 0)
+ return v6multicastmask;
+ return IPallbits;
+ }
+}
+
+void
+ipv62smcast(uchar *smcast, uchar *a)
+{
+ assert(IPaddrlen == 16);
+ memmove(smcast, v6solicitednode, IPaddrlen);
+ smcast[13] = a[13];
+ smcast[14] = a[14];
+ smcast[15] = a[15];
+}
+
+
+/*
+ * parse a hex mac address
+ */
+int
+parsemac(uchar *to, char *from, int len)
+{
+ char nip[4];
+ char *p;
+ int i;
+
+ p = from;
+ memset(to, 0, len);
+ for(i = 0; i < len; i++){
+ if(p[0] == '\0' || p[1] == '\0')
+ break;
+
+ nip[0] = p[0];
+ nip[1] = p[1];
+ nip[2] = '\0';
+ p += 2;
+
+ to[i] = strtoul(nip, 0, 16);
+ if(*p == ':')
+ p++;
+ }
+ return i;
+}
+
+/*
+ * hashing tcp, udp, ... connections
+ */
+ulong
+iphash(uchar *sa, ushort sp, uchar *da, ushort dp)
+{
+ return ((sa[IPaddrlen-1]<<24) ^ (sp << 16) ^ (da[IPaddrlen-1]<<8) ^ dp ) % Nhash;
+}
+
+void
+iphtadd(Ipht *ht, Conv *c)
+{
+ ulong hv;
+ Iphash *h;
+
+ hv = iphash(c->raddr, c->rport, c->laddr, c->lport);
+ h = smalloc(sizeof(*h));
+ if(ipcmp(c->raddr, IPnoaddr) != 0)
+ h->match = IPmatchexact;
+ else {
+ if(ipcmp(c->laddr, IPnoaddr) != 0){
+ if(c->lport == 0)
+ h->match = IPmatchaddr;
+ else
+ h->match = IPmatchpa;
+ } else {
+ if(c->lport == 0)
+ h->match = IPmatchany;
+ else
+ h->match = IPmatchport;
+ }
+ }
+ h->c = c;
+
+ lock(ht);
+ h->next = ht->tab[hv];
+ ht->tab[hv] = h;
+ unlock(ht);
+}
+
+void
+iphtrem(Ipht *ht, Conv *c)
+{
+ ulong hv;
+ Iphash **l, *h;
+
+ hv = iphash(c->raddr, c->rport, c->laddr, c->lport);
+ lock(ht);
+ for(l = &ht->tab[hv]; (*l) != nil; l = &(*l)->next)
+ if((*l)->c == c){
+ h = *l;
+ (*l) = h->next;
+ free(h);
+ break;
+ }
+ unlock(ht);
+}
+
+/* look for a matching conversation with the following precedence
+ * connected && raddr,rport,laddr,lport
+ * announced && laddr,lport
+ * announced && *,lport
+ * announced && laddr,*
+ * announced && *,*
+ */
+Conv*
+iphtlook(Ipht *ht, uchar *sa, ushort sp, uchar *da, ushort dp)
+{
+ ulong hv;
+ Iphash *h;
+ Conv *c;
+
+ /* exact 4 pair match (connection) */
+ hv = iphash(sa, sp, da, dp);
+ lock(ht);
+ for(h = ht->tab[hv]; h != nil; h = h->next){
+ if(h->match != IPmatchexact)
+ continue;
+ c = h->c;
+ if(sp == c->rport && dp == c->lport
+ && ipcmp(sa, c->raddr) == 0 && ipcmp(da, c->laddr) == 0){
+ unlock(ht);
+ return c;
+ }
+ }
+
+ /* match local address and port */
+ hv = iphash(IPnoaddr, 0, da, dp);
+ for(h = ht->tab[hv]; h != nil; h = h->next){
+ if(h->match != IPmatchpa)
+ continue;
+ c = h->c;
+ if(dp == c->lport && ipcmp(da, c->laddr) == 0){
+ unlock(ht);
+ return c;
+ }
+ }
+
+ /* match just port */
+ hv = iphash(IPnoaddr, 0, IPnoaddr, dp);
+ for(h = ht->tab[hv]; h != nil; h = h->next){
+ if(h->match != IPmatchport)
+ continue;
+ c = h->c;
+ if(dp == c->lport){
+ unlock(ht);
+ return c;
+ }
+ }
+
+ /* match local address */
+ hv = iphash(IPnoaddr, 0, da, 0);
+ for(h = ht->tab[hv]; h != nil; h = h->next){
+ if(h->match != IPmatchaddr)
+ continue;
+ c = h->c;
+ if(ipcmp(da, c->laddr) == 0){
+ unlock(ht);
+ return c;
+ }
+ }
+
+ /* look for something that matches anything */
+ hv = iphash(IPnoaddr, 0, IPnoaddr, 0);
+ for(h = ht->tab[hv]; h != nil; h = h->next){
+ if(h->match != IPmatchany)
+ continue;
+ c = h->c;
+ unlock(ht);
+ return c;
+ }
+ unlock(ht);
+ return nil;
+}
--- /dev/null
+++ b/os/ip.original/ipifc.c
@@ -1,0 +1,1767 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include "ip.h"
+#include "ipv6.h"
+
+#define DPRINT if(0)print
+
+enum {
+ Maxmedia = 32,
+ Nself = Maxmedia*5,
+ NHASH = (1<<6),
+ NCACHE = 256,
+ QMAX = 64*1024-1,
+};
+
+Medium *media[Maxmedia] =
+{
+ 0
+};
+
+/*
+ * cache of local addresses (addresses we answer to)
+ */
+struct Ipself
+{
+ uchar a[IPaddrlen];
+ Ipself *hnext; /* next address in the hash table */
+ Iplink *link; /* binding twixt Ipself and Ipifc */
+ ulong expire;
+ uchar type; /* type of address */
+ int ref;
+ Ipself *next; /* free list */
+};
+
+struct Ipselftab
+{
+ QLock;
+ int inited;
+ int acceptall; /* true if an interface has the null address */
+ Ipself *hash[NHASH]; /* hash chains */
+};
+
+/*
+ * Multicast addresses are chained onto a Chan so that
+ * we can remove them when the Chan is closed.
+ */
+typedef struct Ipmcast Ipmcast;
+struct Ipmcast
+{
+ Ipmcast *next;
+ uchar ma[IPaddrlen]; /* multicast address */
+ uchar ia[IPaddrlen]; /* interface address */
+};
+
+/* quick hash for ip addresses */
+#define hashipa(a) ( ( ((a)[IPaddrlen-2]<<8) | (a)[IPaddrlen-1] )%NHASH )
+
+static char tifc[] = "ifc ";
+
+static void addselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a, int type);
+static void remselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a);
+static char* ipifcjoinmulti(Ipifc *ifc, char **argv, int argc);
+static char* ipifcleavemulti(Ipifc *ifc, char **argv, int argc);
+static void ipifcregisterproxy(Fs*, Ipifc*, uchar*);
+static char* ipifcremlifc(Ipifc*, Iplifc*);
+
+/*
+ * link in a new medium
+ */
+void
+addipmedium(Medium *med)
+{
+ int i;
+
+ for(i = 0; i < nelem(media)-1; i++)
+ if(media[i] == nil){
+ media[i] = med;
+ break;
+ }
+}
+
+/*
+ * find the medium with this name
+ */
+Medium*
+ipfindmedium(char *name)
+{
+ Medium **mp;
+
+ for(mp = media; *mp != nil; mp++)
+ if(strcmp((*mp)->name, name) == 0)
+ break;
+ return *mp;
+}
+
+/*
+ * attach a device (or pkt driver) to the interface.
+ * called with c locked
+ */
+static char*
+ipifcbind(Conv *c, char **argv, int argc)
+{
+ Ipifc *ifc;
+ Medium *m;
+
+ if(argc < 2)
+ return Ebadarg;
+
+ ifc = (Ipifc*)c->ptcl;
+
+ /* bind the device to the interface */
+ m = ipfindmedium(argv[1]);
+ if(m == nil)
+ return "unknown interface type";
+
+ wlock(ifc);
+ if(ifc->m != nil){
+ wunlock(ifc);
+ return "interface already bound";
+ }
+ if(waserror()){
+ wunlock(ifc);
+ nexterror();
+ }
+
+ /* do medium specific binding */
+ (*m->bind)(ifc, argc, argv);
+
+ /* set the bound device name */
+ if(argc > 2)
+ strncpy(ifc->dev, argv[2], sizeof(ifc->dev));
+ else
+ snprint(ifc->dev, sizeof ifc->dev, "%s%d", m->name, c->x);
+ ifc->dev[sizeof(ifc->dev)-1] = 0;
+
+ /* set up parameters */
+ ifc->m = m;
+ ifc->mintu = ifc->m->mintu;
+ ifc->maxtu = ifc->m->maxtu;
+ if(ifc->m->unbindonclose == 0)
+ ifc->conv->inuse++;
+ ifc->rp.mflag = 0; // default not managed
+ ifc->rp.oflag = 0;
+ ifc->rp.maxraint = 600000; // millisecs
+ ifc->rp.minraint = 200000;
+ ifc->rp.linkmtu = 0; // no mtu sent
+ ifc->rp.reachtime = 0;
+ ifc->rp.rxmitra = 0;
+ ifc->rp.ttl = MAXTTL;
+ ifc->rp.routerlt = 3*(ifc->rp.maxraint);
+
+ /* any ancillary structures (like routes) no longer pertain */
+ ifc->ifcid++;
+
+ /* reopen all the queues closed by a previous unbind */
+ qreopen(c->rq);
+ qreopen(c->eq);
+ qreopen(c->sq);
+
+ wunlock(ifc);
+ poperror();
+
+ return nil;
+}
+
+/*
+ * detach a device from an interface, close the interface
+ * called with ifc->conv closed
+ */
+static char*
+ipifcunbind(Ipifc *ifc)
+{
+ char *err;
+
+ if(waserror()){
+ wunlock(ifc);
+ nexterror();
+ }
+ wlock(ifc);
+
+ /* dissociate routes */
+ if(ifc->m != nil && ifc->m->unbindonclose == 0)
+ ifc->conv->inuse--;
+ ifc->ifcid++;
+
+ /* disassociate device */
+ if(ifc->m != nil && ifc->m->unbind)
+ (*ifc->m->unbind)(ifc);
+ memset(ifc->dev, 0, sizeof(ifc->dev));
+ ifc->arg = nil;
+ ifc->reassemble = 0;
+
+ /* close queues to stop queuing of packets */
+ qclose(ifc->conv->rq);
+ qclose(ifc->conv->wq);
+ qclose(ifc->conv->sq);
+
+ /* disassociate logical interfaces */
+ while(ifc->lifc){
+ err = ipifcremlifc(ifc, ifc->lifc);
+ if(err)
+ error(err);
+ }
+
+ ifc->m = nil;
+ wunlock(ifc);
+ poperror();
+ return nil;
+}
+
+
+
+char sfixedformat[] = "device %s maxtu %d sendra %d recvra %d mflag %d oflag %d maxraint %d minraint %d linkmtu %d reachtime %d rxmitra %d ttl %d routerlt %d pktin %lud pktout %lud errin %lud errout %lud\n";
+
+char slineformat[] = " %-40I %-10M %-40I %-12lud %-12lud\n";
+
+
+static int
+ipifcstate(Conv *c, char *state, int n)
+{
+ Ipifc *ifc;
+ Iplifc *lifc;
+ int m;
+
+ ifc = (Ipifc*)c->ptcl;
+
+ m = snprint(state, n, sfixedformat,
+ ifc->dev, ifc->maxtu, ifc->sendra6, ifc->recvra6,
+ ifc->rp.mflag, ifc->rp.oflag, ifc->rp.maxraint,
+ ifc->rp.minraint, ifc->rp.linkmtu, ifc->rp.reachtime,
+ ifc->rp.rxmitra, ifc->rp.ttl, ifc->rp.routerlt,
+ ifc->in, ifc->out, ifc->inerr, ifc->outerr);
+
+ rlock(ifc);
+ for(lifc = ifc->lifc; lifc && n > m; lifc = lifc->next)
+ m += snprint(state+m, n - m, slineformat,
+ lifc->local, lifc->mask, lifc->remote,
+ lifc->validlt, lifc->preflt);
+ if(ifc->lifc == nil)
+ m += snprint(state+m, n - m, "\n");
+ runlock(ifc);
+ return m;
+}
+
+static int
+ipifclocal(Conv *c, char *state, int n)
+{
+ Ipifc *ifc;
+ Iplifc *lifc;
+ Iplink *link;
+ int m;
+
+ ifc = (Ipifc*)c->ptcl;
+
+ m = 0;
+
+ rlock(ifc);
+ for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+ m += snprint(state+m, n - m, "%-40.40I ->", lifc->local);
+ for(link = lifc->link; link; link = link->lifclink)
+ m += snprint(state+m, n - m, " %-40.40I", link->self->a);
+ m += snprint(state+m, n - m, "\n");
+ }
+ runlock(ifc);
+ return m;
+}
+
+static int
+ipifcinuse(Conv *c)
+{
+ Ipifc *ifc;
+
+ ifc = (Ipifc*)c->ptcl;
+ return ifc->m != nil;
+}
+
+/*
+ * called when a process writes to an interface's 'data'
+ */
+static void
+ipifckick(void *x)
+{
+ Conv *c = x;
+ Block *bp;
+ Ipifc *ifc;
+
+ bp = qget(c->wq);
+ if(bp == nil)
+ return;
+
+ ifc = (Ipifc*)c->ptcl;
+ if(!canrlock(ifc)){
+ freeb(bp);
+ return;
+ }
+ if(waserror()){
+ runlock(ifc);
+ nexterror();
+ }
+ if(ifc->m == nil || ifc->m->pktin == nil)
+ freeb(bp);
+ else
+ (*ifc->m->pktin)(c->p->f, ifc, bp);
+ runlock(ifc);
+ poperror();
+}
+
+/*
+ * called when a new ipifc structure is created
+ */
+static void
+ipifccreate(Conv *c)
+{
+ Ipifc *ifc;
+
+ c->rq = qopen(QMAX, 0, 0, 0);
+ c->sq = qopen(2*QMAX, 0, 0, 0);
+ c->wq = qopen(QMAX, Qkick, ipifckick, c);
+ ifc = (Ipifc*)c->ptcl;
+ ifc->conv = c;
+ ifc->unbinding = 0;
+ ifc->m = nil;
+ ifc->reassemble = 0;
+}
+
+/*
+ * called after last close of ipifc data or ctl
+ * called with c locked, we must unlock
+ */
+static void
+ipifcclose(Conv *c)
+{
+ Ipifc *ifc;
+ Medium *m;
+
+ ifc = (Ipifc*)c->ptcl;
+ m = ifc->m;
+ if(m != nil && m->unbindonclose)
+ ipifcunbind(ifc);
+}
+
+/*
+ * change an interface's mtu
+ */
+char*
+ipifcsetmtu(Ipifc *ifc, char **argv, int argc)
+{
+ int mtu;
+
+ if(argc < 2)
+ return Ebadarg;
+ if(ifc->m == nil)
+ return Ebadarg;
+ mtu = strtoul(argv[1], 0, 0);
+ if(mtu < ifc->m->mintu || mtu > ifc->m->maxtu)
+ return Ebadarg;
+ ifc->maxtu = mtu;
+ return nil;
+}
+
+/*
+ * add an address to an interface.
+ */
+char*
+ipifcadd(Ipifc *ifc, char **argv, int argc, int tentative, Iplifc *lifcp)
+{
+ uchar ip[IPaddrlen], mask[IPaddrlen], rem[IPaddrlen];
+ uchar bcast[IPaddrlen], net[IPaddrlen];
+ Iplifc *lifc, **l;
+ int i, type, mtu;
+ Fs *f;
+ int sendnbrdisc = 0;
+
+ if(ifc->m == nil)
+ return "ipifc not yet bound to device";
+
+ f = ifc->conv->p->f;
+
+ type = Rifc;
+ memset(ip, 0, IPaddrlen);
+ memset(mask, 0, IPaddrlen);
+ memset(rem, 0, IPaddrlen);
+ switch(argc){
+ case 6:
+ if(strcmp(argv[5], "proxy") == 0)
+ type |= Rproxy;
+ /* fall through */
+ case 5:
+ mtu = strtoul(argv[4], 0, 0);
+ if(mtu >= ifc->m->mintu && mtu <= ifc->m->maxtu)
+ ifc->maxtu = mtu;
+ /* fall through */
+ case 4:
+ parseip(ip, argv[1]);
+ parseipmask(mask, argv[2]);
+ parseip(rem, argv[3]);
+ maskip(rem, mask, net);
+ break;
+ case 3:
+ parseip(ip, argv[1]);
+ parseipmask(mask, argv[2]);
+ maskip(ip, mask, rem);
+ maskip(rem, mask, net);
+ break;
+ case 2:
+ parseip(ip, argv[1]);
+ memmove(mask, defmask(ip), IPaddrlen);
+ maskip(ip, mask, rem);
+ maskip(rem, mask, net);
+ break;
+ default:
+ return Ebadarg;
+ break;
+ }
+ if(isv4(ip))
+ tentative = 0;
+ wlock(ifc);
+
+ /* ignore if this is already a local address for this ifc */
+ for(lifc = ifc->lifc; lifc; lifc = lifc->next) {
+ if(ipcmp(lifc->local, ip) == 0) {
+ if(lifc->tentative != tentative)
+ lifc->tentative = tentative;
+ if(lifcp != nil) {
+ lifc->onlink = lifcp->onlink;
+ lifc->autoflag = lifcp->autoflag;
+ lifc->validlt = lifcp->validlt;
+ lifc->preflt = lifcp->preflt;
+ lifc->origint = lifcp->origint;
+ }
+ goto out;
+ }
+ }
+
+ /* add the address to the list of logical ifc's for this ifc */
+ lifc = smalloc(sizeof(Iplifc));
+ ipmove(lifc->local, ip);
+ ipmove(lifc->mask, mask);
+ ipmove(lifc->remote, rem);
+ ipmove(lifc->net, net);
+ lifc->tentative = tentative;
+ if(lifcp != nil) {
+ lifc->onlink = lifcp->onlink;
+ lifc->autoflag = lifcp->autoflag;
+ lifc->validlt = lifcp->validlt;
+ lifc->preflt = lifcp->preflt;
+ lifc->origint = lifcp->origint;
+ }
+ else { // default values
+ lifc->onlink = 1;
+ lifc->autoflag = 1;
+ lifc->validlt = 0xffffffff;
+ lifc->preflt = 0xffffffff;
+ lifc->origint = NOW / 10^3;
+ }
+ lifc->next = nil;
+
+ for(l = &ifc->lifc; *l; l = &(*l)->next)
+ ;
+ *l = lifc;
+
+ /* check for point-to-point interface */
+ if(ipcmp(ip, v6loopback)) /* skip v6 loopback, it's a special address */
+ if(ipcmp(mask, IPallbits) == 0)
+ type |= Rptpt;
+
+ /* add local routes */
+ if(isv4(ip))
+ v4addroute(f, tifc, rem+IPv4off, mask+IPv4off, rem+IPv4off, type);
+ else
+ v6addroute(f, tifc, rem, mask, rem, type);
+
+ addselfcache(f, ifc, lifc, ip, Runi);
+
+ if((type & (Rproxy|Rptpt)) == (Rproxy|Rptpt)){
+ ipifcregisterproxy(f, ifc, rem);
+ goto out;
+ }
+
+ if(isv4(ip) || ipcmp(ip, IPnoaddr) == 0) {
+ /* add subnet directed broadcast address to the self cache */
+ for(i = 0; i < IPaddrlen; i++)
+ bcast[i] = (ip[i] & mask[i]) | ~mask[i];
+ addselfcache(f, ifc, lifc, bcast, Rbcast);
+
+ /* add subnet directed network address to the self cache */
+ for(i = 0; i < IPaddrlen; i++)
+ bcast[i] = (ip[i] & mask[i]) & mask[i];
+ addselfcache(f, ifc, lifc, bcast, Rbcast);
+
+ /* add network directed broadcast address to the self cache */
+ memmove(mask, defmask(ip), IPaddrlen);
+ for(i = 0; i < IPaddrlen; i++)
+ bcast[i] = (ip[i] & mask[i]) | ~mask[i];
+ addselfcache(f, ifc, lifc, bcast, Rbcast);
+
+ /* add network directed network address to the self cache */
+ memmove(mask, defmask(ip), IPaddrlen);
+ for(i = 0; i < IPaddrlen; i++)
+ bcast[i] = (ip[i] & mask[i]) & mask[i];
+ addselfcache(f, ifc, lifc, bcast, Rbcast);
+
+ addselfcache(f, ifc, lifc, IPv4bcast, Rbcast);
+ }
+ else {
+ if(ipcmp(ip, v6loopback) == 0) {
+ /* add node-local mcast address */
+ addselfcache(f, ifc, lifc, v6allnodesN, Rmulti);
+
+ /* add route for all node multicast */
+ v6addroute(f, tifc, v6allnodesN, v6allnodesNmask, v6allnodesN, Rmulti);
+ }
+
+ /* add all nodes multicast address */
+ addselfcache(f, ifc, lifc, v6allnodesL, Rmulti);
+
+ /* add route for all nodes multicast */
+ v6addroute(f, tifc, v6allnodesL, v6allnodesLmask, v6allnodesL, Rmulti);
+
+ /* add solicited-node multicast address */
+ ipv62smcast(bcast, ip);
+ addselfcache(f, ifc, lifc, bcast, Rmulti);
+
+ sendnbrdisc = 1;
+ }
+
+ /* register the address on this network for address resolution */
+ if(isv4(ip) && ifc->m->areg != nil)
+ (*ifc->m->areg)(ifc, ip);
+
+out:
+ wunlock(ifc);
+ if(tentative && sendnbrdisc)
+ icmpns(f, 0, SRC_UNSPEC, ip, TARG_MULTI, ifc->mac);
+ return nil;
+}
+
+/*
+ * remove a logical interface from an ifc
+ * always called with ifc wlock'd
+ */
+static char*
+ipifcremlifc(Ipifc *ifc, Iplifc *lifc)
+{
+ Iplifc **l;
+ Fs *f;
+
+ f = ifc->conv->p->f;
+
+ /*
+ * find address on this interface and remove from chain.
+ * for pt to pt we actually specify the remote address as the
+ * addresss to remove.
+ */
+ for(l = &ifc->lifc; *l != nil && *l != lifc; l = &(*l)->next)
+ ;
+ if(*l == nil)
+ return "address not on this interface";
+ *l = lifc->next;
+
+ /* disassociate any addresses */
+ while(lifc->link)
+ remselfcache(f, ifc, lifc, lifc->link->self->a);
+
+ /* remove the route for this logical interface */
+ if(isv4(lifc->local))
+ v4delroute(f, lifc->remote+IPv4off, lifc->mask+IPv4off, 1);
+ else {
+ v6delroute(f, lifc->remote, lifc->mask, 1);
+ if(ipcmp(lifc->local, v6loopback) == 0)
+ /* remove route for all node multicast */
+ v6delroute(f, v6allnodesN, v6allnodesNmask, 1);
+ else if(memcmp(lifc->local, v6linklocal, v6llpreflen) == 0)
+ /* remove route for all link multicast */
+ v6delroute(f, v6allnodesL, v6allnodesLmask, 1);
+ }
+
+ free(lifc);
+ return nil;
+
+}
+
+/*
+ * remove an address from an interface.
+ * called with c locked
+ */
+char*
+ipifcrem(Ipifc *ifc, char **argv, int argc)
+{
+ uchar ip[IPaddrlen];
+ uchar mask[IPaddrlen];
+ uchar rem[IPaddrlen];
+ Iplifc *lifc;
+ char *rv;
+
+ if(argc < 3)
+ return Ebadarg;
+
+ parseip(ip, argv[1]);
+ parseipmask(mask, argv[2]);
+ if(argc < 4)
+ maskip(ip, mask, rem);
+ else
+ parseip(rem, argv[3]);
+
+ wlock(ifc);
+
+ /*
+ * find address on this interface and remove from chain.
+ * for pt to pt we actually specify the remote address as the
+ * addresss to remove.
+ */
+ for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next) {
+ if (memcmp(ip, lifc->local, IPaddrlen) == 0
+ && memcmp(mask, lifc->mask, IPaddrlen) == 0
+ && memcmp(rem, lifc->remote, IPaddrlen) == 0)
+ break;
+ }
+
+ rv = ipifcremlifc(ifc, lifc);
+ wunlock(ifc);
+ return rv;
+}
+
+/*
+ * distribute routes to active interfaces like the
+ * TRIP linecards
+ */
+void
+ipifcaddroute(Fs *f, int vers, uchar *addr, uchar *mask, uchar *gate, int type)
+{
+ Medium *m;
+ Conv **cp, **e;
+ Ipifc *ifc;
+
+ e = &f->ipifc->conv[f->ipifc->nc];
+ for(cp = f->ipifc->conv; cp < e; cp++){
+ if(*cp != nil) {
+ ifc = (Ipifc*)(*cp)->ptcl;
+ m = ifc->m;
+ if(m == nil)
+ continue;
+ if(m->addroute != nil)
+ m->addroute(ifc, vers, addr, mask, gate, type);
+ }
+ }
+}
+
+void
+ipifcremroute(Fs *f, int vers, uchar *addr, uchar *mask)
+{
+ Medium *m;
+ Conv **cp, **e;
+ Ipifc *ifc;
+
+ e = &f->ipifc->conv[f->ipifc->nc];
+ for(cp = f->ipifc->conv; cp < e; cp++){
+ if(*cp != nil) {
+ ifc = (Ipifc*)(*cp)->ptcl;
+ m = ifc->m;
+ if(m == nil)
+ continue;
+ if(m->remroute != nil)
+ m->remroute(ifc, vers, addr, mask);
+ }
+ }
+}
+
+/*
+ * associate an address with the interface. This wipes out any previous
+ * addresses. This is a macro that means, remove all the old interfaces
+ * and add a new one.
+ */
+static char*
+ipifcconnect(Conv* c, char **argv, int argc)
+{
+ char *err;
+ Ipifc *ifc;
+
+ ifc = (Ipifc*)c->ptcl;
+
+ if(ifc->m == nil)
+ return "ipifc not yet bound to device";
+
+ if(waserror()){
+ wunlock(ifc);
+ nexterror();
+ }
+ wlock(ifc);
+ while(ifc->lifc){
+ err = ipifcremlifc(ifc, ifc->lifc);
+ if(err)
+ error(err);
+ }
+ wunlock(ifc);
+ poperror();
+
+ err = ipifcadd(ifc, argv, argc, 0, nil);
+ if(err)
+ return err;
+
+ Fsconnected(c, nil);
+
+ return nil;
+}
+
+char*
+ipifcsetpar6(Ipifc *ifc, char **argv, int argc)
+{
+ int i, argsleft, vmax = ifc->rp.maxraint, vmin = ifc->rp.minraint;
+
+ argsleft = argc - 1;
+ i = 1;
+
+ if(argsleft % 2 != 0)
+ return Ebadarg;
+
+ while (argsleft > 1) {
+ if(strcmp(argv[i],"recvra")==0)
+ ifc->recvra6 = (atoi(argv[i+1]) != 0);
+ else if(strcmp(argv[i],"sendra")==0)
+ ifc->sendra6 = (atoi(argv[i+1]) != 0);
+ else if(strcmp(argv[i],"mflag")==0)
+ ifc->rp.mflag = (atoi(argv[i+1]) != 0);
+ else if(strcmp(argv[i],"oflag")==0)
+ ifc->rp.oflag = (atoi(argv[i+1]) != 0);
+ else if(strcmp(argv[i],"maxraint")==0)
+ ifc->rp.maxraint = atoi(argv[i+1]);
+ else if(strcmp(argv[i],"minraint")==0)
+ ifc->rp.minraint = atoi(argv[i+1]);
+ else if(strcmp(argv[i],"linkmtu")==0)
+ ifc->rp.linkmtu = atoi(argv[i+1]);
+ else if(strcmp(argv[i],"reachtime")==0)
+ ifc->rp.reachtime = atoi(argv[i+1]);
+ else if(strcmp(argv[i],"rxmitra")==0)
+ ifc->rp.rxmitra = atoi(argv[i+1]);
+ else if(strcmp(argv[i],"ttl")==0)
+ ifc->rp.ttl = atoi(argv[i+1]);
+ else if(strcmp(argv[i],"routerlt")==0)
+ ifc->rp.routerlt = atoi(argv[i+1]);
+ else
+ return Ebadarg;
+
+ argsleft -= 2;
+ i += 2;
+ }
+
+ // consistency check
+ if(ifc->rp.maxraint < ifc->rp.minraint) {
+ ifc->rp.maxraint = vmax;
+ ifc->rp.minraint = vmin;
+ return Ebadarg;
+ }
+
+ return nil;
+}
+
+char*
+ipifcsendra6(Ipifc *ifc, char **argv, int argc)
+{
+ int i;
+
+ i = 0;
+ if(argc > 1)
+ i = atoi(argv[1]);
+ ifc->sendra6 = (i!=0);
+ return nil;
+}
+
+char*
+ipifcrecvra6(Ipifc *ifc, char **argv, int argc)
+{
+ int i;
+
+ i = 0;
+ if(argc > 1)
+ i = atoi(argv[1]);
+ ifc->recvra6 = (i!=0);
+ return nil;
+}
+
+char*
+ipifcnat(Ipifc *ifc, char **argv, int argc)
+{
+ uchar src[IPaddrlen], mask[IPaddrlen], dst[IPaddrlen];
+ Iplifc *lifc;
+
+ if(argc == 2){
+ if((strcmp(argv[1], "show") == 0)){
+ shownataddr();
+ return nil;
+ }else if((strcmp(argv[1], "flush") == 0)){
+ flushnataddr();
+ return nil;
+ }else
+ return Ebadarg;
+ }
+
+ if(argc != 5)
+ return Ebadarg;
+
+ if (parseip(src, argv[2]) == -1)
+ return Ebadip;
+
+ if (parseipmask(mask, argv[3]) == -1)
+ return Ebadip;
+
+ if (parseip(dst, argv[4]) == -1)
+ return Ebadip;
+
+ if((lifc=iplocalonifc(ifc, dst)) == nil)
+ return Ebadip;
+
+ if(strcmp(argv[1], "add") == 0){
+ if(addnataddr(src, mask, lifc) != 0)
+ return Ebadarg;
+ }else if(strcmp(argv[1], "remove") == 0){
+ if(removenataddr(src, mask, lifc) != 0)
+ return Ebadarg;
+ }else
+ return Ebadarg;
+
+ return nil;
+}
+
+/*
+ * non-standard control messages.
+ * called with c locked.
+ */
+static char*
+ipifcctl(Conv* c, char**argv, int argc)
+{
+ Ipifc *ifc;
+ int i;
+
+ ifc = (Ipifc*)c->ptcl;
+ if(strcmp(argv[0], "add") == 0)
+ return ipifcadd(ifc, argv, argc, 0, nil);
+ else if(strcmp(argv[0], "bootp") == 0)
+ return bootp(ifc);
+ else if(strcmp(argv[0], "try") == 0)
+ return ipifcadd(ifc, argv, argc, 1, nil);
+ else if(strcmp(argv[0], "remove") == 0)
+ return ipifcrem(ifc, argv, argc);
+ else if(strcmp(argv[0], "unbind") == 0)
+ return ipifcunbind(ifc);
+ else if(strcmp(argv[0], "joinmulti") == 0)
+ return ipifcjoinmulti(ifc, argv, argc);
+ else if(strcmp(argv[0], "leavemulti") == 0)
+ return ipifcleavemulti(ifc, argv, argc);
+ else if(strcmp(argv[0], "mtu") == 0)
+ return ipifcsetmtu(ifc, argv, argc);
+ else if(strcmp(argv[0], "reassemble") == 0){
+ ifc->reassemble = 1;
+ return nil;
+ }
+ else if(strcmp(argv[0], "iprouting") == 0){
+ i = 1;
+ if(argc > 1)
+ i = atoi(argv[1]);
+ iprouting(c->p->f, i);
+ return nil;
+ }
+ else if(strcmp(argv[0], "addpref6") == 0)
+ return ipifcaddpref6(ifc, argv, argc);
+ else if(strcmp(argv[0], "setpar6") == 0)
+ return ipifcsetpar6(ifc, argv, argc);
+ else if(strcmp(argv[0], "sendra6") == 0)
+ return ipifcsendra6(ifc, argv, argc);
+ else if(strcmp(argv[0], "recvra6") == 0)
+ return ipifcrecvra6(ifc, argv, argc);
+ else if(strcmp(argv[0], "nat") == 0)
+ return ipifcnat(ifc, argv, argc);
+ return "unsupported ctl";
+}
+
+ipifcstats(Proto *ipifc, char *buf, int len)
+{
+ return ipstats(ipifc->f, buf, len);
+}
+
+void
+ipifcinit(Fs *f)
+{
+ Proto *ipifc;
+
+ ipifc = smalloc(sizeof(Proto));
+ ipifc->name = "ipifc";
+ ipifc->connect = ipifcconnect;
+ ipifc->announce = nil;
+ ipifc->bind = ipifcbind;
+ ipifc->state = ipifcstate;
+ ipifc->create = ipifccreate;
+ ipifc->close = ipifcclose;
+ ipifc->rcv = nil;
+ ipifc->ctl = ipifcctl;
+ ipifc->advise = nil;
+ ipifc->stats = ipifcstats;
+ ipifc->inuse = ipifcinuse;
+ ipifc->local = ipifclocal;
+ ipifc->ipproto = -1;
+ ipifc->nc = Maxmedia;
+ ipifc->ptclsize = sizeof(Ipifc);
+
+ f->ipifc = ipifc; /* hack for ipifcremroute, findipifc, ... */
+ f->self = smalloc(sizeof(Ipselftab)); /* hack for ipforme */
+
+ Fsproto(f, ipifc);
+}
+
+/*
+ * add to self routing cache
+ * called with c locked
+ */
+static void
+addselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a, int type)
+{
+ Ipself *p;
+ Iplink *lp;
+ int h;
+
+ qlock(f->self);
+
+ /* see if the address already exists */
+ h = hashipa(a);
+ for(p = f->self->hash[h]; p; p = p->next)
+ if(memcmp(a, p->a, IPaddrlen) == 0)
+ break;
+
+ /* allocate a local address and add to hash chain */
+ if(p == nil){
+ p = smalloc(sizeof(*p));
+ ipmove(p->a, a);
+ p->type = type;
+ p->next = f->self->hash[h];
+ f->self->hash[h] = p;
+
+ /* if the null address, accept all packets */
+ if(ipcmp(a, v4prefix) == 0 || ipcmp(a, IPnoaddr) == 0)
+ f->self->acceptall = 1;
+ }
+
+ /* look for a link for this lifc */
+ for(lp = p->link; lp; lp = lp->selflink)
+ if(lp->lifc == lifc)
+ break;
+
+ /* allocate a lifc-to-local link and link to both */
+ if(lp == nil){
+ lp = smalloc(sizeof(*lp));
+ lp->ref = 1;
+ lp->lifc = lifc;
+ lp->self = p;
+ lp->selflink = p->link;
+ p->link = lp;
+ lp->lifclink = lifc->link;
+ lifc->link = lp;
+
+ /* add to routing table */
+ if(isv4(a))
+ v4addroute(f, tifc, a+IPv4off, IPallbits+IPv4off, a+IPv4off, type);
+ else
+ v6addroute(f, tifc, a, IPallbits, a, type);
+
+ if((type & Rmulti) && ifc->m->addmulti != nil)
+ (*ifc->m->addmulti)(ifc, a, lifc->local);
+ } else {
+ lp->ref++;
+ }
+
+ qunlock(f->self);
+}
+
+/*
+ * These structures are unlinked from their chains while
+ * other threads may be using them. To avoid excessive locking,
+ * just put them aside for a while before freeing them.
+ * called with f->self locked
+ */
+static Iplink *freeiplink;
+static Ipself *freeipself;
+
+static void
+iplinkfree(Iplink *p)
+{
+ Iplink **l, *np;
+ ulong now = NOW;
+
+ l = &freeiplink;
+ for(np = *l; np; np = *l){
+ if(np->expire > now){
+ *l = np->next;
+ free(np);
+ continue;
+ }
+ l = &np->next;
+ }
+ p->expire = now + 5000; /* give other threads 5 secs to get out */
+ p->next = nil;
+ *l = p;
+}
+static void
+ipselffree(Ipself *p)
+{
+ Ipself **l, *np;
+ ulong now = NOW;
+
+ l = &freeipself;
+ for(np = *l; np; np = *l){
+ if(np->expire > now){
+ *l = np->next;
+ free(np);
+ continue;
+ }
+ l = &np->next;
+ }
+ p->expire = now + 5000; /* give other threads 5 secs to get out */
+ p->next = nil;
+ *l = p;
+}
+
+/*
+ * Decrement reference for this address on this link.
+ * Unlink from selftab if this is the last ref.
+ * called with c locked
+ */
+static void
+remselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a)
+{
+ Ipself *p, **l;
+ Iplink *link, **l_self, **l_lifc;
+
+ qlock(f->self);
+
+ /* find the unique selftab entry */
+ l = &f->self->hash[hashipa(a)];
+ for(p = *l; p; p = *l){
+ if(ipcmp(p->a, a) == 0)
+ break;
+ l = &p->next;
+ }
+
+ if(p == nil)
+ goto out;
+
+ /*
+ * walk down links from an ifc looking for one
+ * that matches the selftab entry
+ */
+ l_lifc = &lifc->link;
+ for(link = *l_lifc; link; link = *l_lifc){
+ if(link->self == p)
+ break;
+ l_lifc = &link->lifclink;
+ }
+
+ if(link == nil)
+ goto out;
+
+ /*
+ * walk down the links from the selftab looking for
+ * the one we just found
+ */
+ l_self = &p->link;
+ for(link = *l_self; link; link = *l_self){
+ if(link == *(l_lifc))
+ break;
+ l_self = &link->selflink;
+ }
+
+ if(link == nil)
+ panic("remselfcache");
+
+ if(--(link->ref) != 0)
+ goto out;
+
+ if((p->type & Rmulti) && ifc->m->remmulti != nil)
+ (*ifc->m->remmulti)(ifc, a, lifc->local);
+
+ /* ref == 0, remove from both chains and free the link */
+ *l_lifc = link->lifclink;
+ *l_self = link->selflink;
+ iplinkfree(link);
+
+ if(p->link != nil)
+ goto out;
+
+ /* remove from routing table */
+ if(isv4(a))
+ v4delroute(f, a+IPv4off, IPallbits+IPv4off, 1);
+ else
+ v6delroute(f, a, IPallbits, 1);
+
+ /* no more links, remove from hash and free */
+ *l = p->next;
+ ipselffree(p);
+
+ /* if IPnoaddr, forget */
+ if(ipcmp(a, v4prefix) == 0 || ipcmp(a, IPnoaddr) == 0)
+ f->self->acceptall = 0;
+
+out:
+ qunlock(f->self);
+}
+
+static char *stformat = "%-44.44I %2.2d %4.4s\n";
+enum
+{
+ Nstformat= 41,
+};
+
+long
+ipselftabread(Fs *f, char *cp, ulong offset, int n)
+{
+ int i, m, nifc, off;
+ Ipself *p;
+ Iplink *link;
+ char state[8];
+
+ m = 0;
+ off = offset;
+ qlock(f->self);
+ for(i = 0; i < NHASH && m < n; i++){
+ for(p = f->self->hash[i]; p != nil && m < n; p = p->next){
+ nifc = 0;
+ for(link = p->link; link; link = link->selflink)
+ nifc++;
+ routetype(p->type, state);
+ m += snprint(cp + m, n - m, stformat, p->a, nifc, state);
+ if(off > 0){
+ off -= m;
+ m = 0;
+ }
+ }
+ }
+ qunlock(f->self);
+ return m;
+}
+
+int
+iptentative(Fs *f, uchar *addr)
+{
+ Ipself *p;
+
+ p = f->self->hash[hashipa(addr)];
+ for(; p; p = p->next){
+ if(ipcmp(addr, p->a) == 0) {
+ return p->link->lifc->tentative;
+ }
+ }
+ return 0;
+}
+
+/*
+ * returns
+ * 0 - no match
+ * Runi
+ * Rbcast
+ * Rmcast
+ */
+int
+ipforme(Fs *f, uchar *addr)
+{
+ Ipself *p;
+
+ p = f->self->hash[hashipa(addr)];
+ for(; p; p = p->next){
+ if(ipcmp(addr, p->a) == 0)
+ return p->type;
+ }
+
+ /* hack to say accept anything */
+ if(f->self->acceptall)
+ return Runi;
+
+ return 0;
+}
+
+/*
+ * find the ifc on same net as the remote system. If none,
+ * return nil.
+ */
+Ipifc*
+findipifc(Fs *f, uchar *remote, int type)
+{
+ Ipifc *ifc, *x;
+ Iplifc *lifc;
+ Conv **cp, **e;
+ uchar gnet[IPaddrlen];
+ uchar xmask[IPaddrlen];
+
+ x = nil; memset(xmask, 0, IPaddrlen);
+
+ /* find most specific match */
+ e = &f->ipifc->conv[f->ipifc->nc];
+ for(cp = f->ipifc->conv; cp < e; cp++){
+ if(*cp == 0)
+ continue;
+
+ ifc = (Ipifc*)(*cp)->ptcl;
+
+ for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+ maskip(remote, lifc->mask, gnet);
+ if(ipcmp(gnet, lifc->net) == 0){
+ if(x == nil || ipcmp(lifc->mask, xmask) > 0){
+ x = ifc;
+ ipmove(xmask, lifc->mask);
+ }
+ }
+ }
+ }
+ if(x != nil)
+ return x;
+
+ /* for now for broadcast and multicast, just use first interface */
+ if(type & (Rbcast|Rmulti)){
+ for(cp = f->ipifc->conv; cp < e; cp++){
+ if(*cp == 0)
+ continue;
+ ifc = (Ipifc*)(*cp)->ptcl;
+ if(ifc->lifc != nil)
+ return ifc;
+ }
+ }
+
+ return nil;
+}
+
+enum {
+ unknownv6,
+ multicastv6,
+ unspecifiedv6,
+ linklocalv6,
+ sitelocalv6,
+ globalv6,
+};
+
+int
+v6addrtype(uchar *addr)
+{
+ if(isv6global(addr))
+ return globalv6;
+ if(islinklocal(addr))
+ return linklocalv6;
+ if(isv6mcast(addr))
+ return multicastv6;
+ if(issitelocal(addr))
+ return sitelocalv6;
+ return unknownv6;
+}
+
+#define v6addrcurr(lifc) (( (lifc)->origint + (lifc)->preflt >= (NOW/10^3) ) || ( (lifc)->preflt == 0xffffffff ))
+
+static void
+findprimaryipv6(Fs *f, uchar *local)
+{
+ Conv **cp, **e;
+ Ipifc *ifc;
+ Iplifc *lifc;
+ int atype, atypel;
+
+ ipmove(local, v6Unspecified);
+ atype = unspecifiedv6;
+
+ /* find "best" (global > sitelocal > link local > unspecified)
+ * local address; address must be current */
+
+ e = &f->ipifc->conv[f->ipifc->nc];
+ for(cp = f->ipifc->conv; cp < e; cp++){
+ if(*cp == 0)
+ continue;
+ ifc = (Ipifc*)(*cp)->ptcl;
+ for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+ atypel = v6addrtype(lifc->local);
+ if(atypel > atype)
+ if(v6addrcurr(lifc)) {
+ ipmove(local, lifc->local);
+ atype = atypel;
+ if(atype == globalv6)
+ return;
+ }
+ }
+ }
+}
+
+/*
+ * returns first ip address configured
+ */
+static void
+findprimaryipv4(Fs *f, uchar *local)
+{
+ Conv **cp, **e;
+ Ipifc *ifc;
+ Iplifc *lifc;
+
+ /* find first ifc local address */
+ e = &f->ipifc->conv[f->ipifc->nc];
+ for(cp = f->ipifc->conv; cp < e; cp++){
+ if(*cp == 0)
+ continue;
+ ifc = (Ipifc*)(*cp)->ptcl;
+ if((lifc = ifc->lifc) != nil){
+ ipmove(local, lifc->local);
+ return;
+ }
+ }
+}
+
+/*
+ * find the local address 'closest' to the remote system, copy it to
+ * local and return the ifc for that address
+ */
+void
+findlocalip(Fs *f, uchar *local, uchar *remote)
+{
+ Ipifc *ifc;
+ Iplifc *lifc;
+ Route *r;
+ uchar gate[IPaddrlen];
+ uchar gnet[IPaddrlen];
+ int version;
+ int atype = unspecifiedv6, atypel = unknownv6;
+
+ USED(atype);
+ USED(atypel);
+ qlock(f->ipifc);
+ r = v6lookup(f, remote, nil);
+ version = (memcmp(remote, v4prefix, IPv4off) == 0) ? V4 : V6;
+
+ if(r != nil){
+ ifc = r->ifc;
+ if(r->type & Rv4)
+ v4tov6(gate, r->v4.gate);
+ else {
+ ipmove(gate, r->v6.gate);
+ ipmove(local, v6Unspecified);
+ }
+
+ /* find ifc address closest to the gateway to use */
+ switch(version) {
+ case V4:
+ for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+ maskip(gate, lifc->mask, gnet);
+ if(ipcmp(gnet, lifc->net) == 0){
+ ipmove(local, lifc->local);
+ goto out;
+ }
+ }
+ break;
+ case V6:
+ for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+ atypel = v6addrtype(lifc->local);
+ maskip(gate, lifc->mask, gnet);
+ if(ipcmp(gnet, lifc->net) == 0)
+ if(atypel > atype)
+ if(v6addrcurr(lifc)) {
+ ipmove(local, lifc->local);
+ atype = atypel;
+ if(atype == globalv6)
+ break;
+ }
+ }
+ if(atype > unspecifiedv6)
+ goto out;
+ break;
+ default:
+ panic("findlocalip: version %d", version);
+ }
+ }
+
+ switch(version){
+ case V4:
+ findprimaryipv4(f, local);
+ break;
+ case V6:
+ findprimaryipv6(f, local);
+ break;
+ default:
+ panic("findlocalip2: version %d", version);
+ }
+
+out:
+ qunlock(f->ipifc);
+}
+
+/*
+ * return first v4 address associated with an interface
+ */
+int
+ipv4local(Ipifc *ifc, uchar *addr)
+{
+ Iplifc *lifc;
+
+ for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+ if(isv4(lifc->local)){
+ memmove(addr, lifc->local+IPv4off, IPv4addrlen);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+/*
+ * return first v6 address associated with an interface
+ */
+int
+ipv6local(Ipifc *ifc, uchar *addr)
+{
+ Iplifc *lifc;
+
+ for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+ if(!isv4(lifc->local) && !(lifc->tentative)){
+ ipmove(addr, lifc->local);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int
+ipv6anylocal(Ipifc *ifc, uchar *addr)
+{
+ Iplifc *lifc;
+
+ for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+ if(!isv4(lifc->local)){
+ ipmove(addr, lifc->local);
+ return SRC_UNI;
+ }
+ }
+ return SRC_UNSPEC;
+}
+
+/*
+ * see if this address is bound to the interface
+ */
+Iplifc*
+iplocalonifc(Ipifc *ifc, uchar *ip)
+{
+ Iplifc *lifc;
+
+ for(lifc = ifc->lifc; lifc; lifc = lifc->next)
+ if(ipcmp(ip, lifc->local) == 0)
+ return lifc;
+ return nil;
+}
+
+
+/*
+ * See if we're proxying for this address on this interface
+ */
+int
+ipproxyifc(Fs *f, Ipifc *ifc, uchar *ip)
+{
+ Route *r;
+ uchar net[IPaddrlen];
+ Iplifc *lifc;
+
+ /* see if this is a direct connected pt to pt address */
+ r = v6lookup(f, ip, nil);
+ if(r == nil)
+ return 0;
+ if((r->type & (Rifc|Rproxy)) != (Rifc|Rproxy))
+ return 0;
+
+ /* see if this is on the right interface */
+ for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+ maskip(ip, lifc->mask, net);
+ if(ipcmp(net, lifc->remote) == 0)
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * return multicast version if any
+ */
+int
+ipismulticast(uchar *ip)
+{
+ if(isv4(ip)){
+ if(ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0)
+ return V4;
+ } else {
+ if(ip[0] == 0xff)
+ return V6;
+ }
+ return 0;
+}
+
+int
+ipisbm(uchar *ip)
+{
+ if(isv4(ip)){
+ if(ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0)
+ return V4;
+ if(ipcmp(ip, IPv4bcast) == 0)
+ return V4;
+ } else {
+ if(ip[0] == 0xff)
+ return V6;
+ }
+ return 0;
+}
+
+
+/*
+ * add a multicast address to an interface, called with c locked
+ */
+void
+ipifcaddmulti(Conv *c, uchar *ma, uchar *ia)
+{
+ Ipifc *ifc;
+ Iplifc *lifc;
+ Conv **p;
+ Ipmulti *multi, **l;
+ Fs *f;
+
+ f = c->p->f;
+
+ for(l = &c->multi; *l; l = &(*l)->next)
+ if(ipcmp(ma, (*l)->ma) == 0)
+ if(ipcmp(ia, (*l)->ia) == 0)
+ return; /* it's already there */
+
+ multi = *l = smalloc(sizeof(*multi));
+ ipmove(multi->ma, ma);
+ ipmove(multi->ia, ia);
+ multi->next = nil;
+
+ for(p = f->ipifc->conv; *p; p++){
+ if((*p)->inuse == 0)
+ continue;
+ ifc = (Ipifc*)(*p)->ptcl;
+ if(waserror()){
+ wunlock(ifc);
+ nexterror();
+ }
+ wlock(ifc);
+ for(lifc = ifc->lifc; lifc; lifc = lifc->next)
+ if(ipcmp(ia, lifc->local) == 0)
+ addselfcache(f, ifc, lifc, ma, Rmulti);
+ wunlock(ifc);
+ poperror();
+ }
+}
+
+
+/*
+ * remove a multicast address from an interface, called with c locked
+ */
+void
+ipifcremmulti(Conv *c, uchar *ma, uchar *ia)
+{
+ Ipmulti *multi, **l;
+ Iplifc *lifc;
+ Conv **p;
+ Ipifc *ifc;
+ Fs *f;
+
+ f = c->p->f;
+
+ for(l = &c->multi; *l; l = &(*l)->next)
+ if(ipcmp(ma, (*l)->ma) == 0)
+ if(ipcmp(ia, (*l)->ia) == 0)
+ break;
+
+ multi = *l;
+ if(multi == nil)
+ return; /* we don't have it open */
+
+ *l = multi->next;
+
+ for(p = f->ipifc->conv; *p; p++){
+ if((*p)->inuse == 0)
+ continue;
+
+ ifc = (Ipifc*)(*p)->ptcl;
+ if(waserror()){
+ wunlock(ifc);
+ nexterror();
+ }
+ wlock(ifc);
+ for(lifc = ifc->lifc; lifc; lifc = lifc->next)
+ if(ipcmp(ia, lifc->local) == 0)
+ remselfcache(f, ifc, lifc, ma);
+ wunlock(ifc);
+ poperror();
+ }
+
+ free(multi);
+}
+
+/*
+ * make lifc's join and leave multicast groups
+ */
+static char*
+ipifcjoinmulti(Ipifc *ifc, char **argv, int argc)
+{
+ USED(ifc, argv, argc);
+ return nil;
+}
+
+static char*
+ipifcleavemulti(Ipifc *ifc, char **argv, int argc)
+{
+ USED(ifc, argv, argc);
+ return nil;
+}
+
+static void
+ipifcregisterproxy(Fs *f, Ipifc *ifc, uchar *ip)
+{
+ Conv **cp, **e;
+ Ipifc *nifc;
+ Iplifc *lifc;
+ Medium *m;
+ uchar net[IPaddrlen];
+
+ /* register the address on any network that will proxy for us */
+ e = &f->ipifc->conv[f->ipifc->nc];
+
+ if(!isv4(ip)) { // V6
+ for(cp = f->ipifc->conv; cp < e; cp++){
+ if(*cp == nil)
+ continue;
+ nifc = (Ipifc*)(*cp)->ptcl;
+ if(nifc == ifc)
+ continue;
+
+ rlock(nifc);
+ m = nifc->m;
+ if(m == nil || m->addmulti == nil) {
+ runlock(nifc);
+ continue;
+ }
+ for(lifc = nifc->lifc; lifc; lifc = lifc->next){
+ maskip(ip, lifc->mask, net);
+ if(ipcmp(net, lifc->remote) == 0) { /* add solicited-node multicast address */
+ ipv62smcast(net, ip);
+ addselfcache(f, nifc, lifc, net, Rmulti);
+ arpenter(f, V6, ip, nifc->mac, 6, 0);
+ //(*m->addmulti)(nifc, net, ip);
+ break;
+ }
+ }
+ runlock(nifc);
+ }
+ return;
+ }
+ else { // V4
+ for(cp = f->ipifc->conv; cp < e; cp++){
+ if(*cp == nil)
+ continue;
+ nifc = (Ipifc*)(*cp)->ptcl;
+ if(nifc == ifc)
+ continue;
+
+ rlock(nifc);
+ m = nifc->m;
+ if(m == nil || m->areg == nil){
+ runlock(nifc);
+ continue;
+ }
+ for(lifc = nifc->lifc; lifc; lifc = lifc->next){
+ maskip(ip, lifc->mask, net);
+ if(ipcmp(net, lifc->remote) == 0){
+ (*m->areg)(nifc, ip);
+ break;
+ }
+ }
+ runlock(nifc);
+ }
+ }
+}
+
+
+// added for new v6 mesg types
+static void
+adddefroute6(Fs *f, uchar *gate, int force)
+{
+ Route *r;
+
+ r = v6lookup(f, v6Unspecified, nil);
+ if(r!=nil)
+ if(!(force) && (strcmp(r->tag,"ra")!=0)) // route entries generated
+ return; // by all other means take
+ // precedence over router annc
+
+ v6delroute(f, v6Unspecified, v6Unspecified, 1);
+ v6addroute(f, "ra", v6Unspecified, v6Unspecified, gate, 0);
+}
+
+enum
+{
+ Ngates = 3,
+};
+
+char*
+ipifcaddpref6(Ipifc *ifc, char**argv, int argc)
+{
+ uchar onlink = 1;
+ uchar autoflag = 1;
+ long validlt = 0xffffffff;
+ long preflt = 0xffffffff;
+ long origint = NOW / 10^3;
+ uchar prefix[IPaddrlen];
+ int plen = 64;
+ Iplifc *lifc;
+ char addr[40], preflen[6];
+ char *params[3];
+
+ switch(argc) {
+ case 7:
+ preflt = atoi(argv[6]);
+ /* fall through */
+ case 6:
+ validlt = atoi(argv[5]);
+ /* fall through */
+ case 5:
+ autoflag = atoi(argv[4]);
+ /* fall through */
+ case 4:
+ onlink = atoi(argv[3]);
+ /* fall through */
+ case 3:
+ plen = atoi(argv[2]);
+ case 2:
+ break;
+ default:
+ return Ebadarg;
+ }
+
+ if((parseip(prefix, argv[1])!=6) ||
+ (validlt < preflt) ||
+ (plen < 0) || (plen > 64) ||
+ (islinklocal(prefix))
+ )
+ return Ebadarg;
+
+ lifc = smalloc(sizeof(Iplifc));
+ lifc->onlink = (onlink!=0);
+ lifc->autoflag = (autoflag!=0);
+ lifc->validlt = validlt;
+ lifc->preflt = preflt;
+ lifc->origint = origint;
+
+ if(ifc->m->pref2addr!=nil)
+ ifc->m->pref2addr(prefix, ifc->mac);
+ else
+ return Ebadarg;
+
+ sprint(addr, "%I", prefix);
+ sprint(preflen, "/%d", plen);
+ params[0] = "add";
+ params[1] = addr;
+ params[2] = preflen;
+
+ return ipifcadd(ifc, params, 3, 0, lifc);
+}
+
--- /dev/null
+++ b/os/ip.original/ipmux.c
@@ -1,0 +1,839 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include "ip.h"
+#define DPRINT if(0)print
+
+typedef struct Ipmuxrock Ipmuxrock;
+typedef struct Ipmux Ipmux;
+typedef struct Ip6hdr Ip6hdr;
+
+enum
+{
+ IPHDR = 20, /* sizeof(Ip4hdr) */
+};
+
+struct Ip6hdr
+{
+ uchar vcf[4]; /* version, class label, and flow label */
+ uchar ploadlen[2]; /* payload length */
+ uchar proto; /* next header, i.e. proto */
+ uchar ttl; /* hop limit, i.e. ttl */
+ uchar src[16]; /* IP source */
+ uchar dst[16]; /* IP destination */
+};
+
+
+enum
+{
+ Tproto,
+ Tdata,
+ Tiph,
+ Tdst,
+ Tsrc,
+ Tifc,
+
+ Cother = 0,
+ Cbyte, /* single byte */
+ Cmbyte, /* single byte with mask */
+ Cshort, /* single short */
+ Cmshort, /* single short with mask */
+ Clong, /* single long */
+ Cmlong, /* single long with mask */
+ Cifc,
+ Cmifc,
+};
+
+char *ftname[] =
+{
+[Tproto] "proto",
+[Tdata] "data",
+[Tiph] "iph",
+[Tdst] "dst",
+[Tsrc] "src",
+[Tifc] "ifc",
+};
+
+/*
+ * a node in the decision tree
+ */
+struct Ipmux
+{
+ Ipmux *yes;
+ Ipmux *no;
+ uchar type; /* type of field(Txxxx) */
+ uchar ctype; /* tupe of comparison(Cxxxx) */
+ uchar len; /* length in bytes of item to compare */
+ uchar n; /* number of items val points to */
+ short off; /* offset of comparison */
+ short eoff; /* end offset of comparison */
+ uchar skiphdr; /* should offset start after ipheader */
+ uchar *val;
+ uchar *mask;
+ uchar *e; /* val+n*len*/
+
+ int ref; /* so we can garbage collect */
+ Conv *conv;
+};
+
+/*
+ * someplace to hold per conversation data
+ */
+struct Ipmuxrock
+{
+ Ipmux *chain;
+};
+
+static int ipmuxsprint(Ipmux*, int, char*, int);
+static void ipmuxkick(void *x);
+
+static char*
+skipwhite(char *p)
+{
+ while(*p == ' ' || *p == '\t')
+ p++;
+ return p;
+}
+
+static char*
+follows(char *p, char c)
+{
+ char *f;
+
+ f = strchr(p, c);
+ if(f == nil)
+ return nil;
+ *f++ = 0;
+ f = skipwhite(f);
+ if(*f == 0)
+ return nil;
+ return f;
+}
+
+static Ipmux*
+parseop(char **pp)
+{
+ char *p = *pp;
+ int type, off, end, len;
+ Ipmux *f;
+
+ p = skipwhite(p);
+ if(strncmp(p, "dst", 3) == 0){
+ type = Tdst;
+ off = offsetof(Ip4hdr, dst[0]);
+ len = IPv4addrlen;
+ p += 3;
+ }
+ else if(strncmp(p, "src", 3) == 0){
+ type = Tsrc;
+ off = offsetof(Ip4hdr, src[0]);
+ len = IPv4addrlen;
+ p += 3;
+ }
+ else if(strncmp(p, "ifc", 3) == 0){
+ type = Tifc;
+ off = -IPv4addrlen;
+ len = IPv4addrlen;
+ p += 3;
+ }
+ else if(strncmp(p, "proto", 5) == 0){
+ type = Tproto;
+ off = offsetof(Ip4hdr, proto);
+ len = 1;
+ p += 5;
+ }
+ else if(strncmp(p, "data", 4) == 0 || strncmp(p, "iph", 3) == 0){
+ if(strncmp(p, "data", 4) == 0) {
+ type = Tdata;
+ p += 4;
+ }
+ else {
+ type = Tiph;
+ p += 3;
+ }
+ p = skipwhite(p);
+ if(*p != '[')
+ return nil;
+ p++;
+ off = strtoul(p, &p, 0);
+ if(off < 0 || off > (64-IPHDR))
+ return nil;
+ p = skipwhite(p);
+ if(*p != ':')
+ end = off;
+ else {
+ p++;
+ p = skipwhite(p);
+ end = strtoul(p, &p, 0);
+ if(end < off)
+ return nil;
+ p = skipwhite(p);
+ }
+ if(*p != ']')
+ return nil;
+ p++;
+ len = end - off + 1;
+ }
+ else
+ return nil;
+
+ f = smalloc(sizeof(*f));
+ f->type = type;
+ f->len = len;
+ f->off = off;
+ f->val = nil;
+ f->mask = nil;
+ f->n = 1;
+ f->ref = 1;
+ if(type == Tdata)
+ f->skiphdr = 1;
+ else
+ f->skiphdr = 0;
+
+ return f;
+}
+
+static int
+htoi(char x)
+{
+ if(x >= '0' && x <= '9')
+ x -= '0';
+ else if(x >= 'a' && x <= 'f')
+ x -= 'a' - 10;
+ else if(x >= 'A' && x <= 'F')
+ x -= 'A' - 10;
+ else
+ x = 0;
+ return x;
+}
+
+static int
+hextoi(char *p)
+{
+ return (htoi(p[0])<<4) | htoi(p[1]);
+}
+
+static void
+parseval(uchar *v, char *p, int len)
+{
+ while(*p && len-- > 0){
+ *v++ = hextoi(p);
+ p += 2;
+ }
+}
+
+static Ipmux*
+parsemux(char *p)
+{
+ int n, nomask;
+ Ipmux *f;
+ char *val;
+ char *mask;
+ char *vals[20];
+ uchar *v;
+
+ /* parse operand */
+ f = parseop(&p);
+ if(f == nil)
+ return nil;
+
+ /* find value */
+ val = follows(p, '=');
+ if(val == nil)
+ goto parseerror;
+
+ /* parse mask */
+ mask = follows(val, '&');
+ if(mask != nil){
+ switch(f->type){
+ case Tsrc:
+ case Tdst:
+ case Tifc:
+ f->mask = smalloc(f->len);
+ v4parseip(f->mask, mask);
+ break;
+ case Tdata:
+ case Tiph:
+ f->mask = smalloc(f->len);
+ parseval(f->mask, mask, f->len);
+ break;
+ default:
+ goto parseerror;
+ }
+ nomask = 0;
+ } else {
+ nomask = 1;
+ f->mask = smalloc(f->len);
+ memset(f->mask, 0xff, f->len);
+ }
+
+ /* parse vals */
+ f->n = getfields(val, vals, sizeof(vals)/sizeof(char*), 1, "|");
+ if(f->n == 0)
+ goto parseerror;
+ f->val = smalloc(f->n*f->len);
+ v = f->val;
+ for(n = 0; n < f->n; n++){
+ switch(f->type){
+ case Tsrc:
+ case Tdst:
+ case Tifc:
+ v4parseip(v, vals[n]);
+ break;
+ case Tproto:
+ case Tdata:
+ case Tiph:
+ parseval(v, vals[n], f->len);
+ break;
+ }
+ v += f->len;
+ }
+
+ f->eoff = f->off + f->len;
+ f->e = f->val + f->n*f->len;
+ f->ctype = Cother;
+ if(f->n == 1){
+ switch(f->len){
+ case 1:
+ f->ctype = nomask ? Cbyte : Cmbyte;
+ break;
+ case 2:
+ f->ctype = nomask ? Cshort : Cmshort;
+ break;
+ case 4:
+ if(f->type == Tifc)
+ f->ctype = nomask ? Cifc : Cmifc;
+ else
+ f->ctype = nomask ? Clong : Cmlong;
+ break;
+ }
+ }
+ return f;
+
+parseerror:
+ if(f->mask)
+ free(f->mask);
+ if(f->val)
+ free(f->val);
+ free(f);
+ return nil;
+}
+
+/*
+ * Compare relative ordering of two ipmuxs. This doesn't compare the
+ * values, just the fields being looked at.
+ *
+ * returns: <0 if a is a more specific match
+ * 0 if a and b are matching on the same fields
+ * >0 if b is a more specific match
+ */
+static int
+ipmuxcmp(Ipmux *a, Ipmux *b)
+{
+ int n;
+
+ /* compare types, lesser ones are more important */
+ n = a->type - b->type;
+ if(n != 0)
+ return n;
+
+ /* compare offsets, call earlier ones more specific */
+ n = (a->off+((int)a->skiphdr)*offsetof(Ip4hdr, data[0])) -
+ (b->off+((int)b->skiphdr)*offsetof(Ip4hdr, data[0]));
+ if(n != 0)
+ return n;
+
+ /* compare match lengths, longer ones are more specific */
+ n = b->len - a->len;
+ if(n != 0)
+ return n;
+
+ /*
+ * if we get here we have two entries matching
+ * the same bytes of the record. Now check
+ * the mask for equality. Longer masks are
+ * more specific.
+ */
+ if(a->mask != nil && b->mask == nil)
+ return -1;
+ if(a->mask == nil && b->mask != nil)
+ return 1;
+ if(a->mask != nil && b->mask != nil){
+ n = memcmp(b->mask, a->mask, a->len);
+ if(n != 0)
+ return n;
+ }
+ return 0;
+}
+
+/*
+ * Compare the values of two ipmuxs. We're assuming that ipmuxcmp
+ * returned 0 comparing them.
+ */
+static int
+ipmuxvalcmp(Ipmux *a, Ipmux *b)
+{
+ int n;
+
+ n = b->len*b->n - a->len*a->n;
+ if(n != 0)
+ return n;
+ return memcmp(a->val, b->val, a->len*a->n);
+}
+
+/*
+ * add onto an existing ipmux chain in the canonical comparison
+ * order
+ */
+static void
+ipmuxchain(Ipmux **l, Ipmux *f)
+{
+ for(; *l; l = &(*l)->yes)
+ if(ipmuxcmp(f, *l) < 0)
+ break;
+ f->yes = *l;
+ *l = f;
+}
+
+/*
+ * copy a tree
+ */
+static Ipmux*
+ipmuxcopy(Ipmux *f)
+{
+ Ipmux *nf;
+
+ if(f == nil)
+ return nil;
+ nf = smalloc(sizeof *nf);
+ *nf = *f;
+ nf->no = ipmuxcopy(f->no);
+ nf->yes = ipmuxcopy(f->yes);
+ nf->val = smalloc(f->n*f->len);
+ nf->e = nf->val + f->len*f->n;
+ memmove(nf->val, f->val, f->n*f->len);
+ return nf;
+}
+
+static void
+ipmuxfree(Ipmux *f)
+{
+ if(f->val != nil)
+ free(f->val);
+ free(f);
+}
+
+static void
+ipmuxtreefree(Ipmux *f)
+{
+ if(f == nil)
+ return;
+ if(f->no != nil)
+ ipmuxfree(f->no);
+ if(f->yes != nil)
+ ipmuxfree(f->yes);
+ ipmuxfree(f);
+}
+
+/*
+ * merge two trees
+ */
+static Ipmux*
+ipmuxmerge(Ipmux *a, Ipmux *b)
+{
+ int n;
+ Ipmux *f;
+
+ if(a == nil)
+ return b;
+ if(b == nil)
+ return a;
+ n = ipmuxcmp(a, b);
+ if(n < 0){
+ f = ipmuxcopy(b);
+ a->yes = ipmuxmerge(a->yes, b);
+ a->no = ipmuxmerge(a->no, f);
+ return a;
+ }
+ if(n > 0){
+ f = ipmuxcopy(a);
+ b->yes = ipmuxmerge(b->yes, a);
+ b->no = ipmuxmerge(b->no, f);
+ return b;
+ }
+ if(ipmuxvalcmp(a, b) == 0){
+ a->yes = ipmuxmerge(a->yes, b->yes);
+ a->no = ipmuxmerge(a->no, b->no);
+ a->ref++;
+ ipmuxfree(b);
+ return a;
+ }
+ a->no = ipmuxmerge(a->no, b);
+ return a;
+}
+
+/*
+ * remove a chain from a demux tree. This is like merging accept that
+ * we remove instead of insert.
+ */
+static int
+ipmuxremove(Ipmux **l, Ipmux *f)
+{
+ int n, rv;
+ Ipmux *ft;
+
+ if(f == nil)
+ return 0; /* we've removed it all */
+ if(*l == nil)
+ return -1;
+
+ ft = *l;
+ n = ipmuxcmp(ft, f);
+ if(n < 0){
+ /* *l is maching an earlier field, descend both paths */
+ rv = ipmuxremove(&ft->yes, f);
+ rv += ipmuxremove(&ft->no, f);
+ return rv;
+ }
+ if(n > 0){
+ /* f represents an earlier field than *l, this should be impossible */
+ return -1;
+ }
+
+ /* if we get here f and *l are comparing the same fields */
+ if(ipmuxvalcmp(ft, f) != 0){
+ /* different values mean mutually exclusive */
+ return ipmuxremove(&ft->no, f);
+ }
+
+ /* we found a match */
+ if(--(ft->ref) == 0){
+ /*
+ * a dead node implies the whole yes side is also dead.
+ * since our chain is constrained to be on that side,
+ * we're done.
+ */
+ ipmuxtreefree(ft->yes);
+ *l = ft->no;
+ ipmuxfree(ft);
+ return 0;
+ }
+
+ /*
+ * free the rest of the chain. it is constrained to match the
+ * yes side.
+ */
+ return ipmuxremove(&ft->yes, f->yes);
+}
+
+/*
+ * connection request is a semi separated list of filters
+ * e.g. proto=17;dat[0:4]=11aa22bb;ifc=135.104.9.2&255.255.255.0
+ *
+ * there's no protection against overlapping specs.
+ */
+static char*
+ipmuxconnect(Conv *c, char **argv, int argc)
+{
+ int i, n;
+ char *field[10];
+ Ipmux *mux, *chain;
+ Ipmuxrock *r;
+ Fs *f;
+
+ f = c->p->f;
+
+ if(argc != 2)
+ return Ebadarg;
+
+ n = getfields(argv[1], field, nelem(field), 1, ";");
+ if(n <= 0)
+ return Ebadarg;
+
+ chain = nil;
+ mux = nil;
+ for(i = 0; i < n; i++){
+ mux = parsemux(field[i]);
+ if(mux == nil){
+ ipmuxtreefree(chain);
+ return Ebadarg;
+ }
+ ipmuxchain(&chain, mux);
+ }
+ if(chain == nil)
+ return Ebadarg;
+ mux->conv = c;
+
+ /* save a copy of the chain so we can later remove it */
+ mux = ipmuxcopy(chain);
+ r = (Ipmuxrock*)(c->ptcl);
+ r->chain = chain;
+
+ /* add the chain to the protocol demultiplexor tree */
+ wlock(f);
+ f->ipmux->priv = ipmuxmerge(f->ipmux->priv, mux);
+ wunlock(f);
+
+ Fsconnected(c, nil);
+ return nil;
+}
+
+static int
+ipmuxstate(Conv *c, char *state, int n)
+{
+ Ipmuxrock *r;
+
+ r = (Ipmuxrock*)(c->ptcl);
+ return ipmuxsprint(r->chain, 0, state, n);
+}
+
+static void
+ipmuxcreate(Conv *c)
+{
+ Ipmuxrock *r;
+
+ c->rq = qopen(64*1024, Qmsg, 0, c);
+ c->wq = qopen(64*1024, Qkick, ipmuxkick, c);
+ r = (Ipmuxrock*)(c->ptcl);
+ r->chain = nil;
+}
+
+static char*
+ipmuxannounce(Conv*, char**, int)
+{
+ return "ipmux does not support announce";
+}
+
+static void
+ipmuxclose(Conv *c)
+{
+ Ipmuxrock *r;
+ Fs *f = c->p->f;
+
+ r = (Ipmuxrock*)(c->ptcl);
+
+ qclose(c->rq);
+ qclose(c->wq);
+ qclose(c->eq);
+ ipmove(c->laddr, IPnoaddr);
+ ipmove(c->raddr, IPnoaddr);
+ c->lport = 0;
+ c->rport = 0;
+
+ wlock(f);
+ ipmuxremove(&(c->p->priv), r->chain);
+ wunlock(f);
+ ipmuxtreefree(r->chain);
+ r->chain = nil;
+}
+
+/*
+ * takes a fully formed ip packet and just passes it down
+ * the stack
+ */
+static void
+ipmuxkick(void *x)
+{
+ Conv *c = x;
+ Block *bp;
+
+ bp = qget(c->wq);
+ if(bp == nil)
+ return;
+ else {
+ Ip4hdr *ih4 = (Ip4hdr*)(bp->rp);
+ if((ih4->vihl)&0xF0 != 0x60)
+ ipoput4(c->p->f, bp, 0, ih4->ttl, ih4->tos, nil);
+ else {
+ Ip6hdr *ih6 = (Ip6hdr*)(bp->rp);
+ ipoput6(c->p->f, bp, 0, ih6->ttl, 0, nil);
+ }
+ }
+}
+
+static void
+ipmuxiput(Proto *p, Ipifc *ifc, Block *bp)
+{
+ int len, hl;
+ Fs *f = p->f;
+ uchar *m, *h, *v, *e, *ve, *hp;
+ Conv *c;
+ Ipmux *mux;
+ Ip4hdr *ip;
+ Ip6hdr *ip6;
+
+ ip = (Ip4hdr*)bp->rp;
+ hl = (ip->vihl&0x0F)<<2;
+
+ if(p->priv == nil)
+ goto nomatch;
+
+ h = bp->rp;
+ len = BLEN(bp);
+
+ /* run the v4 filter */
+ rlock(f);
+ c = nil;
+ mux = f->ipmux->priv;
+ while(mux != nil){
+ if(mux->eoff > len){
+ mux = mux->no;
+ continue;
+ }
+ hp = h + mux->off + ((int)mux->skiphdr)*hl;
+ switch(mux->ctype){
+ case Cbyte:
+ if(*mux->val == *hp)
+ goto yes;
+ break;
+ case Cmbyte:
+ if((*hp & *mux->mask) == *mux->val)
+ goto yes;
+ break;
+ case Cshort:
+ if(*((ushort*)mux->val) == *(ushort*)hp)
+ goto yes;
+ break;
+ case Cmshort:
+ if((*(ushort*)hp & (*((ushort*)mux->mask))) == *((ushort*)mux->val))
+ goto yes;
+ break;
+ case Clong:
+ if(*((ulong*)mux->val) == *(ulong*)hp)
+ goto yes;
+ break;
+ case Cmlong:
+ if((*(ulong*)hp & (*((ulong*)mux->mask))) == *((ulong*)mux->val))
+ goto yes;
+ break;
+ case Cifc:
+ if(*((ulong*)mux->val) == *(ulong*)(ifc->lifc->local + IPv4off))
+ goto yes;
+ break;
+ case Cmifc:
+ if((*(ulong*)(ifc->lifc->local + IPv4off) & (*((ulong*)mux->mask))) == *((ulong*)mux->val))
+ goto yes;
+ break;
+ default:
+ v = mux->val;
+ for(e = mux->e; v < e; v = ve){
+ m = mux->mask;
+ hp = h + mux->off;
+ for(ve = v + mux->len; v < ve; v++){
+ if((*hp++ & *m++) != *v)
+ break;
+ }
+ if(v == ve)
+ goto yes;
+ }
+ }
+ mux = mux->no;
+ continue;
+yes:
+ if(mux->conv != nil)
+ c = mux->conv;
+ mux = mux->yes;
+ }
+ runlock(f);
+
+ if(c != nil){
+ /* tack on interface address */
+ bp = padblock(bp, IPaddrlen);
+ ipmove(bp->rp, ifc->lifc->local);
+ bp = concatblock(bp);
+ if(bp != nil)
+ if(qpass(c->rq, bp) < 0)
+ print("Q");
+ return;
+ }
+
+nomatch:
+ /* doesn't match any filter, hand it to the specific protocol handler */
+ ip = (Ip4hdr*)bp->rp;
+ if((ip->vihl&0xF0)==0x40) {
+ p = f->t2p[ip->proto];
+ } else {
+ ip6 = (Ip6hdr*)bp->rp;
+ p = f->t2p[ip6->proto];
+ }
+ if(p && p->rcv)
+ (*p->rcv)(p, ifc, bp);
+ else
+ freeblist(bp);
+ return;
+}
+
+static int
+ipmuxsprint(Ipmux *mux, int level, char *buf, int len)
+{
+ int i, j, n;
+ uchar *v;
+
+ n = 0;
+ for(i = 0; i < level; i++)
+ n += snprint(buf+n, len-n, " ");
+ if(mux == nil){
+ n += snprint(buf+n, len-n, "\n");
+ return n;
+ }
+ n += snprint(buf+n, len-n, "h[%d:%d]&",
+ mux->off+((int)mux->skiphdr)*((int)offsetof(Ip4hdr, data[0])),
+ mux->off+(((int)mux->skiphdr)*((int)offsetof(Ip4hdr, data[0])))+mux->len-1);
+ for(i = 0; i < mux->len; i++)
+ n += snprint(buf+n, len - n, "%2.2ux", mux->mask[i]);
+ n += snprint(buf+n, len-n, "=");
+ v = mux->val;
+ for(j = 0; j < mux->n; j++){
+ for(i = 0; i < mux->len; i++)
+ n += snprint(buf+n, len - n, "%2.2ux", *v++);
+ n += snprint(buf+n, len-n, "|");
+ }
+ n += snprint(buf+n, len-n, "\n");
+ level++;
+ n += ipmuxsprint(mux->no, level, buf+n, len-n);
+ n += ipmuxsprint(mux->yes, level, buf+n, len-n);
+ return n;
+}
+
+static int
+ipmuxstats(Proto *p, char *buf, int len)
+{
+ int n;
+ Fs *f = p->f;
+
+ rlock(f);
+ n = ipmuxsprint(p->priv, 0, buf, len);
+ runlock(f);
+
+ return n;
+}
+
+void
+ipmuxinit(Fs *f)
+{
+ Proto *ipmux;
+
+ ipmux = smalloc(sizeof(Proto));
+ ipmux->priv = nil;
+ ipmux->name = "ipmux";
+ ipmux->connect = ipmuxconnect;
+ ipmux->announce = ipmuxannounce;
+ ipmux->state = ipmuxstate;
+ ipmux->create = ipmuxcreate;
+ ipmux->close = ipmuxclose;
+ ipmux->rcv = ipmuxiput;
+ ipmux->ctl = nil;
+ ipmux->advise = nil;
+ ipmux->stats = ipmuxstats;
+ ipmux->ipproto = -1;
+ ipmux->nc = 64;
+ ipmux->ptclsize = sizeof(Ipmuxrock);
+
+ f->ipmux = ipmux; /* hack for Fsrcvpcol */
+
+ Fsproto(f, ipmux);
+}
--- /dev/null
+++ b/os/ip.original/iproute.c
@@ -1,0 +1,852 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include "ip.h"
+
+static void walkadd(Fs*, Route**, Route*);
+static void addnode(Fs*, Route**, Route*);
+static void calcd(Route*);
+
+/* these are used for all instances of IP */
+Route* v4freelist;
+Route* v6freelist;
+RWlock routelock;
+ulong v4routegeneration, v6routegeneration;
+
+static void
+freeroute(Route *r)
+{
+ Route **l;
+
+ r->left = nil;
+ r->right = nil;
+ if(r->type & Rv4)
+ l = &v4freelist;
+ else
+ l = &v6freelist;
+ r->mid = *l;
+ *l = r;
+}
+
+static Route*
+allocroute(int type)
+{
+ Route *r;
+ int n;
+ Route **l;
+
+ if(type & Rv4){
+ n = sizeof(RouteTree) + sizeof(V4route);
+ l = &v4freelist;
+ } else {
+ n = sizeof(RouteTree) + sizeof(V6route);
+ l = &v6freelist;
+ }
+
+ r = *l;
+ if(r != nil){
+ *l = r->mid;
+ } else {
+ r = malloc(n);
+ if(r == nil)
+ panic("out of routing nodes");
+ }
+ memset(r, 0, n);
+ r->type = type;
+ r->ifc = nil;
+ r->ref = 1;
+
+ return r;
+}
+
+static void
+addqueue(Route **q, Route *r)
+{
+ Route *l;
+
+ if(r == nil)
+ return;
+
+ l = allocroute(r->type);
+ l->mid = *q;
+ *q = l;
+ l->left = r;
+}
+
+/*
+ * compare 2 v6 addresses
+ */
+static int
+lcmp(ulong *a, ulong *b)
+{
+ int i;
+
+ for(i = 0; i < IPllen; i++){
+ if(a[i] > b[i])
+ return 1;
+ if(a[i] < b[i])
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * compare 2 v4 or v6 ranges
+ */
+enum
+{
+ Rpreceeds,
+ Rfollows,
+ Requals,
+ Rcontains,
+ Rcontained,
+};
+
+static int
+rangecompare(Route *a, Route *b)
+{
+ if(a->type & Rv4){
+ if(a->v4.endaddress < b->v4.address)
+ return Rpreceeds;
+
+ if(a->v4.address > b->v4.endaddress)
+ return Rfollows;
+
+ if(a->v4.address <= b->v4.address
+ && a->v4.endaddress >= b->v4.endaddress){
+ if(a->v4.address == b->v4.address
+ && a->v4.endaddress == b->v4.endaddress)
+ return Requals;
+ return Rcontains;
+ }
+ return Rcontained;
+ }
+
+ if(lcmp(a->v6.endaddress, b->v6.address) < 0)
+ return Rpreceeds;
+
+ if(lcmp(a->v6.address, b->v6.endaddress) > 0)
+ return Rfollows;
+
+ if(lcmp(a->v6.address, b->v6.address) <= 0
+ && lcmp(a->v6.endaddress, b->v6.endaddress) >= 0){
+ if(lcmp(a->v6.address, b->v6.address) == 0
+ && lcmp(a->v6.endaddress, b->v6.endaddress) == 0)
+ return Requals;
+ return Rcontains;
+ }
+
+ return Rcontained;
+}
+
+static void
+copygate(Route *old, Route *new)
+{
+ if(new->type & Rv4)
+ memmove(old->v4.gate, new->v4.gate, IPv4addrlen);
+ else
+ memmove(old->v6.gate, new->v6.gate, IPaddrlen);
+}
+
+/*
+ * walk down a tree adding nodes back in
+ */
+static void
+walkadd(Fs *f, Route **root, Route *p)
+{
+ Route *l, *r;
+
+ l = p->left;
+ r = p->right;
+ p->left = 0;
+ p->right = 0;
+ addnode(f, root, p);
+ if(l)
+ walkadd(f, root, l);
+ if(r)
+ walkadd(f, root, r);
+}
+
+/*
+ * calculate depth
+ */
+static void
+calcd(Route *p)
+{
+ Route *q;
+ int d;
+
+ if(p) {
+ d = 0;
+ q = p->left;
+ if(q)
+ d = q->depth;
+ q = p->right;
+ if(q && q->depth > d)
+ d = q->depth;
+ q = p->mid;
+ if(q && q->depth > d)
+ d = q->depth;
+ p->depth = d+1;
+ }
+}
+
+/*
+ * balance the tree at the current node
+ */
+static void
+balancetree(Route **cur)
+{
+ Route *p, *l, *r;
+ int dl, dr;
+
+ /*
+ * if left and right are
+ * too out of balance,
+ * rotate tree node
+ */
+ p = *cur;
+ dl = 0; if(l = p->left) dl = l->depth;
+ dr = 0; if(r = p->right) dr = r->depth;
+
+ if(dl > dr+1) {
+ p->left = l->right;
+ l->right = p;
+ *cur = l;
+ calcd(p);
+ calcd(l);
+ } else
+ if(dr > dl+1) {
+ p->right = r->left;
+ r->left = p;
+ *cur = r;
+ calcd(p);
+ calcd(r);
+ } else
+ calcd(p);
+}
+
+/*
+ * add a new node to the tree
+ */
+static void
+addnode(Fs *f, Route **cur, Route *new)
+{
+ Route *p;
+
+ p = *cur;
+ if(p == 0) {
+ *cur = new;
+ new->depth = 1;
+ return;
+ }
+
+ switch(rangecompare(new, p)){
+ case Rpreceeds:
+ addnode(f, &p->left, new);
+ break;
+ case Rfollows:
+ addnode(f, &p->right, new);
+ break;
+ case Rcontains:
+ /*
+ * if new node is superset
+ * of tree node,
+ * replace tree node and
+ * queue tree node to be
+ * merged into root.
+ */
+ *cur = new;
+ new->depth = 1;
+ addqueue(&f->queue, p);
+ break;
+ case Requals:
+ /*
+ * supercede the old entry if the old one isn't
+ * a local interface.
+ */
+ if((p->type & Rifc) == 0){
+ p->type = new->type;
+ p->ifcid = -1;
+ copygate(p, new);
+ } else if(new->type & Rifc)
+ p->ref++;
+ freeroute(new);
+ break;
+ case Rcontained:
+ addnode(f, &p->mid, new);
+ break;
+ }
+
+ balancetree(cur);
+}
+
+#define V4H(a) ((a&0x07ffffff)>>(32-Lroot-5))
+
+void
+v4addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type)
+{
+ Route *p;
+ ulong sa;
+ ulong m;
+ ulong ea;
+ int h, eh;
+
+ m = nhgetl(mask);
+ sa = nhgetl(a) & m;
+ ea = sa | ~m;
+
+ eh = V4H(ea);
+ for(h=V4H(sa); h<=eh; h++) {
+ p = allocroute(Rv4 | type);
+ p->v4.address = sa;
+ p->v4.endaddress = ea;
+ memmove(p->v4.gate, gate, sizeof(p->v4.gate));
+ memmove(p->tag, tag, sizeof(p->tag));
+
+ wlock(&routelock);
+ addnode(f, &f->v4root[h], p);
+ while(p = f->queue) {
+ f->queue = p->mid;
+ walkadd(f, &f->v4root[h], p->left);
+ freeroute(p);
+ }
+ wunlock(&routelock);
+ }
+ v4routegeneration++;
+
+ ipifcaddroute(f, Rv4, a, mask, gate, type);
+}
+
+#define V6H(a) (((a)[IPllen-1] & 0x07ffffff)>>(32-Lroot-5))
+#define ISDFLT(a, mask, tag) ((ipcmp((a),v6Unspecified)==0) && (ipcmp((mask),v6Unspecified)==0) && (strcmp((tag), "ra")!=0))
+
+void
+v6addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type)
+{
+ Route *p;
+ ulong sa[IPllen], ea[IPllen];
+ ulong x, y;
+ int h, eh;
+
+ /*
+ if(ISDFLT(a, mask, tag))
+ f->v6p->cdrouter = -1;
+ */
+
+
+ for(h = 0; h < IPllen; h++){
+ x = nhgetl(a+4*h);
+ y = nhgetl(mask+4*h);
+ sa[h] = x & y;
+ ea[h] = x | ~y;
+ }
+
+ eh = V6H(ea);
+ for(h = V6H(sa); h <= eh; h++) {
+ p = allocroute(type);
+ memmove(p->v6.address, sa, IPaddrlen);
+ memmove(p->v6.endaddress, ea, IPaddrlen);
+ memmove(p->v6.gate, gate, IPaddrlen);
+ memmove(p->tag, tag, sizeof(p->tag));
+
+ wlock(&routelock);
+ addnode(f, &f->v6root[h], p);
+ while(p = f->queue) {
+ f->queue = p->mid;
+ walkadd(f, &f->v6root[h], p->left);
+ freeroute(p);
+ }
+ wunlock(&routelock);
+ }
+ v6routegeneration++;
+
+ ipifcaddroute(f, 0, a, mask, gate, type);
+}
+
+Route**
+looknode(Route **cur, Route *r)
+{
+ Route *p;
+
+ for(;;){
+ p = *cur;
+ if(p == 0)
+ return 0;
+
+ switch(rangecompare(r, p)){
+ case Rcontains:
+ return 0;
+ case Rpreceeds:
+ cur = &p->left;
+ break;
+ case Rfollows:
+ cur = &p->right;
+ break;
+ case Rcontained:
+ cur = &p->mid;
+ break;
+ case Requals:
+ return cur;
+ }
+ }
+}
+
+void
+v4delroute(Fs *f, uchar *a, uchar *mask, int dolock)
+{
+ Route **r, *p;
+ Route rt;
+ int h, eh;
+ ulong m;
+
+ m = nhgetl(mask);
+ rt.v4.address = nhgetl(a) & m;
+ rt.v4.endaddress = rt.v4.address | ~m;
+ rt.type = Rv4;
+
+ eh = V4H(rt.v4.endaddress);
+ for(h=V4H(rt.v4.address); h<=eh; h++) {
+ if(dolock)
+ wlock(&routelock);
+ r = looknode(&f->v4root[h], &rt);
+ if(r) {
+ p = *r;
+ if(--(p->ref) == 0){
+ *r = 0;
+ addqueue(&f->queue, p->left);
+ addqueue(&f->queue, p->mid);
+ addqueue(&f->queue, p->right);
+ freeroute(p);
+ while(p = f->queue) {
+ f->queue = p->mid;
+ walkadd(f, &f->v4root[h], p->left);
+ freeroute(p);
+ }
+ }
+ }
+ if(dolock)
+ wunlock(&routelock);
+ }
+ v4routegeneration++;
+
+ ipifcremroute(f, Rv4, a, mask);
+}
+
+void
+v6delroute(Fs *f, uchar *a, uchar *mask, int dolock)
+{
+ Route **r, *p;
+ Route rt;
+ int h, eh;
+ ulong x, y;
+
+ for(h = 0; h < IPllen; h++){
+ x = nhgetl(a+4*h);
+ y = nhgetl(mask+4*h);
+ rt.v6.address[h] = x & y;
+ rt.v6.endaddress[h] = x | ~y;
+ }
+ rt.type = 0;
+
+ eh = V6H(rt.v6.endaddress);
+ for(h=V6H(rt.v6.address); h<=eh; h++) {
+ if(dolock)
+ wlock(&routelock);
+ r = looknode(&f->v6root[h], &rt);
+ if(r) {
+ p = *r;
+ if(--(p->ref) == 0){
+ *r = 0;
+ addqueue(&f->queue, p->left);
+ addqueue(&f->queue, p->mid);
+ addqueue(&f->queue, p->right);
+ freeroute(p);
+ while(p = f->queue) {
+ f->queue = p->mid;
+ walkadd(f, &f->v6root[h], p->left);
+ freeroute(p);
+ }
+ }
+ }
+ if(dolock)
+ wunlock(&routelock);
+ }
+ v6routegeneration++;
+
+ ipifcremroute(f, 0, a, mask);
+}
+
+Route*
+v4lookup(Fs *f, uchar *a, Conv *c)
+{
+ Route *p, *q;
+ ulong la;
+ uchar gate[IPaddrlen];
+ Ipifc *ifc;
+
+ if(c != nil && c->r != nil && c->r->ifc != nil && c->rgen == v4routegeneration)
+ return c->r;
+
+ la = nhgetl(a);
+ q = nil;
+ for(p=f->v4root[V4H(la)]; p;)
+ if(la >= p->v4.address) {
+ if(la <= p->v4.endaddress) {
+ q = p;
+ p = p->mid;
+ } else
+ p = p->right;
+ } else
+ p = p->left;
+
+ if(q && (q->ifc == nil || q->ifcid != q->ifc->ifcid)){
+ if(q->type & Rifc) {
+ hnputl(gate+IPv4off, q->v4.address);
+ memmove(gate, v4prefix, IPv4off);
+ } else
+ v4tov6(gate, q->v4.gate);
+ ifc = findipifc(f, gate, q->type);
+ if(ifc == nil)
+ return nil;
+ q->ifc = ifc;
+ q->ifcid = ifc->ifcid;
+ }
+
+ if(c != nil){
+ c->r = q;
+ c->rgen = v4routegeneration;
+ }
+
+ return q;
+}
+
+Route*
+v6lookup(Fs *f, uchar *a, Conv *c)
+{
+ Route *p, *q;
+ ulong la[IPllen];
+ int h;
+ ulong x, y;
+ uchar gate[IPaddrlen];
+ Ipifc *ifc;
+
+ if(memcmp(a, v4prefix, IPv4off) == 0){
+ q = v4lookup(f, a+IPv4off, c);
+ if(q != nil)
+ return q;
+ }
+
+ if(c != nil && c->r != nil && c->r->ifc != nil && c->rgen == v6routegeneration)
+ return c->r;
+
+ for(h = 0; h < IPllen; h++)
+ la[h] = nhgetl(a+4*h);
+
+ q = 0;
+ for(p=f->v6root[V6H(la)]; p;){
+ for(h = 0; h < IPllen; h++){
+ x = la[h];
+ y = p->v6.address[h];
+ if(x == y)
+ continue;
+ if(x < y){
+ p = p->left;
+ goto next;
+ }
+ break;
+ }
+ for(h = 0; h < IPllen; h++){
+ x = la[h];
+ y = p->v6.endaddress[h];
+ if(x == y)
+ continue;
+ if(x > y){
+ p = p->right;
+ goto next;
+ }
+ break;
+ }
+ q = p;
+ p = p->mid;
+next: ;
+ }
+
+ if(q && (q->ifc == nil || q->ifcid != q->ifc->ifcid)){
+ if(q->type & Rifc) {
+ for(h = 0; h < IPllen; h++)
+ hnputl(gate+4*h, q->v6.address[h]);
+ ifc = findipifc(f, gate, q->type);
+ } else
+ ifc = findipifc(f, q->v6.gate, q->type);
+ if(ifc == nil)
+ return nil;
+ q->ifc = ifc;
+ q->ifcid = ifc->ifcid;
+ }
+ if(c != nil){
+ c->r = q;
+ c->rgen = v6routegeneration;
+ }
+
+ return q;
+}
+
+void
+routetype(int type, char *p)
+{
+ memset(p, ' ', 4);
+ p[4] = 0;
+ if(type & Rv4)
+ *p++ = '4';
+ else
+ *p++ = '6';
+ if(type & Rifc)
+ *p++ = 'i';
+ if(type & Runi)
+ *p++ = 'u';
+ else if(type & Rbcast)
+ *p++ = 'b';
+ else if(type & Rmulti)
+ *p++ = 'm';
+ if(type & Rptpt)
+ *p = 'p';
+}
+
+char *rformat = "%-15I %-4M %-15I %4.4s %4.4s %3s\n";
+
+void
+convroute(Route *r, uchar *addr, uchar *mask, uchar *gate, char *t, int *nifc)
+{
+ int i;
+
+ if(r->type & Rv4){
+ memmove(addr, v4prefix, IPv4off);
+ hnputl(addr+IPv4off, r->v4.address);
+ memset(mask, 0xff, IPv4off);
+ hnputl(mask+IPv4off, ~(r->v4.endaddress ^ r->v4.address));
+ memmove(gate, v4prefix, IPv4off);
+ memmove(gate+IPv4off, r->v4.gate, IPv4addrlen);
+ } else {
+ for(i = 0; i < IPllen; i++){
+ hnputl(addr + 4*i, r->v6.address[i]);
+ hnputl(mask + 4*i, ~(r->v6.endaddress[i] ^ r->v6.address[i]));
+ }
+ memmove(gate, r->v6.gate, IPaddrlen);
+ }
+
+ routetype(r->type, t);
+
+ if(r->ifc)
+ *nifc = r->ifc->conv->x;
+ else
+ *nifc = -1;
+}
+
+/*
+ * this code is not in rr to reduce stack size
+ */
+static void
+sprintroute(Route *r, Routewalk *rw)
+{
+ int nifc, n;
+ char t[5], *iname, ifbuf[5];
+ uchar addr[IPaddrlen], mask[IPaddrlen], gate[IPaddrlen];
+ char *p;
+
+ convroute(r, addr, mask, gate, t, &nifc);
+ iname = "-";
+ if(nifc != -1) {
+ iname = ifbuf;
+ snprint(ifbuf, sizeof ifbuf, "%d", nifc);
+ }
+ p = seprint(rw->p, rw->e, rformat, addr, mask, gate, t, r->tag, iname);
+ if(rw->o < 0){
+ n = p - rw->p;
+ if(n > -rw->o){
+ memmove(rw->p, rw->p-rw->o, n+rw->o);
+ rw->p = p + rw->o;
+ }
+ rw->o += n;
+ } else
+ rw->p = p;
+}
+
+/*
+ * recurse descending tree, applying the function in Routewalk
+ */
+static int
+rr(Route *r, Routewalk *rw)
+{
+ int h;
+
+ if(rw->e <= rw->p)
+ return 0;
+ if(r == nil)
+ return 1;
+
+ if(rr(r->left, rw) == 0)
+ return 0;
+
+ if(r->type & Rv4)
+ h = V4H(r->v4.address);
+ else
+ h = V6H(r->v6.address);
+
+ if(h == rw->h)
+ rw->walk(r, rw);
+
+ if(rr(r->mid, rw) == 0)
+ return 0;
+
+ return rr(r->right, rw);
+}
+
+void
+ipwalkroutes(Fs *f, Routewalk *rw)
+{
+ rlock(&routelock);
+ if(rw->e > rw->p) {
+ for(rw->h = 0; rw->h < nelem(f->v4root); rw->h++)
+ if(rr(f->v4root[rw->h], rw) == 0)
+ break;
+ }
+ if(rw->e > rw->p) {
+ for(rw->h = 0; rw->h < nelem(f->v6root); rw->h++)
+ if(rr(f->v6root[rw->h], rw) == 0)
+ break;
+ }
+ runlock(&routelock);
+}
+
+long
+routeread(Fs *f, char *p, ulong offset, int n)
+{
+ Routewalk rw;
+
+ rw.p = p;
+ rw.e = p+n;
+ rw.o = -offset;
+ rw.walk = sprintroute;
+
+ ipwalkroutes(f, &rw);
+
+ return rw.p - p;
+}
+
+/*
+ * this code is not in routeflush to reduce stack size
+ */
+void
+delroute(Fs *f, Route *r, int dolock)
+{
+ uchar addr[IPaddrlen];
+ uchar mask[IPaddrlen];
+ uchar gate[IPaddrlen];
+ char t[5];
+ int nifc;
+
+ convroute(r, addr, mask, gate, t, &nifc);
+ if(r->type & Rv4)
+ v4delroute(f, addr+IPv4off, mask+IPv4off, dolock);
+ else
+ v6delroute(f, addr, mask, dolock);
+}
+
+/*
+ * recurse until one route is deleted
+ * returns 0 if nothing is deleted, 1 otherwise
+ */
+int
+routeflush(Fs *f, Route *r, char *tag)
+{
+ if(r == nil)
+ return 0;
+ if(routeflush(f, r->mid, tag))
+ return 1;
+ if(routeflush(f, r->left, tag))
+ return 1;
+ if(routeflush(f, r->right, tag))
+ return 1;
+ if((r->type & Rifc) == 0){
+ if(tag == nil || strncmp(tag, r->tag, sizeof(r->tag)) == 0){
+ delroute(f, r, 0);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+long
+routewrite(Fs *f, Chan *c, char *p, int n)
+{
+ int h, changed;
+ char *tag;
+ Cmdbuf *cb;
+ uchar addr[IPaddrlen];
+ uchar mask[IPaddrlen];
+ uchar gate[IPaddrlen];
+ IPaux *a, *na;
+
+ cb = parsecmd(p, n);
+ if(waserror()){
+ free(cb);
+ nexterror();
+ }
+
+ if(strcmp(cb->f[0], "flush") == 0){
+ tag = cb->f[1];
+ for(h = 0; h < nelem(f->v4root); h++)
+ for(changed = 1; changed;){
+ wlock(&routelock);
+ changed = routeflush(f, f->v4root[h], tag);
+ wunlock(&routelock);
+ }
+ for(h = 0; h < nelem(f->v6root); h++)
+ for(changed = 1; changed;){
+ wlock(&routelock);
+ changed = routeflush(f, f->v6root[h], tag);
+ wunlock(&routelock);
+ }
+ } else if(strcmp(cb->f[0], "remove") == 0){
+ if(cb->nf < 3)
+ error(Ebadarg);
+ parseip(addr, cb->f[1]);
+ parseipmask(mask, cb->f[2]);
+ if(memcmp(addr, v4prefix, IPv4off) == 0)
+ v4delroute(f, addr+IPv4off, mask+IPv4off, 1);
+ else
+ v6delroute(f, addr, mask, 1);
+ } else if(strcmp(cb->f[0], "add") == 0){
+ if(cb->nf < 4)
+ error(Ebadarg);
+ parseip(addr, cb->f[1]);
+ parseipmask(mask, cb->f[2]);
+ parseip(gate, cb->f[3]);
+ tag = "none";
+ if(c != nil){
+ a = c->aux;
+ tag = a->tag;
+ }
+ if(memcmp(addr, v4prefix, IPv4off) == 0)
+ v4addroute(f, tag, addr+IPv4off, mask+IPv4off, gate+IPv4off, 0);
+ else
+ v6addroute(f, tag, addr, mask, gate, 0);
+ } else if(strcmp(cb->f[0], "tag") == 0) {
+ if(cb->nf < 2)
+ error(Ebadarg);
+
+ a = c->aux;
+ na = newipaux(a->owner, cb->f[1]);
+ c->aux = na;
+ free(a);
+ }
+
+ poperror();
+ free(cb);
+ return n;
+}
--- /dev/null
+++ b/os/ip.original/iprouter.c
@@ -1,0 +1,56 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "../ip/ip.h"
+
+IProuter iprouter;
+
+/*
+ * User level routing. Ip packets we don't know what to do with
+ * come here.
+ */
+void
+useriprouter(Fs *f, Ipifc *ifc, Block *bp)
+{
+ qlock(&f->iprouter);
+ if(f->iprouter.q != nil){
+ bp = padblock(bp, IPaddrlen);
+ if(bp == nil)
+ return;
+ ipmove(bp->rp, ifc->lifc->local);
+ qpass(f->iprouter.q, bp);
+ }else
+ freeb(bp);
+ qunlock(&f->iprouter);
+}
+
+void
+iprouteropen(Fs *f)
+{
+ qlock(&f->iprouter);
+ f->iprouter.opens++;
+ if(f->iprouter.q == nil)
+ f->iprouter.q = qopen(64*1024, 0, 0, 0);
+ else if(f->iprouter.opens == 1)
+ qreopen(f->iprouter.q);
+ qunlock(&f->iprouter);
+}
+
+void
+iprouterclose(Fs *f)
+{
+ qlock(&f->iprouter);
+ f->iprouter.opens--;
+ if(f->iprouter.opens == 0)
+ qclose(f->iprouter.q);
+ qunlock(&f->iprouter);
+}
+
+long
+iprouterread(Fs *f, void *a, int n)
+{
+ return qread(f->iprouter.q, a, n);
+}
--- /dev/null
+++ b/os/ip.original/ipv6.c
@@ -1,0 +1,747 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include "ip.h"
+#include "ipv6.h"
+
+enum
+{
+ IP4HDR = 20, /* sizeof(Ip4hdr) */
+ IP6HDR = 40, /* sizeof(Ip6hdr) */
+ IP_HLEN4 = 0x05, /* Header length in words */
+ IP_DF = 0x4000, /* Don't fragment */
+ IP_MF = 0x2000, /* More fragments */
+ IP6FHDR = 8, /* sizeof(Fraghdr6) */
+ IP_MAX = (32*1024), /* Maximum Internet packet size */
+};
+
+#define IPV6CLASS(hdr) ((hdr->vcf[0]&0x0F)<<2 | (hdr->vcf[1]&0xF0)>>2)
+#define BLKIPVER(xp) (((Ip6hdr*)((xp)->rp))->vcf[0]&0xF0)
+/*
+ * This sleazy macro is stolen shamelessly from ip.c, see comment there.
+ */
+#define BKFG(xp) ((Ipfrag*)((xp)->base))
+
+typedef struct IP IP;
+typedef struct Fragment4 Fragment4;
+typedef struct Fragment6 Fragment6;
+typedef struct Ipfrag Ipfrag;
+
+Block* ip6reassemble(IP*, int, Block*, Ip6hdr*);
+void ipfragfree6(IP*, Fragment6*);
+Fragment6* ipfragallo6(IP*);
+static Block* procxtns(IP *ip, Block *bp, int doreasm);
+int unfraglen(Block *bp, uchar *nexthdr, int setfh);
+Block* procopts(Block *bp);
+
+/* MIB II counters */
+enum
+{
+ Forwarding,
+ DefaultTTL,
+ InReceives,
+ InHdrErrors,
+ InAddrErrors,
+ ForwDatagrams,
+ InUnknownProtos,
+ InDiscards,
+ InDelivers,
+ OutRequests,
+ OutDiscards,
+ OutNoRoutes,
+ ReasmTimeout,
+ ReasmReqds,
+ ReasmOKs,
+ ReasmFails,
+ FragOKs,
+ FragFails,
+ FragCreates,
+
+ Nstats,
+};
+
+static char *statnames[] =
+{
+[Forwarding] "Forwarding",
+[DefaultTTL] "DefaultTTL",
+[InReceives] "InReceives",
+[InHdrErrors] "InHdrErrors",
+[InAddrErrors] "InAddrErrors",
+[ForwDatagrams] "ForwDatagrams",
+[InUnknownProtos] "InUnknownProtos",
+[InDiscards] "InDiscards",
+[InDelivers] "InDelivers",
+[OutRequests] "OutRequests",
+[OutDiscards] "OutDiscards",
+[OutNoRoutes] "OutNoRoutes",
+[ReasmTimeout] "ReasmTimeout",
+[ReasmReqds] "ReasmReqds",
+[ReasmOKs] "ReasmOKs",
+[ReasmFails] "ReasmFails",
+[FragOKs] "FragOKs",
+[FragFails] "FragFails",
+[FragCreates] "FragCreates",
+};
+
+struct Fragment4
+{
+ Block* blist;
+ Fragment4* next;
+ ulong src;
+ ulong dst;
+ ushort id;
+ ulong age;
+};
+
+struct Fragment6
+{
+ Block* blist;
+ Fragment6* next;
+ uchar src[IPaddrlen];
+ uchar dst[IPaddrlen];
+ uint id;
+ ulong age;
+};
+
+struct Ipfrag
+{
+ ushort foff;
+ ushort flen;
+};
+
+/* an instance of IP */
+struct IP
+{
+ ulong stats[Nstats];
+
+ QLock fraglock4;
+ Fragment4* flisthead4;
+ Fragment4* fragfree4;
+ Ref id4;
+
+ QLock fraglock6;
+ Fragment6* flisthead6;
+ Fragment6* fragfree6;
+ Ref id6;
+
+ int iprouting; /* true if we route like a gateway */
+};
+
+int
+ipoput6(Fs *f, Block *bp, int gating, int ttl, int tos, Conv *c)
+{
+ int tentative;
+ Ipifc *ifc;
+ uchar *gate, nexthdr;
+ Ip6hdr *eh;
+ int medialen, len, chunk, uflen, flen, seglen, lid, offset, fragoff, morefrags, blklen;
+ Route *r, *sr;
+ Fraghdr6 fraghdr;
+ Block *xp, *nb;
+ IP *ip;
+ int rv = 0;
+
+ ip = f->ip;
+
+ /* Fill out the ip header */
+ eh = (Ip6hdr*)(bp->rp);
+
+ ip->stats[OutRequests]++;
+
+ /* Number of uchars in data and ip header to write */
+ len = blocklen(bp);
+
+ tentative = iptentative(f, eh->src);
+ if(tentative){
+ netlog(f, Logip, "reject tx of packet with tentative src address\n");
+ goto free;
+ }
+
+ if(gating){
+ chunk = nhgets(eh->ploadlen);
+ if(chunk > len){
+ ip->stats[OutDiscards]++;
+ netlog(f, Logip, "short gated packet\n");
+ goto free;
+ }
+ if(chunk + IPV6HDR_LEN < len)
+ len = chunk + IPV6HDR_LEN;
+ }
+
+ if(len >= IP_MAX){
+// print("len > IP_MAX, free\n");
+ ip->stats[OutDiscards]++;
+ netlog(f, Logip, "exceeded ip max size %I\n", eh->dst);
+ goto free;
+ }
+
+ r = v6lookup(f, eh->dst, c);
+ if(r == nil){
+// print("no route for %I, src %I free\n", eh->dst, eh->src);
+ ip->stats[OutNoRoutes]++;
+ netlog(f, Logip, "no interface %I\n", eh->dst);
+ rv = -1;
+ goto free;
+ }
+
+ ifc = r->ifc;
+ if(r->type & (Rifc|Runi))
+ gate = eh->dst;
+ else
+ if(r->type & (Rbcast|Rmulti)) {
+ gate = eh->dst;
+ sr = v6lookup(f, eh->src, nil);
+ if(sr != nil && (sr->type & Runi))
+ ifc = sr->ifc;
+ }
+ else
+ gate = r->v6.gate;
+
+ if(!gating)
+ eh->vcf[0] = IP_VER6;
+ eh->ttl = ttl;
+ if(!gating) {
+ eh->vcf[0] |= (tos >> 4);
+ eh->vcf[1] = (tos << 4);
+ }
+
+ if(!canrlock(ifc)) {
+ goto free;
+ }
+
+ if(waserror()){
+ runlock(ifc);
+ nexterror();
+ }
+
+ if(ifc->m == nil) {
+ goto raise;
+ }
+
+ /* If we dont need to fragment just send it */
+ medialen = ifc->maxtu - ifc->m->hsize;
+ if(len <= medialen) {
+ hnputs(eh->ploadlen, len-IPV6HDR_LEN);
+ ifc->m->bwrite(ifc, bp, V6, gate);
+ runlock(ifc);
+ poperror();
+ return 0;
+ }
+
+ if(gating)
+ if(ifc->reassemble <= 0) {
+
+ /* v6 intermediate nodes are not supposed to fragment pkts;
+ we fragment if ifc->reassemble is turned on; an exception
+ needed for nat.
+ */
+
+ ip->stats[OutDiscards]++;
+ icmppkttoobig6(f, ifc, bp);
+ netlog(f, Logip, "%I: gated pkts not fragmented\n", eh->dst);
+ goto raise;
+ }
+
+ /* start v6 fragmentation */
+ uflen = unfraglen(bp, &nexthdr, 1);
+ if(uflen > medialen) {
+ ip->stats[FragFails]++;
+ ip->stats[OutDiscards]++;
+ netlog(f, Logip, "%I: unfragmentable part too big\n", eh->dst);
+ goto raise;
+ }
+
+ flen = len - uflen;
+ seglen = (medialen - (uflen + IP6FHDR)) & ~7;
+ if(seglen < 8) {
+ ip->stats[FragFails]++;
+ ip->stats[OutDiscards]++;
+ netlog(f, Logip, "%I: seglen < 8\n", eh->dst);
+ goto raise;
+ }
+
+ lid = incref(&ip->id6);
+ fraghdr.nexthdr = nexthdr;
+ fraghdr.res = 0;
+ hnputl(fraghdr.id, lid);
+
+ xp = bp;
+ offset = uflen;
+ while (xp != nil && offset && offset >= BLEN(xp)) {
+ offset -= BLEN(xp);
+ xp = xp->next;
+ }
+ xp->rp += offset;
+
+ fragoff = 0;
+ morefrags = 1;
+
+ for(; fragoff < flen; fragoff += seglen) {
+ nb = allocb(uflen + IP6FHDR + seglen);
+
+ if(fragoff + seglen >= flen) {
+ seglen = flen - fragoff;
+ morefrags = 0;
+ }
+
+ hnputs(eh->ploadlen, seglen+IP6FHDR);
+ memmove(nb->wp, eh, uflen);
+ nb->wp += uflen;
+
+ hnputs(fraghdr.offsetRM, fragoff); // last 3 bits must be 0
+ fraghdr.offsetRM[1] |= morefrags;
+ memmove(nb->wp, &fraghdr, IP6FHDR);
+ nb->wp += IP6FHDR;
+
+ /* Copy data */
+ chunk = seglen;
+ while (chunk) {
+ if(!xp) {
+ ip->stats[OutDiscards]++;
+ ip->stats[FragFails]++;
+ freeblist(nb);
+ netlog(f, Logip, "!xp: chunk in v6%d\n", chunk);
+ goto raise;
+ }
+ blklen = chunk;
+ if(BLEN(xp) < chunk)
+ blklen = BLEN(xp);
+ memmove(nb->wp, xp->rp, blklen);
+
+ nb->wp += blklen;
+ xp->rp += blklen;
+ chunk -= blklen;
+ if(xp->rp == xp->wp)
+ xp = xp->next;
+ }
+
+ ifc->m->bwrite(ifc, nb, V6, gate);
+ ip->stats[FragCreates]++;
+ }
+ ip->stats[FragOKs]++;
+
+raise:
+ runlock(ifc);
+ poperror();
+free:
+ freeblist(bp);
+ return rv;
+}
+
+void
+ipiput6(Fs *f, Ipifc *ifc, Block *bp)
+{
+ int hl;
+ int hop, tos;
+ uchar proto;
+ Ip6hdr *h;
+ Proto *p;
+ int notforme;
+ int tentative;
+ uchar v6dst[IPaddrlen];
+ IP *ip;
+ Route *r, *sr;
+
+ ip = f->ip;
+ ip->stats[InReceives]++;
+
+ /*
+ * Ensure we have all the header info in the first
+ * block. Make life easier for other protocols by
+ * collecting up to the first 64 bytes in the first block.
+ */
+ if(BLEN(bp) < 64) {
+ hl = blocklen(bp);
+ if(hl < IP6HDR)
+ hl = IP6HDR;
+ if(hl > 64)
+ hl = 64;
+ bp = pullupblock(bp, hl);
+ if(bp == nil)
+ return;
+ }
+
+ h = (Ip6hdr *)(bp->rp);
+
+ memmove(&v6dst[0], &(h->dst)[0], IPaddrlen);
+ notforme = ipforme(f, v6dst) == 0;
+ tentative = iptentative(f, v6dst);
+
+ if(tentative && (h->proto != ICMPv6)) {
+ print("tentative addr, drop\n");
+ freeblist(bp);
+ return;
+ }
+
+ /* Check header version */
+ if(BLKIPVER(bp) != IP_VER6) {
+ ip->stats[InHdrErrors]++;
+ netlog(f, Logip, "ip: bad version %ux\n", (h->vcf[0]&0xF0)>>2);
+ freeblist(bp);
+ return;
+ }
+
+ /* route */
+ if(notforme) {
+ if(!ip->iprouting){
+ freeb(bp);
+ return;
+ }
+ /* don't forward to source's network */
+ sr = v6lookup(f, h->src, nil);
+ r = v6lookup(f, h->dst, nil);
+
+ if(r == nil || sr == r){
+ ip->stats[OutDiscards]++;
+ freeblist(bp);
+ return;
+ }
+
+ /* don't forward if packet has timed out */
+ hop = h->ttl;
+ if(hop < 1) {
+ ip->stats[InHdrErrors]++;
+ icmpttlexceeded6(f, ifc, bp);
+ freeblist(bp);
+ return;
+ }
+
+ /* process headers & reassemble if the interface expects it */
+ bp = procxtns(ip, bp, r->ifc->reassemble);
+
+ if(bp == nil)
+ return;
+
+ ip->stats[ForwDatagrams]++;
+ h = (Ip6hdr *) (bp->rp);
+ tos = IPV6CLASS(h);
+ hop = h->ttl;
+ ipoput6(f, bp, 1, hop-1, tos, nil);
+ return;
+ }
+
+ /* reassemble & process headers if needed */
+ bp = procxtns(ip, bp, 1);
+
+ if(bp == nil)
+ return;
+
+ h = (Ip6hdr *) (bp->rp);
+ proto = h->proto;
+ p = Fsrcvpcol(f, proto);
+ if(p != nil && p->rcv != nil) {
+ ip->stats[InDelivers]++;
+ (*p->rcv)(p, ifc, bp);
+ return;
+ }
+
+ ip->stats[InDiscards]++;
+ ip->stats[InUnknownProtos]++;
+ freeblist(bp);
+}
+
+/*
+ * ipfragfree6 - copied from ipfragfree4 - assume hold fraglock6
+ */
+void
+ipfragfree6(IP *ip, Fragment6 *frag)
+{
+ Fragment6 *fl, **l;
+
+ if(frag->blist)
+ freeblist(frag->blist);
+
+ memset(frag->src, 0, IPaddrlen);
+ frag->id = 0;
+ frag->blist = nil;
+
+ l = &ip->flisthead6;
+ for(fl = *l; fl; fl = fl->next) {
+ if(fl == frag) {
+ *l = frag->next;
+ break;
+ }
+ l = &fl->next;
+ }
+
+ frag->next = ip->fragfree6;
+ ip->fragfree6 = frag;
+
+}
+
+/*
+ * ipfragallo6 - copied from ipfragalloc4
+ */
+Fragment6*
+ipfragallo6(IP *ip)
+{
+ Fragment6 *f;
+
+ while(ip->fragfree6 == nil) {
+ /* free last entry on fraglist */
+ for(f = ip->flisthead6; f->next; f = f->next)
+ ;
+ ipfragfree6(ip, f);
+ }
+ f = ip->fragfree6;
+ ip->fragfree6 = f->next;
+ f->next = ip->flisthead6;
+ ip->flisthead6 = f;
+ f->age = NOW + 30000;
+
+ return f;
+}
+
+static Block*
+procxtns(IP *ip, Block *bp, int doreasm) {
+
+ int offset;
+ uchar proto;
+ Ip6hdr *h;
+
+ h = (Ip6hdr *) (bp->rp);
+ offset = unfraglen(bp, &proto, 0);
+
+ if((proto == FH) && (doreasm != 0)) {
+ bp = ip6reassemble(ip, offset, bp, h);
+ if(bp == nil)
+ return nil;
+ offset = unfraglen(bp, &proto, 0);
+ }
+
+ if(proto == DOH || offset > IP6HDR)
+ bp = procopts(bp);
+
+ return bp;
+}
+
+
+/* returns length of "Unfragmentable part", i.e., sum of lengths of ipv6 hdr,
+ * hop-by-hop & routing headers if present; *nexthdr is set to nexthdr value
+ * of the last header in the "Unfragmentable part"; if setfh != 0, nexthdr
+ * field of the last header in the "Unfragmentable part" is set to FH.
+ */
+int
+unfraglen(Block *bp, uchar *nexthdr, int setfh)
+{
+ uchar *p, *q;
+ int ufl, hs;
+
+ p = bp->rp;
+ q = p+6; /* proto, = p+sizeof(Ip6hdr.vcf)+sizeof(Ip6hdr.ploadlen) */
+ *nexthdr = *q;
+ ufl = IP6HDR;
+ p += ufl;
+
+ for(;;) {
+ if(*nexthdr == HBH || *nexthdr == RH) {
+ *nexthdr = *p;
+ hs = ((int)*(p+1) + 1) * 8;
+ ufl += hs;
+ q = p;
+ p += hs;
+ }
+ else
+ break;
+ }
+
+ if(*nexthdr == FH)
+ *q = *p;
+
+ if(setfh)
+ *q = FH;
+
+ return ufl;
+}
+
+Block*
+procopts(Block *bp)
+{
+ return bp;
+}
+
+Block*
+ip6reassemble(IP* ip, int uflen, Block* bp, Ip6hdr* ih)
+{
+
+ int fend, offset;
+ uint id;
+ Fragment6 *f, *fnext;
+ Fraghdr6 *fraghdr;
+ uchar src[IPaddrlen], dst[IPaddrlen];
+ Block *bl, **l, *last, *prev;
+ int ovlap, len, fragsize, pktposn;
+
+ fraghdr = (Fraghdr6 *) (bp->rp + uflen);
+ memmove(src, ih->src, IPaddrlen);
+ memmove(dst, ih->dst, IPaddrlen);
+ id = nhgetl(fraghdr->id);
+ offset = nhgets(fraghdr->offsetRM) & ~7;
+
+ /*
+ * block lists are too hard, pullupblock into a single block
+ */
+ if(bp->next){
+ bp = pullupblock(bp, blocklen(bp));
+ ih = (Ip6hdr *)(bp->rp);
+ }
+
+
+ qlock(&ip->fraglock6);
+
+ /*
+ * find a reassembly queue for this fragment
+ */
+ for(f = ip->flisthead6; f; f = fnext){
+ fnext = f->next;
+ if(ipcmp(f->src, src)==0 && ipcmp(f->dst, dst)==0 && f->id == id)
+ break;
+ if(f->age < NOW){
+ ip->stats[ReasmTimeout]++;
+ ipfragfree6(ip, f);
+ }
+ }
+
+
+ /*
+ * if this isn't a fragmented packet, accept it
+ * and get rid of any fragments that might go
+ * with it.
+ */
+ if(nhgets(fraghdr->offsetRM)==0) { // first frag is also the last
+ if(f != nil) {
+ ipfragfree6(ip, f);
+ ip->stats[ReasmFails]++;
+ }
+ qunlock(&ip->fraglock6);
+ return bp;
+ }
+
+ if(bp->base+sizeof(Ipfrag) >= bp->rp){
+ bp = padblock(bp, sizeof(Ipfrag));
+ bp->rp += sizeof(Ipfrag);
+ }
+
+ BKFG(bp)->foff = offset;
+ BKFG(bp)->flen = nhgets(ih->ploadlen) + IP6HDR - uflen - IP6FHDR;
+
+ /* First fragment allocates a reassembly queue */
+ if(f == nil) {
+ f = ipfragallo6(ip);
+ f->id = id;
+ memmove(f->src, src, IPaddrlen);
+ memmove(f->dst, dst, IPaddrlen);
+
+ f->blist = bp;
+
+ qunlock(&ip->fraglock6);
+ ip->stats[ReasmReqds]++;
+ return nil;
+ }
+
+ /*
+ * find the new fragment's position in the queue
+ */
+ prev = nil;
+ l = &f->blist;
+ bl = f->blist;
+ while(bl != nil && BKFG(bp)->foff > BKFG(bl)->foff) {
+ prev = bl;
+ l = &bl->next;
+ bl = bl->next;
+ }
+
+ /* Check overlap of a previous fragment - trim away as necessary */
+ if(prev) {
+ ovlap = BKFG(prev)->foff + BKFG(prev)->flen - BKFG(bp)->foff;
+ if(ovlap > 0) {
+ if(ovlap >= BKFG(bp)->flen) {
+ freeblist(bp);
+ qunlock(&ip->fraglock6);
+ return nil;
+ }
+ BKFG(prev)->flen -= ovlap;
+ }
+ }
+
+ /* Link onto assembly queue */
+ bp->next = *l;
+ *l = bp;
+
+ /* Check to see if succeeding segments overlap */
+ if(bp->next) {
+ l = &bp->next;
+ fend = BKFG(bp)->foff + BKFG(bp)->flen;
+
+ /* Take completely covered segments out */
+
+ while(*l) {
+ ovlap = fend - BKFG(*l)->foff;
+
+ if(ovlap <= 0)
+ break;
+ if(ovlap < BKFG(*l)->flen) {
+ BKFG(*l)->flen -= ovlap;
+ BKFG(*l)->foff += ovlap;
+ /* move up ih hdrs */
+ memmove((*l)->rp + ovlap, (*l)->rp, uflen);
+ (*l)->rp += ovlap;
+ break;
+ }
+ last = (*l)->next;
+ (*l)->next = nil;
+ freeblist(*l);
+ *l = last;
+ }
+ }
+
+ /*
+ * look for a complete packet. if we get to a fragment
+ * with the trailing bit of fraghdr->offsetRM[1] set, we're done.
+ */
+ pktposn = 0;
+ for(bl = f->blist; bl; bl = bl->next) {
+ if(BKFG(bl)->foff != pktposn)
+ break;
+
+ fraghdr = (Fraghdr6 *) (bl->rp + uflen);
+ if((fraghdr->offsetRM[1] & 1) == 0) {
+
+ bl = f->blist;
+
+ /* get rid of frag header in first fragment */
+
+ memmove(bl->rp + IP6FHDR, bl->rp, uflen);
+ bl->rp += IP6FHDR;
+ len = nhgets(((Ip6hdr*)(bl->rp))->ploadlen) - IP6FHDR;
+ bl->wp = bl->rp + len + IP6HDR;
+
+ /* Pullup all the fragment headers and
+ * return a complete packet
+ */
+ for(bl = bl->next; bl; bl = bl->next) {
+ fragsize = BKFG(bl)->flen;
+ len += fragsize;
+ bl->rp += uflen + IP6FHDR;
+ bl->wp = bl->rp + fragsize;
+ }
+
+ bl = f->blist;
+ f->blist = nil;
+ ipfragfree6(ip, f);
+ ih = (Ip6hdr*)(bl->rp);
+ hnputs(ih->ploadlen, len);
+ qunlock(&ip->fraglock6);
+ ip->stats[ReasmOKs]++;
+ return bl;
+ }
+ pktposn += BKFG(bl)->flen;
+ }
+ qunlock(&ip->fraglock6);
+ return nil;
+}
+
--- /dev/null
+++ b/os/ip.original/ipv6.h
@@ -1,0 +1,186 @@
+#undef MIN
+#define MIN(a, b) ((a) <= (b) ? (a) : (b))
+
+/* rfc 3513 defines the address prefices */
+#define isv6mcast(addr) ((addr)[0] == 0xff)
+#define islinklocal(addr) ((addr)[0] == 0xfe && ((addr)[1] & 0xc0) == 0x80)
+#define issitelocal(addr) ((addr)[0] == 0xfe && ((addr)[1] & 0xc0) == 0xc0)
+#define isv6global(addr) (((addr)[0] & 0xe0) == 0x20)
+
+#define optexsts(np) (nhgets((np)->ploadlen) > 24)
+#define issmcast(addr) (memcmp((addr), v6solicitednode, 13) == 0)
+
+/* from RFC 2460 */
+
+typedef struct Ip6hdr Ip6hdr;
+typedef struct Opthdr Opthdr;
+typedef struct Routinghdr Routinghdr;
+typedef struct Fraghdr6 Fraghdr6;
+
+struct Ip6hdr {
+ uchar vcf[4]; // version:4, traffic class:8, flow label:20
+ uchar ploadlen[2]; // payload length: packet length - 40
+ uchar proto; // next header type
+ uchar ttl; // hop limit
+ uchar src[IPaddrlen];
+ uchar dst[IPaddrlen];
+};
+
+struct Opthdr {
+ uchar nexthdr;
+ uchar len;
+};
+
+struct Routinghdr {
+ uchar nexthdr;
+ uchar len;
+ uchar rtetype;
+ uchar segrem;
+};
+
+struct Fraghdr6 {
+ uchar nexthdr;
+ uchar res;
+ uchar offsetRM[2]; // Offset, Res, M flag
+ uchar id[4];
+};
+
+
+enum { /* Header Types */
+ HBH = 0, //?
+ ICMP = 1,
+ IGMP = 2,
+ GGP = 3,
+ IPINIP = 4,
+ ST = 5,
+ TCP = 6,
+ UDP = 17,
+ ISO_TP4 = 29,
+ RH = 43,
+ FH = 44,
+ IDRP = 45,
+ RSVP = 46,
+ AH = 51,
+ ESP = 52,
+ ICMPv6 = 58,
+ NNH = 59,
+ DOH = 60,
+ ISO_IP = 80,
+ IGRP = 88,
+ OSPF = 89,
+
+ Maxhdrtype = 256,
+};
+
+
+enum {
+ // multicast flgs and scop
+
+ well_known_flg = 0,
+ transient_flg = 1,
+
+ node_local_scop = 1,
+ link_local_scop = 2,
+ site_local_scop = 5,
+ org_local_scop = 8,
+ global_scop = 14,
+
+ // various prefix lengths
+
+ SOLN_PREF_LEN = 13,
+
+ // icmpv6 unreach codes
+ icmp6_no_route = 0,
+ icmp6_ad_prohib = 1,
+ icmp6_unassigned = 2,
+ icmp6_adr_unreach = 3,
+ icmp6_port_unreach = 4,
+ icmp6_unkn_code = 5,
+
+ // various flags & constants
+
+ v6MINTU = 1280,
+ HOP_LIMIT = 255,
+ ETHERHDR_LEN = 14,
+ IPV6HDR_LEN = 40,
+ IPV4HDR_LEN = 20,
+
+ // option types
+
+ SRC_LLADDRESS = 1,
+ TARGET_LLADDRESS = 2,
+ PREFIX_INFO = 3,
+ REDIR_HEADER = 4,
+ MTU_OPTION = 5,
+
+ SRC_UNSPEC = 0,
+ SRC_UNI = 1,
+ TARG_UNI = 2,
+ TARG_MULTI = 3,
+
+ t_unitent = 1,
+ t_uniproxy = 2,
+ t_unirany = 3,
+
+ // Router constants (all times in milliseconds)
+
+ MAX_INITIAL_RTR_ADVERT_INTERVAL = 16000,
+ MAX_INITIAL_RTR_ADVERTISEMENTS = 3,
+ MAX_FINAL_RTR_ADVERTISEMENTS = 3,
+ MIN_DELAY_BETWEEN_RAS = 3000,
+ MAX_RA_DELAY_TIME = 500,
+
+ // Host constants
+
+ MAX_RTR_SOLICITATION_DELAY = 1000,
+ RTR_SOLICITATION_INTERVAL = 4000,
+ MAX_RTR_SOLICITATIONS = 3,
+
+ // Node constants
+
+ MAX_MULTICAST_SOLICIT = 3,
+ MAX_UNICAST_SOLICIT = 3,
+ MAX_ANYCAST_DELAY_TIME = 1000,
+ MAX_NEIGHBOR_ADVERTISEMENT = 3,
+ REACHABLE_TIME = 30000,
+ RETRANS_TIMER = 1000,
+ DELAY_FIRST_PROBE_TIME = 5000,
+
+};
+
+extern void ipv62smcast(uchar *, uchar *);
+extern void icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac);
+extern void icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags);
+extern void icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp);
+extern void icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp);
+extern void icmphostunr(Fs *f, Ipifc *ifc, Block *bp, int code, int free);
+
+extern uchar v6allnodesN[IPaddrlen];
+extern uchar v6allnodesL[IPaddrlen];
+extern uchar v6allroutersN[IPaddrlen];
+extern uchar v6allroutersL[IPaddrlen];
+extern uchar v6allnodesNmask[IPaddrlen];
+extern uchar v6allnodesLmask[IPaddrlen];
+extern uchar v6allroutersS[IPaddrlen];
+extern uchar v6solicitednode[IPaddrlen];
+extern uchar v6solicitednodemask[IPaddrlen];
+extern uchar v6Unspecified[IPaddrlen];
+extern uchar v6loopback[IPaddrlen];
+extern uchar v6loopbackmask[IPaddrlen];
+extern uchar v6linklocal[IPaddrlen];
+extern uchar v6linklocalmask[IPaddrlen];
+extern uchar v6sitelocal[IPaddrlen];
+extern uchar v6sitelocalmask[IPaddrlen];
+extern uchar v6glunicast[IPaddrlen];
+extern uchar v6multicast[IPaddrlen];
+extern uchar v6multicastmask[IPaddrlen];
+
+extern int v6llpreflen;
+extern int v6slpreflen;
+extern int v6lbpreflen;
+extern int v6mcpreflen;
+extern int v6snpreflen;
+extern int v6aNpreflen;
+extern int v6aLpreflen;
+
+extern int ReTransTimer;
--- /dev/null
+++ b/os/ip.original/kernel.h
@@ -1,0 +1,10 @@
+extern int kclose(int);
+extern int kdial(char*, char*, char*, int*);
+extern int kannounce(char*, char*);
+extern void kerrstr(char*);
+extern void kgerrstr(char*);
+extern int kopen(char*, int);
+extern long kread(int, void*, long);
+extern long kseek(int, vlong, int);
+extern long kwrite(int, void*, long);
+extern void kwerrstr(char *, ...);
--- /dev/null
+++ b/os/ip.original/loopbackmedium.c
@@ -1,0 +1,121 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include "ip.h"
+
+enum
+{
+ Maxtu= 16*1024,
+};
+
+typedef struct LB LB;
+struct LB
+{
+ Proc *readp;
+ Queue *q;
+ Fs *f;
+};
+
+static void loopbackread(void *a);
+
+static void
+loopbackbind(Ipifc *ifc, int, char**)
+{
+ LB *lb;
+
+ lb = smalloc(sizeof(*lb));
+ lb->f = ifc->conv->p->f;
+ /* TO DO: make queue size a function of kernel memory */
+ lb->q = qopen(128*1024, Qmsg, nil, nil);
+ ifc->arg = lb;
+ ifc->mbps = 1000;
+
+ kproc("loopbackread", loopbackread, ifc, 0);
+
+}
+
+static void
+loopbackunbind(Ipifc *ifc)
+{
+ LB *lb = ifc->arg;
+
+ if(lb->readp)
+ postnote(lb->readp, 1, "unbind", 0);
+
+ /* wait for reader to die */
+ while(lb->readp != 0)
+ tsleep(&up->sleep, return0, 0, 300);
+
+ /* clean up */
+ qfree(lb->q);
+ free(lb);
+}
+
+static void
+loopbackbwrite(Ipifc *ifc, Block *bp, int, uchar*)
+{
+ LB *lb;
+
+ lb = ifc->arg;
+ if(qpass(lb->q, bp) < 0)
+ ifc->outerr++;
+ ifc->out++;
+}
+
+static void
+loopbackread(void *a)
+{
+ Ipifc *ifc;
+ Block *bp;
+ LB *lb;
+
+ ifc = a;
+ lb = ifc->arg;
+ lb->readp = up; /* hide identity under a rock for unbind */
+ if(waserror()){
+ lb->readp = 0;
+ pexit("hangup", 1);
+ }
+ for(;;){
+ bp = qbread(lb->q, Maxtu);
+ if(bp == nil)
+ continue;
+ ifc->in++;
+ if(!canrlock(ifc)){
+ freeb(bp);
+ continue;
+ }
+ if(waserror()){
+ runlock(ifc);
+ nexterror();
+ }
+ if(ifc->lifc == nil)
+ freeb(bp);
+ else
+ ipiput4(lb->f, ifc, bp);
+ runlock(ifc);
+ poperror();
+ }
+}
+
+Medium loopbackmedium =
+{
+.hsize= 0,
+.mintu= 0,
+.maxtu= Maxtu,
+.maclen= 0,
+.name= "loopback",
+.bind= loopbackbind,
+.unbind= loopbackunbind,
+.bwrite= loopbackbwrite,
+};
+
+void
+loopbackmediumlink(void)
+{
+ addipmedium(&loopbackmedium);
+}
--- /dev/null
+++ b/os/ip.original/nat.c
@@ -1,0 +1,549 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include "ip.h"
+
+typedef struct NatProto NatProto;
+typedef struct NatAddr NatAddr;
+
+/*
+ * NAT.
+ */
+struct Nat
+{
+ uchar src[IPv4addrlen]; /* Source address */
+ uchar sport[2]; /* Source port */
+ uchar lport[2]; /* Local port */
+ uchar proto; /* Protocol */
+ long time; /* Time */
+ Conv *conv; /* Conversation */
+ Nat *next; /* Next node */
+};
+
+/*
+ * Protocol list.
+ */
+struct NatProto
+{
+ uchar proto; /* Protocol */
+ int sport; /* Source port offset */
+ int dport; /* Destination port offset */
+ int cksum; /* Checksum offset */
+ int timeout; /* Timeout */
+};
+
+/*
+ * Address list.
+ */
+struct NatAddr
+{
+ uchar src[IPaddrlen]; /* Source address */
+ uchar mask[IPaddrlen]; /* Source address mask */
+ uchar net[IPaddrlen]; /* Source network address */
+ Iplifc *dst; /* Destination interface */
+ NatAddr *next; /* Next node */
+};
+
+static Nat *head = nil;
+static NatAddr *addrhead = nil;
+
+/*
+ * Timeouts for ICMP, TCP and UDP are respectively confirmed
+ * in RFC 5508, RFC 5382 and RFC 4787.
+ */
+static NatProto prototab[] =
+{
+ { 1, 4, 4, 2, 60*1000 }, /* ICMP */
+ { 6, 0, 2, 16, (2*60*60+4*60)*1000 }, /* TCP */
+ { 17, 0, 2, 6, 2*60*1000 }, /* UDP */
+ { 40, 6, 8, 0, 10*30*1000 }, /* IL */
+ { 255, 0, 2, 6, 2*60*1000 }, /* RUDP */
+ { 0 }
+};
+
+NatProto* parseproto(uchar);
+void natprepend(Nat*);
+Nat* natexistout(uchar*, uchar, uchar*);
+Nat* natexistin(uchar, uchar*);
+int natdelete(uchar*, uchar, uchar*);
+int natpurge(uchar);
+Nat* natlport(Proto*, Ip4hdr*, uchar*);
+int natgc(uchar);
+void checksumadjust(uchar*, uchar*, int, uchar*, int);
+Iplifc* natonifco(Ipifc*, Ip4hdr*);
+Iplifc* natonifci(Ipifc*);
+void nataddrprepend(NatAddr*);
+NatAddr* nataddrexist(uchar*, uchar*, Iplifc*);
+int addnataddr(uchar*, uchar*, Iplifc*);
+int removenataddr(uchar*, uchar*, Iplifc*);
+void shownataddr(void);
+void flushnataddr(void);
+
+/*
+ * Return protocol attributes if known.
+ */
+NatProto*
+parseproto(uchar proto)
+{
+ NatProto *np;
+
+ for(np = prototab; np->proto; np++)
+ if(proto == np->proto)
+ return np;
+
+ return nil;
+}
+
+/*
+ * Output NAT.
+ * Return -1 if the packet must be NATed but the protocol is unknown.
+ */
+int
+nato(Block *b, Ipifc *ifc, Fs *f)
+{
+ Nat *n; /* NAT table */
+ NatProto *np; /* Protocol list */
+ Iplifc *lifc; /* Logical interface */
+ Ip4hdr *h; /* Source IPv4 header */
+ Proto *p; /* New protocol */
+ uchar *laddr; /* Local address on Iplifc */
+ uchar *sport; /* Source port */
+ uchar *cksum; /* Source checksum */
+
+ h = (Ip4hdr*)(b->rp);
+
+ /* Verify on which logical interface NAT is enabled,
+ and if this source address must be translated */
+ if((lifc=natonifco(ifc, h)) == nil)
+ return 0;
+
+ laddr = lifc->local+IPv4off;
+ p = Fsrcvpcolx(f, h->proto);
+
+ if(ip4cmp(h->src, laddr) != 0){
+ if((np=parseproto(h->proto)) != nil){
+ /* Protocol layer */
+ sport = (b->rp)+sizeof(Ip4hdr)+np->sport;
+ cksum = (b->rp)+sizeof(Ip4hdr)+np->cksum;
+ if((n = natlport(p, h, sport)) == nil)
+ return -1;
+ memmove(sport, n->lport, 2);
+ checksumadjust(cksum, n->sport, 2, n->lport, 2);
+ if(np->proto != 1)
+ /* ICMP checksum doesn't include IP header */
+ checksumadjust(cksum, n->src, IPv4addrlen,
+ laddr, IPv4addrlen);
+ /* IP layer */
+ ip4move(h->src, laddr);
+ checksumadjust(h->cksum, n->src, IPv4addrlen,
+ h->src, IPv4addrlen);
+ return 0;
+ }else{
+ netlog(f, Lognat, "nat: unknown protocol %d\n", h->proto);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Input NAT.
+ */
+void
+nati(Block *b, Ipifc *ifc)
+{
+ Nat *n; /* NAT table */
+ NatProto *np; /* Protocol list */
+ Ip4hdr *h; /* Source IPv4 header */
+ uchar *lport; /* Our local port, and dst port for the packet */
+ uchar *cksum; /* Source checksum */
+
+ h = (Ip4hdr*)(b->rp);
+
+ /* Verify if NAT is enabled on this interface */
+ if(natonifci(ifc) == nil)
+ return;
+
+ if((np=parseproto(h->proto)) != nil){
+ lport = (b->rp)+sizeof(Ip4hdr)+np->dport;
+ if((n=natexistin(h->proto, lport)) != nil){
+ /* Protocol layer */
+ cksum = (b->rp)+sizeof(Ip4hdr)+np->cksum;
+ checksumadjust(cksum, lport, 2, n->sport, 2);
+ memmove(lport, n->sport, 2);
+ if(np->proto != 1)
+ /* ICMP checksum doesn't include IP header */
+ checksumadjust(cksum, h->dst, IPv4addrlen,
+ n->src, IPv4addrlen);
+ /* IP layer */
+ checksumadjust(h->cksum, h->dst, IPv4addrlen,
+ n->src, IPv4addrlen);
+ ip4move(h->dst, n->src);
+ }
+ }
+}
+
+/*
+ * Add Nat to Nat list.
+ */
+void
+natprepend(Nat *n)
+{
+ n->next = head;
+ head = n;
+}
+
+/*
+ * Return Nat if it exists in Nat list.
+ */
+Nat*
+natexistout(uchar *src, uchar proto, uchar *sport)
+{
+ Nat *c; /* Current node */
+
+ for(c=head; c!=nil; c=c->next)
+ if(ip4cmp(src, c->src) == 0 &&
+ memcmp(sport, c->sport, 2) == 0 &&
+ proto == c->proto){
+ c->time = NOW;
+ return c;
+ }
+
+ return nil;
+}
+
+/*
+ * Return Nat if it exists in Nat list.
+ */
+Nat*
+natexistin(uchar proto, uchar *lport)
+{
+ Nat *c; /* Current node */
+
+ for(c=head; c!=nil; c=c->next)
+ if(memcmp(lport, c->lport, 2) == 0 &&
+ proto == c->proto){
+ c->time = NOW;
+ return c;
+ }
+
+ return nil;
+}
+
+/*
+ * Delete Nat in Nat list.
+ * Return -1 if it doesn't exist.
+ */
+int
+natdelete(uchar src[IPv4addrlen], uchar proto, uchar sport[2])
+{
+ Nat *p; /* Precedent node */
+ Nat *c; /* Current node */
+
+ for(p=nil, c=head; c!=nil; p=c, c=c->next)
+ if(ip4cmp(src, c->src) == 0 &&
+ memcmp(sport, c->sport, 2) == 0 &&
+ proto == c->proto)
+ break;
+
+ if(c == nil)
+ return -1;
+
+ if(p == nil)
+ head = head->next;
+ else
+ p->next = c->next;
+
+ closeconv(c->conv);
+ free(c);
+
+ return 0;
+}
+
+/*
+ * Purge Nat list.
+ */
+int
+natpurge(uchar proto)
+{
+ Nat *c; /* Current node */
+ int n; /* Number of purged connections */
+
+ for(n = 0;; n++){
+ do{
+ if((c = head) == nil)
+ return n;
+ head = head->next;
+ }while(c->proto != proto);
+ closeconv(c->conv);
+ free(c);
+ }
+}
+
+/*
+ * Create a new Nat if necessary.
+ */
+Nat*
+natlport(Proto *p, Ip4hdr *h, uchar *sport)
+{
+ Nat *n; /* New NAT node */
+ Conv *s; /* New conversation */
+
+ if((n=natexistout(h->src, h->proto, sport)) == nil){
+ qlock(p);
+ s = Fsprotoclone(p, "network");
+ qunlock(p);
+ if(s == nil){
+ error(Enodev);
+ return nil;
+ }
+ setlport(s);
+ n = malloc(sizeof(Nat));
+ ip4move(n->src, h->src);
+ memmove(n->sport, sport, 2);
+ memmove(n->lport, &s->lport, 2);
+ n->proto = h->proto;
+ n->time = NOW;
+ n->conv = s;
+ natprepend(n);
+ }
+
+ return n;
+}
+
+/*
+ * Nat list garbage collector.
+ */
+int
+natgc(uchar proto){
+ Nat *p; /* Precedent node */
+ Nat *c; /* Current node */
+ NatProto *np; /* Protocol list */
+ int n; /* Number of garbage collected connections */
+
+ n = 0;
+ p = nil;
+ c = head;
+
+ np = parseproto(proto);
+
+ while(c != nil){
+ if(NOW - c->time > np->timeout){
+ if(p == nil){
+ head = head->next;
+ if(proto == c->proto)
+ n++;
+ closeconv(c->conv);
+ free(c);
+ p = nil;
+ c = head;
+ }else{
+ p->next = c->next;
+ if(proto == c->proto)
+ n++;
+ closeconv(c->conv);
+ free(c);
+ c = p->next;
+ }
+ }else{
+ p = c;
+ c = c->next;
+ }
+ }
+
+ if(n == 0) /* Prevent Conv saturation */
+ n = natpurge(proto);
+
+ return n;
+}
+
+/*
+ * Function checksumadjust from RFC 3022.
+ */
+void
+checksumadjust(uchar *chksum, uchar *optr, int olen, uchar *nptr, int nlen)
+{
+ long x, old, new;
+
+ x=chksum[0]*256+chksum[1];
+ x=~x & 0xffff;
+ while(olen){
+ old=optr[0]*256+optr[1];
+ optr+=2;
+ x-=old & 0xffff;
+ if(x<=0){
+ x--;
+ x&=0xffff;
+ }
+ olen-=2;
+ }
+ while(nlen){
+ new=nptr[0]*256+nptr[1];
+ nptr+=2;
+ x+=new & 0xffff;
+ if(x & 0x10000){
+ x++;
+ x&=0xffff;
+ }
+ nlen-=2;
+ }
+ x=~x & 0xffff;
+ chksum[0]=x/256;
+ chksum[1]=x & 0xff;
+}
+
+/*
+ * Add NatAddr to NatAddr list.
+ */
+void
+nataddrprepend(NatAddr *na)
+{
+ na->next = addrhead;
+ addrhead = na;
+}
+
+/*
+ * Return NatAddr if it exists in NatAddr list.
+ */
+NatAddr*
+nataddrexist(uchar *src, uchar *mask, Iplifc *dst)
+{
+ NatAddr *c; /* Current node */
+
+ for(c=addrhead; c!=nil; c=c->next)
+ if(ipcmp(src, c->src) == 0 &&
+ ipcmp(mask, c->mask) == 0 &&
+ dst == c->dst)
+ return c;
+
+ return nil;
+}
+
+/*
+ * Create a new NatAddr.
+ * Return -1 if it already exist.
+ */
+int
+addnataddr(uchar *src, uchar *mask, Iplifc *dst)
+{
+ NatAddr *na; /* New address node */
+ uchar net[IPaddrlen]; /* Network address */
+
+ maskip(src, mask, net);
+
+ if(nataddrexist(src, mask, dst) != nil)
+ return -1;
+
+ na = malloc(sizeof(NatAddr));
+ ipmove(na->src, src);
+ ipmove(na->mask, mask);
+ ipmove(na->net, net);
+ na->dst = dst;
+
+ nataddrprepend(na);
+
+ return 0;
+}
+
+/*
+ * Remove a NatAddr.
+ * Return -1 if it doesn't exist.
+ */
+int
+removenataddr(uchar *src, uchar *mask, Iplifc *dst)
+{
+ NatAddr *c; /* Current node */
+ NatAddr *p; /* Precedent node */
+
+ for(p=nil, c=addrhead; c!=nil; p=c, c=c->next)
+ if(ipcmp(src, c->src) == 0 &&
+ ipcmp(mask, c->mask) == 0 &&
+ dst == c->dst)
+ break;
+
+ if(c == nil)
+ return -1;
+
+ if(p == nil)
+ addrhead = addrhead->next;
+ else
+ p->next = c->next;
+
+ return 0;
+}
+
+/*
+ * Display NatAddr list.
+ */
+void
+shownataddr(void)
+{
+ NatAddr *c; /* Current node */
+
+ for(c=addrhead; c!=nil; c=c->next)
+ print("%I %V %I\n", c->src, c->mask+IPv4off, c->dst->local);
+}
+
+/*
+ * Flush NatAddr list.
+ */
+void
+flushnataddr(void)
+{
+ NatAddr *c; /* Current node */
+
+ while((c=addrhead) != nil){
+ addrhead = addrhead->next;
+ free(c);
+ }
+}
+
+/*
+ * Return logical interface if NAT is enabled on this interface,
+ * and the source address must be translated.
+ */
+Iplifc*
+natonifco(Ipifc *ifc, Ip4hdr* h)
+{
+ NatAddr *na; /* Address list */
+ Iplifc *lifc; /* Logical interface */
+ uchar src[IPaddrlen]; /* Source address */
+ uchar net[IPaddrlen]; /* Source network address */
+
+ for(lifc=ifc->lifc; lifc!=nil; lifc=lifc->next)
+ for(na=addrhead; na; na=na->next)
+ if(lifc == na->dst){
+ /* NAT enabled on this logical interface */
+ v4tov6(src, h->src);
+ maskip(src, na->mask, net);
+ if(ipcmp(net, na->net) == 0)
+ /* Source address must be translated */
+ return lifc;
+ }
+
+ return nil;
+}
+
+/*
+ * Return logical interface if NAT is enabled on this interface.
+ */
+Iplifc*
+natonifci(Ipifc *ifc)
+{
+ NatAddr *na; /* Address list */
+ Iplifc *lifc; /* Logical interface */
+
+ for(lifc=ifc->lifc; lifc!=nil; lifc=lifc->next)
+ for(na=addrhead; na; na=na->next)
+ if(lifc == na->dst){
+ /* NAT enabled on this logical interface */
+ return lifc;
+ }
+
+ return nil;
+}
--- /dev/null
+++ b/os/ip.original/netdevmedium.c
@@ -1,0 +1,153 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include "ip.h"
+
+static void netdevbind(Ipifc *ifc, int argc, char **argv);
+static void netdevunbind(Ipifc *ifc);
+static void netdevbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip);
+static void netdevread(void *a);
+
+typedef struct Netdevrock Netdevrock;
+struct Netdevrock
+{
+ Fs *f; /* file system we belong to */
+ Proc *readp; /* reading process */
+ Chan *mchan; /* Data channel */
+};
+
+Medium netdevmedium =
+{
+.name= "netdev",
+.hsize= 0,
+.mintu= 0,
+.maxtu= 64000,
+.maclen= 0,
+.bind= netdevbind,
+.unbind= netdevunbind,
+.bwrite= netdevbwrite,
+.unbindonclose= 0,
+};
+
+/*
+ * called to bind an IP ifc to a generic network device
+ * called with ifc qlock'd
+ */
+static void
+netdevbind(Ipifc *ifc, int argc, char **argv)
+{
+ Chan *mchan;
+ Netdevrock *er;
+
+ if(argc < 2)
+ error(Ebadarg);
+
+ mchan = namec(argv[2], Aopen, ORDWR, 0);
+
+ er = smalloc(sizeof(*er));
+ er->mchan = mchan;
+ er->f = ifc->conv->p->f;
+
+ ifc->arg = er;
+
+ kproc("netdevread", netdevread, ifc, 0);
+}
+
+/*
+ * called with ifc wlock'd
+ */
+static void
+netdevunbind(Ipifc *ifc)
+{
+ Netdevrock *er = ifc->arg;
+
+ if(er->readp != nil)
+ postnote(er->readp, 1, "unbind", 0);
+
+ /* wait for readers to die */
+ while(er->readp != nil)
+ tsleep(&up->sleep, return0, 0, 300);
+
+ if(er->mchan != nil)
+ cclose(er->mchan);
+
+ free(er);
+}
+
+/*
+ * called by ipoput with a single block to write
+ */
+static void
+netdevbwrite(Ipifc *ifc, Block *bp, int, uchar*)
+{
+ Netdevrock *er = ifc->arg;
+
+ if(bp->next)
+ bp = concatblock(bp);
+ if(BLEN(bp) < ifc->mintu)
+ bp = adjustblock(bp, ifc->mintu);
+
+ devtab[er->mchan->type]->bwrite(er->mchan, bp, 0);
+ ifc->out++;
+}
+
+/*
+ * process to read from the device
+ */
+static void
+netdevread(void *a)
+{
+ Ipifc *ifc;
+ Block *bp;
+ Netdevrock *er;
+ char *argv[1];
+
+ ifc = a;
+ er = ifc->arg;
+ er->readp = up; /* hide identity under a rock for unbind */
+ if(waserror()){
+ er->readp = nil;
+ pexit("hangup", 1);
+ }
+ for(;;){
+ bp = devtab[er->mchan->type]->bread(er->mchan, ifc->maxtu, 0);
+ if(bp == nil){
+ /*
+ * get here if mchan is a pipe and other side hangs up
+ * clean up this interface & get out
+ZZZ is this a good idea?
+ */
+ poperror();
+ er->readp = nil;
+ argv[0] = "unbind";
+ if(!waserror())
+ ifc->conv->p->ctl(ifc->conv, argv, 1);
+ pexit("hangup", 1);
+ }
+ if(!canrlock(ifc)){
+ freeb(bp);
+ continue;
+ }
+ if(waserror()){
+ runlock(ifc);
+ nexterror();
+ }
+ ifc->in++;
+ if(ifc->lifc == nil)
+ freeb(bp);
+ else
+ ipiput4(er->f, ifc, bp);
+ runlock(ifc);
+ poperror();
+ }
+}
+
+void
+netdevmediumlink(void)
+{
+ addipmedium(&netdevmedium);
+}
--- /dev/null
+++ b/os/ip.original/netlog.c
@@ -1,0 +1,263 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "../ip/ip.h"
+
+enum {
+ Nlog = 4*1024,
+};
+
+/*
+ * action log
+ */
+struct Netlog {
+ Lock;
+ int opens;
+ char* buf;
+ char *end;
+ char *rptr;
+ int len;
+
+ int logmask; /* mask of things to debug */
+ uchar iponly[IPaddrlen]; /* ip address to print debugging for */
+ int iponlyset;
+
+ QLock;
+ Rendez;
+};
+
+typedef struct Netlogflag {
+ char* name;
+ int mask;
+} Netlogflag;
+
+static Netlogflag flags[] =
+{
+ { "ppp", Logppp, },
+ { "ip", Logip, },
+ { "fs", Logfs, },
+ { "tcp", Logtcp, },
+ { "il", Logil, },
+ { "icmp", Logicmp, },
+ { "udp", Logudp, },
+ { "compress", Logcompress, },
+ { "ilmsg", Logil|Logilmsg, },
+ { "gre", Loggre, },
+ { "tcpwin", Logtcp|Logtcpwin, },
+ { "tcprxmt", Logtcp|Logtcprxmt, },
+ { "udpmsg", Logudp|Logudpmsg, },
+ { "ipmsg", Logip|Logipmsg, },
+ { "esp", Logesp, },
+ { nil, 0, },
+};
+
+char Ebadnetctl[] = "too few arguments for netlog control message";
+
+enum
+{
+ CMset,
+ CMclear,
+ CMonly,
+};
+
+static
+Cmdtab routecmd[] = {
+ CMset, "set", 0,
+ CMclear, "clear", 0,
+ CMonly, "only", 0,
+};
+
+void
+netloginit(Fs *f)
+{
+ f->alog = smalloc(sizeof(Netlog));
+}
+
+void
+netlogopen(Fs *f)
+{
+ lock(f->alog);
+ if(waserror()){
+ unlock(f->alog);
+ nexterror();
+ }
+ if(f->alog->opens == 0){
+ if(f->alog->buf == nil)
+ f->alog->buf = malloc(Nlog);
+ f->alog->rptr = f->alog->buf;
+ f->alog->end = f->alog->buf + Nlog;
+ }
+ f->alog->opens++;
+ unlock(f->alog);
+ poperror();
+}
+
+void
+netlogclose(Fs *f)
+{
+ lock(f->alog);
+ if(waserror()){
+ unlock(f->alog);
+ nexterror();
+ }
+ f->alog->opens--;
+ if(f->alog->opens == 0){
+ free(f->alog->buf);
+ f->alog->buf = nil;
+ }
+ unlock(f->alog);
+ poperror();
+}
+
+static int
+netlogready(void *a)
+{
+ Fs *f = a;
+
+ return f->alog->len;
+}
+
+long
+netlogread(Fs *f, void *a, ulong, long n)
+{
+ int i, d;
+ char *p, *rptr;
+
+ qlock(f->alog);
+ if(waserror()){
+ qunlock(f->alog);
+ nexterror();
+ }
+
+ for(;;){
+ lock(f->alog);
+ if(f->alog->len){
+ if(n > f->alog->len)
+ n = f->alog->len;
+ d = 0;
+ rptr = f->alog->rptr;
+ f->alog->rptr += n;
+ if(f->alog->rptr >= f->alog->end){
+ d = f->alog->rptr - f->alog->end;
+ f->alog->rptr = f->alog->buf + d;
+ }
+ f->alog->len -= n;
+ unlock(f->alog);
+
+ i = n-d;
+ p = a;
+ memmove(p, rptr, i);
+ memmove(p+i, f->alog->buf, d);
+ break;
+ }
+ else
+ unlock(f->alog);
+
+ sleep(f->alog, netlogready, f);
+ }
+
+ qunlock(f->alog);
+ poperror();
+
+ return n;
+}
+
+void
+netlogctl(Fs *f, char* s, int n)
+{
+ int i, set;
+ Netlogflag *fp;
+ Cmdbuf *cb;
+ Cmdtab *ct;
+
+ cb = parsecmd(s, n);
+ if(waserror()){
+ free(cb);
+ nexterror();
+ }
+
+ if(cb->nf < 2)
+ error(Ebadnetctl);
+
+ ct = lookupcmd(cb, routecmd, nelem(routecmd));
+
+ SET(set);
+
+ switch(ct->index){
+ case CMset:
+ set = 1;
+ break;
+
+ case CMclear:
+ set = 0;
+ break;
+
+ case CMonly:
+ parseip(f->alog->iponly, cb->f[1]);
+ if(ipcmp(f->alog->iponly, IPnoaddr) == 0)
+ f->alog->iponlyset = 0;
+ else
+ f->alog->iponlyset = 1;
+ free(cb);
+ return;
+
+ default:
+ cmderror(cb, "unknown ip control message");
+ }
+
+ for(i = 1; i < cb->nf; i++){
+ for(fp = flags; fp->name; fp++)
+ if(strcmp(fp->name, cb->f[i]) == 0)
+ break;
+ if(fp->name == nil)
+ continue;
+ if(set)
+ f->alog->logmask |= fp->mask;
+ else
+ f->alog->logmask &= ~fp->mask;
+ }
+
+ free(cb);
+ poperror();
+}
+
+void
+netlog(Fs *f, int mask, char *fmt, ...)
+{
+ char buf[128], *t, *fp;
+ int i, n;
+ va_list arg;
+
+ if(!(f->alog->logmask & mask))
+ return;
+
+ if(f->alog->opens == 0)
+ return;
+
+ va_start(arg, fmt);
+ n = vseprint(buf, buf+sizeof(buf), fmt, arg) - buf;
+ va_end(arg);
+
+ lock(f->alog);
+ i = f->alog->len + n - Nlog;
+ if(i > 0){
+ f->alog->len -= i;
+ f->alog->rptr += i;
+ if(f->alog->rptr >= f->alog->end)
+ f->alog->rptr = f->alog->buf + (f->alog->rptr - f->alog->end);
+ }
+ t = f->alog->rptr + f->alog->len;
+ fp = buf;
+ f->alog->len += n;
+ while(n-- > 0){
+ if(t >= f->alog->end)
+ t = f->alog->buf + (t - f->alog->end);
+ *t++ = *fp++;
+ }
+ unlock(f->alog);
+
+ wakeup(f->alog);
+}
--- /dev/null
+++ b/os/ip.original/nullmedium.c
@@ -1,0 +1,39 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include "ip.h"
+
+static void
+nullbind(Ipifc*, int, char**)
+{
+ error("cannot bind null device");
+}
+
+static void
+nullunbind(Ipifc*)
+{
+}
+
+static void
+nullbwrite(Ipifc*, Block*, int, uchar*)
+{
+ error("nullbwrite");
+}
+
+Medium nullmedium =
+{
+.name= "null",
+.bind= nullbind,
+.unbind= nullunbind,
+.bwrite= nullbwrite,
+};
+
+void
+nullmediumlink(void)
+{
+ addipmedium(&nullmedium);
+}
--- /dev/null
+++ b/os/ip.original/pktmedium.c
@@ -1,0 +1,79 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include "ip.h"
+
+
+static void pktbind(Ipifc*, int, char**);
+static void pktunbind(Ipifc*);
+static void pktbwrite(Ipifc*, Block*, int, uchar*);
+static void pktin(Fs*, Ipifc*, Block*);
+
+Medium pktmedium =
+{
+.name= "pkt",
+.hsize= 14,
+.mintu= 40,
+.maxtu= 4*1024,
+.maclen= 6,
+.bind= pktbind,
+.unbind= pktunbind,
+.bwrite= pktbwrite,
+.pktin= pktin,
+.unbindonclose= 1,
+};
+
+/*
+ * called to bind an IP ifc to an ethernet device
+ * called with ifc wlock'd
+ */
+static void
+pktbind(Ipifc*, int, char**)
+{
+}
+
+/*
+ * called with ifc wlock'd
+ */
+static void
+pktunbind(Ipifc*)
+{
+}
+
+/*
+ * called by ipoput with a single packet to write
+ */
+static void
+pktbwrite(Ipifc *ifc, Block *bp, int, uchar*)
+{
+ /* enqueue onto the conversation's rq */
+ bp = concatblock(bp);
+ if(ifc->conv->snoopers.ref > 0)
+ qpass(ifc->conv->sq, copyblock(bp, BLEN(bp)));
+ qpass(ifc->conv->rq, bp);
+}
+
+/*
+ * called with ifc rlocked when someone write's to 'data'
+ */
+static void
+pktin(Fs *f, Ipifc *ifc, Block *bp)
+{
+ if(ifc->lifc == nil)
+ freeb(bp);
+ else {
+ if(ifc->conv->snoopers.ref > 0)
+ qpass(ifc->conv->sq, copyblock(bp, BLEN(bp)));
+ ipiput4(f, ifc, bp);
+ }
+}
+
+void
+pktmediumlink(void)
+{
+ addipmedium(&pktmedium);
+}
--- /dev/null
+++ b/os/ip.original/plan9.c
@@ -1,0 +1,36 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "ip.h"
+
+/*
+ * some hacks for commonality twixt inferno and plan9
+ */
+
+char*
+commonuser(void)
+{
+ return up->env->user;
+}
+
+Chan*
+commonfdtochan(int fd, int mode, int a, int b)
+{
+ return fdtochan(up->env->fgrp, fd, mode, a, b);
+}
+
+char*
+commonerror(void)
+{
+ return up->env->errstr;
+}
+
+int
+postnote(Proc *p, int, char *, int)
+{
+ swiproc(p, 0);
+ return 0;
+}
--- /dev/null
+++ b/os/ip.original/ppp.c
@@ -1,0 +1,1656 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include <libcrypt.h>
+#include <kernel.h>
+#include "ip.h"
+#include "ppp.h"
+
+int nocompress;
+Ipaddr pppdns[2];
+
+/*
+ * Calculate FCS - rfc 1331
+ */
+ushort fcstab[256] =
+{
+ 0x0000, 0x1189, 0x2312, 0x329b, 0x4624, 0x57ad, 0x6536, 0x74bf,
+ 0x8c48, 0x9dc1, 0xaf5a, 0xbed3, 0xca6c, 0xdbe5, 0xe97e, 0xf8f7,
+ 0x1081, 0x0108, 0x3393, 0x221a, 0x56a5, 0x472c, 0x75b7, 0x643e,
+ 0x9cc9, 0x8d40, 0xbfdb, 0xae52, 0xdaed, 0xcb64, 0xf9ff, 0xe876,
+ 0x2102, 0x308b, 0x0210, 0x1399, 0x6726, 0x76af, 0x4434, 0x55bd,
+ 0xad4a, 0xbcc3, 0x8e58, 0x9fd1, 0xeb6e, 0xfae7, 0xc87c, 0xd9f5,
+ 0x3183, 0x200a, 0x1291, 0x0318, 0x77a7, 0x662e, 0x54b5, 0x453c,
+ 0xbdcb, 0xac42, 0x9ed9, 0x8f50, 0xfbef, 0xea66, 0xd8fd, 0xc974,
+ 0x4204, 0x538d, 0x6116, 0x709f, 0x0420, 0x15a9, 0x2732, 0x36bb,
+ 0xce4c, 0xdfc5, 0xed5e, 0xfcd7, 0x8868, 0x99e1, 0xab7a, 0xbaf3,
+ 0x5285, 0x430c, 0x7197, 0x601e, 0x14a1, 0x0528, 0x37b3, 0x263a,
+ 0xdecd, 0xcf44, 0xfddf, 0xec56, 0x98e9, 0x8960, 0xbbfb, 0xaa72,
+ 0x6306, 0x728f, 0x4014, 0x519d, 0x2522, 0x34ab, 0x0630, 0x17b9,
+ 0xef4e, 0xfec7, 0xcc5c, 0xddd5, 0xa96a, 0xb8e3, 0x8a78, 0x9bf1,
+ 0x7387, 0x620e, 0x5095, 0x411c, 0x35a3, 0x242a, 0x16b1, 0x0738,
+ 0xffcf, 0xee46, 0xdcdd, 0xcd54, 0xb9eb, 0xa862, 0x9af9, 0x8b70,
+ 0x8408, 0x9581, 0xa71a, 0xb693, 0xc22c, 0xd3a5, 0xe13e, 0xf0b7,
+ 0x0840, 0x19c9, 0x2b52, 0x3adb, 0x4e64, 0x5fed, 0x6d76, 0x7cff,
+ 0x9489, 0x8500, 0xb79b, 0xa612, 0xd2ad, 0xc324, 0xf1bf, 0xe036,
+ 0x18c1, 0x0948, 0x3bd3, 0x2a5a, 0x5ee5, 0x4f6c, 0x7df7, 0x6c7e,
+ 0xa50a, 0xb483, 0x8618, 0x9791, 0xe32e, 0xf2a7, 0xc03c, 0xd1b5,
+ 0x2942, 0x38cb, 0x0a50, 0x1bd9, 0x6f66, 0x7eef, 0x4c74, 0x5dfd,
+ 0xb58b, 0xa402, 0x9699, 0x8710, 0xf3af, 0xe226, 0xd0bd, 0xc134,
+ 0x39c3, 0x284a, 0x1ad1, 0x0b58, 0x7fe7, 0x6e6e, 0x5cf5, 0x4d7c,
+ 0xc60c, 0xd785, 0xe51e, 0xf497, 0x8028, 0x91a1, 0xa33a, 0xb2b3,
+ 0x4a44, 0x5bcd, 0x6956, 0x78df, 0x0c60, 0x1de9, 0x2f72, 0x3efb,
+ 0xd68d, 0xc704, 0xf59f, 0xe416, 0x90a9, 0x8120, 0xb3bb, 0xa232,
+ 0x5ac5, 0x4b4c, 0x79d7, 0x685e, 0x1ce1, 0x0d68, 0x3ff3, 0x2e7a,
+ 0xe70e, 0xf687, 0xc41c, 0xd595, 0xa12a, 0xb0a3, 0x8238, 0x93b1,
+ 0x6b46, 0x7acf, 0x4854, 0x59dd, 0x2d62, 0x3ceb, 0x0e70, 0x1ff9,
+ 0xf78f, 0xe606, 0xd49d, 0xc514, 0xb1ab, 0xa022, 0x92b9, 0x8330,
+ 0x7bc7, 0x6a4e, 0x58d5, 0x495c, 0x3de3, 0x2c6a, 0x1ef1, 0x0f78
+};
+
+static char *snames[] =
+{
+ "Sclosed",
+ "Sclosing",
+ "Sreqsent",
+ "Sackrcvd",
+ "Sacksent",
+ "Sopened",
+};
+
+static void init(PPP*);
+static void setphase(PPP*, int);
+static void pinit(PPP*, Pstate*);
+static void ppptimer(void*);
+static void ptimer(PPP*, Pstate*);
+static int getframe(PPP*, Block**);
+static Block* putframe(PPP*, int, Block*);
+static uchar* escapebyte(PPP*, ulong, uchar*, ushort*);
+static void config(PPP*, Pstate*, int);
+static int getopts(PPP*, Pstate*, Block*);
+static void rejopts(PPP*, Pstate*, Block*, int);
+static void newstate(PPP*, Pstate*, int);
+static void rcv(PPP*, Pstate*, Block*);
+static void getchap(PPP*, Block*);
+static void getpap(PPP*, Block*);
+static void sendpap(PPP*);
+static void getlqm(PPP*, Block*);
+static void putlqm(PPP*);
+static void hangup(PPP*);
+static void remove(PPP*);
+
+static int validv4(Ipaddr);
+static void invalidate(Ipaddr);
+static void ipconnect(PPP *);
+static void setdefroute(PPP *, Ipaddr);
+static void printopts(PPP *, Pstate*, Block*, int);
+static void sendtermreq(PPP*, Pstate*);
+
+static void
+errlog(PPP *ppp, char *err)
+{
+ int n;
+ char msg[64];
+
+ n = snprint(msg, sizeof(msg), "%s\n", err);
+ qproduce(ppp->ifc->conv->eq, msg, n);
+}
+
+static void
+init(PPP* ppp)
+{
+ if(ppp->inbuf == nil){
+ ppp->inbuf = allocb(4096);
+ ppp->outbuf = allocb(4096);
+
+ ppp->lcp = malloc(sizeof(Pstate));
+ ppp->ipcp = malloc(sizeof(Pstate));
+ if(ppp->lcp == nil || ppp->ipcp == nil)
+ error("ppp init: malloc");
+
+ ppp->lcp->proto = Plcp;
+ ppp->lcp->state = Sclosed;
+ ppp->ipcp->proto = Pipcp;
+ ppp->ipcp->state = Sclosed;
+
+ kproc("ppptimer", ppptimer, ppp, KPDUPPG|KPDUPFDG);
+ }
+
+ pinit(ppp, ppp->lcp);
+ setphase(ppp, Plink);
+}
+
+static void
+setphase(PPP *ppp, int phase)
+{
+ int oldphase;
+
+ oldphase = ppp->phase;
+
+ ppp->phase = phase;
+ switch(phase){
+ default:
+ panic("ppp: unknown phase %d", phase);
+ case Pdead:
+ /* restart or exit? */
+ pinit(ppp, ppp->lcp);
+ setphase(ppp, Plink);
+ break;
+ case Plink:
+ /* link down */
+ switch(oldphase) {
+ case Pnet:
+ newstate(ppp, ppp->ipcp, Sclosed);
+ }
+ break;
+ case Pauth:
+ if(ppp->usepap)
+ sendpap(ppp);
+ else if(!ppp->usechap)
+ setphase(ppp, Pnet);
+ break;
+ case Pnet:
+ pinit(ppp, ppp->ipcp);
+ break;
+ case Pterm:
+ /* what? */
+ break;
+ }
+}
+
+static void
+pinit(PPP *ppp, Pstate *p)
+{
+ p->timeout = 0;
+
+ switch(p->proto){
+ case Plcp:
+ ppp->magic = TK2MS(MACHP(0)->ticks);
+ ppp->xctlmap = 0xffffffff;
+ ppp->period = 0;
+ p->optmask = 0xffffffff;
+ ppp->rctlmap = 0;
+ ppp->ipcp->state = Sclosed;
+ ppp->ipcp->optmask = 0xffffffff;
+
+ /* quality goo */
+ ppp->timeout = 0;
+ memset(&ppp->in, 0, sizeof(ppp->in));
+ memset(&ppp->out, 0, sizeof(ppp->out));
+ memset(&ppp->pin, 0, sizeof(ppp->pin));
+ memset(&ppp->pout, 0, sizeof(ppp->pout));
+ memset(&ppp->sin, 0, sizeof(ppp->sin));
+ break;
+ case Pipcp:
+ if(ppp->localfrozen == 0)
+ invalidate(ppp->local);
+ if(ppp->remotefrozen == 0)
+ invalidate(ppp->remote);
+ p->optmask = 0xffffffff;
+ ppp->ctcp = compress_init(ppp->ctcp);
+ ppp->usedns = 3;
+ invalidate(ppp->dns1);
+ invalidate(ppp->dns2);
+ break;
+ }
+ p->confid = p->rcvdconfid = -1;
+ config(ppp, p, 1);
+ newstate(ppp, p, Sreqsent);
+}
+
+/*
+ * change protocol to a new state.
+ */
+static void
+newstate(PPP *ppp, Pstate *p, int state)
+{
+ netlog(ppp->f, Logppp, "%ux %ux %s->%s ctlmap %lux/%lux flags %ux mtu %d mru %d\n", ppp, p->proto,
+ snames[p->state], snames[state], ppp->rctlmap, ppp->xctlmap, p->flags,
+ ppp->mtu, ppp->mru);
+
+ if(p->proto == Plcp) {
+ if(state == Sopened)
+ setphase(ppp, Pauth);
+ else if(state == Sclosed)
+ setphase(ppp, Pdead);
+ else if(p->state == Sopened)
+ setphase(ppp, Plink);
+ }
+
+ if(p->proto == Pipcp && state == Sopened && validv4(ppp->local) && validv4(ppp->remote)){
+ netlog(ppp->f, Logppp, "pppnewstate: local %I remote %I\n", ppp->local, ppp->remote);
+ ipmove(pppdns[0], ppp->dns1);
+ ipmove(pppdns[1], ppp->dns2);
+ ipconnect(ppp);
+ /* if this is the only network, set up a default route */
+// if(ppp->ifc->link==nil) /* how??? */
+ setdefroute(ppp, ppp->remote);
+ errlog(ppp, Enoerror);
+ }
+
+ p->state = state;
+}
+
+static void
+remove(PPP *ppp)
+{
+ free(ppp->ipcp);
+ ppp->ipcp = 0;
+ free(ppp->ctcp);
+ ppp->ctcp = 0;
+ free(ppp->lcp);
+ ppp->lcp = 0;
+ if (ppp->inbuf) {
+ freeb(ppp->inbuf);
+ ppp->inbuf = nil;
+ }
+ if (ppp->outbuf) {
+ freeb(ppp->outbuf);
+ ppp->outbuf = nil;
+ }
+ free(ppp);
+}
+
+void
+pppclose(PPP *ppp)
+{
+ hangup(ppp);
+ remove(ppp);
+}
+
+static void
+dumpblock(Block *b)
+{
+ char x[256];
+ int i;
+
+ for(i = 0; i < (sizeof(x)-1)/3 && b->rp+i < b->wp; i++)
+ sprint(&x[3*i], "%2.2ux ", b->rp[i]);
+ print("%s\n", x);
+}
+
+/* returns (protocol, information) */
+static int
+getframe(PPP *ppp, Block **info)
+{
+ uchar *p, *from, *to;
+ int n, len, proto;
+ ulong c;
+ ushort fcs;
+ Block *buf, *b;
+
+ buf = ppp->inbuf;
+ for(;;){
+ /* read till we hit a frame byte or run out of room */
+ for(p = buf->rp; buf->wp < buf->lim;){
+ for(; p < buf->wp; p++)
+ if(*p == HDLC_frame)
+ goto break2;
+
+ len = buf->lim - buf->wp;
+ n = 0;
+ if(ppp->dchan != nil)
+ n = kchanio(ppp->dchan, buf->wp, len, OREAD);
+ netlog(ppp->f, Logppp, "ppp kchanio %d bytes\n", n);
+ if(n <= 0){
+ buf->wp = buf->rp;
+// if(n < 0)
+// print("ppp kchanio(%s) returned %d: %r",
+// ppp->dchan->path->elem, n);
+ *info = nil;
+ return 0;
+ }
+ buf->wp += n;
+ }
+break2:
+
+ /* copy into block, undoing escapes, and caculating fcs */
+ fcs = PPP_initfcs;
+ b = allocb(p - buf->rp);
+ to = b->wp;
+ for(from = buf->rp; from != p;){
+ c = *from++;
+ if(c == HDLC_esc){
+ if(from == p)
+ break;
+ c = *from++ ^ 0x20;
+ } else if((c < 0x20) && (ppp->rctlmap & (1 << c)))
+ continue;
+ *to++ = c;
+ fcs = (fcs >> 8) ^ fcstab[(fcs ^ c) & 0xff];
+ }
+
+ /* copy down what's left in buffer */
+ p++;
+ memmove(buf->rp, p, buf->wp - p);
+ n = p - buf->rp;
+ buf->wp -= n;
+ b->wp = to - 2;
+
+ /* return to caller if checksum matches */
+ if(fcs == PPP_goodfcs){
+ if(b->rp[0] == PPP_addr && b->rp[1] == PPP_ctl)
+ b->rp += 2;
+ proto = *b->rp++;
+ if((proto & 0x1) == 0)
+ proto = (proto<<8) | *b->rp++;
+ if(b->rp < b->wp){
+ ppp->in.bytes += n;
+ ppp->in.packets++;
+ *info = b;
+ return proto;
+ }
+ } else if(BLEN(b) > 0){
+ ppp->ifc->inerr++;
+ ppp->in.discards++;
+ netlog(ppp->f, Logppp, "len %d/%d cksum %ux (%ux %ux %ux %ux)\n",
+ BLEN(b), BLEN(buf), fcs, b->rp[0],
+ b->rp[1], b->rp[2], b->rp[3]);
+ }
+
+ freeblist(b);
+ }
+ *info = nil;
+ return 0;
+}
+
+/* send a PPP frame */
+static Block *
+putframe(PPP *ppp, int proto, Block *b)
+{
+ Block *buf;
+ uchar *to, *from;
+ ushort fcs;
+ ulong ctlmap;
+ int c;
+ Block *bp;
+
+ if(ppp->dchan == nil){
+ netlog(ppp->f, Logppp, "putframe: dchan down\n");
+ errlog(ppp, Ehungup);
+ return b;
+ }
+ netlog(ppp->f, Logppp, "putframe %ux %d %d (%d bytes)\n", proto, b->rp[0], b->rp[1], BLEN(b));
+
+ ppp->out.packets++;
+
+ if(proto == Plcp)
+ ctlmap = 0xffffffff;
+ else
+ ctlmap = ppp->xctlmap;
+
+ /* make sure we have head room */
+ if(b->rp - b->base < 4){
+ b = padblock(b, 4);
+ b->rp += 4;
+ }
+
+ /* add in the protocol and address, we'd better have left room */
+ from = b->rp;
+ *--from = proto;
+ if(!(ppp->lcp->flags&Fpc) || proto > 0x100 || proto == Plcp)
+ *--from = proto>>8;
+ if(!(ppp->lcp->flags&Fac) || proto == Plcp){
+ *--from = PPP_ctl;
+ *--from = PPP_addr;
+ }
+
+ qlock(&ppp->outlock);
+ buf = ppp->outbuf;
+
+ /* escape and checksum the body */
+ fcs = PPP_initfcs;
+ to = buf->rp;
+
+ *to++ = HDLC_frame;
+
+ for(bp = b; bp; bp = bp->next){
+ if(bp != b)
+ from = bp->rp;
+ for(; from < bp->wp; from++){
+ c = *from;
+ if(c == HDLC_frame || c == HDLC_esc
+ || (c < 0x20 && ((1<<c) & ctlmap))){
+ *to++ = HDLC_esc;
+ *to++ = c ^ 0x20;
+ } else
+ *to++ = c;
+ fcs = (fcs >> 8) ^ fcstab[(fcs ^ c) & 0xff];
+ }
+ }
+
+ /* add on and escape the checksum */
+ fcs = ~fcs;
+ c = fcs;
+ if(c == HDLC_frame || c == HDLC_esc
+ || (c < 0x20 && ((1<<c) & ctlmap))){
+ *to++ = HDLC_esc;
+ *to++ = c ^ 0x20;
+ } else
+ *to++ = c;
+ c = fcs>>8;
+ if(c == HDLC_frame || c == HDLC_esc
+ || (c < 0x20 && ((1<<c) & ctlmap))){
+ *to++ = HDLC_esc;
+ *to++ = c ^ 0x20;
+ } else
+ *to++ = c;
+
+ /* add frame marker and send */
+ *to++ = HDLC_frame;
+ buf->wp = to;
+ if(ppp->dchan == nil){
+ netlog(ppp->f, Logppp, "putframe: dchan down\n");
+ errlog(ppp, Ehungup);
+ }else{
+ kchanio(ppp->dchan, buf->rp, BLEN(buf), OWRITE);
+ ppp->out.bytes += BLEN(buf);
+ }
+
+ qunlock(&ppp->outlock);
+ return b;
+}
+
+#define IPB2LCP(b) ((Lcpmsg*)((b)->wp-4))
+
+static Block*
+alloclcp(int code, int id, int len)
+{
+ Block *b;
+ Lcpmsg *m;
+
+ /*
+ * leave room for header
+ */
+ b = allocb(len);
+
+ m = (Lcpmsg*)b->wp;
+ m->code = code;
+ m->id = id;
+ b->wp += 4;
+
+ return b;
+}
+
+static void
+putao(Block *b, int type, int aproto, int alg)
+{
+ *b->wp++ = type;
+ *b->wp++ = 5;
+ hnputs(b->wp, aproto);
+ b->wp += 2;
+ *b->wp++ = alg;
+}
+
+static void
+putlo(Block *b, int type, ulong val)
+{
+ *b->wp++ = type;
+ *b->wp++ = 6;
+ hnputl(b->wp, val);
+ b->wp += 4;
+}
+
+static void
+putv4o(Block *b, int type, Ipaddr val)
+{
+ *b->wp++ = type;
+ *b->wp++ = 6;
+ if(v6tov4(b->wp, val) < 0){
+ /*panic("putv4o")*/;
+ }
+ b->wp += 4;
+}
+
+static void
+putso(Block *b, int type, ulong val)
+{
+ *b->wp++ = type;
+ *b->wp++ = 4;
+ hnputs(b->wp, val);
+ b->wp += 2;
+}
+
+static void
+puto(Block *b, int type)
+{
+ *b->wp++ = type;
+ *b->wp++ = 2;
+}
+
+/*
+ * send configuration request
+ */
+static void
+config(PPP *ppp, Pstate *p, int newid)
+{
+ Block *b;
+ Lcpmsg *m;
+ int id;
+
+ if(newid){
+ id = ++(p->id);
+ p->confid = id;
+ p->timeout = Timeout;
+ } else
+ id = p->confid;
+ b = alloclcp(Lconfreq, id, 256);
+ m = IPB2LCP(b);
+ USED(m);
+
+ switch(p->proto){
+ case Plcp:
+ if(p->optmask & Fmagic)
+ putlo(b, Omagic, ppp->magic);
+ if(p->optmask & Fmtu)
+ putso(b, Omtu, ppp->mru);
+ if(p->optmask & Fac)
+ puto(b, Oac);
+ if(p->optmask & Fpc)
+ puto(b, Opc);
+ if(p->optmask & Fctlmap)
+ putlo(b, Octlmap, 0); /* we don't want anything escaped */
+ break;
+ case Pipcp:
+ if((p->optmask & Fipaddr) /*&& validv4(ppp->local)*/)
+ putv4o(b, Oipaddr, ppp->local);
+ if(!nocompress && (p->optmask & Fipcompress)){
+ *b->wp++ = Oipcompress;
+ *b->wp++ = 6;
+ hnputs(b->wp, Pvjctcp);
+ b->wp += 2;
+ *b->wp++ = MAX_STATES-1;
+ *b->wp++ = 1;
+ }
+ if(ppp->usedns & 1)
+ putlo(b, Oipdns, 0);
+ if(ppp->usedns & 2)
+ putlo(b, Oipdns2, 0);
+ break;
+ }
+
+ hnputs(m->len, BLEN(b));
+ b = putframe(ppp, p->proto, b);
+ freeblist(b);
+}
+
+/*
+ * parse configuration request, sends an ack or reject packet
+ *
+ * returns: -1 if request was syntacticly incorrect
+ * 0 if packet was accepted
+ * 1 if packet was rejected
+ */
+static int
+getopts(PPP *ppp, Pstate *p, Block *b)
+{
+ Lcpmsg *m, *repm;
+ Lcpopt *o;
+ uchar *cp;
+ ulong rejecting, nacking, flags, proto;
+ ulong mtu, ctlmap, period;
+ ulong x;
+ Block *repb;
+ Ipaddr ipaddr;
+
+ rejecting = 0;
+ nacking = 0;
+ flags = 0;
+
+ /* defaults */
+ invalidate(ipaddr);
+ mtu = ppp->mtu;
+
+ ctlmap = 0xffffffff;
+ period = 0;
+
+ m = (Lcpmsg*)b->rp;
+ repb = alloclcp(Lconfack, m->id, BLEN(b));
+ repm = IPB2LCP(repb);
+
+ /* copy options into ack packet */
+ memmove(repm->data, m->data, b->wp - m->data);
+ repb->wp += b->wp - m->data;
+
+ /* look for options we don't recognize or like */
+ for(cp = m->data; cp < b->wp; cp += o->len){
+ o = (Lcpopt*)cp;
+ if(cp + o->len > b->wp || o->len == 0){
+ freeblist(repb);
+ netlog(ppp->f, Logppp, "ppp %s: bad option length %ux\n", ppp->ifc->dev,
+ o->type);
+ return -1;
+ }
+
+ switch(p->proto){
+ case Plcp:
+ switch(o->type){
+ case Oac:
+ flags |= Fac;
+ continue;
+ case Opc:
+ flags |= Fpc;
+ continue;
+ case Omtu:
+ mtu = nhgets(o->data);
+ if(mtu < ppp->ifc->m->mintu){
+ netlog(ppp->f, Logppp, "bogus mtu %d\n", mtu);
+ mtu = ppp->ifc->m->mintu;
+ }
+ continue;
+ case Omagic:
+ if(ppp->magic == nhgetl(o->data))
+ netlog(ppp->f, Logppp, "ppp: possible loop\n");
+ continue;
+ case Octlmap:
+ ctlmap = nhgetl(o->data);
+ continue;
+ case Oquality:
+ proto = nhgets(o->data);
+ if(proto != Plqm)
+ break;
+ x = nhgetl(o->data+2)*10;
+ period = (x+Period-1)/Period;
+ continue;
+ case Oauth:
+ proto = nhgets(o->data);
+ if(proto == Ppap && ppp->chapname[0] && ppp->secret[0]){
+ ppp->usepap = 1;
+ netlog(ppp->f, Logppp, "PPP %s: select PAP\n", ppp->ifc->dev);
+ continue;
+ }
+ if(proto != Pchap || o->data[2] != APmd5){
+ if(!nacking){
+ nacking = 1;
+ repb->wp = repm->data;
+ repm->code = Lconfnak;
+ }
+ putao(repb, Oauth, Pchap, APmd5);
+ }
+ else
+ ppp->usechap = 1;
+ ppp->usepap = 0;
+ continue;
+ }
+ break;
+ case Pipcp:
+ switch(o->type){
+ case Oipaddr:
+ v4tov6(ipaddr, o->data);
+ if(!validv4(ppp->remote))
+ continue;
+ if(!validv4(ipaddr) && !rejecting){
+ /* other side requesting an address */
+ if(!nacking){
+ nacking = 1;
+ repb->wp = repm->data;
+ repm->code = Lconfnak;
+ }
+ putv4o(repb, Oipaddr, ppp->remote);
+ }
+ continue;
+ case Oipcompress:
+ proto = nhgets(o->data);
+ if(nocompress || proto != Pvjctcp || compress_negotiate(ppp->ctcp, o->data+2) < 0)
+ break;
+ flags |= Fipcompress;
+ continue;
+ }
+ break;
+ }
+
+ /* come here if option is not recognized */
+ if(!rejecting){
+ rejecting = 1;
+ repb->wp = repm->data;
+ repm->code = Lconfrej;
+ }
+ netlog(ppp->f, Logppp, "ppp %s: bad %ux option %d\n", ppp->ifc->dev, p->proto, o->type);
+ memmove(repb->wp, o, o->len);
+ repb->wp += o->len;
+ }
+
+ /* permanent changes only after we know that we liked the packet */
+ if(!rejecting && !nacking){
+ switch(p->proto){
+ case Plcp:
+ netlog(ppp->f, Logppp, "Plcp: mtu: %d %d x:%lux/r:%lux %lux\n", mtu, ppp->mtu, ppp->xctlmap, ppp->rctlmap, ctlmap);
+ ppp->period = period;
+ ppp->xctlmap = ctlmap;
+ if(mtu > Maxmtu)
+ mtu = Maxmtu;
+ if(mtu < Minmtu)
+ mtu = Minmtu;
+ ppp->mtu = mtu;
+ break;
+ case Pipcp:
+ if(validv4(ipaddr) && ppp->remotefrozen == 0)
+ ipmove(ppp->remote, ipaddr);
+ break;
+ }
+ p->flags = flags;
+ }
+
+ hnputs(repm->len, BLEN(repb));
+ repb = putframe(ppp, p->proto, repb);
+ freeblist(repb);
+
+ return rejecting || nacking;
+}
+
+/*
+ * parse configuration rejection, just stop sending anything that they
+ * don't like (except for ipcp address nak).
+ */
+static void
+rejopts(PPP *ppp, Pstate *p, Block *b, int code)
+{
+ Lcpmsg *m;
+ Lcpopt *o;
+
+ /* just give up trying what the other side doesn't like */
+ m = (Lcpmsg*)b->rp;
+ for(b->rp = m->data; b->rp < b->wp; b->rp += o->len){
+ o = (Lcpopt*)b->rp;
+ if(b->rp + o->len > b->wp || o->len == 0){
+ netlog(ppp->f, Logppp, "ppp %s: bad roption length %ux\n", ppp->ifc->dev,
+ o->type);
+ return;
+ }
+
+ if(code == Lconfrej){
+ if(o->type < 8*sizeof(p->optmask))
+ p->optmask &= ~(1<<o->type);
+ if(o->type == Oipdns)
+ ppp->usedns &= ~1;
+ else if(o->type == Oipdns2)
+ ppp->usedns &= ~2;
+ netlog(ppp->f, Logppp, "ppp %s: %ux rejecting %d\n", ppp->ifc->dev, p->proto,
+ o->type);
+ continue;
+ }
+
+ switch(p->proto){
+ case Plcp:
+ switch(o->type){
+ case Octlmap:
+ ppp->rctlmap = nhgetl(o->data);
+ break;
+ default:
+ if(o->type < 8*sizeof(p->optmask))
+ p->optmask &= ~(1<<o->type);
+ break;
+ };
+ case Pipcp:
+ switch(o->type){
+ case Oipaddr:
+ if(!validv4(ppp->local))
+ v4tov6(ppp->local, o->data);
+// if(o->type < 8*sizeof(p->optmask))
+// p->optmask &= ~(1<<o->type);
+ break;
+ case Oipdns:
+ if(!validv4(ppp->dns1))
+ v4tov6(ppp->dns1, o->data);
+ ppp->usedns &= ~1;
+ break;
+ case Oipdns2:
+ if(!validv4(ppp->dns2))
+ v4tov6(ppp->dns2, o->data);
+ ppp->usedns &= ~2;
+ break;
+ default:
+ if(o->type < 8*sizeof(p->optmask))
+ p->optmask &= ~(1<<o->type);
+ break;
+ }
+ break;
+ }
+ }
+}
+
+
+/*
+ * put a messages through the lcp or ipcp state machine. They are
+ * very similar.
+ */
+static void
+rcv(PPP *ppp, Pstate *p, Block *b)
+{
+ ulong len;
+ int err;
+ Lcpmsg *m;
+
+ if(BLEN(b) < 4){
+ netlog(ppp->f, Logppp, "ppp %s: short lcp message\n", ppp->ifc->dev);
+ freeblist(b);
+ return;
+ }
+ m = (Lcpmsg*)b->rp;
+ len = nhgets(m->len);
+ if(BLEN(b) < len){
+ netlog(ppp->f, Logppp, "ppp %s: short lcp message\n", ppp->ifc->dev);
+ freeblist(b);
+ return;
+ }
+
+ netlog(ppp->f, Logppp, "ppp: %ux rcv %d len %d id %d/%d/%d\n",
+ p->proto, m->code, len, m->id, p->confid, p->id);
+
+ if(p->proto != Plcp && ppp->lcp->state != Sopened){
+ netlog(ppp->f, Logppp, "ppp: non-lcp with lcp not open\n");
+ freeb(b);
+ return;
+ }
+
+ qlock(ppp);
+ switch(m->code){
+ case Lconfreq:
+ /* flush the output queue */
+ if(p->state == Sopened && p->proto == Plcp)
+ kchanio(ppp->cchan, "f", 1, OWRITE);
+
+ printopts(ppp, p, b, 0);
+ err = getopts(ppp, p, b);
+ if(err < 0)
+ break;
+
+ if(m->id == p->rcvdconfid)
+ break; /* don't change state for duplicates */
+ p->rcvdconfid = m->id;
+
+ switch(p->state){
+ case Sackrcvd:
+ if(err)
+ break;
+ newstate(ppp, p, Sopened);
+ break;
+ case Sclosed:
+ case Sopened:
+ config(ppp, p, 1);
+ if(err == 0)
+ newstate(ppp, p, Sacksent);
+ else
+ newstate(ppp, p, Sreqsent);
+ break;
+ break;
+ case Sreqsent:
+ case Sacksent:
+ if(err == 0)
+ newstate(ppp, p, Sacksent);
+ else
+ newstate(ppp, p, Sreqsent);
+ break;
+ }
+ break;
+ case Lconfack:
+ if(p->confid != m->id){
+ /* ignore if it isn't the message we're sending */
+ netlog(ppp->f, Logppp, "ppp: dropping confack\n");
+ break;
+ }
+ p->confid = -1; /* ignore duplicates */
+ p->id++; /* avoid sending duplicates */
+
+ switch(p->state){
+ case Sopened:
+ case Sackrcvd:
+ config(ppp, p, 1);
+ newstate(ppp, p, Sreqsent);
+ break;
+ case Sreqsent:
+ newstate(ppp, p, Sackrcvd);
+ break;
+ case Sacksent:
+ newstate(ppp, p, Sopened);
+ break;
+ }
+ break;
+ case Lconfrej:
+ case Lconfnak:
+ if(p->confid != m->id) {
+ /* ignore if it isn't the message we're sending */
+ netlog(ppp->f, Logppp, "ppp: dropping confrej or confnak\n");
+ break;
+ }
+ p->confid = -1; /* ignore duplicates */
+ p->id++; /* avoid sending duplicates */
+
+ switch(p->state){
+ case Sopened:
+ case Sackrcvd:
+ config(ppp, p, 1);
+ newstate(ppp, p, Sreqsent);
+ break;
+ case Sreqsent:
+ case Sacksent:
+ printopts(ppp, p, b, 0);
+ rejopts(ppp, p, b, m->code);
+ config(ppp, p, 1);
+ break;
+ }
+ break;
+ case Ltermreq:
+ m->code = Ltermack;
+ b = putframe(ppp, p->proto, b);
+
+ switch(p->state){
+ case Sackrcvd:
+ case Sacksent:
+ newstate(ppp, p, Sreqsent);
+ break;
+ case Sopened:
+ newstate(ppp, p, Sclosing);
+ break;
+ }
+ break;
+ case Ltermack:
+ if(p->termid != m->id) /* ignore if it isn't the message we're sending */
+ break;
+
+ if(p->proto == Plcp)
+ ppp->ipcp->state = Sclosed;
+ switch(p->state){
+ case Sclosing:
+ newstate(ppp, p, Sclosed);
+ break;
+ case Sackrcvd:
+ newstate(ppp, p, Sreqsent);
+ break;
+ case Sopened:
+ config(ppp, p, 0);
+ newstate(ppp, p, Sreqsent);
+ break;
+ }
+ break;
+ case Lcoderej:
+ netlog(ppp->f, Logppp, "ppp %s: code reject %d\n", ppp->ifc->dev, m->data[0]);
+ break;
+ case Lprotorej:
+ netlog(ppp->f, Logppp, "ppp %s: proto reject %lux\n", ppp->ifc->dev, nhgets(m->data));
+ break;
+ case Lechoreq:
+ m->code = Lechoack;
+ b = putframe(ppp, p->proto, b);
+ break;
+ case Lechoack:
+ case Ldiscard:
+ /* nothing to do */
+ break;
+ }
+
+ qunlock(ppp);
+ freeblist(b);
+}
+
+/*
+ * timer for protocol state machine
+ */
+static void
+ptimer(PPP *ppp, Pstate *p)
+{
+ if(p->state == Sopened || p->state == Sclosed)
+ return;
+
+ p->timeout--;
+ switch(p->state){
+ case Sclosing:
+ sendtermreq(ppp, p);
+ break;
+ case Sreqsent:
+ case Sacksent:
+ if(p->timeout <= 0){
+ if(p->proto && ppp->cchan != nil)
+ kchanio(ppp->cchan, "f", 1, OWRITE); /* flush output queue */
+ newstate(ppp, p, Sclosed);
+ } else {
+ config(ppp, p, 0);
+ }
+ break;
+ case Sackrcvd:
+ if(p->timeout <= 0){
+ if(p->proto && ppp->cchan != nil)
+ kchanio(ppp->cchan, "f", 1, OWRITE); /* flush output queue */
+ newstate(ppp, p, Sclosed);
+ }
+ else {
+ config(ppp, p, 0);
+ newstate(ppp, p, Sreqsent);
+ }
+ break;
+ }
+}
+
+/*
+ * timer for ppp
+ */
+static void
+ppptimer(void *arg)
+{
+ PPP *ppp;
+
+ ppp = arg;
+ ppp->timep = up;
+ if(waserror()){
+ netlog(ppp->f, Logppp, "ppptimer: %I: %s\n", ppp->local, up->env->errstr);
+ ppp->timep = 0;
+ pexit("hangup", 1);
+ }
+ for(;;){
+ tsleep(&up->sleep, return0, nil, Period);
+ if(ppp->pppup){
+ qlock(ppp);
+
+ ptimer(ppp, ppp->lcp);
+ if(ppp->lcp->state == Sopened)
+ ptimer(ppp, ppp->ipcp);
+
+ if(ppp->period && --(ppp->timeout) <= 0){
+ ppp->timeout = ppp->period;
+ putlqm(ppp);
+ }
+
+ qunlock(ppp);
+ }
+ }
+}
+
+static void
+setdefroute(PPP *ppp, Ipaddr gate)
+{
+ int fd, n;
+ char path[128], msg[128];
+
+ snprint(path, sizeof path, "#I%d/iproute", ppp->f->dev);
+ fd = kopen(path, ORDWR);
+ if(fd < 0)
+ return;
+ n = snprint(msg, sizeof(msg), "add 0 0 %I", gate);
+ kwrite(fd, msg, n);
+ kclose(fd);
+}
+
+static void
+ipconnect(PPP *ppp)
+{
+ int fd, n;
+ char path[128], msg[128];
+
+ snprint(path, sizeof path, "#I%d/ipifc/%d/ctl", ppp->f->dev, ppp->ifc->conv->x);
+ fd = kopen(path, ORDWR);
+ if(fd < 0)
+ return;
+ n = snprint(msg, sizeof(msg), "connect %I 255.255.255.255 %I", ppp->local, ppp->remote);
+ if (kwrite(fd, msg, n) != n)
+ print("ppp ipconnect: %s: %r\n", msg);
+ kclose(fd);
+}
+
+PPP*
+pppopen(PPP *ppp, char *dev,
+ Ipaddr ipaddr, Ipaddr remip,
+ int mtu, int framing,
+ char *chapname, char *secret)
+{
+ int fd, cfd;
+ char ctl[Maxpath];
+
+ invalidate(ppp->remote);
+ invalidate(ppp->local);
+ invalidate(ppp->dns1);
+ invalidate(ppp->dns2);
+ ppp->mtu = Defmtu;
+ ppp->mru = mtu;
+ ppp->framing = framing;
+
+ if(remip != nil && validv4(remip)){
+ ipmove(ppp->remote, remip);
+ ppp->remotefrozen = 1;
+ }
+ if(ipaddr != nil && validv4(ipaddr)){
+ ipmove(ppp->local, ipaddr);
+ ppp->localfrozen = 1;
+ }
+
+ /* authentication goo */
+ ppp->secret[0] = 0;
+ if(secret != nil)
+ strncpy(ppp->secret, secret, sizeof(ppp->secret));
+ ppp->chapname[0] = 0;
+ if(chapname != nil)
+ strncpy(ppp->chapname, chapname, sizeof(ppp->chapname));
+
+ if(strchr(dev, '!'))
+ fd = kdial(dev, nil, nil, nil);
+ else
+ fd = kopen(dev, ORDWR);
+ if(fd < 0){
+ netlog(ppp->f, Logppp, "ppp: can't open %s\n", dev);
+ return nil;
+ }
+ ppp->dchan = fdtochan(up->env->fgrp, fd, ORDWR, 0, 1);
+ kclose(fd);
+
+ /* set up serial line */
+/* XXX this stuff belongs in application, not driver */
+ sprint(ctl, "%sctl", dev);
+ cfd = kopen(ctl, ORDWR);
+ if(cfd >= 0){
+ ppp->cchan = fdtochan(up->env->fgrp, cfd, ORDWR, 0, 1);
+ kclose(cfd);
+ kchanio(ppp->cchan, "m1", 2, OWRITE); /* cts/rts flow control/fifo's) on */
+ kchanio(ppp->cchan, "q64000", 6, OWRITE);/* increas q size to 64k */
+ kchanio(ppp->cchan, "n1", 2, OWRITE); /* nonblocking writes on */
+ kchanio(ppp->cchan, "r1", 2, OWRITE); /* rts on */
+ kchanio(ppp->cchan, "d1", 2, OWRITE); /* dtr on */
+ }
+
+ ppp->pppup = 1;
+ init(ppp);
+ return ppp;
+}
+
+static void
+hangup(PPP *ppp)
+{
+ qlock(ppp);
+ if(waserror()){
+ qunlock(ppp);
+ nexterror();
+ }
+ netlog(ppp->f, Logppp, "PPP Hangup\n");
+ errlog(ppp, Ehungup);
+ if(ppp->pppup && ppp->cchan != nil){
+ kchanio(ppp->cchan, "f", 1, OWRITE); /* flush */
+ kchanio(ppp->cchan, "h", 1, OWRITE); /* hangup */
+ }
+ cclose(ppp->dchan);
+ cclose(ppp->cchan);
+ ppp->dchan = nil;
+ ppp->cchan = nil;
+ ppp->pppup = 0;
+ qunlock(ppp);
+ poperror();
+}
+
+/* return next input IP packet */
+Block*
+pppread(PPP *ppp)
+{
+ Block *b;
+ int proto;
+ Lcpmsg *m;
+
+ for(;;){
+ proto = getframe(ppp, &b);
+ if(b == nil)
+ return nil;
+ netlog(ppp->f, Logppp, "ppp: read proto %d len %d\n", proto, blocklen(b));
+ switch(proto){
+ case Plcp:
+ rcv(ppp, ppp->lcp, b);
+ break;
+ case Pipcp:
+ rcv(ppp, ppp->ipcp, b);
+ break;
+ case Pip:
+ if(ppp->ipcp->state == Sopened)
+ return b;
+ freeblist(b);
+ break;
+ case Plqm:
+ getlqm(ppp, b);
+ break;
+ case Pchap:
+ getchap(ppp, b);
+ break;
+ case Ppap:
+ getpap(ppp, b);
+ break;
+ case Pvjctcp:
+ case Pvjutcp:
+ if(ppp->ipcp->state == Sopened){
+ b = tcpuncompress(ppp->ctcp, b, proto, ppp->f);
+ if(b != nil)
+ return b;
+ }
+ freeblist(b);
+ break;
+ default:
+ netlog(ppp->f, Logppp, "unknown proto %ux\n", proto);
+ if(ppp->lcp->state == Sopened){
+ /* reject the protocol */
+ b->rp -= 6;
+ m = (Lcpmsg*)b->rp;
+ m->code = Lprotorej;
+ m->id = ++ppp->lcp->id;
+ hnputs(m->data, proto);
+ hnputs(m->len, BLEN(b));
+ b = putframe(ppp, Plcp, b);
+ }
+ freeblist(b);
+ break;
+ }
+ }
+ return nil; /* compiler confused */
+}
+
+/* transmit an IP packet */
+int
+pppwrite(PPP *ppp, Block *b)
+{
+ ushort proto;
+ int r;
+
+ qlock(ppp);
+
+ /* can't send ip packets till we're established */
+ if(ppp->ipcp->state != Sopened)
+ goto ret;
+
+ /* link hung up */
+ if(ppp->dchan == nil)
+ goto ret;
+
+ b = concatblock(b); /* or else compression will barf */
+
+ proto = Pip;
+ if(ppp->ipcp->flags & Fipcompress)
+ proto = compress(ppp->ctcp, b, ppp->f);
+ b = putframe(ppp, proto, b);
+
+
+ret:
+ qunlock(ppp);
+
+ r = blocklen(b);
+ netlog(ppp->f, Logppp, "ppp wrt len %d\n", r);
+
+ freeblist(b);
+ return r;
+}
+
+/*
+ * link quality management
+ */
+static void
+getlqm(PPP *ppp, Block *b)
+{
+ Qualpkt *p;
+
+ p = (Qualpkt*)b->rp;
+ if(BLEN(b) == sizeof(Qualpkt)){
+ ppp->in.reports++;
+ ppp->pout.reports = nhgetl(p->peeroutreports);
+ ppp->pout.packets = nhgetl(p->peeroutpackets);
+ ppp->pout.bytes = nhgetl(p->peeroutbytes);
+ ppp->pin.reports = nhgetl(p->peerinreports);
+ ppp->pin.packets = nhgetl(p->peerinpackets);
+ ppp->pin.discards = nhgetl(p->peerindiscards);
+ ppp->pin.errors = nhgetl(p->peerinerrors);
+ ppp->pin.bytes = nhgetl(p->peerinbytes);
+
+ /* save our numbers at time of reception */
+ memmove(&ppp->sin, &ppp->in, sizeof(Qualstats));
+
+ }
+ freeblist(b);
+ if(ppp->period == 0)
+ putlqm(ppp);
+
+}
+static void
+putlqm(PPP *ppp)
+{
+ Qualpkt *p;
+ Block *b;
+
+ b = allocb(sizeof(Qualpkt));
+ b->wp += sizeof(Qualpkt);
+ p = (Qualpkt*)b->rp;
+ hnputl(p->magic, 0);
+
+ /* heresay (what he last told us) */
+ hnputl(p->lastoutreports, ppp->pout.reports);
+ hnputl(p->lastoutpackets, ppp->pout.packets);
+ hnputl(p->lastoutbytes, ppp->pout.bytes);
+
+ /* our numbers at time of last reception */
+ hnputl(p->peerinreports, ppp->sin.reports);
+ hnputl(p->peerinpackets, ppp->sin.packets);
+ hnputl(p->peerindiscards, ppp->sin.discards);
+ hnputl(p->peerinerrors, ppp->sin.errors);
+ hnputl(p->peerinbytes, ppp->sin.bytes);
+
+ /* our numbers now */
+ hnputl(p->peeroutreports, ppp->out.reports+1);
+ hnputl(p->peeroutpackets, ppp->out.packets+1);
+ hnputl(p->peeroutbytes, ppp->out.bytes+53/*hack*/);
+
+ b = putframe(ppp, Plqm, b);
+ freeblist(b);
+ ppp->out.reports++;
+}
+
+/*
+ * challenge response dialog
+ */
+static void
+getchap(PPP *ppp, Block *b)
+{
+ Lcpmsg *m;
+ int len, vlen, n;
+ char md5buf[512];
+
+ m = (Lcpmsg*)b->rp;
+ len = nhgets(m->len);
+ if(BLEN(b) < len){
+ netlog(ppp->f, Logppp, "ppp %s: short chap message\n", ppp->ifc->dev);
+ freeblist(b);
+ return;
+ }
+
+ switch(m->code){
+ case Cchallenge:
+ vlen = m->data[0];
+ if(vlen > len - 5){
+ netlog(ppp->f, Logppp, "PPP %s: bad challenge len\n", ppp->ifc->dev);
+ freeblist(b);
+ break;
+ }
+
+ netlog(ppp->f, Logppp, "PPP %s: CHAP Challenge\n", ppp->ifc->dev);
+netlog(ppp->f, Logppp, "(secret %s chapname %s id %d)\n", ppp->secret, ppp->chapname, m->id);
+ /* create string to hash */
+ md5buf[0] = m->id;
+ strcpy(md5buf+1, ppp->secret);
+ n = strlen(ppp->secret) + 1;
+ memmove(md5buf+n, m->data+1, vlen);
+ n += vlen;
+ freeblist(b);
+
+ /* send reply */
+ len = 4 + 1 + 16 + strlen(ppp->chapname);
+ b = alloclcp(2, md5buf[0], len);
+ m = IPB2LCP(b);
+ m->data[0] = 16;
+ md5((uchar*)md5buf, n, m->data+1, 0);
+ memmove((char*)m->data+17, ppp->chapname, strlen(ppp->chapname));
+ hnputs(m->len, len);
+ b->wp += len-4;
+ b = putframe(ppp, Pchap, b);
+ break;
+ case Cresponse:
+ netlog(ppp->f, Logppp, "PPP %s: chap response?\n", ppp->ifc->dev);
+ break;
+ case Csuccess:
+ netlog(ppp->f, Logppp, "PPP %s: chap succeeded\n", ppp->ifc->dev);
+ setphase(ppp, Pnet);
+ break;
+ case Cfailure:
+ netlog(ppp->f, Logppp, "PPP %s: chap failed: %.*s\n", ppp->ifc->dev, len-4, m->data);
+ errlog(ppp, Eperm);
+ break;
+ default:
+ netlog(ppp->f, Logppp, "PPP %s: chap code %d?\n", ppp->ifc->dev, m->code);
+ break;
+ }
+ freeblist(b);
+}
+
+/*
+ * password authentication protocol dialog
+ * -- obsolete but all we know how to use with NT just now
+ */
+static void
+sendpap(PPP *ppp)
+{
+ Lcpmsg *m;
+ int clen, slen, len;
+ Block *b;
+ uchar *p;
+
+ clen = strlen(ppp->chapname);
+ slen = strlen(ppp->secret);
+ len = 4 + 1 + clen + 1 + slen;
+ ppp->papid = ++ppp->lcp->id;
+ b = alloclcp(Cpapreq, ppp->papid, len);
+ m = IPB2LCP(b);
+ p = m->data;
+ p[0] = clen;
+ memmove(p+1, ppp->chapname, clen);
+ p += clen + 1;
+ p[0] = slen;
+ memmove(p+1, ppp->secret, slen);
+ hnputs(m->len, len);
+ b->wp += len-4;
+ b = putframe(ppp, Ppap, b);
+ netlog(ppp->f, Logppp, "PPP %s: sent pap auth req (%d)\n", ppp->ifc->dev, len);
+ freeblist(b);
+}
+
+static void
+getpap(PPP *ppp, Block *b)
+{
+ Lcpmsg *m;
+ int len;
+
+ m = (Lcpmsg*)b->rp;
+ len = nhgets(m->len);
+ if(BLEN(b) < len){
+ netlog(ppp->f, Logppp, "ppp %s: short pap message\n", ppp->ifc->dev);
+ freeblist(b);
+ return;
+ }
+
+ switch(m->code){
+ case Cpapreq:
+ netlog(ppp->f, Logppp, "PPP %s: pap request?\n", ppp->ifc->dev);
+ break;
+ case Cpapack:
+ netlog(ppp->f, Logppp, "PPP %s: PAP succeeded\n", ppp->ifc->dev);
+ setphase(ppp, Pnet);
+ break;
+ case Cpapnak:
+ if(m->data[0])
+ netlog(ppp->f, Logppp, "PPP %s: PAP failed: %.*s\n", ppp->ifc->dev, len-5, m->data+1);
+ else
+ netlog(ppp->f, Logppp, "PPP %s: PAP failed\n", ppp->ifc->dev);
+ errlog(ppp, Eperm);
+ break;
+ default:
+ netlog(ppp->f, Logppp, "PPP %s: pap code %d?\n", ppp->ifc->dev, m->code);
+ break;
+ }
+ freeblist(b);
+}
+
+static void
+printopts(PPP *ppp, Pstate *p, Block *b, int send)
+{
+ Lcpmsg *m;
+ Lcpopt *o;
+ int proto, x, period;
+ uchar *cp;
+ char *code, *dir;
+
+ m = (Lcpmsg*)b->rp;
+ switch(m->code) {
+ default: code = "<unknown>"; break;
+ case Lconfreq: code = "confrequest"; break;
+ case Lconfack: code = "confack"; break;
+ case Lconfnak: code = "confnak"; break;
+ case Lconfrej: code = "confreject"; break;
+ }
+
+ if(send)
+ dir = "send";
+ else
+ dir = "recv";
+
+ netlog(ppp->f, Logppp, "ppp: %s %s: id=%d\n", dir, code, m->id);
+
+ for(cp = m->data; cp < b->wp; cp += o->len){
+ o = (Lcpopt*)cp;
+ if(cp + o->len > b->wp || o->len == 0){
+ netlog(ppp->f, Logppp, "\tbad option length %ux\n", o->type);
+ return;
+ }
+
+ switch(p->proto){
+ case Plcp:
+ switch(o->type){
+ default:
+ netlog(ppp->f, Logppp, "\tunknown %d len=%d\n", o->type, o->len);
+ break;
+ case Omtu:
+ netlog(ppp->f, Logppp, "\tmtu = %d\n", nhgets(o->data));
+ break;
+ case Octlmap:
+ netlog(ppp->f, Logppp, "\tctlmap = %ux\n", nhgetl(o->data));
+ break;
+ case Oauth:
+ netlog(ppp->f, Logppp, "\tauth = ", nhgetl(o->data));
+ proto = nhgets(o->data);
+ switch(proto) {
+ default:
+ netlog(ppp->f, Logppp, "unknown auth proto %d\n", proto);
+ break;
+ case Ppap:
+ netlog(ppp->f, Logppp, "password\n");
+ break;
+ case Pchap:
+ netlog(ppp->f, Logppp, "chap %ux\n", o->data[2]);
+ break;
+ }
+ break;
+ case Oquality:
+ proto = nhgets(o->data);
+ switch(proto) {
+ default:
+ netlog(ppp->f, Logppp, "\tunknown quality proto %d\n", proto);
+ break;
+ case Plqm:
+ x = nhgetl(o->data+2)*10;
+ period = (x+Period-1)/Period;
+ netlog(ppp->f, Logppp, "\tlqm period = %d\n", period);
+ break;
+ }
+ case Omagic:
+ netlog(ppp->f, Logppp, "\tmagic = %ux\n", nhgetl(o->data));
+ break;
+ case Opc:
+ netlog(ppp->f, Logppp, "\tprotocol compress\n");
+ break;
+ case Oac:
+ netlog(ppp->f, Logppp, "\taddr compress\n");
+ break;
+ }
+ break;
+ case Pccp:
+ switch(o->type){
+ default:
+ netlog(ppp->f, Logppp, "\tunknown %d len=%d\n", o->type, o->len);
+ break;
+ case Ocoui:
+ netlog(ppp->f, Logppp, "\tOUI\n");
+ break;
+ case Ocstac:
+ netlog(ppp->f, Logppp, "\tstac LZS\n");
+ break;
+ case Ocmppc:
+ netlog(ppp->f, Logppp, "\tMicrosoft PPC len=%d %ux\n", o->len, nhgetl(o->data));
+ break;
+ }
+ break;
+ case Pecp:
+ switch(o->type){
+ default:
+ netlog(ppp->f, Logppp, "\tunknown %d len=%d\n", o->type, o->len);
+ break;
+ case Oeoui:
+ netlog(ppp->f, Logppp, "\tOUI\n");
+ break;
+ case Oedese:
+ netlog(ppp->f, Logppp, "\tDES\n");
+ break;
+ }
+ break;
+ case Pipcp:
+ switch(o->type){
+ default:
+ netlog(ppp->f, Logppp, "\tunknown %d len=%d\n", o->type, o->len);
+ break;
+ case Oipaddrs:
+ netlog(ppp->f, Logppp, "\tip addrs - deprecated\n");
+ break;
+ case Oipcompress:
+ netlog(ppp->f, Logppp, "\tip compress\n");
+ break;
+ case Oipaddr:
+ netlog(ppp->f, Logppp, "\tip addr %V\n", o->data);
+ break;
+ case Oipdns:
+ netlog(ppp->f, Logppp, "\tdns addr %V\n", o->data);
+ break;
+ case Oipwins:
+ netlog(ppp->f, Logppp, "\twins addr %V\n", o->data);
+ break;
+ case Oipdns2:
+ netlog(ppp->f, Logppp, "\tdns2 addr %V\n", o->data);
+ break;
+ case Oipwins2:
+ netlog(ppp->f, Logppp, "\twins2 addr %V\n", o->data);
+ break;
+ }
+ break;
+ }
+ }
+}
+
+static void
+sendtermreq(PPP *ppp, Pstate *p)
+{
+ Block *b;
+ Lcpmsg *m;
+
+ p->termid = ++(p->id);
+ b = alloclcp(Ltermreq, p->termid, 4);
+ m = IPB2LCP(b);
+ hnputs(m->len, 4);
+ putframe(ppp, p->proto, b);
+ freeb(b);
+ newstate(ppp, p, Sclosing);
+}
+
+static void
+sendechoreq(PPP *ppp, Pstate *p)
+{
+ Block *b;
+ Lcpmsg *m;
+
+ p->termid = ++(p->id);
+ b = alloclcp(Lechoreq, p->id, 4);
+ m = IPB2LCP(b);
+ hnputs(m->len, 4);
+ putframe(ppp, p->proto, b);
+ freeb(b);
+}
+
+/*
+ * return non-zero if this is a valid v4 address
+ */
+static int
+validv4(Ipaddr addr)
+{
+ return memcmp(addr, v4prefix, IPv4off) == 0;
+}
+
+static void
+invalidate(Ipaddr addr)
+{
+ ipmove(addr, IPnoaddr);
+}
--- /dev/null
+++ b/os/ip.original/ppp.h
@@ -1,0 +1,258 @@
+typedef struct PPP PPP;
+typedef struct Pstate Pstate;
+typedef struct Lcpmsg Lcpmsg;
+typedef struct Lcpopt Lcpopt;
+typedef struct Qualpkt Qualpkt;
+typedef struct Qualstats Qualstats;
+typedef struct Tcpc Tcpc;
+
+typedef uchar Ipaddr[IPaddrlen];
+
+enum
+{
+ HDLC_frame= 0x7e,
+ HDLC_esc= 0x7d,
+
+ /* PPP frame fields */
+ PPP_addr= 0xff,
+ PPP_ctl= 0x3,
+ PPP_initfcs= 0xffff,
+ PPP_goodfcs= 0xf0b8,
+
+ /* PPP phases */
+ Pdead= 0,
+ Plink, /* doing LCP */
+ Pauth, /* doing chap */
+ Pnet, /* doing IPCP, CCP */
+ Pterm, /* closing down */
+
+ /* PPP protocol types */
+ Pip= 0x21, /* internet */
+ Pvjctcp= 0x2d, /* compressing van jacobson tcp */
+ Pvjutcp= 0x2f, /* uncompressing van jacobson tcp */
+ Pcdata= 0xfd, /* compressed datagram */
+ Pipcp= 0x8021, /* ip control */
+ Pecp= 0x8053, /* encryption control */
+ Pccp= 0x80fd, /* compressed datagram control */
+ Plcp= 0xc021, /* link control */
+ Ppap= 0xc023, /* password auth. protocol */
+ Plqm= 0xc025, /* link quality monitoring */
+ Pchap= 0xc223, /* challenge/response */
+
+ /* LCP codes */
+ Lconfreq= 1,
+ Lconfack= 2,
+ Lconfnak= 3,
+ Lconfrej= 4,
+ Ltermreq= 5,
+ Ltermack= 6,
+ Lcoderej= 7,
+ Lprotorej= 8,
+ Lechoreq= 9,
+ Lechoack= 10,
+ Ldiscard= 11,
+
+ /* Lcp configure options */
+ Omtu= 1,
+ Octlmap= 2,
+ Oauth= 3,
+ Oquality= 4,
+ Omagic= 5,
+ Opc= 7,
+ Oac= 8,
+ Obad= 12, /* for testing */
+
+ /* authentication protocols */
+ APmd5= 5,
+
+ /* lcp flags */
+ Fmtu= 1<<Omtu,
+ Fctlmap= 1<<Octlmap,
+ Fauth= 1<<Oauth,
+ Fquality= 1<<Oquality,
+ Fmagic= 1<<Omagic,
+ Fpc= 1<<Opc,
+ Fac= 1<<Oac,
+ Fbad= 1<<Obad,
+
+ /* Chap codes */
+ Cchallenge= 1,
+ Cresponse= 2,
+ Csuccess= 3,
+ Cfailure= 4,
+
+ /* Pap codes */
+ Cpapreq= 1,
+ Cpapack= 2,
+ Cpapnak= 3,
+
+ /* link states */
+ Sclosed= 0,
+ Sclosing,
+ Sreqsent,
+ Sackrcvd,
+ Sacksent,
+ Sopened,
+
+ /* ccp configure options */
+ Ocoui= 0, /* proprietary compression */
+ Ocstac= 17, /* stac electronics LZS */
+ Ocmppc= 18, /* microsoft ppc */
+
+ /* ccp flags */
+ Fcoui= 1<<Ocoui,
+ Fcstac= 1<<Ocstac,
+ Fcmppc= 1<<Ocmppc,
+
+ /* ecp configure options */
+ Oeoui= 0, /* proprietary compression */
+ Oedese= 1, /* DES */
+
+ /* ecp flags */
+ Feoui= 1<<Oeoui,
+ Fedese= 1<<Oedese,
+
+ /* ipcp configure options */
+ Oipaddrs= 1,
+ Oipcompress= 2,
+ Oipaddr= 3,
+ Oipdns= 129,
+ Oipwins= 130,
+ Oipdns2= 131,
+ Oipwins2= 132,
+
+ /* ipcp flags */
+ Fipaddrs= 1<<Oipaddrs,
+ Fipcompress= 1<<Oipcompress,
+ Fipaddr= 1<<Oipaddr,
+
+ Period= 3*1000, /* period of retransmit process (in ms) */
+ Timeout= 10, /* xmit timeout (in Periods) */
+
+ MAX_STATES = 16, /* van jacobson compression states */
+ Defmtu= 1450, /* default that we will ask for */
+ Minmtu= 128, /* minimum that we will accept */
+ Maxmtu= 2000, /* maximum that we will accept */
+};
+
+
+struct Pstate
+{
+ int proto; /* protocol type */
+ int timeout; /* for current state */
+ int rxtimeout; /* for current retransmit */
+ ulong flags; /* options received */
+ uchar id; /* id of current message */
+ uchar confid; /* id of current config message */
+ uchar termid; /* id of current termination message */
+ uchar rcvdconfid; /* id of last conf message received */
+ uchar state; /* PPP link state */
+ ulong optmask; /* which options to request */
+ int echoack; /* recieved echo ack */
+ int echotimeout; /* echo timeout */
+};
+
+struct Qualstats
+{
+ ulong reports;
+ ulong packets;
+ ulong bytes;
+ ulong discards;
+ ulong errors;
+};
+
+struct PPP
+{
+ QLock;
+
+ Chan* dchan; /* serial line */
+ Chan* cchan; /* serial line control */
+ int framing; /* non-zero to use framing characters */
+ Ipaddr local;
+ int localfrozen;
+ Ipaddr remote;
+ int remotefrozen;
+
+ int pppup;
+ Fs *f; /* file system we belong to */
+ Ipifc* ifc;
+ Proc* readp; /* reading process */
+ Proc* timep; /* timer process */
+ Block* inbuf; /* input buffer */
+ Block* outbuf; /* output buffer */
+ QLock outlock; /* and its lock */
+
+ ulong magic; /* magic number to detect loop backs */
+ ulong rctlmap; /* map of chars to ignore in rcvr */
+ ulong xctlmap; /* map of chars to excape in xmit */
+ int phase; /* PPP phase */
+ Pstate* lcp; /* lcp state */
+ Pstate* ipcp; /* ipcp state */
+ char secret[256]; /* md5 key */
+ char chapname[256]; /* chap system name */
+ Tcpc* ctcp;
+ ulong mtu; /* maximum xmit size */
+ ulong mru; /* maximum recv size */
+
+ int baud;
+ int usepap; /* authentication is PAP in every sense, not CHAP */
+ int papid;
+ int usechap;
+
+ /* rfc */
+ int usedns;
+ Ipaddr dns1;
+ Ipaddr dns2;
+
+ /* link quality monitoring */
+ int period; /* lqm period */
+ int timeout; /* time to next lqm packet */
+ Qualstats in; /* local */
+ Qualstats out;
+ Qualstats pin; /* peer */
+ Qualstats pout;
+ Qualstats sin; /* saved */
+};
+
+PPP* pppopen(PPP*, char*, Ipaddr, Ipaddr, int, int, char*, char*);
+Block* pppread(PPP*);
+int pppwrite(PPP*, Block*);
+void pppclose(PPP*);
+
+struct Lcpmsg
+{
+ uchar code;
+ uchar id;
+ uchar len[2];
+ uchar data[1];
+};
+
+struct Lcpopt
+{
+ uchar type;
+ uchar len;
+ uchar data[1];
+};
+
+struct Qualpkt
+{
+ uchar magic[4];
+
+ uchar lastoutreports[4];
+ uchar lastoutpackets[4];
+ uchar lastoutbytes[4];
+ uchar peerinreports[4];
+ uchar peerinpackets[4];
+ uchar peerindiscards[4];
+ uchar peerinerrors[4];
+ uchar peerinbytes[4];
+ uchar peeroutreports[4];
+ uchar peeroutpackets[4];
+ uchar peeroutbytes[4];
+};
+
+ushort compress(Tcpc*, Block*, Fs*);
+Tcpc* compress_init(Tcpc*);
+int compress_negotiate(Tcpc*, uchar*);
+ushort tcpcompress(Tcpc*, Block*, Fs*);
+Block* tcpuncompress(Tcpc*, Block*, ushort, Fs*);
--- /dev/null
+++ b/os/ip.original/pppmedium.c
@@ -1,0 +1,192 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include "ip.h"
+#include "kernel.h"
+#include "ppp.h"
+
+static void pppreader(void *a);
+static void pppbind(Ipifc *ifc, int argc, char **argv);
+static void pppunbind(Ipifc *ifc);
+static void pppbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip);
+static void deadremote(Ipifc *ifc);
+
+Medium pppmedium =
+{
+.name= "ppp",
+.hsize= 4,
+.mintu= Minmtu,
+.maxtu= Maxmtu,
+.maclen= 0,
+.bind= pppbind,
+.unbind= pppunbind,
+.bwrite= pppbwrite,
+.unbindonclose= 0, /* don't unbind on last close */
+};
+
+/*
+ * called to bind an IP ifc to an ethernet device
+ * called with ifc wlock'd
+ */
+static void
+pppbind(Ipifc *ifc, int argc, char **argv)
+{
+ PPP *ppp;
+ Ipaddr ipaddr, remip;
+ int mtu, framing;
+ char *chapname, *secret;
+
+ if(argc < 3)
+ error(Ebadarg);
+
+ ipmove(ipaddr, IPnoaddr);
+ ipmove(remip, IPnoaddr);
+ mtu = Defmtu;
+ framing = 1;
+ chapname = nil;
+ secret = nil;
+
+ switch(argc){
+ default:
+ case 9:
+ if(argv[8][0] != '-')
+ secret = argv[8];
+ case 8:
+ if(argv[7][0] != '-')
+ chapname = argv[7];
+ case 7:
+ if(argv[6][0] != '-')
+ framing = strtoul(argv[6], 0, 0);
+ case 6:
+ if(argv[5][0] != '-')
+ mtu = strtoul(argv[5], 0, 0);
+ case 5:
+ if(argv[4][0] != '-')
+ parseip(remip, argv[4]);
+ case 4:
+ if(argv[3][0] != '-')
+ parseip(ipaddr, argv[3]);
+ case 3:
+ break;
+ }
+
+ ppp = smalloc(sizeof(*ppp));
+ ppp->ifc = ifc;
+ ppp->f = ifc->conv->p->f;
+ ifc->arg = ppp;
+ if(waserror()){
+ pppunbind(ifc);
+ nexterror();
+ }
+ if(pppopen(ppp, argv[2], ipaddr, remip, mtu, framing, chapname, secret) == nil)
+ error("ppp open failed");
+ poperror();
+ kproc("pppreader", pppreader, ifc, KPDUPPG|KPDUPFDG);
+}
+
+static void
+pppreader(void *a)
+{
+ Ipifc *ifc;
+ Block *bp;
+ PPP *ppp;
+
+ ifc = a;
+ ppp = ifc->arg;
+ ppp->readp = up; /* hide identity under a rock for unbind */
+ setpri(PriHi);
+
+ if(waserror()){
+ netlog(ppp->f, Logppp, "pppreader: %I: %s\n", ppp->local, up->env->errstr);
+ ppp->readp = 0;
+ deadremote(ifc);
+ pexit("hangup", 1);
+ }
+
+ for(;;){
+ bp = pppread(ppp);
+ if(bp == nil)
+ error("hungup");
+ if(!canrlock(ifc)){
+ freeb(bp);
+ continue;
+ }
+ if(waserror()){
+ runlock(ifc);
+ nexterror();
+ }
+ ifc->in++;
+ if(ifc->lifc == nil)
+ freeb(bp);
+ else
+ ipiput(ppp->f, ifc, bp);
+ runlock(ifc);
+ poperror();
+ }
+}
+
+/*
+ * called with ifc wlock'd
+ */
+static void
+pppunbind(Ipifc *ifc)
+{
+ PPP *ppp = ifc->arg;
+
+ if(ppp == nil)
+ return;
+ if(ppp->readp)
+ postnote(ppp->readp, 1, "unbind", 0);
+ if(ppp->timep)
+ postnote(ppp->timep, 1, "unbind", 0);
+
+ /* wait for kprocs to die */
+ while(ppp->readp != 0 || ppp->timep != 0)
+ tsleep(&up->sleep, return0, 0, 300);
+
+ pppclose(ppp);
+ qclose(ifc->conv->eq);
+ ifc->arg = nil;
+}
+
+/*
+ * called by ipoput with a single packet to write with ifc rlock'd
+ */
+static void
+pppbwrite(Ipifc *ifc, Block *bp, int, uchar*)
+{
+ PPP *ppp = ifc->arg;
+
+ pppwrite(ppp, bp);
+ ifc->out++;
+}
+
+/*
+ * If the other end hangs up, we have to unbind the interface. An extra
+ * unbind (in the case where we are hanging up) won't do any harm.
+ */
+static void
+deadremote(Ipifc *ifc)
+{
+ int fd;
+ char path[128];
+ PPP *ppp;
+
+ ppp = ifc->arg;
+ snprint(path, sizeof path, "#I%d/ipifc/%d/ctl", ppp->f->dev, ifc->conv->x);
+ fd = kopen(path, ORDWR);
+ if(fd < 0)
+ return;
+ kwrite(fd, "unbind", sizeof("unbind")-1);
+ kclose(fd);
+}
+
+void
+pppmediumlink(void)
+{
+ addipmedium(&pppmedium);
+}
--- /dev/null
+++ b/os/ip.original/ptclbsum.c
@@ -1,0 +1,72 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "ip.h"
+
+static short endian = 1;
+static uchar* aendian = (uchar*)&endian;
+#define LITTLE *aendian
+
+ushort
+ptclbsum(uchar *addr, int len)
+{
+ ulong losum, hisum, mdsum, x;
+ ulong t1, t2;
+
+ losum = 0;
+ hisum = 0;
+ mdsum = 0;
+
+ x = 0;
+ if((uintptr)addr & 1) {
+ if(len) {
+ hisum += addr[0];
+ len--;
+ addr++;
+ }
+ x = 1;
+ }
+ while(len >= 16) {
+ t1 = *(ushort*)(addr+0);
+ t2 = *(ushort*)(addr+2); mdsum += t1;
+ t1 = *(ushort*)(addr+4); mdsum += t2;
+ t2 = *(ushort*)(addr+6); mdsum += t1;
+ t1 = *(ushort*)(addr+8); mdsum += t2;
+ t2 = *(ushort*)(addr+10); mdsum += t1;
+ t1 = *(ushort*)(addr+12); mdsum += t2;
+ t2 = *(ushort*)(addr+14); mdsum += t1;
+ mdsum += t2;
+ len -= 16;
+ addr += 16;
+ }
+ while(len >= 2) {
+ mdsum += *(ushort*)addr;
+ len -= 2;
+ addr += 2;
+ }
+ if(x) {
+ if(len)
+ losum += addr[0];
+ if(LITTLE)
+ losum += mdsum;
+ else
+ hisum += mdsum;
+ } else {
+ if(len)
+ hisum += addr[0];
+ if(LITTLE)
+ hisum += mdsum;
+ else
+ losum += mdsum;
+ }
+
+ losum += hisum >> 8;
+ losum += (hisum & 0xff) << 8;
+ while(hisum = losum>>16)
+ losum = hisum + (losum & 0xffff);
+
+ return losum & 0xffff;
+}
--- /dev/null
+++ b/os/ip.original/rudp.c
@@ -1,0 +1,1092 @@
+/*
+ * This protocol is compatible with UDP's packet format.
+ * It could be done over UDP if need be.
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include "ip.h"
+
+#define DEBUG 0
+#define DPRINT if(DEBUG)print
+
+#define SEQDIFF(a,b) ( (a)>=(b)?\
+ (a)-(b):\
+ 0xffffffffUL-((b)-(a)) )
+#define INSEQ(a,start,end) ( (start)<=(end)?\
+ ((a)>(start)&&(a)<=(end)):\
+ ((a)>(start)||(a)<=(end)) )
+#define UNACKED(r) SEQDIFF(r->sndseq, r->ackrcvd)
+#define NEXTSEQ(a) ( (a)+1 == 0 ? 1 : (a)+1 )
+
+enum
+{
+ UDP_HDRSIZE = 20, /* pseudo header + udp header */
+ UDP_PHDRSIZE = 12, /* pseudo header */
+ UDP_RHDRSIZE = 36, /* pseudo header + udp header + rudp header */
+ UDP_IPHDR = 8, /* ip header */
+ IP_UDPPROTO = 254,
+ UDP_USEAD7 = 52,
+ UDP_USEAD6 = 36,
+ UDP_USEAD4 = 12,
+
+ Rudprxms = 200,
+ Rudptickms = 50,
+ Rudpmaxxmit = 10,
+ Maxunacked = 100,
+
+};
+
+#define Hangupgen 0xffffffff /* used only in hangup messages */
+
+typedef struct Udphdr Udphdr;
+struct Udphdr
+{
+ /* ip header */
+ uchar vihl; /* Version and header length */
+ uchar tos; /* Type of service */
+ uchar length[2]; /* packet length */
+ uchar id[2]; /* Identification */
+ uchar frag[2]; /* Fragment information */
+
+ /* pseudo header starts here */
+ uchar Unused;
+ uchar udpproto; /* Protocol */
+ uchar udpplen[2]; /* Header plus data length */
+ uchar udpsrc[4]; /* Ip source */
+ uchar udpdst[4]; /* Ip destination */
+
+ /* udp header */
+ uchar udpsport[2]; /* Source port */
+ uchar udpdport[2]; /* Destination port */
+ uchar udplen[2]; /* data length */
+ uchar udpcksum[2]; /* Checksum */
+};
+
+typedef struct Rudphdr Rudphdr;
+struct Rudphdr
+{
+ /* ip header */
+ uchar vihl; /* Version and header length */
+ uchar tos; /* Type of service */
+ uchar length[2]; /* packet length */
+ uchar id[2]; /* Identification */
+ uchar frag[2]; /* Fragment information */
+
+ /* pseudo header starts here */
+ uchar Unused;
+ uchar udpproto; /* Protocol */
+ uchar udpplen[2]; /* Header plus data length */
+ uchar udpsrc[4]; /* Ip source */
+ uchar udpdst[4]; /* Ip destination */
+
+ /* udp header */
+ uchar udpsport[2]; /* Source port */
+ uchar udpdport[2]; /* Destination port */
+ uchar udplen[2]; /* data length (includes rudp header) */
+ uchar udpcksum[2]; /* Checksum */
+
+ /* rudp header */
+ uchar relseq[4]; /* id of this packet (or 0) */
+ uchar relsgen[4]; /* generation/time stamp */
+ uchar relack[4]; /* packet being acked (or 0) */
+ uchar relagen[4]; /* generation/time stamp */
+};
+
+
+/*
+ * one state structure per destination
+ */
+typedef struct Reliable Reliable;
+struct Reliable
+{
+ Ref;
+
+ Reliable *next;
+
+ uchar addr[IPaddrlen]; /* always V6 when put here */
+ ushort port;
+
+ Block *unacked; /* unacked msg list */
+ Block *unackedtail; /* and its tail */
+
+ int timeout; /* time since first unacked msg sent */
+ int xmits; /* number of times first unacked msg sent */
+
+ ulong sndseq; /* next packet to be sent */
+ ulong sndgen; /* and its generation */
+
+ ulong rcvseq; /* last packet received */
+ ulong rcvgen; /* and its generation */
+
+ ulong acksent; /* last ack sent */
+ ulong ackrcvd; /* last msg for which ack was rcvd */
+
+ /* flow control */
+ QLock lock;
+ Rendez vous;
+ int blocked;
+};
+
+
+
+/* MIB II counters */
+typedef struct Rudpstats Rudpstats;
+struct Rudpstats
+{
+ ulong rudpInDatagrams;
+ ulong rudpNoPorts;
+ ulong rudpInErrors;
+ ulong rudpOutDatagrams;
+};
+
+typedef struct Rudppriv Rudppriv;
+struct Rudppriv
+{
+ Ipht ht;
+
+ /* MIB counters */
+ Rudpstats ustats;
+
+ /* non-MIB stats */
+ ulong csumerr; /* checksum errors */
+ ulong lenerr; /* short packet */
+ ulong rxmits; /* # of retransmissions */
+ ulong orders; /* # of out of order pkts */
+
+ /* keeping track of the ack kproc */
+ int ackprocstarted;
+ QLock apl;
+};
+
+
+static ulong generation = 0;
+static Rendez rend;
+
+/*
+ * protocol specific part of Conv
+ */
+typedef struct Rudpcb Rudpcb;
+struct Rudpcb
+{
+ QLock;
+ uchar headers;
+ uchar randdrop;
+ Reliable *r;
+};
+
+/*
+ * local functions
+ */
+void relsendack(Conv*, Reliable*, int);
+int reliput(Conv*, Block*, uchar*, ushort);
+Reliable *relstate(Rudpcb*, uchar*, ushort, char*);
+void relput(Reliable*);
+void relforget(Conv *, uchar*, int, int);
+void relackproc(void *);
+void relackq(Reliable *, Block*);
+void relhangup(Conv *, Reliable*);
+void relrexmit(Conv *, Reliable*);
+void relput(Reliable*);
+void rudpkick(void *x);
+
+static void
+rudpstartackproc(Proto *rudp)
+{
+ Rudppriv *rpriv;
+ char kpname[KNAMELEN];
+
+ rpriv = rudp->priv;
+ if(rpriv->ackprocstarted == 0){
+ qlock(&rpriv->apl);
+ if(rpriv->ackprocstarted == 0){
+ sprint(kpname, "#I%drudpack", rudp->f->dev);
+ kproc(kpname, relackproc, rudp, 0);
+ rpriv->ackprocstarted = 1;
+ }
+ qunlock(&rpriv->apl);
+ }
+}
+
+static char*
+rudpconnect(Conv *c, char **argv, int argc)
+{
+ char *e;
+ Rudppriv *upriv;
+
+ upriv = c->p->priv;
+ rudpstartackproc(c->p);
+ e = Fsstdconnect(c, argv, argc);
+ Fsconnected(c, e);
+ iphtadd(&upriv->ht, c);
+
+ return e;
+}
+
+
+static int
+rudpstate(Conv *c, char *state, int n)
+{
+ Rudpcb *ucb;
+ Reliable *r;
+ int m;
+
+ m = snprint(state, n, "%s", c->inuse?"Open":"Closed");
+ ucb = (Rudpcb*)c->ptcl;
+ qlock(ucb);
+ for(r = ucb->r; r; r = r->next)
+ m += snprint(state+m, n-m, " %I/%ld", r->addr, UNACKED(r));
+ qunlock(ucb);
+ return m;
+}
+
+static char*
+rudpannounce(Conv *c, char** argv, int argc)
+{
+ char *e;
+ Rudppriv *upriv;
+
+ upriv = c->p->priv;
+ rudpstartackproc(c->p);
+ e = Fsstdannounce(c, argv, argc);
+ if(e != nil)
+ return e;
+ Fsconnected(c, nil);
+ iphtadd(&upriv->ht, c);
+
+ return nil;
+}
+
+static void
+rudpcreate(Conv *c)
+{
+ c->rq = qopen(64*1024, Qmsg, 0, 0);
+ c->wq = qopen(64*1024, Qkick, rudpkick, c);
+}
+
+static void
+rudpclose(Conv *c)
+{
+ Rudpcb *ucb;
+ Reliable *r, *nr;
+ Rudppriv *upriv;
+
+ upriv = c->p->priv;
+ iphtrem(&upriv->ht, c);
+
+ /* force out any delayed acks */
+ ucb = (Rudpcb*)c->ptcl;
+ qlock(ucb);
+ for(r = ucb->r; r; r = r->next){
+ if(r->acksent != r->rcvseq)
+ relsendack(c, r, 0);
+ }
+ qunlock(ucb);
+
+ qclose(c->rq);
+ qclose(c->wq);
+ qclose(c->eq);
+ ipmove(c->laddr, IPnoaddr);
+ ipmove(c->raddr, IPnoaddr);
+ c->lport = 0;
+ c->rport = 0;
+
+ ucb->headers = 0;
+ ucb->randdrop = 0;
+ qlock(ucb);
+ for(r = ucb->r; r; r = nr){
+ if(r->acksent != r->rcvseq)
+ relsendack(c, r, 0);
+ nr = r->next;
+ relhangup(c, r);
+ relput(r);
+ }
+ ucb->r = 0;
+
+ qunlock(ucb);
+}
+
+/*
+ * randomly don't send packets
+ */
+static void
+doipoput(Conv *c, Fs *f, Block *bp, int x, int ttl, int tos)
+{
+ Rudpcb *ucb;
+
+ ucb = (Rudpcb*)c->ptcl;
+ if(ucb->randdrop && nrand(100) < ucb->randdrop)
+ freeblist(bp);
+ else
+ ipoput4(f, bp, x, ttl, tos, nil);
+}
+
+int
+flow(void *v)
+{
+ Reliable *r = v;
+
+ return UNACKED(r) <= Maxunacked;
+}
+
+void
+rudpkick(void *x)
+{
+ Conv *c = x;
+ Udphdr *uh;
+ ushort rport;
+ uchar laddr[IPaddrlen], raddr[IPaddrlen];
+ Block *bp;
+ Rudpcb *ucb;
+ Rudphdr *rh;
+ Reliable *r;
+ int dlen, ptcllen;
+ Rudppriv *upriv;
+ Fs *f;
+
+ upriv = c->p->priv;
+ f = c->p->f;
+
+ netlog(c->p->f, Logrudp, "rudp: kick\n");
+ bp = qget(c->wq);
+ if(bp == nil)
+ return;
+
+ ucb = (Rudpcb*)c->ptcl;
+ switch(ucb->headers) {
+ case 7:
+ /* get user specified addresses */
+ bp = pullupblock(bp, UDP_USEAD7);
+ if(bp == nil)
+ return;
+ ipmove(raddr, bp->rp);
+ bp->rp += IPaddrlen;
+ ipmove(laddr, bp->rp);
+ bp->rp += IPaddrlen;
+ /* pick interface closest to dest */
+ if(ipforme(f, laddr) != Runi)
+ findlocalip(f, laddr, raddr);
+ bp->rp += IPaddrlen; /* Ignore ifc address */
+ rport = nhgets(bp->rp);
+ bp->rp += 2+2; /* Ignore local port */
+ break;
+ case 6:
+ /* get user specified addresses */
+ bp = pullupblock(bp, UDP_USEAD6);
+ if(bp == nil)
+ return;
+ ipmove(raddr, bp->rp);
+ bp->rp += IPaddrlen;
+ ipmove(laddr, bp->rp);
+ bp->rp += IPaddrlen;
+ /* pick interface closest to dest */
+ if(ipforme(f, laddr) != Runi)
+ findlocalip(f, laddr, raddr);
+ rport = nhgets(bp->rp);
+
+ bp->rp += 4; /* Igonore local port */
+ break;
+ default:
+ ipmove(raddr, c->raddr);
+ ipmove(laddr, c->laddr);
+ rport = c->rport;
+
+ break;
+ }
+
+ dlen = blocklen(bp);
+
+ /* Make space to fit rudp & ip header */
+ bp = padblock(bp, UDP_IPHDR+UDP_RHDRSIZE);
+ if(bp == nil)
+ return;
+
+ uh = (Udphdr *)(bp->rp);
+ uh->vihl = IP_VER4;
+
+ rh = (Rudphdr*)uh;
+
+ ptcllen = dlen + (UDP_RHDRSIZE-UDP_PHDRSIZE);
+ uh->Unused = 0;
+ uh->udpproto = IP_UDPPROTO;
+ uh->frag[0] = 0;
+ uh->frag[1] = 0;
+ hnputs(uh->udpplen, ptcllen);
+ switch(ucb->headers){
+ case 6:
+ case 7:
+ v6tov4(uh->udpdst, raddr);
+ hnputs(uh->udpdport, rport);
+ v6tov4(uh->udpsrc, laddr);
+ break;
+ default:
+ v6tov4(uh->udpdst, c->raddr);
+ hnputs(uh->udpdport, c->rport);
+ if(ipcmp(c->laddr, IPnoaddr) == 0)
+ findlocalip(f, c->laddr, c->raddr);
+ v6tov4(uh->udpsrc, c->laddr);
+ break;
+ }
+ hnputs(uh->udpsport, c->lport);
+ hnputs(uh->udplen, ptcllen);
+ uh->udpcksum[0] = 0;
+ uh->udpcksum[1] = 0;
+
+ qlock(ucb);
+ r = relstate(ucb, raddr, rport, "kick");
+ r->sndseq = NEXTSEQ(r->sndseq);
+ hnputl(rh->relseq, r->sndseq);
+ hnputl(rh->relsgen, r->sndgen);
+
+ hnputl(rh->relack, r->rcvseq); /* ACK last rcvd packet */
+ hnputl(rh->relagen, r->rcvgen);
+
+ if(r->rcvseq != r->acksent)
+ r->acksent = r->rcvseq;
+
+ hnputs(uh->udpcksum, ptclcsum(bp, UDP_IPHDR, dlen+UDP_RHDRSIZE));
+
+ relackq(r, bp);
+ qunlock(ucb);
+
+ upriv->ustats.rudpOutDatagrams++;
+
+ DPRINT("sent: %lud/%lud, %lud/%lud\n",
+ r->sndseq, r->sndgen, r->rcvseq, r->rcvgen);
+
+ doipoput(c, f, bp, 0, c->ttl, c->tos);
+
+ if(waserror()) {
+ relput(r);
+ qunlock(&r->lock);
+ nexterror();
+ }
+
+ /* flow control of sorts */
+ qlock(&r->lock);
+ if(UNACKED(r) > Maxunacked){
+ r->blocked = 1;
+ sleep(&r->vous, flow, r);
+ r->blocked = 0;
+ }
+
+ qunlock(&r->lock);
+ relput(r);
+ poperror();
+}
+
+void
+rudpiput(Proto *rudp, Ipifc *ifc, Block *bp)
+{
+ int len, olen, ottl;
+ Udphdr *uh;
+ Conv *c;
+ Rudpcb *ucb;
+ uchar raddr[IPaddrlen], laddr[IPaddrlen];
+ ushort rport, lport;
+ Rudppriv *upriv;
+ Fs *f;
+ uchar *p;
+
+ upriv = rudp->priv;
+ f = rudp->f;
+
+ upriv->ustats.rudpInDatagrams++;
+
+ uh = (Udphdr*)(bp->rp);
+
+ /* Put back pseudo header for checksum
+ * (remember old values for icmpnoconv())
+ */
+ ottl = uh->Unused;
+ uh->Unused = 0;
+ len = nhgets(uh->udplen);
+ olen = nhgets(uh->udpplen);
+ hnputs(uh->udpplen, len);
+
+ v4tov6(raddr, uh->udpsrc);
+ v4tov6(laddr, uh->udpdst);
+ lport = nhgets(uh->udpdport);
+ rport = nhgets(uh->udpsport);
+
+ if(nhgets(uh->udpcksum)) {
+ if(ptclcsum(bp, UDP_IPHDR, len+UDP_PHDRSIZE)) {
+ upriv->ustats.rudpInErrors++;
+ upriv->csumerr++;
+ netlog(f, Logrudp, "rudp: checksum error %I\n", raddr);
+ DPRINT("rudp: checksum error %I\n", raddr);
+ freeblist(bp);
+ return;
+ }
+ }
+
+ qlock(rudp);
+
+ c = iphtlook(&upriv->ht, raddr, rport, laddr, lport);
+ if(c == nil){
+ /* no converstation found */
+ upriv->ustats.rudpNoPorts++;
+ qunlock(rudp);
+ netlog(f, Logudp, "udp: no conv %I!%d -> %I!%d\n", raddr, rport,
+ laddr, lport);
+ uh->Unused = ottl;
+ hnputs(uh->udpplen, olen);
+ icmpnoconv(f, bp);
+ freeblist(bp);
+ return;
+ }
+ ucb = (Rudpcb*)c->ptcl;
+ qlock(ucb);
+ qunlock(rudp);
+
+ if(reliput(c, bp, raddr, rport) < 0){
+ qunlock(ucb);
+ freeb(bp);
+ return;
+ }
+
+ /*
+ * Trim the packet down to data size
+ */
+
+ len -= (UDP_RHDRSIZE-UDP_PHDRSIZE);
+ bp = trimblock(bp, UDP_IPHDR+UDP_RHDRSIZE, len);
+ if(bp == nil) {
+ netlog(f, Logrudp, "rudp: len err %I.%d -> %I.%d\n",
+ raddr, rport, laddr, lport);
+ DPRINT("rudp: len err %I.%d -> %I.%d\n",
+ raddr, rport, laddr, lport);
+ upriv->lenerr++;
+ return;
+ }
+
+ netlog(f, Logrudpmsg, "rudp: %I.%d -> %I.%d l %d\n",
+ raddr, rport, laddr, lport, len);
+
+ switch(ucb->headers){
+ case 7:
+ /* pass the src address */
+ bp = padblock(bp, UDP_USEAD7);
+ p = bp->rp;
+ ipmove(p, raddr); p += IPaddrlen;
+ ipmove(p, laddr); p += IPaddrlen;
+ ipmove(p, ifc->lifc->local); p += IPaddrlen;
+ hnputs(p, rport); p += 2;
+ hnputs(p, lport);
+ break;
+ case 6:
+ /* pass the src address */
+ bp = padblock(bp, UDP_USEAD6);
+ p = bp->rp;
+ ipmove(p, raddr); p += IPaddrlen;
+ ipmove(p, ipforme(f, laddr)==Runi ? laddr : ifc->lifc->local); p += IPaddrlen;
+ hnputs(p, rport); p += 2;
+ hnputs(p, lport);
+ break;
+ default:
+ /* connection oriented rudp */
+ if(ipcmp(c->raddr, IPnoaddr) == 0){
+ /* save the src address in the conversation */
+ ipmove(c->raddr, raddr);
+ c->rport = rport;
+
+ /* reply with the same ip address (if not broadcast) */
+ if(ipforme(f, laddr) == Runi)
+ ipmove(c->laddr, laddr);
+ else
+ v4tov6(c->laddr, ifc->lifc->local);
+ }
+ break;
+ }
+ if(bp->next)
+ bp = concatblock(bp);
+
+ if(qfull(c->rq)) {
+ netlog(f, Logrudp, "rudp: qfull %I.%d -> %I.%d\n", raddr, rport,
+ laddr, lport);
+ freeblist(bp);
+ }
+ else
+ qpass(c->rq, bp);
+
+ qunlock(ucb);
+}
+
+static char *rudpunknown = "unknown rudp ctl request";
+
+char*
+rudpctl(Conv *c, char **f, int n)
+{
+ Rudpcb *ucb;
+ uchar ip[IPaddrlen];
+ int x;
+
+ ucb = (Rudpcb*)c->ptcl;
+ if(n < 1)
+ return rudpunknown;
+
+ if(strcmp(f[0], "headers++4") == 0){
+ ucb->headers = 7;
+ return nil;
+ } else if(strcmp(f[0], "headers") == 0){
+ ucb->headers = 6;
+ return nil;
+ } else if(strcmp(f[0], "hangup") == 0){
+ if(n < 3)
+ return "bad syntax";
+ parseip(ip, f[1]);
+ x = atoi(f[2]);
+ qlock(ucb);
+ relforget(c, ip, x, 1);
+ qunlock(ucb);
+ return nil;
+ } else if(strcmp(f[0], "randdrop") == 0){
+ x = 10; /* default is 10% */
+ if(n > 1)
+ x = atoi(f[1]);
+ if(x > 100 || x < 0)
+ return "illegal rudp drop rate";
+ ucb->randdrop = x;
+ return nil;
+ }
+ return rudpunknown;
+}
+
+void
+rudpadvise(Proto *rudp, Block *bp, char *msg)
+{
+ Udphdr *h;
+ uchar source[IPaddrlen], dest[IPaddrlen];
+ ushort psource, pdest;
+ Conv *s, **p;
+
+ h = (Udphdr*)(bp->rp);
+
+ v4tov6(dest, h->udpdst);
+ v4tov6(source, h->udpsrc);
+ psource = nhgets(h->udpsport);
+ pdest = nhgets(h->udpdport);
+
+ /* Look for a connection */
+ for(p = rudp->conv; *p; p++) {
+ s = *p;
+ if(s->rport == pdest)
+ if(s->lport == psource)
+ if(ipcmp(s->raddr, dest) == 0)
+ if(ipcmp(s->laddr, source) == 0){
+ qhangup(s->rq, msg);
+ qhangup(s->wq, msg);
+ break;
+ }
+ }
+ freeblist(bp);
+}
+
+int
+rudpstats(Proto *rudp, char *buf, int len)
+{
+ Rudppriv *upriv;
+
+ upriv = rudp->priv;
+ return snprint(buf, len, "%lud %lud %lud %lud %lud %lud\n",
+ upriv->ustats.rudpInDatagrams,
+ upriv->ustats.rudpNoPorts,
+ upriv->ustats.rudpInErrors,
+ upriv->ustats.rudpOutDatagrams,
+ upriv->rxmits,
+ upriv->orders);
+}
+
+int
+rudpgc(Proto *rudp)
+{
+ return natgc(rudp->ipproto);
+}
+
+void
+rudpinit(Fs *fs)
+{
+
+ Proto *rudp;
+
+ rudp = smalloc(sizeof(Proto));
+ rudp->priv = smalloc(sizeof(Rudppriv));
+ rudp->name = "rudp";
+ rudp->connect = rudpconnect;
+ rudp->announce = rudpannounce;
+ rudp->ctl = rudpctl;
+ rudp->state = rudpstate;
+ rudp->create = rudpcreate;
+ rudp->close = rudpclose;
+ rudp->rcv = rudpiput;
+ rudp->advise = rudpadvise;
+ rudp->stats = rudpstats;
+ rudp->gc = rudpgc;
+ rudp->ipproto = IP_UDPPROTO;
+ rudp->nc = 16;
+ rudp->ptclsize = sizeof(Rudpcb);
+
+ Fsproto(fs, rudp);
+}
+
+/*********************************************/
+/* Here starts the reliable helper functions */
+/*********************************************/
+/*
+ * Enqueue a copy of an unacked block for possible retransmissions
+ */
+void
+relackq(Reliable *r, Block *bp)
+{
+ Block *np;
+
+ np = copyblock(bp, blocklen(bp));
+ if(r->unacked)
+ r->unackedtail->list = np;
+ else {
+ /* restart timer */
+ r->timeout = 0;
+ r->xmits = 1;
+ r->unacked = np;
+ }
+ r->unackedtail = np;
+ np->list = nil;
+}
+
+/*
+ * retransmit unacked blocks
+ */
+void
+relackproc(void *a)
+{
+ Rudpcb *ucb;
+ Proto *rudp;
+ Reliable *r;
+ Conv **s, *c;
+
+ rudp = (Proto *)a;
+
+loop:
+ tsleep(&up->sleep, return0, 0, Rudptickms);
+
+ for(s = rudp->conv; *s; s++) {
+ c = *s;
+ ucb = (Rudpcb*)c->ptcl;
+ qlock(ucb);
+
+ for(r = ucb->r; r; r = r->next) {
+ if(r->unacked != nil){
+ r->timeout += Rudptickms;
+ if(r->timeout > Rudprxms*r->xmits)
+ relrexmit(c, r);
+ }
+ if(r->acksent != r->rcvseq)
+ relsendack(c, r, 0);
+ }
+ qunlock(ucb);
+ }
+ goto loop;
+}
+
+/*
+ * get the state record for a conversation
+ */
+Reliable*
+relstate(Rudpcb *ucb, uchar *addr, ushort port, char *from)
+{
+ Reliable *r, **l;
+
+ l = &ucb->r;
+ for(r = *l; r; r = *l){
+ if(memcmp(addr, r->addr, IPaddrlen) == 0 &&
+ port == r->port)
+ break;
+ l = &r->next;
+ }
+
+ /* no state for this addr/port, create some */
+ if(r == nil){
+ while(generation == 0)
+ generation = rand();
+
+ DPRINT("from %s new state %lud for %I!%ud\n",
+ from, generation, addr, port);
+
+ r = smalloc(sizeof(Reliable));
+ memmove(r->addr, addr, IPaddrlen);
+ r->port = port;
+ r->unacked = 0;
+ if(generation == Hangupgen)
+ generation++;
+ r->sndgen = generation++;
+ r->sndseq = 0;
+ r->ackrcvd = 0;
+ r->rcvgen = 0;
+ r->rcvseq = 0;
+ r->acksent = 0;
+ r->xmits = 0;
+ r->timeout = 0;
+ r->ref = 0;
+ incref(r); /* one reference for being in the list */
+
+ *l = r;
+ }
+
+ incref(r);
+ return r;
+}
+
+void
+relput(Reliable *r)
+{
+ if(decref(r) == 0)
+ free(r);
+}
+
+/*
+ * forget a Reliable state
+ */
+void
+relforget(Conv *c, uchar *ip, int port, int originator)
+{
+ Rudpcb *ucb;
+ Reliable *r, **l;
+
+ ucb = (Rudpcb*)c->ptcl;
+
+ l = &ucb->r;
+ for(r = *l; r; r = *l){
+ if(ipcmp(ip, r->addr) == 0 && port == r->port){
+ *l = r->next;
+ if(originator)
+ relsendack(c, r, 1);
+ relhangup(c, r);
+ relput(r); /* remove from the list */
+ break;
+ }
+ l = &r->next;
+ }
+}
+
+/*
+ * process a rcvd reliable packet. return -1 if not to be passed to user process,
+ * 0 therwise.
+ *
+ * called with ucb locked.
+ */
+int
+reliput(Conv *c, Block *bp, uchar *addr, ushort port)
+{
+ Block *nbp;
+ Rudpcb *ucb;
+ Rudppriv *upriv;
+ Udphdr *uh;
+ Reliable *r;
+ Rudphdr *rh;
+ ulong seq, ack, sgen, agen, ackreal;
+ int rv = -1;
+
+ /* get fields */
+ uh = (Udphdr*)(bp->rp);
+ rh = (Rudphdr*)uh;
+ seq = nhgetl(rh->relseq);
+ sgen = nhgetl(rh->relsgen);
+ ack = nhgetl(rh->relack);
+ agen = nhgetl(rh->relagen);
+
+ upriv = c->p->priv;
+ ucb = (Rudpcb*)c->ptcl;
+ r = relstate(ucb, addr, port, "input");
+
+ DPRINT("rcvd %lud/%lud, %lud/%lud, r->sndgen = %lud\n",
+ seq, sgen, ack, agen, r->sndgen);
+
+ /* if acking an incorrect generation, ignore */
+ if(ack && agen != r->sndgen)
+ goto out;
+
+ /* Look for a hangup */
+ if(sgen == Hangupgen) {
+ if(agen == r->sndgen)
+ relforget(c, addr, port, 0);
+ goto out;
+ }
+
+ /* make sure we're not talking to a new remote side */
+ if(r->rcvgen != sgen){
+ if(seq != 0 && seq != 1)
+ goto out;
+
+ /* new connection */
+ if(r->rcvgen != 0){
+ DPRINT("new con r->rcvgen = %lud, sgen = %lud\n", r->rcvgen, sgen);
+ relhangup(c, r);
+ }
+ r->rcvgen = sgen;
+ }
+
+ /* dequeue acked packets */
+ if(ack && agen == r->sndgen){
+ ackreal = 0;
+ while(r->unacked != nil && INSEQ(ack, r->ackrcvd, r->sndseq)){
+ nbp = r->unacked;
+ r->unacked = nbp->list;
+ DPRINT("%lud/%lud acked, r->sndgen = %lud\n",
+ ack, agen, r->sndgen);
+ freeb(nbp);
+ r->ackrcvd = NEXTSEQ(r->ackrcvd);
+ ackreal = 1;
+ }
+
+ /* flow control */
+ if(UNACKED(r) < Maxunacked/8 && r->blocked)
+ wakeup(&r->vous);
+
+ /*
+ * retransmit next packet if the acked packet
+ * was transmitted more than once
+ */
+ if(ackreal && r->unacked != nil){
+ r->timeout = 0;
+ if(r->xmits > 1){
+ r->xmits = 1;
+ relrexmit(c, r);
+ }
+ }
+
+ }
+
+ /* no message or input queue full */
+ if(seq == 0 || qfull(c->rq))
+ goto out;
+
+ /* refuse out of order delivery */
+ if(seq != NEXTSEQ(r->rcvseq)){
+ relsendack(c, r, 0); /* tell him we got it already */
+ upriv->orders++;
+ DPRINT("out of sequence %lud not %lud\n", seq, NEXTSEQ(r->rcvseq));
+ goto out;
+ }
+ r->rcvseq = seq;
+
+ rv = 0;
+out:
+ relput(r);
+ return rv;
+}
+
+void
+relsendack(Conv *c, Reliable *r, int hangup)
+{
+ Udphdr *uh;
+ Block *bp;
+ Rudphdr *rh;
+ int ptcllen;
+ Fs *f;
+
+ bp = allocb(UDP_IPHDR + UDP_RHDRSIZE);
+ if(bp == nil)
+ return;
+ bp->wp += UDP_IPHDR + UDP_RHDRSIZE;
+ f = c->p->f;
+ uh = (Udphdr *)(bp->rp);
+ uh->vihl = IP_VER4;
+ rh = (Rudphdr*)uh;
+
+ ptcllen = (UDP_RHDRSIZE-UDP_PHDRSIZE);
+ uh->Unused = 0;
+ uh->udpproto = IP_UDPPROTO;
+ uh->frag[0] = 0;
+ uh->frag[1] = 0;
+ hnputs(uh->udpplen, ptcllen);
+
+ v6tov4(uh->udpdst, r->addr);
+ hnputs(uh->udpdport, r->port);
+ hnputs(uh->udpsport, c->lport);
+ if(ipcmp(c->laddr, IPnoaddr) == 0)
+ findlocalip(f, c->laddr, c->raddr);
+ v6tov4(uh->udpsrc, c->laddr);
+ hnputs(uh->udplen, ptcllen);
+
+ if(hangup)
+ hnputl(rh->relsgen, Hangupgen);
+ else
+ hnputl(rh->relsgen, r->sndgen);
+ hnputl(rh->relseq, 0);
+ hnputl(rh->relagen, r->rcvgen);
+ hnputl(rh->relack, r->rcvseq);
+
+ if(r->acksent < r->rcvseq)
+ r->acksent = r->rcvseq;
+
+ uh->udpcksum[0] = 0;
+ uh->udpcksum[1] = 0;
+ hnputs(uh->udpcksum, ptclcsum(bp, UDP_IPHDR, UDP_RHDRSIZE));
+
+ DPRINT("sendack: %lud/%lud, %lud/%lud\n", 0L, r->sndgen, r->rcvseq, r->rcvgen);
+ doipoput(c, f, bp, 0, c->ttl, c->tos);
+}
+
+
+/*
+ * called with ucb locked (and c locked if user initiated close)
+ */
+void
+relhangup(Conv *c, Reliable *r)
+{
+ int n;
+ Block *bp;
+ char hup[ERRMAX];
+
+ n = snprint(hup, sizeof(hup), "hangup %I!%d", r->addr, r->port);
+ qproduce(c->eq, hup, n);
+
+ /*
+ * dump any unacked outgoing messages
+ */
+ for(bp = r->unacked; bp != nil; bp = r->unacked){
+ r->unacked = bp->list;
+ bp->list = nil;
+ freeb(bp);
+ }
+
+ r->rcvgen = 0;
+ r->rcvseq = 0;
+ r->acksent = 0;
+ if(generation == Hangupgen)
+ generation++;
+ r->sndgen = generation++;
+ r->sndseq = 0;
+ r->ackrcvd = 0;
+ r->xmits = 0;
+ r->timeout = 0;
+ wakeup(&r->vous);
+}
+
+/*
+ * called with ucb locked
+ */
+void
+relrexmit(Conv *c, Reliable *r)
+{
+ Rudppriv *upriv;
+ Block *np;
+ Fs *f;
+
+ upriv = c->p->priv;
+ f = c->p->f;
+ r->timeout = 0;
+ if(r->xmits++ > Rudpmaxxmit){
+ relhangup(c, r);
+ return;
+ }
+
+ upriv->rxmits++;
+ np = copyblock(r->unacked, blocklen(r->unacked));
+ DPRINT("rxmit r->ackrvcd+1 = %lud\n", r->ackrcvd+1);
+ doipoput(c, f, np, 0, c->ttl, c->tos);
+}
--- /dev/null
+++ b/os/ip.original/tcp.c
@@ -1,0 +1,3194 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include "ip.h"
+
+enum
+{
+ QMAX = 64*1024-1,
+ IP_TCPPROTO = 6,
+
+ TCP4_IPLEN = 8,
+ TCP4_PHDRSIZE = 12,
+ TCP4_HDRSIZE = 20,
+ TCP4_TCBPHDRSZ = 40,
+ TCP4_PKT = TCP4_IPLEN+TCP4_PHDRSIZE,
+
+ TCP6_IPLEN = 0,
+ TCP6_PHDRSIZE = 40,
+ TCP6_HDRSIZE = 20,
+ TCP6_TCBPHDRSZ = 60,
+ TCP6_PKT = TCP6_IPLEN+TCP6_PHDRSIZE,
+
+ TcptimerOFF = 0,
+ TcptimerON = 1,
+ TcptimerDONE = 2,
+ MAX_TIME = (1<<20), /* Forever */
+ TCP_ACK = 50, /* Timed ack sequence in ms */
+ MAXBACKMS = 9*60*1000, /* longest backoff time (ms) before hangup */
+
+ URG = 0x20, /* Data marked urgent */
+ ACK = 0x10, /* Acknowledge is valid */
+ PSH = 0x08, /* Whole data pipe is pushed */
+ RST = 0x04, /* Reset connection */
+ SYN = 0x02, /* Pkt. is synchronise */
+ FIN = 0x01, /* Start close down */
+
+ EOLOPT = 0,
+ NOOPOPT = 1,
+ MSSOPT = 2,
+ MSS_LENGTH = 4, /* Mean segment size */
+ WSOPT = 3,
+ WS_LENGTH = 3, /* Bits to scale window size by */
+ MSL2 = 10,
+ MSPTICK = 50, /* Milliseconds per timer tick */
+ DEF_MSS = 1460, /* Default mean segment */
+ DEF_MSS6 = 1280, /* Default mean segment (min) for v6 */
+ DEF_RTT = 500, /* Default round trip */
+ DEF_KAT = 120000, /* Default time (ms) between keep alives */
+ TCP_LISTEN = 0, /* Listen connection */
+ TCP_CONNECT = 1, /* Outgoing connection */
+ SYNACK_RXTIMER = 250, /* ms between SYNACK retransmits */
+
+ TCPREXMTTHRESH = 3, /* dupack threshhold for rxt */
+
+ FORCE = 1,
+ CLONE = 2,
+ RETRAN = 4,
+ ACTIVE = 8,
+ SYNACK = 16,
+
+ LOGAGAIN = 3,
+ LOGDGAIN = 2,
+
+ Closed = 0, /* Connection states */
+ Listen,
+ Syn_sent,
+ Syn_received,
+ Established,
+ Finwait1,
+ Finwait2,
+ Close_wait,
+ Closing,
+ Last_ack,
+ Time_wait,
+
+ Maxlimbo = 1000, /* maximum procs waiting for response to SYN ACK */
+ NLHT = 256, /* hash table size, must be a power of 2 */
+ LHTMASK = NLHT-1,
+
+ HaveWS = 1<<8,
+};
+
+/* Must correspond to the enumeration above */
+char *tcpstates[] =
+{
+ "Closed", "Listen", "Syn_sent", "Syn_received",
+ "Established", "Finwait1", "Finwait2", "Close_wait",
+ "Closing", "Last_ack", "Time_wait"
+};
+
+typedef struct Tcptimer Tcptimer;
+struct Tcptimer
+{
+ Tcptimer *next;
+ Tcptimer *prev;
+ Tcptimer *readynext;
+ int state;
+ int start;
+ int count;
+ void (*func)(void*);
+ void *arg;
+};
+
+/*
+ * v4 and v6 pseudo headers used for
+ * checksuming tcp
+ */
+typedef struct Tcp4hdr Tcp4hdr;
+struct Tcp4hdr
+{
+ uchar vihl; /* Version and header length */
+ uchar tos; /* Type of service */
+ uchar length[2]; /* packet length */
+ uchar id[2]; /* Identification */
+ uchar frag[2]; /* Fragment information */
+ uchar Unused;
+ uchar proto;
+ uchar tcplen[2];
+ uchar tcpsrc[4];
+ uchar tcpdst[4];
+ uchar tcpsport[2];
+ uchar tcpdport[2];
+ uchar tcpseq[4];
+ uchar tcpack[4];
+ uchar tcpflag[2];
+ uchar tcpwin[2];
+ uchar tcpcksum[2];
+ uchar tcpurg[2];
+ /* Options segment */
+ uchar tcpopt[1];
+};
+
+typedef struct Tcp6hdr Tcp6hdr;
+struct Tcp6hdr
+{
+ uchar vcf[4];
+ uchar ploadlen[2];
+ uchar proto;
+ uchar ttl;
+ uchar tcpsrc[IPaddrlen];
+ uchar tcpdst[IPaddrlen];
+ uchar tcpsport[2];
+ uchar tcpdport[2];
+ uchar tcpseq[4];
+ uchar tcpack[4];
+ uchar tcpflag[2];
+ uchar tcpwin[2];
+ uchar tcpcksum[2];
+ uchar tcpurg[2];
+ /* Options segment */
+ uchar tcpopt[1];
+};
+
+/*
+ * this represents the control info
+ * for a single packet. It is derived from
+ * a packet in ntohtcp{4,6}() and stuck into
+ * a packet in htontcp{4,6}().
+ */
+typedef struct Tcp Tcp;
+struct Tcp
+{
+ ushort source;
+ ushort dest;
+ ulong seq;
+ ulong ack;
+ uchar flags;
+ ushort ws; /* window scale option (if not zero) */
+ ulong wnd;
+ ushort urg;
+ ushort mss; /* max segment size option (if not zero) */
+ ushort len; /* size of data */
+};
+
+/*
+ * this header is malloc'd to thread together fragments
+ * waiting to be coalesced
+ */
+typedef struct Reseq Reseq;
+struct Reseq
+{
+ Reseq *next;
+ Tcp seg;
+ Block *bp;
+ ushort length;
+};
+
+/*
+ * the qlock in the Conv locks this structure
+ */
+typedef struct Tcpctl Tcpctl;
+struct Tcpctl
+{
+ uchar state; /* Connection state */
+ uchar type; /* Listening or active connection */
+ uchar code; /* Icmp code */
+ struct {
+ ulong una; /* Unacked data pointer */
+ ulong nxt; /* Next sequence expected */
+ ulong ptr; /* Data pointer */
+ ulong wnd; /* Tcp send window */
+ ulong urg; /* Urgent data pointer */
+ ulong wl2;
+ int scale; /* how much to right shift window in xmitted packets */
+ /* to implement tahoe and reno TCP */
+ ulong dupacks; /* number of duplicate acks rcvd */
+ int recovery; /* loss recovery flag */
+ ulong rxt; /* right window marker for recovery */
+ } snd;
+ struct {
+ ulong nxt; /* Receive pointer to next uchar slot */
+ ulong wnd; /* Receive window incoming */
+ ulong urg; /* Urgent pointer */
+ int blocked;
+ int una; /* unacked data segs */
+ int scale; /* how much to left shift window in rcved packets */
+ } rcv;
+ ulong iss; /* Initial sequence number */
+ int sawwsopt; /* true if we saw a wsopt on the incoming SYN */
+ ulong cwind; /* Congestion window */
+ int scale; /* desired snd.scale */
+ ushort ssthresh; /* Slow start threshold */
+ int resent; /* Bytes just resent */
+ int irs; /* Initial received squence */
+ ushort mss; /* Mean segment size */
+ int rerecv; /* Overlap of data rerecevived */
+ ulong window; /* Recevive window */
+ uchar backoff; /* Exponential backoff counter */
+ int backedoff; /* ms we've backed off for rexmits */
+ uchar flags; /* State flags */
+ Reseq *reseq; /* Resequencing queue */
+ Tcptimer timer; /* Activity timer */
+ Tcptimer acktimer; /* Acknowledge timer */
+ Tcptimer rtt_timer; /* Round trip timer */
+ Tcptimer katimer; /* keep alive timer */
+ ulong rttseq; /* Round trip sequence */
+ int srtt; /* Shortened round trip */
+ int mdev; /* Mean deviation of round trip */
+ int kacounter; /* count down for keep alive */
+ uint sndsyntime; /* time syn sent */
+ ulong time; /* time Finwait2 or Syn_received was sent */
+ int nochecksum; /* non-zero means don't send checksums */
+ int flgcnt; /* number of flags in the sequence (FIN,SEQ) */
+
+ union {
+ Tcp4hdr tcp4hdr;
+ Tcp6hdr tcp6hdr;
+ } protohdr; /* prototype header */
+};
+
+/*
+ * New calls are put in limbo rather than having a conversation structure
+ * allocated. Thus, a SYN attack results in lots of limbo'd calls but not
+ * any real Conv structures mucking things up. Calls in limbo rexmit their
+ * SYN ACK every SYNACK_RXTIMER ms up to 4 times, i.e., they disappear after 1 second.
+ *
+ * In particular they aren't on a listener's queue so that they don't figure
+ * in the input queue limit.
+ *
+ * If 1/2 of a T3 was attacking SYN packets, we'ld have a permanent queue
+ * of 70000 limbo'd calls. Not great for a linear list but doable. Therefore
+ * there is no hashing of this list.
+ */
+typedef struct Limbo Limbo;
+struct Limbo
+{
+ Limbo *next;
+
+ uchar laddr[IPaddrlen];
+ uchar raddr[IPaddrlen];
+ ushort lport;
+ ushort rport;
+ ulong irs; /* initial received sequence */
+ ulong iss; /* initial sent sequence */
+ ushort mss; /* mss from the other end */
+ ushort rcvscale; /* how much to scale rcvd windows */
+ ushort sndscale; /* how much to scale sent windows */
+ ulong lastsend; /* last time we sent a synack */
+ uchar version; /* v4 or v6 */
+ uchar rexmits; /* number of retransmissions */
+};
+
+int tcp_irtt = DEF_RTT; /* Initial guess at round trip time */
+ushort tcp_mss = DEF_MSS; /* Maximum segment size to be sent */
+
+enum {
+ /* MIB stats */
+ MaxConn,
+ ActiveOpens,
+ PassiveOpens,
+ EstabResets,
+ CurrEstab,
+ InSegs,
+ OutSegs,
+ RetransSegs,
+ RetransTimeouts,
+ InErrs,
+ OutRsts,
+
+ /* non-MIB stats */
+ CsumErrs,
+ HlenErrs,
+ LenErrs,
+ OutOfOrder,
+
+ Nstats
+};
+
+static char *statnames[] =
+{
+[MaxConn] "MaxConn",
+[ActiveOpens] "ActiveOpens",
+[PassiveOpens] "PassiveOpens",
+[EstabResets] "EstabResets",
+[CurrEstab] "CurrEstab",
+[InSegs] "InSegs",
+[OutSegs] "OutSegs",
+[RetransSegs] "RetransSegs",
+[RetransTimeouts] "RetransTimeouts",
+[InErrs] "InErrs",
+[OutRsts] "OutRsts",
+[CsumErrs] "CsumErrs",
+[HlenErrs] "HlenErrs",
+[LenErrs] "LenErrs",
+[OutOfOrder] "OutOfOrder",
+};
+
+typedef struct Tcppriv Tcppriv;
+struct Tcppriv
+{
+ /* List of active timers */
+ QLock tl;
+ Tcptimer *timers;
+
+ /* hash table for matching conversations */
+ Ipht ht;
+
+ /* calls in limbo waiting for an ACK to our SYN ACK */
+ int nlimbo;
+ Limbo *lht[NLHT];
+
+ /* for keeping track of tcpackproc */
+ QLock apl;
+ int ackprocstarted;
+
+ ulong stats[Nstats];
+};
+
+/*
+ * Setting tcpporthogdefense to non-zero enables Dong Lin's
+ * solution to hijacked systems staking out port's as a form
+ * of DoS attack.
+ *
+ * To avoid stateless Conv hogs, we pick a sequence number at random. If
+ * it that number gets acked by the other end, we shut down the connection.
+ * Look for tcpporthogedefense in the code.
+ */
+int tcpporthogdefense = 0;
+
+int addreseq(Tcpctl*, Tcppriv*, Tcp*, Block*, ushort);
+void getreseq(Tcpctl*, Tcp*, Block**, ushort*);
+void localclose(Conv*, char*);
+void procsyn(Conv*, Tcp*);
+void tcpiput(Proto*, Ipifc*, Block*);
+void tcpoutput(Conv*);
+int tcptrim(Tcpctl*, Tcp*, Block**, ushort*);
+void tcpstart(Conv*, int);
+void tcptimeout(void*);
+void tcpsndsyn(Conv*, Tcpctl*);
+void tcprcvwin(Conv*);
+void tcpacktimer(void*);
+void tcpkeepalive(void*);
+void tcpsetkacounter(Tcpctl*);
+void tcprxmit(Conv*);
+void tcpsettimer(Tcpctl*);
+void tcpsynackrtt(Conv*);
+void tcpsetscale(Conv*, Tcpctl*, ushort, ushort);
+
+static void limborexmit(Proto*);
+static void limbo(Conv*, uchar*, uchar*, Tcp*, int);
+
+void
+tcpsetstate(Conv *s, uchar newstate)
+{
+ Tcpctl *tcb;
+ uchar oldstate;
+ Tcppriv *tpriv;
+
+ tpriv = s->p->priv;
+
+ tcb = (Tcpctl*)s->ptcl;
+
+ oldstate = tcb->state;
+ if(oldstate == newstate)
+ return;
+
+ if(oldstate == Established)
+ tpriv->stats[CurrEstab]--;
+ if(newstate == Established)
+ tpriv->stats[CurrEstab]++;
+
+ /**
+ print( "%d/%d %s->%s CurrEstab=%d\n", s->lport, s->rport,
+ tcpstates[oldstate], tcpstates[newstate], tpriv->tstats.tcpCurrEstab );
+ **/
+
+ switch(newstate) {
+ case Closed:
+ qclose(s->rq);
+ qclose(s->wq);
+ qclose(s->eq);
+ break;
+
+ case Close_wait: /* Remote closes */
+ qhangup(s->rq, nil);
+ break;
+ }
+
+ tcb->state = newstate;
+
+ if(oldstate == Syn_sent && newstate != Closed)
+ Fsconnected(s, nil);
+}
+
+static char*
+tcpconnect(Conv *c, char **argv, int argc)
+{
+ char *e;
+
+ e = Fsstdconnect(c, argv, argc);
+ if(e != nil)
+ return e;
+ tcpstart(c, TCP_CONNECT);
+
+ return nil;
+}
+
+static int
+tcpstate(Conv *c, char *state, int n)
+{
+ Tcpctl *s;
+
+ s = (Tcpctl*)(c->ptcl);
+
+ return snprint(state, n,
+ "%s qin %d qout %d srtt %d mdev %d cwin %lud swin %lud>>%d rwin %lud>>%d timer.start %d timer.count %d rerecv %d katimer.start %d katimer.count %d\n",
+ tcpstates[s->state],
+ c->rq ? qlen(c->rq) : 0,
+ c->wq ? qlen(c->wq) : 0,
+ s->srtt, s->mdev,
+ s->cwind, s->snd.wnd, s->rcv.scale, s->rcv.wnd, s->snd.scale,
+ s->timer.start, s->timer.count, s->rerecv,
+ s->katimer.start, s->katimer.count);
+}
+
+static int
+tcpinuse(Conv *c)
+{
+ Tcpctl *s;
+
+ s = (Tcpctl*)(c->ptcl);
+ return s->state != Closed;
+}
+
+static char*
+tcpannounce(Conv *c, char **argv, int argc)
+{
+ char *e;
+
+ e = Fsstdannounce(c, argv, argc);
+ if(e != nil)
+ return e;
+ tcpstart(c, TCP_LISTEN);
+ Fsconnected(c, nil);
+
+ return nil;
+}
+
+/*
+ * tcpclose is always called with the q locked
+ */
+static void
+tcpclose(Conv *c)
+{
+ Tcpctl *tcb;
+
+ tcb = (Tcpctl*)c->ptcl;
+
+ qhangup(c->rq, nil);
+ qhangup(c->wq, nil);
+ qhangup(c->eq, nil);
+ qflush(c->rq);
+
+ switch(tcb->state) {
+ case Listen:
+ /*
+ * reset any incoming calls to this listener
+ */
+ Fsconnected(c, "Hangup");
+
+ localclose(c, nil);
+ break;
+ case Closed:
+ case Syn_sent:
+ localclose(c, nil);
+ break;
+ case Syn_received:
+ case Established:
+ tcb->flgcnt++;
+ tcb->snd.nxt++;
+ tcpsetstate(c, Finwait1);
+ tcpoutput(c);
+ break;
+ case Close_wait:
+ tcb->flgcnt++;
+ tcb->snd.nxt++;
+ tcpsetstate(c, Last_ack);
+ tcpoutput(c);
+ break;
+ }
+}
+
+void
+tcpkick(void *x)
+{
+ Conv *s = x;
+ Tcpctl *tcb;
+
+ tcb = (Tcpctl*)s->ptcl;
+
+ if(waserror()){
+ qunlock(s);
+ nexterror();
+ }
+ qlock(s);
+
+ switch(tcb->state) {
+ case Syn_sent:
+ case Syn_received:
+ case Established:
+ case Close_wait:
+ /*
+ * Push data
+ */
+ tcprcvwin(s);
+ tcpoutput(s);
+ break;
+ default:
+ localclose(s, "Hangup");
+ break;
+ }
+
+ qunlock(s);
+ poperror();
+}
+
+void
+tcprcvwin(Conv *s) /* Call with tcb locked */
+{
+ int w;
+ Tcpctl *tcb;
+
+ tcb = (Tcpctl*)s->ptcl;
+ w = tcb->window - qlen(s->rq);
+ if(w < 0)
+ w = 0;
+ tcb->rcv.wnd = w;
+ if(w == 0)
+ tcb->rcv.blocked = 1;
+}
+
+void
+tcpacktimer(void *v)
+{
+ Tcpctl *tcb;
+ Conv *s;
+
+ s = v;
+ tcb = (Tcpctl*)s->ptcl;
+
+ if(waserror()){
+ qunlock(s);
+ nexterror();
+ }
+ qlock(s);
+ if(tcb->state != Closed){
+ tcb->flags |= FORCE;
+ tcprcvwin(s);
+ tcpoutput(s);
+ }
+ qunlock(s);
+ poperror();
+}
+
+static void
+tcpcreate(Conv *c)
+{
+ c->rq = qopen(QMAX, Qcoalesce, tcpacktimer, c);
+ c->wq = qopen((3*QMAX)/2, Qkick, tcpkick, c);
+}
+
+static void
+timerstate(Tcppriv *priv, Tcptimer *t, int newstate)
+{
+ if(newstate != TcptimerON){
+ if(t->state == TcptimerON){
+ // unchain
+ if(priv->timers == t){
+ priv->timers = t->next;
+ if(t->prev != nil)
+ panic("timerstate1");
+ }
+ if(t->next)
+ t->next->prev = t->prev;
+ if(t->prev)
+ t->prev->next = t->next;
+ t->next = t->prev = nil;
+ }
+ } else {
+ if(t->state != TcptimerON){
+ // chain
+ if(t->prev != nil || t->next != nil)
+ panic("timerstate2");
+ t->prev = nil;
+ t->next = priv->timers;
+ if(t->next)
+ t->next->prev = t;
+ priv->timers = t;
+ }
+ }
+ t->state = newstate;
+}
+
+void
+tcpackproc(void *a)
+{
+ Tcptimer *t, *tp, *timeo;
+ Proto *tcp;
+ Tcppriv *priv;
+ int loop;
+
+ tcp = a;
+ priv = tcp->priv;
+
+ for(;;) {
+ tsleep(&up->sleep, return0, 0, MSPTICK);
+
+ qlock(&priv->tl);
+ timeo = nil;
+ loop = 0;
+ for(t = priv->timers; t != nil; t = tp) {
+ if(loop++ > 10000)
+ panic("tcpackproc1");
+ tp = t->next;
+ if(t->state == TcptimerON) {
+ t->count--;
+ if(t->count == 0) {
+ timerstate(priv, t, TcptimerDONE);
+ t->readynext = timeo;
+ timeo = t;
+ }
+ }
+ }
+ qunlock(&priv->tl);
+
+ loop = 0;
+ for(t = timeo; t != nil; t = t->readynext) {
+ if(loop++ > 10000)
+ panic("tcpackproc2");
+ if(t->state == TcptimerDONE && t->func != nil && !waserror()){
+ (*t->func)(t->arg);
+ poperror();
+ }
+ }
+
+ limborexmit(tcp);
+ }
+}
+
+void
+tcpgo(Tcppriv *priv, Tcptimer *t)
+{
+ if(t == nil || t->start == 0)
+ return;
+
+ qlock(&priv->tl);
+ t->count = t->start;
+ timerstate(priv, t, TcptimerON);
+ qunlock(&priv->tl);
+}
+
+void
+tcphalt(Tcppriv *priv, Tcptimer *t)
+{
+ if(t == nil)
+ return;
+
+ qlock(&priv->tl);
+ timerstate(priv, t, TcptimerOFF);
+ qunlock(&priv->tl);
+}
+
+int
+backoff(int n)
+{
+ return 1 << n;
+}
+
+void
+localclose(Conv *s, char *reason) /* called with tcb locked */
+{
+ Tcpctl *tcb;
+ Reseq *rp,*rp1;
+ Tcppriv *tpriv;
+
+ tpriv = s->p->priv;
+ tcb = (Tcpctl*)s->ptcl;
+
+ iphtrem(&tpriv->ht, s);
+
+ tcphalt(tpriv, &tcb->timer);
+ tcphalt(tpriv, &tcb->rtt_timer);
+ tcphalt(tpriv, &tcb->acktimer);
+ tcphalt(tpriv, &tcb->katimer);
+
+ /* Flush reassembly queue; nothing more can arrive */
+ for(rp = tcb->reseq; rp != nil; rp = rp1) {
+ rp1 = rp->next;
+ freeblist(rp->bp);
+ free(rp);
+ }
+ tcb->reseq = nil;
+
+ if(tcb->state == Syn_sent)
+ Fsconnected(s, reason);
+ if(s->state == Announced)
+ wakeup(&s->listenr);
+
+ qhangup(s->rq, reason);
+ qhangup(s->wq, reason);
+
+ tcpsetstate(s, Closed);
+}
+
+/* mtu (- TCP + IP hdr len) of 1st hop */
+int
+tcpmtu(Proto *tcp, uchar *addr, int version, int *scale)
+{
+ Ipifc *ifc;
+ int mtu;
+
+ ifc = findipifc(tcp->f, addr, 0);
+ switch(version){
+ default:
+ case V4:
+ mtu = DEF_MSS;
+ if(ifc != nil)
+ mtu = ifc->maxtu - ifc->m->hsize - (TCP4_PKT + TCP4_HDRSIZE);
+ break;
+ case V6:
+ mtu = DEF_MSS6;
+ if(ifc != nil)
+ mtu = ifc->maxtu - ifc->m->hsize - (TCP6_PKT + TCP6_HDRSIZE);
+ break;
+ }
+ if(ifc != nil){
+ if(ifc->mbps > 100)
+ *scale = HaveWS | 3;
+ else if(ifc->mbps > 10)
+ *scale = HaveWS | 1;
+ else
+ *scale = HaveWS | 0;
+ } else
+ *scale = HaveWS | 0;
+
+ return mtu;
+}
+
+void
+inittcpctl(Conv *s, int mode)
+{
+ Tcpctl *tcb;
+ Tcp4hdr* h4;
+ Tcp6hdr* h6;
+ int mss;
+
+ tcb = (Tcpctl*)s->ptcl;
+
+ memset(tcb, 0, sizeof(Tcpctl));
+
+ tcb->ssthresh = 65535;
+ tcb->srtt = tcp_irtt<<LOGAGAIN;
+ tcb->mdev = 0;
+
+ /* setup timers */
+ tcb->timer.start = tcp_irtt / MSPTICK;
+ tcb->timer.func = tcptimeout;
+ tcb->timer.arg = s;
+ tcb->rtt_timer.start = MAX_TIME;
+ tcb->acktimer.start = TCP_ACK / MSPTICK;
+ tcb->acktimer.func = tcpacktimer;
+ tcb->acktimer.arg = s;
+ tcb->katimer.start = DEF_KAT / MSPTICK;
+ tcb->katimer.func = tcpkeepalive;
+ tcb->katimer.arg = s;
+
+ mss = DEF_MSS;
+
+ /* create a prototype(pseudo) header */
+ if(mode != TCP_LISTEN){
+ if(ipcmp(s->laddr, IPnoaddr) == 0)
+ findlocalip(s->p->f, s->laddr, s->raddr);
+
+ switch(s->ipversion){
+ case V4:
+ h4 = &tcb->protohdr.tcp4hdr;
+ memset(h4, 0, sizeof(*h4));
+ h4->proto = IP_TCPPROTO;
+ hnputs(h4->tcpsport, s->lport);
+ hnputs(h4->tcpdport, s->rport);
+ v6tov4(h4->tcpsrc, s->laddr);
+ v6tov4(h4->tcpdst, s->raddr);
+ break;
+ case V6:
+ h6 = &tcb->protohdr.tcp6hdr;
+ memset(h6, 0, sizeof(*h6));
+ h6->proto = IP_TCPPROTO;
+ hnputs(h6->tcpsport, s->lport);
+ hnputs(h6->tcpdport, s->rport);
+ ipmove(h6->tcpsrc, s->laddr);
+ ipmove(h6->tcpdst, s->raddr);
+ mss = DEF_MSS6;
+ break;
+ default:
+ panic("inittcpctl: version %d", s->ipversion);
+ }
+ }
+
+ tcb->mss = tcb->cwind = mss;
+
+ /* default is no window scaling */
+ tcb->window = QMAX;
+ tcb->rcv.wnd = QMAX;
+ tcb->rcv.scale = 0;
+ tcb->snd.scale = 0;
+ qsetlimit(s->rq, QMAX);
+}
+
+/*
+ * called with s qlocked
+ */
+void
+tcpstart(Conv *s, int mode)
+{
+ Tcpctl *tcb;
+ Tcppriv *tpriv;
+ char kpname[KNAMELEN];
+
+ tpriv = s->p->priv;
+
+ if(tpriv->ackprocstarted == 0){
+ qlock(&tpriv->apl);
+ if(tpriv->ackprocstarted == 0){
+ sprint(kpname, "#I%dtcpack", s->p->f->dev);
+ kproc(kpname, tcpackproc, s->p, 0);
+ tpriv->ackprocstarted = 1;
+ }
+ qunlock(&tpriv->apl);
+ }
+
+ tcb = (Tcpctl*)s->ptcl;
+
+ inittcpctl(s, mode);
+
+ iphtadd(&tpriv->ht, s);
+ switch(mode) {
+ case TCP_LISTEN:
+ tpriv->stats[PassiveOpens]++;
+ tcb->flags |= CLONE;
+ tcpsetstate(s, Listen);
+ break;
+
+ case TCP_CONNECT:
+ tpriv->stats[ActiveOpens]++;
+ tcb->flags |= ACTIVE;
+ tcpsndsyn(s, tcb);
+ tcpsetstate(s, Syn_sent);
+ tcpoutput(s);
+ break;
+ }
+}
+
+static char*
+tcpflag(ushort flag)
+{
+ static char buf[128];
+
+ sprint(buf, "%d", flag>>10); /* Head len */
+ if(flag & URG)
+ strcat(buf, " URG");
+ if(flag & ACK)
+ strcat(buf, " ACK");
+ if(flag & PSH)
+ strcat(buf, " PSH");
+ if(flag & RST)
+ strcat(buf, " RST");
+ if(flag & SYN)
+ strcat(buf, " SYN");
+ if(flag & FIN)
+ strcat(buf, " FIN");
+
+ return buf;
+}
+
+Block *
+htontcp6(Tcp *tcph, Block *data, Tcp6hdr *ph, Tcpctl *tcb)
+{
+ int dlen;
+ Tcp6hdr *h;
+ ushort csum;
+ ushort hdrlen, optpad = 0;
+ uchar *opt;
+
+ hdrlen = TCP6_HDRSIZE;
+ if(tcph->flags & SYN){
+ if(tcph->mss)
+ hdrlen += MSS_LENGTH;
+ if(tcph->ws)
+ hdrlen += WS_LENGTH;
+ optpad = hdrlen & 3;
+ if(optpad)
+ optpad = 4 - optpad;
+ hdrlen += optpad;
+ }
+
+ if(data) {
+ dlen = blocklen(data);
+ data = padblock(data, hdrlen + TCP6_PKT);
+ if(data == nil)
+ return nil;
+ }
+ else {
+ dlen = 0;
+ data = allocb(hdrlen + TCP6_PKT + 64); /* the 64 pad is to meet mintu's */
+ if(data == nil)
+ return nil;
+ data->wp += hdrlen + TCP6_PKT;
+ }
+
+ /* copy in pseudo ip header plus port numbers */
+ h = (Tcp6hdr *)(data->rp);
+ memmove(h, ph, TCP6_TCBPHDRSZ);
+
+ /* compose pseudo tcp header, do cksum calculation */
+ hnputl(h->vcf, hdrlen + dlen);
+ h->ploadlen[0] = h->ploadlen[1] = h->proto = 0;
+ h->ttl = ph->proto;
+
+ /* copy in variable bits */
+ hnputl(h->tcpseq, tcph->seq);
+ hnputl(h->tcpack, tcph->ack);
+ hnputs(h->tcpflag, (hdrlen<<10) | tcph->flags);
+ hnputs(h->tcpwin, tcph->wnd>>(tcb != nil ? tcb->snd.scale : 0));
+ hnputs(h->tcpurg, tcph->urg);
+
+ if(tcph->flags & SYN){
+ opt = h->tcpopt;
+ if(tcph->mss != 0){
+ *opt++ = MSSOPT;
+ *opt++ = MSS_LENGTH;
+ hnputs(opt, tcph->mss);
+ opt += 2;
+ }
+ if(tcph->ws != 0){
+ *opt++ = WSOPT;
+ *opt++ = WS_LENGTH;
+ *opt++ = tcph->ws;
+ }
+ while(optpad-- > 0)
+ *opt++ = NOOPOPT;
+ }
+
+ if(tcb != nil && tcb->nochecksum){
+ h->tcpcksum[0] = h->tcpcksum[1] = 0;
+ } else {
+ csum = ptclcsum(data, TCP6_IPLEN, hdrlen+dlen+TCP6_PHDRSIZE);
+ hnputs(h->tcpcksum, csum);
+ }
+
+ /* move from pseudo header back to normal ip header */
+ memset(h->vcf, 0, 4);
+ h->vcf[0] = IP_VER6;
+ hnputs(h->ploadlen, hdrlen+dlen);
+ h->proto = ph->proto;
+
+ return data;
+}
+
+Block *
+htontcp4(Tcp *tcph, Block *data, Tcp4hdr *ph, Tcpctl *tcb)
+{
+ int dlen;
+ Tcp4hdr *h;
+ ushort csum;
+ ushort hdrlen, optpad = 0;
+ uchar *opt;
+
+ hdrlen = TCP4_HDRSIZE;
+ if(tcph->flags & SYN){
+ if(tcph->mss)
+ hdrlen += MSS_LENGTH;
+ if(tcph->ws)
+ hdrlen += WS_LENGTH;
+ optpad = hdrlen & 3;
+ if(optpad)
+ optpad = 4 - optpad;
+ hdrlen += optpad;
+ }
+
+ if(data) {
+ dlen = blocklen(data);
+ data = padblock(data, hdrlen + TCP4_PKT);
+ if(data == nil)
+ return nil;
+ }
+ else {
+ dlen = 0;
+ data = allocb(hdrlen + TCP4_PKT + 64); /* the 64 pad is to meet mintu's */
+ if(data == nil)
+ return nil;
+ data->wp += hdrlen + TCP4_PKT;
+ }
+
+ /* copy in pseudo ip header plus port numbers */
+ h = (Tcp4hdr *)(data->rp);
+ memmove(h, ph, TCP4_TCBPHDRSZ);
+
+ /* copy in variable bits */
+ hnputs(h->tcplen, hdrlen + dlen);
+ hnputl(h->tcpseq, tcph->seq);
+ hnputl(h->tcpack, tcph->ack);
+ hnputs(h->tcpflag, (hdrlen<<10) | tcph->flags);
+ hnputs(h->tcpwin, tcph->wnd>>(tcb != nil ? tcb->snd.scale : 0));
+ hnputs(h->tcpurg, tcph->urg);
+
+ if(tcph->flags & SYN){
+ opt = h->tcpopt;
+ if(tcph->mss != 0){
+ *opt++ = MSSOPT;
+ *opt++ = MSS_LENGTH;
+ hnputs(opt, tcph->mss);
+ opt += 2;
+ }
+ if(tcph->ws != 0){
+ *opt++ = WSOPT;
+ *opt++ = WS_LENGTH;
+ *opt++ = tcph->ws;
+ }
+ while(optpad-- > 0)
+ *opt++ = NOOPOPT;
+ }
+
+ if(tcb != nil && tcb->nochecksum){
+ h->tcpcksum[0] = h->tcpcksum[1] = 0;
+ } else {
+ csum = ptclcsum(data, TCP4_IPLEN, hdrlen+dlen+TCP4_PHDRSIZE);
+ hnputs(h->tcpcksum, csum);
+ }
+
+ return data;
+}
+
+int
+ntohtcp6(Tcp *tcph, Block **bpp)
+{
+ Tcp6hdr *h;
+ uchar *optr;
+ ushort hdrlen;
+ ushort optlen;
+ int n;
+
+ *bpp = pullupblock(*bpp, TCP6_PKT+TCP6_HDRSIZE);
+ if(*bpp == nil)
+ return -1;
+
+ h = (Tcp6hdr *)((*bpp)->rp);
+ tcph->source = nhgets(h->tcpsport);
+ tcph->dest = nhgets(h->tcpdport);
+ tcph->seq = nhgetl(h->tcpseq);
+ tcph->ack = nhgetl(h->tcpack);
+ hdrlen = (h->tcpflag[0]>>2) & ~3;
+ if(hdrlen < TCP6_HDRSIZE) {
+ freeblist(*bpp);
+ return -1;
+ }
+
+ tcph->flags = h->tcpflag[1];
+ tcph->wnd = nhgets(h->tcpwin);
+ tcph->urg = nhgets(h->tcpurg);
+ tcph->mss = 0;
+ tcph->ws = 0;
+ tcph->len = nhgets(h->ploadlen) - hdrlen;
+
+ *bpp = pullupblock(*bpp, hdrlen+TCP6_PKT);
+ if(*bpp == nil)
+ return -1;
+
+ optr = h->tcpopt;
+ n = hdrlen - TCP6_HDRSIZE;
+ while(n > 0 && *optr != EOLOPT) {
+ if(*optr == NOOPOPT) {
+ n--;
+ optr++;
+ continue;
+ }
+ optlen = optr[1];
+ if(optlen < 2 || optlen > n)
+ break;
+ switch(*optr) {
+ case MSSOPT:
+ if(optlen == MSS_LENGTH)
+ tcph->mss = nhgets(optr+2);
+ break;
+ case WSOPT:
+ if(optlen == WS_LENGTH && *(optr+2) <= 14)
+ tcph->ws = HaveWS | *(optr+2);
+ break;
+ }
+ n -= optlen;
+ optr += optlen;
+ }
+ return hdrlen;
+}
+
+int
+ntohtcp4(Tcp *tcph, Block **bpp)
+{
+ Tcp4hdr *h;
+ uchar *optr;
+ ushort hdrlen;
+ ushort optlen;
+ int n;
+
+ *bpp = pullupblock(*bpp, TCP4_PKT+TCP4_HDRSIZE);
+ if(*bpp == nil)
+ return -1;
+
+ h = (Tcp4hdr *)((*bpp)->rp);
+ tcph->source = nhgets(h->tcpsport);
+ tcph->dest = nhgets(h->tcpdport);
+ tcph->seq = nhgetl(h->tcpseq);
+ tcph->ack = nhgetl(h->tcpack);
+
+ hdrlen = (h->tcpflag[0]>>2) & ~3;
+ if(hdrlen < TCP4_HDRSIZE) {
+ freeblist(*bpp);
+ return -1;
+ }
+
+ tcph->flags = h->tcpflag[1];
+ tcph->wnd = nhgets(h->tcpwin);
+ tcph->urg = nhgets(h->tcpurg);
+ tcph->mss = 0;
+ tcph->ws = 0;
+ tcph->len = nhgets(h->length) - (hdrlen + TCP4_PKT);
+
+ *bpp = pullupblock(*bpp, hdrlen+TCP4_PKT);
+ if(*bpp == nil)
+ return -1;
+
+ optr = h->tcpopt;
+ n = hdrlen - TCP4_HDRSIZE;
+ while(n > 0 && *optr != EOLOPT) {
+ if(*optr == NOOPOPT) {
+ n--;
+ optr++;
+ continue;
+ }
+ optlen = optr[1];
+ if(optlen < 2 || optlen > n)
+ break;
+ switch(*optr) {
+ case MSSOPT:
+ if(optlen == MSS_LENGTH)
+ tcph->mss = nhgets(optr+2);
+ break;
+ case WSOPT:
+ if(optlen == WS_LENGTH && *(optr+2) <= 14)
+ tcph->ws = HaveWS | *(optr+2);
+ break;
+ }
+ n -= optlen;
+ optr += optlen;
+ }
+ return hdrlen;
+}
+
+/*
+ * For outgiing calls, generate an initial sequence
+ * number and put a SYN on the send queue
+ */
+void
+tcpsndsyn(Conv *s, Tcpctl *tcb)
+{
+ tcb->iss = (nrand(1<<16)<<16)|nrand(1<<16);
+ tcb->rttseq = tcb->iss;
+ tcb->snd.wl2 = tcb->iss;
+ tcb->snd.una = tcb->iss;
+ tcb->snd.ptr = tcb->rttseq;
+ tcb->snd.nxt = tcb->rttseq;
+ tcb->flgcnt++;
+ tcb->flags |= FORCE;
+ tcb->sndsyntime = NOW;
+
+ /* set desired mss and scale */
+ tcb->mss = tcpmtu(s->p, s->laddr, s->ipversion, &tcb->scale);
+}
+
+void
+sndrst(Proto *tcp, uchar *source, uchar *dest, ushort length, Tcp *seg, uchar version, char *reason)
+{
+ Block *hbp;
+ uchar rflags;
+ Tcppriv *tpriv;
+ Tcp4hdr ph4;
+ Tcp6hdr ph6;
+
+ netlog(tcp->f, Logtcp, "sndrst: %s", reason);
+
+ tpriv = tcp->priv;
+
+ if(seg->flags & RST)
+ return;
+
+ /* make pseudo header */
+ switch(version) {
+ case V4:
+ memset(&ph4, 0, sizeof(ph4));
+ ph4.vihl = IP_VER4;
+ v6tov4(ph4.tcpsrc, dest);
+ v6tov4(ph4.tcpdst, source);
+ ph4.proto = IP_TCPPROTO;
+ hnputs(ph4.tcplen, TCP4_HDRSIZE);
+ hnputs(ph4.tcpsport, seg->dest);
+ hnputs(ph4.tcpdport, seg->source);
+ break;
+ case V6:
+ memset(&ph6, 0, sizeof(ph6));
+ ph6.vcf[0] = IP_VER6;
+ ipmove(ph6.tcpsrc, dest);
+ ipmove(ph6.tcpdst, source);
+ ph6.proto = IP_TCPPROTO;
+ hnputs(ph6.ploadlen, TCP6_HDRSIZE);
+ hnputs(ph6.tcpsport, seg->dest);
+ hnputs(ph6.tcpdport, seg->source);
+ break;
+ default:
+ panic("sndrst: version %d", version);
+ }
+
+ tpriv->stats[OutRsts]++;
+ rflags = RST;
+
+ /* convince the other end that this reset is in band */
+ if(seg->flags & ACK) {
+ seg->seq = seg->ack;
+ seg->ack = 0;
+ }
+ else {
+ rflags |= ACK;
+ seg->ack = seg->seq;
+ seg->seq = 0;
+ if(seg->flags & SYN)
+ seg->ack++;
+ seg->ack += length;
+ if(seg->flags & FIN)
+ seg->ack++;
+ }
+ seg->flags = rflags;
+ seg->wnd = 0;
+ seg->urg = 0;
+ seg->mss = 0;
+ seg->ws = 0;
+ switch(version) {
+ case V4:
+ hbp = htontcp4(seg, nil, &ph4, nil);
+ if(hbp == nil)
+ return;
+ ipoput4(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil);
+ break;
+ case V6:
+ hbp = htontcp6(seg, nil, &ph6, nil);
+ if(hbp == nil)
+ return;
+ ipoput6(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil);
+ break;
+ default:
+ panic("sndrst2: version %d", version);
+ }
+}
+
+/*
+ * send a reset to the remote side and close the conversation
+ * called with s qlocked
+ */
+char*
+tcphangup(Conv *s)
+{
+ Tcp seg;
+ Tcpctl *tcb;
+ Block *hbp;
+
+ tcb = (Tcpctl*)s->ptcl;
+ if(waserror())
+ return commonerror();
+ if(ipcmp(s->raddr, IPnoaddr) != 0) {
+ if(!waserror()){
+ memset(&seg, 0, sizeof seg);
+ seg.flags = RST | ACK;
+ seg.ack = tcb->rcv.nxt;
+ tcb->rcv.una = 0;
+ seg.seq = tcb->snd.ptr;
+ seg.wnd = 0;
+ seg.urg = 0;
+ seg.mss = 0;
+ seg.ws = 0;
+ switch(s->ipversion) {
+ case V4:
+ tcb->protohdr.tcp4hdr.vihl = IP_VER4;
+ hbp = htontcp4(&seg, nil, &tcb->protohdr.tcp4hdr, tcb);
+ ipoput4(s->p->f, hbp, 0, s->ttl, s->tos, s);
+ break;
+ case V6:
+ tcb->protohdr.tcp6hdr.vcf[0] = IP_VER6;
+ hbp = htontcp6(&seg, nil, &tcb->protohdr.tcp6hdr, tcb);
+ ipoput6(s->p->f, hbp, 0, s->ttl, s->tos, s);
+ break;
+ default:
+ panic("tcphangup: version %d", s->ipversion);
+ }
+ poperror();
+ }
+ }
+ localclose(s, nil);
+ poperror();
+ return nil;
+}
+
+/*
+ * (re)send a SYN ACK
+ */
+int
+sndsynack(Proto *tcp, Limbo *lp)
+{
+ Block *hbp;
+ Tcp4hdr ph4;
+ Tcp6hdr ph6;
+ Tcp seg;
+ int scale;
+
+ /* make pseudo header */
+ switch(lp->version) {
+ case V4:
+ memset(&ph4, 0, sizeof(ph4));
+ ph4.vihl = IP_VER4;
+ v6tov4(ph4.tcpsrc, lp->laddr);
+ v6tov4(ph4.tcpdst, lp->raddr);
+ ph4.proto = IP_TCPPROTO;
+ hnputs(ph4.tcplen, TCP4_HDRSIZE);
+ hnputs(ph4.tcpsport, lp->lport);
+ hnputs(ph4.tcpdport, lp->rport);
+ break;
+ case V6:
+ memset(&ph6, 0, sizeof(ph6));
+ ph6.vcf[0] = IP_VER6;
+ ipmove(ph6.tcpsrc, lp->laddr);
+ ipmove(ph6.tcpdst, lp->raddr);
+ ph6.proto = IP_TCPPROTO;
+ hnputs(ph6.ploadlen, TCP6_HDRSIZE);
+ hnputs(ph6.tcpsport, lp->lport);
+ hnputs(ph6.tcpdport, lp->rport);
+ break;
+ default:
+ panic("sndrst: version %d", lp->version);
+ }
+
+ seg.seq = lp->iss;
+ seg.ack = lp->irs+1;
+ seg.flags = SYN|ACK;
+ seg.urg = 0;
+ seg.mss = tcpmtu(tcp, lp->laddr, lp->version, &scale);
+ seg.wnd = QMAX;
+
+ /* if the other side set scale, we should too */
+ if(lp->rcvscale){
+ seg.ws = scale;
+ lp->sndscale = scale;
+ } else {
+ seg.ws = 0;
+ lp->sndscale = 0;
+ }
+
+ switch(lp->version) {
+ case V4:
+ hbp = htontcp4(&seg, nil, &ph4, nil);
+ if(hbp == nil)
+ return -1;
+ ipoput4(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil);
+ break;
+ case V6:
+ hbp = htontcp6(&seg, nil, &ph6, nil);
+ if(hbp == nil)
+ return -1;
+ ipoput6(tcp->f, hbp, 0, MAXTTL, DFLTTOS, nil);
+ break;
+ default:
+ panic("sndsnack: version %d", lp->version);
+ }
+ lp->lastsend = NOW;
+ return 0;
+}
+
+#define hashipa(a, p) ( ( (a)[IPaddrlen-2] + (a)[IPaddrlen-1] + p )&LHTMASK )
+
+/*
+ * put a call into limbo and respond with a SYN ACK
+ *
+ * called with proto locked
+ */
+static void
+limbo(Conv *s, uchar *source, uchar *dest, Tcp *seg, int version)
+{
+ Limbo *lp, **l;
+ Tcppriv *tpriv;
+ int h;
+
+ tpriv = s->p->priv;
+ h = hashipa(source, seg->source);
+
+ for(l = &tpriv->lht[h]; *l != nil; l = &lp->next){
+ lp = *l;
+ if(lp->lport != seg->dest || lp->rport != seg->source || lp->version != version)
+ continue;
+ if(ipcmp(lp->raddr, source) != 0)
+ continue;
+ if(ipcmp(lp->laddr, dest) != 0)
+ continue;
+
+ /* each new SYN restarts the retransmits */
+ lp->irs = seg->seq;
+ break;
+ }
+ lp = *l;
+ if(lp == nil){
+ if(tpriv->nlimbo >= Maxlimbo && tpriv->lht[h]){
+ lp = tpriv->lht[h];
+ tpriv->lht[h] = lp->next;
+ lp->next = nil;
+ } else {
+ lp = malloc(sizeof(*lp));
+ if(lp == nil)
+ return;
+ tpriv->nlimbo++;
+ }
+ *l = lp;
+ lp->version = version;
+ ipmove(lp->laddr, dest);
+ ipmove(lp->raddr, source);
+ lp->lport = seg->dest;
+ lp->rport = seg->source;
+ lp->mss = seg->mss;
+ lp->rcvscale = seg->ws;
+ lp->irs = seg->seq;
+ lp->iss = (nrand(1<<16)<<16)|nrand(1<<16);
+ }
+
+ if(sndsynack(s->p, lp) < 0){
+ *l = lp->next;
+ tpriv->nlimbo--;
+ free(lp);
+ }
+}
+
+/*
+ * resend SYN ACK's once every SYNACK_RXTIMER ms.
+ */
+static void
+limborexmit(Proto *tcp)
+{
+ Tcppriv *tpriv;
+ Limbo **l, *lp;
+ int h;
+ int seen;
+ ulong now;
+
+ tpriv = tcp->priv;
+
+ if(!canqlock(tcp))
+ return;
+ seen = 0;
+ now = NOW;
+ for(h = 0; h < NLHT && seen < tpriv->nlimbo; h++){
+ for(l = &tpriv->lht[h]; *l != nil && seen < tpriv->nlimbo; ){
+ lp = *l;
+ seen++;
+ if(now - lp->lastsend < (lp->rexmits+1)*SYNACK_RXTIMER)
+ continue;
+
+ /* time it out after 1 second */
+ if(++(lp->rexmits) > 5){
+ tpriv->nlimbo--;
+ *l = lp->next;
+ free(lp);
+ continue;
+ }
+
+ /* if we're being attacked, don't bother resending SYN ACK's */
+ if(tpriv->nlimbo > 100)
+ continue;
+
+ if(sndsynack(tcp, lp) < 0){
+ tpriv->nlimbo--;
+ *l = lp->next;
+ free(lp);
+ continue;
+ }
+
+ l = &lp->next;
+ }
+ }
+ qunlock(tcp);
+}
+
+/*
+ * lookup call in limbo. if found, throw it out.
+ *
+ * called with proto locked
+ */
+static void
+limborst(Conv *s, Tcp *segp, uchar *src, uchar *dst, uchar version)
+{
+ Limbo *lp, **l;
+ int h;
+ Tcppriv *tpriv;
+
+ tpriv = s->p->priv;
+
+ /* find a call in limbo */
+ h = hashipa(src, segp->source);
+ for(l = &tpriv->lht[h]; *l != nil; l = &lp->next){
+ lp = *l;
+ if(lp->lport != segp->dest || lp->rport != segp->source || lp->version != version)
+ continue;
+ if(ipcmp(lp->laddr, dst) != 0)
+ continue;
+ if(ipcmp(lp->raddr, src) != 0)
+ continue;
+
+ /* RST can only follow the SYN */
+ if(segp->seq == lp->irs+1){
+ tpriv->nlimbo--;
+ *l = lp->next;
+ free(lp);
+ }
+ break;
+ }
+}
+
+/*
+ * come here when we finally get an ACK to our SYN-ACK.
+ * lookup call in limbo. if found, create a new conversation
+ *
+ * called with proto locked
+ */
+static Conv*
+tcpincoming(Conv *s, Tcp *segp, uchar *src, uchar *dst, uchar version)
+{
+ Conv *new;
+ Tcpctl *tcb;
+ Tcppriv *tpriv;
+ Tcp4hdr *h4;
+ Tcp6hdr *h6;
+ Limbo *lp, **l;
+ int h;
+
+ /* unless it's just an ack, it can't be someone coming out of limbo */
+ if((segp->flags & SYN) || (segp->flags & ACK) == 0)
+ return nil;
+
+ tpriv = s->p->priv;
+
+ /* find a call in limbo */
+ h = hashipa(src, segp->source);
+ for(l = &tpriv->lht[h]; (lp = *l) != nil; l = &lp->next){
+ netlog(s->p->f, Logtcp, "tcpincoming s %I,%ux/%I,%ux d %I,%ux/%I,%ux v %d/%d",
+ src, segp->source, lp->raddr, lp->rport,
+ dst, segp->dest, lp->laddr, lp->lport,
+ version, lp->version
+ );
+
+ if(lp->lport != segp->dest || lp->rport != segp->source || lp->version != version)
+ continue;
+ if(ipcmp(lp->laddr, dst) != 0)
+ continue;
+ if(ipcmp(lp->raddr, src) != 0)
+ continue;
+
+ /* we're assuming no data with the initial SYN */
+ if(segp->seq != lp->irs+1 || segp->ack != lp->iss+1){
+ netlog(s->p->f, Logtcp, "tcpincoming s %lux/%lux a %lux %lux",
+ segp->seq, lp->irs+1, segp->ack, lp->iss+1);
+ lp = nil;
+ } else {
+ tpriv->nlimbo--;
+ *l = lp->next;
+ }
+ break;
+ }
+ if(lp == nil)
+ return nil;
+
+ new = Fsnewcall(s, src, segp->source, dst, segp->dest, version);
+ if(new == nil)
+ return nil;
+
+ memmove(new->ptcl, s->ptcl, sizeof(Tcpctl));
+ tcb = (Tcpctl*)new->ptcl;
+ tcb->flags &= ~CLONE;
+ tcb->timer.arg = new;
+ tcb->timer.state = TcptimerOFF;
+ tcb->acktimer.arg = new;
+ tcb->acktimer.state = TcptimerOFF;
+ tcb->katimer.arg = new;
+ tcb->katimer.state = TcptimerOFF;
+ tcb->rtt_timer.arg = new;
+ tcb->rtt_timer.state = TcptimerOFF;
+
+ tcb->irs = lp->irs;
+ tcb->rcv.nxt = tcb->irs+1;
+ tcb->rcv.urg = tcb->rcv.nxt;
+
+ tcb->iss = lp->iss;
+ tcb->rttseq = tcb->iss;
+ tcb->snd.wl2 = tcb->iss;
+ tcb->snd.una = tcb->iss+1;
+ tcb->snd.ptr = tcb->iss+1;
+ tcb->snd.nxt = tcb->iss+1;
+ tcb->flgcnt = 0;
+ tcb->flags |= SYNACK;
+
+ /* our sending max segment size cannot be bigger than what he asked for */
+ if(lp->mss != 0 && lp->mss < tcb->mss)
+ tcb->mss = lp->mss;
+
+ /* window scaling */
+ tcpsetscale(new, tcb, lp->rcvscale, lp->sndscale);
+
+ /* the congestion window always starts out as a single segment */
+ tcb->snd.wnd = segp->wnd;
+ tcb->cwind = tcb->mss;
+
+ /* set initial round trip time */
+ tcb->sndsyntime = lp->lastsend+lp->rexmits*SYNACK_RXTIMER;
+ tcpsynackrtt(new);
+
+ free(lp);
+
+ /* set up proto header */
+ switch(version){
+ case V4:
+ h4 = &tcb->protohdr.tcp4hdr;
+ memset(h4, 0, sizeof(*h4));
+ h4->proto = IP_TCPPROTO;
+ hnputs(h4->tcpsport, new->lport);
+ hnputs(h4->tcpdport, new->rport);
+ v6tov4(h4->tcpsrc, dst);
+ v6tov4(h4->tcpdst, src);
+ break;
+ case V6:
+ h6 = &tcb->protohdr.tcp6hdr;
+ memset(h6, 0, sizeof(*h6));
+ h6->proto = IP_TCPPROTO;
+ hnputs(h6->tcpsport, new->lport);
+ hnputs(h6->tcpdport, new->rport);
+ ipmove(h6->tcpsrc, dst);
+ ipmove(h6->tcpdst, src);
+ break;
+ default:
+ panic("tcpincoming: version %d", new->ipversion);
+ }
+
+ tcpsetstate(new, Established);
+
+ iphtadd(&tpriv->ht, new);
+
+ return new;
+}
+
+int
+seq_within(ulong x, ulong low, ulong high)
+{
+ if(low <= high){
+ if(low <= x && x <= high)
+ return 1;
+ }
+ else {
+ if(x >= low || x <= high)
+ return 1;
+ }
+ return 0;
+}
+
+int
+seq_lt(ulong x, ulong y)
+{
+ return (int)(x-y) < 0;
+}
+
+int
+seq_le(ulong x, ulong y)
+{
+ return (int)(x-y) <= 0;
+}
+
+int
+seq_gt(ulong x, ulong y)
+{
+ return (int)(x-y) > 0;
+}
+
+int
+seq_ge(ulong x, ulong y)
+{
+ return (int)(x-y) >= 0;
+}
+
+/*
+ * use the time between the first SYN and it's ack as the
+ * initial round trip time
+ */
+void
+tcpsynackrtt(Conv *s)
+{
+ Tcpctl *tcb;
+ int delta;
+ Tcppriv *tpriv;
+
+ tcb = (Tcpctl*)s->ptcl;
+ tpriv = s->p->priv;
+
+ delta = NOW - tcb->sndsyntime;
+ tcb->srtt = delta<<LOGAGAIN;
+ tcb->mdev = delta<<LOGDGAIN;
+
+ /* halt round trip timer */
+ tcphalt(tpriv, &tcb->rtt_timer);
+}
+
+void
+update(Conv *s, Tcp *seg)
+{
+ int rtt, delta;
+ Tcpctl *tcb;
+ ulong acked;
+ ulong expand;
+ Tcppriv *tpriv;
+
+ tpriv = s->p->priv;
+ tcb = (Tcpctl*)s->ptcl;
+
+ /* if everything has been acked, force output(?) */
+ if(seq_gt(seg->ack, tcb->snd.nxt)) {
+ tcb->flags |= FORCE;
+ return;
+ }
+
+ /* added by Dong Lin for fast retransmission */
+ if(seg->ack == tcb->snd.una
+ && tcb->snd.una != tcb->snd.nxt
+ && seg->len == 0
+ && seg->wnd == tcb->snd.wnd) {
+
+ /* this is a pure ack w/o window update */
+ netlog(s->p->f, Logtcprxmt, "dupack %lud ack %lud sndwnd %d advwin %d\n",
+ tcb->snd.dupacks, seg->ack, tcb->snd.wnd, seg->wnd);
+
+ if(++tcb->snd.dupacks == TCPREXMTTHRESH) {
+ /*
+ * tahoe tcp rxt the packet, half sshthresh,
+ * and set cwnd to one packet
+ */
+ tcb->snd.recovery = 1;
+ tcb->snd.rxt = tcb->snd.nxt;
+ netlog(s->p->f, Logtcprxmt, "fast rxt %lud, nxt %lud\n", tcb->snd.una, tcb->snd.nxt);
+ tcprxmit(s);
+ } else {
+ /* do reno tcp here. */
+ }
+ }
+
+ /*
+ * update window
+ */
+ if(seq_gt(seg->ack, tcb->snd.wl2)
+ || (tcb->snd.wl2 == seg->ack && seg->wnd > tcb->snd.wnd)){
+ tcb->snd.wnd = seg->wnd;
+ tcb->snd.wl2 = seg->ack;
+ }
+
+ if(!seq_gt(seg->ack, tcb->snd.una)){
+ /*
+ * don't let us hangup if sending into a closed window and
+ * we're still getting acks
+ */
+ if((tcb->flags&RETRAN) && tcb->snd.wnd == 0){
+ tcb->backedoff = MAXBACKMS/4;
+ }
+ return;
+ }
+
+ /*
+ * any positive ack turns off fast rxt,
+ * (should we do new-reno on partial acks?)
+ */
+ if(!tcb->snd.recovery || seq_ge(seg->ack, tcb->snd.rxt)) {
+ tcb->snd.dupacks = 0;
+ tcb->snd.recovery = 0;
+ } else
+ netlog(s->p->f, Logtcp, "rxt next %lud, cwin %ud\n", seg->ack, tcb->cwind);
+
+ /* Compute the new send window size */
+ acked = seg->ack - tcb->snd.una;
+
+ /* avoid slow start and timers for SYN acks */
+ if((tcb->flags & SYNACK) == 0) {
+ tcb->flags |= SYNACK;
+ acked--;
+ tcb->flgcnt--;
+ goto done;
+ }
+
+ /* slow start as long as we're not recovering from lost packets */
+ if(tcb->cwind < tcb->snd.wnd && !tcb->snd.recovery) {
+ if(tcb->cwind < tcb->ssthresh) {
+ expand = tcb->mss;
+ if(acked < expand)
+ expand = acked;
+ }
+ else
+ expand = ((int)tcb->mss * tcb->mss) / tcb->cwind;
+
+ if(tcb->cwind + expand < tcb->cwind)
+ expand = tcb->snd.wnd - tcb->cwind;
+ if(tcb->cwind + expand > tcb->snd.wnd)
+ expand = tcb->snd.wnd - tcb->cwind;
+ tcb->cwind += expand;
+ }
+
+ /* Adjust the timers according to the round trip time */
+ if(tcb->rtt_timer.state == TcptimerON && seq_ge(seg->ack, tcb->rttseq)) {
+ tcphalt(tpriv, &tcb->rtt_timer);
+ if((tcb->flags&RETRAN) == 0) {
+ tcb->backoff = 0;
+ tcb->backedoff = 0;
+ rtt = tcb->rtt_timer.start - tcb->rtt_timer.count;
+ if(rtt == 0)
+ rtt = 1; /* otherwise all close systems will rexmit in 0 time */
+ rtt *= MSPTICK;
+ if(tcb->srtt == 0) {
+ tcb->srtt = rtt << LOGAGAIN;
+ tcb->mdev = rtt << LOGDGAIN;
+ } else {
+ delta = rtt - (tcb->srtt>>LOGAGAIN);
+ tcb->srtt += delta;
+ if(tcb->srtt <= 0)
+ tcb->srtt = 1;
+
+ delta = abs(delta) - (tcb->mdev>>LOGDGAIN);
+ tcb->mdev += delta;
+ if(tcb->mdev <= 0)
+ tcb->mdev = 1;
+ }
+ tcpsettimer(tcb);
+ }
+ }
+
+done:
+ if(qdiscard(s->wq, acked) < acked)
+ tcb->flgcnt--;
+
+ tcb->snd.una = seg->ack;
+ if(seq_gt(seg->ack, tcb->snd.urg))
+ tcb->snd.urg = seg->ack;
+
+ if(tcb->snd.una != tcb->snd.nxt)
+ tcpgo(tpriv, &tcb->timer);
+ else
+ tcphalt(tpriv, &tcb->timer);
+
+ if(seq_lt(tcb->snd.ptr, tcb->snd.una))
+ tcb->snd.ptr = tcb->snd.una;
+
+ tcb->flags &= ~RETRAN;
+ tcb->backoff = 0;
+ tcb->backedoff = 0;
+}
+
+void
+tcpiput(Proto *tcp, Ipifc*, Block *bp)
+{
+ Tcp seg;
+ Tcp4hdr *h4;
+ Tcp6hdr *h6;
+ int hdrlen;
+ Tcpctl *tcb;
+ ushort length;
+ uchar source[IPaddrlen], dest[IPaddrlen];
+ Conv *s;
+ Fs *f;
+ Tcppriv *tpriv;
+ uchar version;
+
+ f = tcp->f;
+ tpriv = tcp->priv;
+
+ tpriv->stats[InSegs]++;
+
+ h4 = (Tcp4hdr*)(bp->rp);
+ h6 = (Tcp6hdr*)(bp->rp);
+
+ if((h4->vihl&0xF0)==IP_VER4) {
+ version = V4;
+ length = nhgets(h4->length);
+ v4tov6(dest, h4->tcpdst);
+ v4tov6(source, h4->tcpsrc);
+
+ h4->Unused = 0;
+ hnputs(h4->tcplen, length-TCP4_PKT);
+ if(!(bp->flag & Btcpck) && (h4->tcpcksum[0] || h4->tcpcksum[1]) &&
+ ptclcsum(bp, TCP4_IPLEN, length-TCP4_IPLEN)) {
+ tpriv->stats[CsumErrs]++;
+ tpriv->stats[InErrs]++;
+ netlog(f, Logtcp, "bad tcp proto cksum\n");
+ freeblist(bp);
+ return;
+ }
+
+ hdrlen = ntohtcp4(&seg, &bp);
+ if(hdrlen < 0){
+ tpriv->stats[HlenErrs]++;
+ tpriv->stats[InErrs]++;
+ netlog(f, Logtcp, "bad tcp hdr len\n");
+ return;
+ }
+
+ /* trim the packet to the size claimed by the datagram */
+ length -= hdrlen+TCP4_PKT;
+ bp = trimblock(bp, hdrlen+TCP4_PKT, length);
+ if(bp == nil){
+ tpriv->stats[LenErrs]++;
+ tpriv->stats[InErrs]++;
+ netlog(f, Logtcp, "tcp len < 0 after trim\n");
+ return;
+ }
+ }
+ else {
+ int ttl = h6->ttl;
+ int proto = h6->proto;
+
+ version = V6;
+ length = nhgets(h6->ploadlen);
+ ipmove(dest, h6->tcpdst);
+ ipmove(source, h6->tcpsrc);
+
+ h6->ploadlen[0] = h6->ploadlen[1] = h6->proto = 0;
+ h6->ttl = proto;
+ hnputl(h6->vcf, length);
+ if((h6->tcpcksum[0] || h6->tcpcksum[1]) &&
+ ptclcsum(bp, TCP6_IPLEN, length+TCP6_PHDRSIZE)) {
+ tpriv->stats[CsumErrs]++;
+ tpriv->stats[InErrs]++;
+ netlog(f, Logtcp, "bad tcp proto cksum\n");
+ freeblist(bp);
+ return;
+ }
+ h6->ttl = ttl;
+ h6->proto = proto;
+ hnputs(h6->ploadlen, length);
+
+ hdrlen = ntohtcp6(&seg, &bp);
+ if(hdrlen < 0){
+ tpriv->stats[HlenErrs]++;
+ tpriv->stats[InErrs]++;
+ netlog(f, Logtcp, "bad tcp hdr len\n");
+ return;
+ }
+
+ /* trim the packet to the size claimed by the datagram */
+ length -= hdrlen;
+ bp = trimblock(bp, hdrlen+TCP6_PKT, length);
+ if(bp == nil){
+ tpriv->stats[LenErrs]++;
+ tpriv->stats[InErrs]++;
+ netlog(f, Logtcp, "tcp len < 0 after trim\n");
+ return;
+ }
+ }
+
+ /* lock protocol while searching for a conversation */
+ qlock(tcp);
+
+ /* Look for a matching conversation */
+ s = iphtlook(&tpriv->ht, source, seg.source, dest, seg.dest);
+ if(s == nil){
+ netlog(f, Logtcp, "iphtlook failed");
+reset:
+ qunlock(tcp);
+ sndrst(tcp, source, dest, length, &seg, version, "no conversation");
+ freeblist(bp);
+ return;
+ }
+
+ /* if it's a listener, look for the right flags and get a new conv */
+ tcb = (Tcpctl*)s->ptcl;
+ if(tcb->state == Listen){
+ if(seg.flags & RST){
+ limborst(s, &seg, source, dest, version);
+ qunlock(tcp);
+ freeblist(bp);
+ return;
+ }
+
+ /* if this is a new SYN, put the call into limbo */
+ if((seg.flags & SYN) && (seg.flags & ACK) == 0){
+ limbo(s, source, dest, &seg, version);
+ qunlock(tcp);
+ freeblist(bp);
+ return;
+ }
+
+ /*
+ * if there's a matching call in limbo, tcpincoming will
+ * return it in state Syn_received
+ */
+ s = tcpincoming(s, &seg, source, dest, version);
+ if(s == nil)
+ goto reset;
+ }
+
+ /* The rest of the input state machine is run with the control block
+ * locked and implements the state machine directly out of the RFC.
+ * Out-of-band data is ignored - it was always a bad idea.
+ */
+ tcb = (Tcpctl*)s->ptcl;
+ if(waserror()){
+ qunlock(s);
+ nexterror();
+ }
+ qlock(s);
+ qunlock(tcp);
+
+ /* fix up window */
+ seg.wnd <<= tcb->rcv.scale;
+
+ /* every input packet in puts off the keep alive time out */
+ tcpsetkacounter(tcb);
+
+ switch(tcb->state) {
+ case Closed:
+ sndrst(tcp, source, dest, length, &seg, version, "sending to Closed");
+ goto raise;
+ case Syn_sent:
+ if(seg.flags & ACK) {
+ if(!seq_within(seg.ack, tcb->iss+1, tcb->snd.nxt)) {
+ sndrst(tcp, source, dest, length, &seg, version,
+ "bad seq in Syn_sent");
+ goto raise;
+ }
+ }
+ if(seg.flags & RST) {
+ if(seg.flags & ACK)
+ localclose(s, Econrefused);
+ goto raise;
+ }
+
+ if(seg.flags & SYN) {
+ procsyn(s, &seg);
+ if(seg.flags & ACK){
+ update(s, &seg);
+ tcpsynackrtt(s);
+ tcpsetstate(s, Established);
+ tcpsetscale(s, tcb, seg.ws, tcb->scale);
+ }
+ else {
+ tcb->time = NOW;
+ tcpsetstate(s, Syn_received); /* DLP - shouldn't this be a reset? */
+ }
+
+ if(length != 0 || (seg.flags & FIN))
+ break;
+
+ freeblist(bp);
+ goto output;
+ }
+ else
+ freeblist(bp);
+
+ qunlock(s);
+ poperror();
+ return;
+ case Syn_received:
+ /* doesn't matter if it's the correct ack, we're just trying to set timing */
+ if(seg.flags & ACK)
+ tcpsynackrtt(s);
+ break;
+ }
+
+ /*
+ * One DOS attack is to open connections to us and then forget about them,
+ * thereby tying up a conv at no long term cost to the attacker.
+ * This is an attempt to defeat these stateless DOS attacks. See
+ * corresponding code in tcpsendka().
+ */
+ if(tcb->state != Syn_received && (seg.flags & RST) == 0){
+ if(tcpporthogdefense
+ && seq_within(seg.ack, tcb->snd.una-(1<<31), tcb->snd.una-(1<<29))){
+ print("stateless hog %I.%d->%I.%d f %ux %lux - %lux - %lux\n",
+ source, seg.source, dest, seg.dest, seg.flags,
+ tcb->snd.una-(1<<31), seg.ack, tcb->snd.una-(1<<29));
+ localclose(s, "stateless hog");
+ }
+ }
+
+ /* Cut the data to fit the receive window */
+ if(tcptrim(tcb, &seg, &bp, &length) == -1) {
+ netlog(f, Logtcp, "tcp len < 0, %lud %d\n", seg.seq, length);
+ update(s, &seg);
+ if(qlen(s->wq)+tcb->flgcnt == 0 && tcb->state == Closing) {
+ tcphalt(tpriv, &tcb->rtt_timer);
+ tcphalt(tpriv, &tcb->acktimer);
+ tcphalt(tpriv, &tcb->katimer);
+ tcpsetstate(s, Time_wait);
+ tcb->timer.start = MSL2*(1000 / MSPTICK);
+ tcpgo(tpriv, &tcb->timer);
+ }
+ if(!(seg.flags & RST)) {
+ tcb->flags |= FORCE;
+ goto output;
+ }
+ qunlock(s);
+ poperror();
+ return;
+ }
+
+ /* Cannot accept so answer with a rst */
+ if(length && tcb->state == Closed) {
+ sndrst(tcp, source, dest, length, &seg, version, "sending to Closed");
+ goto raise;
+ }
+
+ /* The segment is beyond the current receive pointer so
+ * queue the data in the resequence queue
+ */
+ if(seg.seq != tcb->rcv.nxt)
+ if(length != 0 || (seg.flags & (SYN|FIN))) {
+ update(s, &seg);
+ if(addreseq(tcb, tpriv, &seg, bp, length) < 0)
+ print("reseq %I.%d -> %I.%d\n", s->raddr, s->rport, s->laddr, s->lport);
+ tcb->flags |= FORCE;
+ goto output;
+ }
+
+ /*
+ * keep looping till we've processed this packet plus any
+ * adjacent packets in the resequence queue
+ */
+ for(;;) {
+ if(seg.flags & RST) {
+ if(tcb->state == Established) {
+ tpriv->stats[EstabResets]++;
+ if(tcb->rcv.nxt != seg.seq)
+ print("out of order RST rcvd: %I.%d -> %I.%d, rcv.nxt %lux seq %lux\n", s->raddr, s->rport, s->laddr, s->lport, tcb->rcv.nxt, seg.seq);
+ }
+ localclose(s, Econrefused);
+ goto raise;
+ }
+
+ if((seg.flags&ACK) == 0)
+ goto raise;
+
+ switch(tcb->state) {
+ case Syn_received:
+ if(!seq_within(seg.ack, tcb->snd.una+1, tcb->snd.nxt)){
+ sndrst(tcp, source, dest, length, &seg, version,
+ "bad seq in Syn_received");
+ goto raise;
+ }
+ update(s, &seg);
+ tcpsetstate(s, Established);
+ case Established:
+ case Close_wait:
+ update(s, &seg);
+ break;
+ case Finwait1:
+ update(s, &seg);
+ if(qlen(s->wq)+tcb->flgcnt == 0){
+ tcphalt(tpriv, &tcb->rtt_timer);
+ tcphalt(tpriv, &tcb->acktimer);
+ tcpsetkacounter(tcb);
+ tcb->time = NOW;
+ tcpsetstate(s, Finwait2);
+ tcb->katimer.start = MSL2 * (1000 / MSPTICK);
+ tcpgo(tpriv, &tcb->katimer);
+ }
+ break;
+ case Finwait2:
+ update(s, &seg);
+ break;
+ case Closing:
+ update(s, &seg);
+ if(qlen(s->wq)+tcb->flgcnt == 0) {
+ tcphalt(tpriv, &tcb->rtt_timer);
+ tcphalt(tpriv, &tcb->acktimer);
+ tcphalt(tpriv, &tcb->katimer);
+ tcpsetstate(s, Time_wait);
+ tcb->timer.start = MSL2*(1000 / MSPTICK);
+ tcpgo(tpriv, &tcb->timer);
+ }
+ break;
+ case Last_ack:
+ update(s, &seg);
+ if(qlen(s->wq)+tcb->flgcnt == 0) {
+ localclose(s, nil);
+ goto raise;
+ }
+ case Time_wait:
+ tcb->flags |= FORCE;
+ if(tcb->timer.state != TcptimerON)
+ tcpgo(tpriv, &tcb->timer);
+ }
+
+ if((seg.flags&URG) && seg.urg) {
+ if(seq_gt(seg.urg + seg.seq, tcb->rcv.urg)) {
+ tcb->rcv.urg = seg.urg + seg.seq;
+ pullblock(&bp, seg.urg);
+ }
+ }
+ else
+ if(seq_gt(tcb->rcv.nxt, tcb->rcv.urg))
+ tcb->rcv.urg = tcb->rcv.nxt;
+
+ if(length == 0) {
+ if(bp != nil)
+ freeblist(bp);
+ }
+ else {
+ switch(tcb->state){
+ default:
+ /* Ignore segment text */
+ if(bp != nil)
+ freeblist(bp);
+ break;
+
+ case Syn_received:
+ case Established:
+ case Finwait1:
+ /* If we still have some data place on
+ * receive queue
+ */
+ if(bp) {
+ bp = packblock(bp);
+ if(bp == nil)
+ panic("tcp packblock");
+ qpassnolim(s->rq, bp);
+ bp = nil;
+
+ /*
+ * Force an ack every 2 data messages. This is
+ * a hack for rob to make his home system run
+ * faster.
+ *
+ * this also keeps the standard TCP congestion
+ * control working since it needs an ack every
+ * 2 max segs worth. This is not quite that,
+ * but under a real stream is equivalent since
+ * every packet has a max seg in it.
+ */
+ if(++(tcb->rcv.una) >= 2)
+ tcb->flags |= FORCE;
+ }
+ tcb->rcv.nxt += length;
+
+ /*
+ * update our rcv window
+ */
+ tcprcvwin(s);
+
+ /*
+ * turn on the acktimer if there's something
+ * to ack
+ */
+ if(tcb->acktimer.state != TcptimerON)
+ tcpgo(tpriv, &tcb->acktimer);
+
+ break;
+ case Finwait2:
+ /* no process to read the data, send a reset */
+ if(bp != nil)
+ freeblist(bp);
+ sndrst(tcp, source, dest, length, &seg, version,
+ "send to Finwait2");
+ qunlock(s);
+ poperror();
+ return;
+ }
+ }
+
+ if(seg.flags & FIN) {
+ tcb->flags |= FORCE;
+
+ switch(tcb->state) {
+ case Syn_received:
+ case Established:
+ tcb->rcv.nxt++;
+ tcpsetstate(s, Close_wait);
+ break;
+ case Finwait1:
+ tcb->rcv.nxt++;
+ if(qlen(s->wq)+tcb->flgcnt == 0) {
+ tcphalt(tpriv, &tcb->rtt_timer);
+ tcphalt(tpriv, &tcb->acktimer);
+ tcphalt(tpriv, &tcb->katimer);
+ tcpsetstate(s, Time_wait);
+ tcb->timer.start = MSL2*(1000/MSPTICK);
+ tcpgo(tpriv, &tcb->timer);
+ }
+ else
+ tcpsetstate(s, Closing);
+ break;
+ case Finwait2:
+ tcb->rcv.nxt++;
+ tcphalt(tpriv, &tcb->rtt_timer);
+ tcphalt(tpriv, &tcb->acktimer);
+ tcphalt(tpriv, &tcb->katimer);
+ tcpsetstate(s, Time_wait);
+ tcb->timer.start = MSL2 * (1000/MSPTICK);
+ tcpgo(tpriv, &tcb->timer);
+ break;
+ case Close_wait:
+ case Closing:
+ case Last_ack:
+ break;
+ case Time_wait:
+ tcpgo(tpriv, &tcb->timer);
+ break;
+ }
+ }
+
+ /*
+ * get next adjacent segment from the resequence queue.
+ * dump/trim any overlapping segments
+ */
+ for(;;) {
+ if(tcb->reseq == nil)
+ goto output;
+
+ if(seq_ge(tcb->rcv.nxt, tcb->reseq->seg.seq) == 0)
+ goto output;
+
+ getreseq(tcb, &seg, &bp, &length);
+
+ if(tcptrim(tcb, &seg, &bp, &length) == 0)
+ break;
+ }
+ }
+output:
+ tcpoutput(s);
+ qunlock(s);
+ poperror();
+ return;
+raise:
+ qunlock(s);
+ poperror();
+ freeblist(bp);
+ tcpkick(s);
+}
+
+/*
+ * always enters and exits with the s locked. We drop
+ * the lock to ipoput the packet so some care has to be
+ * taken by callers.
+ */
+void
+tcpoutput(Conv *s)
+{
+ Tcp seg;
+ int msgs;
+ Tcpctl *tcb;
+ Block *hbp, *bp;
+ int sndcnt, n;
+ ulong ssize, dsize, usable, sent;
+ Fs *f;
+ Tcppriv *tpriv;
+ uchar version;
+
+ f = s->p->f;
+ tpriv = s->p->priv;
+ version = s->ipversion;
+
+ for(msgs = 0; msgs < 100; msgs++) {
+ tcb = (Tcpctl*)s->ptcl;
+
+ switch(tcb->state) {
+ case Listen:
+ case Closed:
+ case Finwait2:
+ return;
+ }
+
+ /* force an ack when a window has opened up */
+ if(tcb->rcv.blocked && tcb->rcv.wnd > 0){
+ tcb->rcv.blocked = 0;
+ tcb->flags |= FORCE;
+ }
+
+ sndcnt = qlen(s->wq)+tcb->flgcnt;
+ sent = tcb->snd.ptr - tcb->snd.una;
+
+ /* Don't send anything else until our SYN has been acked */
+ if(tcb->snd.ptr != tcb->iss && (tcb->flags & SYNACK) == 0)
+ break;
+
+ /* Compute usable segment based on offered window and limit
+ * window probes to one
+ */
+ if(tcb->snd.wnd == 0){
+ if(sent != 0) {
+ if((tcb->flags&FORCE) == 0)
+ break;
+// tcb->snd.ptr = tcb->snd.una;
+ }
+ usable = 1;
+ }
+ else {
+ usable = tcb->cwind;
+ if(tcb->snd.wnd < usable)
+ usable = tcb->snd.wnd;
+ usable -= sent;
+ }
+ ssize = sndcnt-sent;
+ if(ssize && usable < 2)
+ netlog(s->p->f, Logtcp, "throttled snd.wnd %lud cwind %lud\n",
+ tcb->snd.wnd, tcb->cwind);
+ if(usable < ssize)
+ ssize = usable;
+ if(tcb->mss < ssize)
+ ssize = tcb->mss;
+ dsize = ssize;
+ seg.urg = 0;
+
+ if(ssize == 0)
+ if((tcb->flags&FORCE) == 0)
+ break;
+
+ tcb->flags &= ~FORCE;
+ tcprcvwin(s);
+
+ /* By default we will generate an ack */
+ tcphalt(tpriv, &tcb->acktimer);
+ tcb->rcv.una = 0;
+ seg.source = s->lport;
+ seg.dest = s->rport;
+ seg.flags = ACK;
+ seg.mss = 0;
+ seg.ws = 0;
+ switch(tcb->state){
+ case Syn_sent:
+ seg.flags = 0;
+ if(tcb->snd.ptr == tcb->iss){
+ seg.flags |= SYN;
+ dsize--;
+ seg.mss = tcb->mss;
+ seg.ws = tcb->scale;
+ }
+ break;
+ case Syn_received:
+ /*
+ * don't send any data with a SYN/ACK packet
+ * because Linux rejects the packet in its
+ * attempt to solve the SYN attack problem
+ */
+ if(tcb->snd.ptr == tcb->iss){
+ seg.flags |= SYN;
+ dsize = 0;
+ ssize = 1;
+ seg.mss = tcb->mss;
+ seg.ws = tcb->scale;
+ }
+ break;
+ }
+ seg.seq = tcb->snd.ptr;
+ seg.ack = tcb->rcv.nxt;
+ seg.wnd = tcb->rcv.wnd;
+
+ /* Pull out data to send */
+ bp = nil;
+ if(dsize != 0) {
+ bp = qcopy(s->wq, dsize, sent);
+ if(BLEN(bp) != dsize) {
+ seg.flags |= FIN;
+ dsize--;
+ }
+ }
+
+ if(sent+dsize == sndcnt)
+ seg.flags |= PSH;
+
+ /* keep track of balance of resent data */
+ if(seq_lt(tcb->snd.ptr, tcb->snd.nxt)) {
+ n = tcb->snd.nxt - tcb->snd.ptr;
+ if(ssize < n)
+ n = ssize;
+ tcb->resent += n;
+ netlog(f, Logtcp, "rexmit: %I.%d -> %I.%d ptr %lux nxt %lux\n",
+ s->raddr, s->rport, s->laddr, s->lport, tcb->snd.ptr, tcb->snd.nxt);
+ tpriv->stats[RetransSegs]++;
+ }
+
+ tcb->snd.ptr += ssize;
+
+ /* Pull up the send pointer so we can accept acks
+ * for this window
+ */
+ if(seq_gt(tcb->snd.ptr,tcb->snd.nxt))
+ tcb->snd.nxt = tcb->snd.ptr;
+
+ /* Build header, link data and compute cksum */
+ switch(version){
+ case V4:
+ tcb->protohdr.tcp4hdr.vihl = IP_VER4;
+ hbp = htontcp4(&seg, bp, &tcb->protohdr.tcp4hdr, tcb);
+ if(hbp == nil) {
+ freeblist(bp);
+ return;
+ }
+ break;
+ case V6:
+ tcb->protohdr.tcp6hdr.vcf[0] = IP_VER6;
+ hbp = htontcp6(&seg, bp, &tcb->protohdr.tcp6hdr, tcb);
+ if(hbp == nil) {
+ freeblist(bp);
+ return;
+ }
+ break;
+ default:
+ hbp = nil; /* to suppress a warning */
+ panic("tcpoutput: version %d", version);
+ }
+
+ /* Start the transmission timers if there is new data and we
+ * expect acknowledges
+ */
+ if(ssize != 0){
+ if(tcb->timer.state != TcptimerON)
+ tcpgo(tpriv, &tcb->timer);
+
+ /* If round trip timer isn't running, start it.
+ * measure the longest packet only in case the
+ * transmission time dominates RTT
+ */
+ if(tcb->rtt_timer.state != TcptimerON)
+ if(ssize == tcb->mss) {
+ tcpgo(tpriv, &tcb->rtt_timer);
+ tcb->rttseq = tcb->snd.ptr;
+ }
+ }
+
+ tpriv->stats[OutSegs]++;
+
+ /* put off the next keep alive */
+ tcpgo(tpriv, &tcb->katimer);
+
+ switch(version){
+ case V4:
+ if(ipoput4(f, hbp, 0, s->ttl, s->tos, s) < 0){
+ /* a negative return means no route */
+ localclose(s, "no route");
+ }
+ break;
+ case V6:
+ if(ipoput6(f, hbp, 0, s->ttl, s->tos, s) < 0){
+ /* a negative return means no route */
+ localclose(s, "no route");
+ }
+ break;
+ default:
+ panic("tcpoutput2: version %d", version);
+ }
+ if((msgs%4) == 1){
+ qunlock(s);
+ sched();
+ qlock(s);
+ }
+ }
+}
+
+/*
+ * the BSD convention (hack?) for keep alives. resend last uchar acked.
+ */
+void
+tcpsendka(Conv *s)
+{
+ Tcp seg;
+ Tcpctl *tcb;
+ Block *hbp,*dbp;
+
+ tcb = (Tcpctl*)s->ptcl;
+
+ dbp = nil;
+ seg.urg = 0;
+ seg.source = s->lport;
+ seg.dest = s->rport;
+ seg.flags = ACK|PSH;
+ seg.mss = 0;
+ seg.ws = 0;
+ if(tcpporthogdefense)
+ seg.seq = tcb->snd.una-(1<<30)-nrand(1<<20);
+ else
+ seg.seq = tcb->snd.una-1;
+ seg.ack = tcb->rcv.nxt;
+ tcb->rcv.una = 0;
+ seg.wnd = tcb->rcv.wnd;
+ if(tcb->state == Finwait2){
+ seg.flags |= FIN;
+ } else {
+ dbp = allocb(1);
+ dbp->wp++;
+ }
+
+ if(isv4(s->raddr)) {
+ /* Build header, link data and compute cksum */
+ tcb->protohdr.tcp4hdr.vihl = IP_VER4;
+ hbp = htontcp4(&seg, dbp, &tcb->protohdr.tcp4hdr, tcb);
+ if(hbp == nil) {
+ freeblist(dbp);
+ return;
+ }
+ ipoput4(s->p->f, hbp, 0, s->ttl, s->tos, s);
+ }
+ else {
+ /* Build header, link data and compute cksum */
+ tcb->protohdr.tcp6hdr.vcf[0] = IP_VER6;
+ hbp = htontcp6(&seg, dbp, &tcb->protohdr.tcp6hdr, tcb);
+ if(hbp == nil) {
+ freeblist(dbp);
+ return;
+ }
+ ipoput6(s->p->f, hbp, 0, s->ttl, s->tos, s);
+ }
+}
+
+/*
+ * set connection to time out after 12 minutes
+ */
+void
+tcpsetkacounter(Tcpctl *tcb)
+{
+ tcb->kacounter = (12 * 60 * 1000) / (tcb->katimer.start*MSPTICK);
+ if(tcb->kacounter < 3)
+ tcb->kacounter = 3;
+}
+
+/*
+ * if we've timed out, close the connection
+ * otherwise, send a keepalive and restart the timer
+ */
+void
+tcpkeepalive(void *v)
+{
+ Tcpctl *tcb;
+ Conv *s;
+
+ s = v;
+ tcb = (Tcpctl*)s->ptcl;
+ if(waserror()){
+ qunlock(s);
+ nexterror();
+ }
+ qlock(s);
+ if(tcb->state != Closed){
+ if(--(tcb->kacounter) <= 0) {
+ localclose(s, Etimedout);
+ } else {
+ tcpsendka(s);
+ tcpgo(s->p->priv, &tcb->katimer);
+ }
+ }
+ qunlock(s);
+ poperror();
+}
+
+/*
+ * start keepalive timer
+ */
+char*
+tcpstartka(Conv *s, char **f, int n)
+{
+ Tcpctl *tcb;
+ int x;
+
+ tcb = (Tcpctl*)s->ptcl;
+ if(tcb->state != Established)
+ return "connection must be in Establised state";
+ if(n > 1){
+ x = atoi(f[1]);
+ if(x >= MSPTICK)
+ tcb->katimer.start = x/MSPTICK;
+ }
+ tcpsetkacounter(tcb);
+ tcpgo(s->p->priv, &tcb->katimer);
+
+ return nil;
+}
+
+/*
+ * turn checksums on/off
+ */
+char*
+tcpsetchecksum(Conv *s, char **f, int)
+{
+ Tcpctl *tcb;
+
+ tcb = (Tcpctl*)s->ptcl;
+ tcb->nochecksum = !atoi(f[1]);
+
+ return nil;
+}
+
+void
+tcprxmit(Conv *s)
+{
+ Tcpctl *tcb;
+
+ tcb = (Tcpctl*)s->ptcl;
+
+ tcb->flags |= RETRAN|FORCE;
+ tcb->snd.ptr = tcb->snd.una;
+
+ /*
+ * We should be halving the slow start threshhold (down to one
+ * mss) but leaving it at mss seems to work well enough
+ */
+ tcb->ssthresh = tcb->mss;
+
+ /*
+ * pull window down to a single packet
+ */
+ tcb->cwind = tcb->mss;
+ tcpoutput(s);
+}
+
+void
+tcptimeout(void *arg)
+{
+ Conv *s;
+ Tcpctl *tcb;
+ int maxback;
+ Tcppriv *tpriv;
+
+ s = (Conv*)arg;
+ tpriv = s->p->priv;
+ tcb = (Tcpctl*)s->ptcl;
+
+ if(waserror()){
+ qunlock(s);
+ nexterror();
+ }
+ qlock(s);
+ switch(tcb->state){
+ default:
+ tcb->backoff++;
+ if(tcb->state == Syn_sent)
+ maxback = MAXBACKMS/2;
+ else
+ maxback = MAXBACKMS;
+ tcb->backedoff += tcb->timer.start * MSPTICK;
+ if(tcb->backedoff >= maxback) {
+ localclose(s, Etimedout);
+ break;
+ }
+ netlog(s->p->f, Logtcprxmt, "timeout rexmit 0x%lux %d/%d\n", tcb->snd.una, tcb->timer.start, NOW);
+ tcpsettimer(tcb);
+ tcprxmit(s);
+ tpriv->stats[RetransTimeouts]++;
+ tcb->snd.dupacks = 0;
+ break;
+ case Time_wait:
+ localclose(s, nil);
+ break;
+ case Closed:
+ break;
+ }
+ qunlock(s);
+ poperror();
+}
+
+int
+inwindow(Tcpctl *tcb, int seq)
+{
+ return seq_within(seq, tcb->rcv.nxt, tcb->rcv.nxt+tcb->rcv.wnd-1);
+}
+
+/*
+ * set up state for a received SYN (or SYN ACK) packet
+ */
+void
+procsyn(Conv *s, Tcp *seg)
+{
+ Tcpctl *tcb;
+
+ tcb = (Tcpctl*)s->ptcl;
+ tcb->flags |= FORCE;
+
+ tcb->rcv.nxt = seg->seq + 1;
+ tcb->rcv.urg = tcb->rcv.nxt;
+ tcb->irs = seg->seq;
+
+ /* our sending max segment size cannot be bigger than what he asked for */
+ if(seg->mss != 0 && seg->mss < tcb->mss)
+ tcb->mss = seg->mss;
+
+ /* the congestion window always starts out as a single segment */
+ tcb->snd.wnd = seg->wnd;
+ tcb->cwind = tcb->mss;
+}
+
+int
+addreseq(Tcpctl *tcb, Tcppriv *tpriv, Tcp *seg, Block *bp, ushort length)
+{
+ Reseq *rp, *rp1;
+ int i, rqlen, qmax;
+
+ rp = malloc(sizeof(Reseq));
+ if(rp == nil){
+ freeblist(bp); /* bp always consumed by add_reseq */
+ return 0;
+ }
+
+ rp->seg = *seg;
+ rp->bp = bp;
+ rp->length = length;
+
+ /* Place on reassembly list sorting by starting seq number */
+ rp1 = tcb->reseq;
+ if(rp1 == nil || seq_lt(seg->seq, rp1->seg.seq)) {
+ rp->next = rp1;
+ tcb->reseq = rp;
+ if(rp->next != nil)
+ tpriv->stats[OutOfOrder]++;
+ return 0;
+ }
+
+ rqlen = 0;
+ for(i = 0;; i++) {
+ rqlen += rp1->length;
+ if(rp1->next == nil || seq_lt(seg->seq, rp1->next->seg.seq)) {
+ rp->next = rp1->next;
+ rp1->next = rp;
+ if(rp->next != nil)
+ tpriv->stats[OutOfOrder]++;
+ break;
+ }
+ rp1 = rp1->next;
+ }
+ qmax = QMAX<<tcb->rcv.scale;
+ if(rqlen > qmax){
+ print("resequence queue > window: %d > %d\n", rqlen, qmax);
+ i = 0;
+ for(rp1 = tcb->reseq; rp1 != nil; rp1 = rp1->next){
+ print("%#lux %#lux %#ux\n", rp1->seg.seq,
+ rp1->seg.ack, rp1->seg.flags);
+ if(i++ > 10){
+ print("...\n");
+ break;
+ }
+ }
+
+ // delete entire reassembly queue; wait for retransmit.
+ // - should we be smarter and only delete the tail?
+ for(rp = tcb->reseq; rp != nil; rp = rp1){
+ rp1 = rp->next;
+ freeblist(rp->bp);
+ free(rp);
+ }
+ tcb->reseq = nil;
+
+ return -1;
+ }
+ return 0;
+}
+
+void
+getreseq(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length)
+{
+ Reseq *rp;
+
+ rp = tcb->reseq;
+ if(rp == nil)
+ return;
+
+ tcb->reseq = rp->next;
+
+ *seg = rp->seg;
+ *bp = rp->bp;
+ *length = rp->length;
+
+ free(rp);
+}
+
+int
+tcptrim(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length)
+{
+ ushort len;
+ uchar accept;
+ int dupcnt, excess;
+
+ accept = 0;
+ len = *length;
+ if(seg->flags & SYN)
+ len++;
+ if(seg->flags & FIN)
+ len++;
+
+ if(tcb->rcv.wnd == 0) {
+ if(len == 0 && seg->seq == tcb->rcv.nxt)
+ return 0;
+ }
+ else {
+ /* Some part of the segment should be in the window */
+ if(inwindow(tcb,seg->seq))
+ accept++;
+ else
+ if(len != 0) {
+ if(inwindow(tcb, seg->seq+len-1) ||
+ seq_within(tcb->rcv.nxt, seg->seq,seg->seq+len-1))
+ accept++;
+ }
+ }
+ if(!accept) {
+ freeblist(*bp);
+ return -1;
+ }
+ dupcnt = tcb->rcv.nxt - seg->seq;
+ if(dupcnt > 0){
+ tcb->rerecv += dupcnt;
+ if(seg->flags & SYN){
+ seg->flags &= ~SYN;
+ seg->seq++;
+
+ if(seg->urg > 1)
+ seg->urg--;
+ else
+ seg->flags &= ~URG;
+ dupcnt--;
+ }
+ if(dupcnt > 0){
+ pullblock(bp, (ushort)dupcnt);
+ seg->seq += dupcnt;
+ *length -= dupcnt;
+
+ if(seg->urg > dupcnt)
+ seg->urg -= dupcnt;
+ else {
+ seg->flags &= ~URG;
+ seg->urg = 0;
+ }
+ }
+ }
+ excess = seg->seq + *length - (tcb->rcv.nxt + tcb->rcv.wnd);
+ if(excess > 0) {
+ tcb->rerecv += excess;
+ *length -= excess;
+ *bp = trimblock(*bp, 0, *length);
+ if(*bp == nil)
+ panic("presotto is a boofhead");
+ seg->flags &= ~FIN;
+ }
+ return 0;
+}
+
+void
+tcpadvise(Proto *tcp, Block *bp, char *msg)
+{
+ Tcp4hdr *h4;
+ Tcp6hdr *h6;
+ Tcpctl *tcb;
+ uchar source[IPaddrlen];
+ uchar dest[IPaddrlen];
+ ushort psource, pdest;
+ Conv *s, **p;
+
+ h4 = (Tcp4hdr*)(bp->rp);
+ h6 = (Tcp6hdr*)(bp->rp);
+
+ if((h4->vihl&0xF0)==IP_VER4) {
+ v4tov6(dest, h4->tcpdst);
+ v4tov6(source, h4->tcpsrc);
+ psource = nhgets(h4->tcpsport);
+ pdest = nhgets(h4->tcpdport);
+ }
+ else {
+ ipmove(dest, h6->tcpdst);
+ ipmove(source, h6->tcpsrc);
+ psource = nhgets(h6->tcpsport);
+ pdest = nhgets(h6->tcpdport);
+ }
+
+ /* Look for a connection */
+ qlock(tcp);
+ for(p = tcp->conv; *p; p++) {
+ s = *p;
+ tcb = (Tcpctl*)s->ptcl;
+ if(s->rport == pdest)
+ if(s->lport == psource)
+ if(tcb->state != Closed)
+ if(ipcmp(s->raddr, dest) == 0)
+ if(ipcmp(s->laddr, source) == 0){
+ qlock(s);
+ qunlock(tcp);
+ switch(tcb->state){
+ case Syn_sent:
+ localclose(s, msg);
+ break;
+ }
+ qunlock(s);
+ freeblist(bp);
+ return;
+ }
+ }
+ qunlock(tcp);
+ freeblist(bp);
+}
+
+static char*
+tcpporthogdefensectl(char *val)
+{
+ if(strcmp(val, "on") == 0)
+ tcpporthogdefense = 1;
+ else if(strcmp(val, "off") == 0)
+ tcpporthogdefense = 0;
+ else
+ return "unknown value for tcpporthogdefense";
+ return nil;
+}
+
+/* called with c qlocked */
+char*
+tcpctl(Conv* c, char** f, int n)
+{
+ if(n == 1 && strcmp(f[0], "hangup") == 0)
+ return tcphangup(c);
+ if(n >= 1 && strcmp(f[0], "keepalive") == 0)
+ return tcpstartka(c, f, n);
+ if(n >= 1 && strcmp(f[0], "checksum") == 0)
+ return tcpsetchecksum(c, f, n);
+ if(n >= 1 && strcmp(f[0], "tcpporthogdefense") == 0)
+ return tcpporthogdefensectl(f[1]);
+ return "unknown control request";
+}
+
+int
+tcpstats(Proto *tcp, char *buf, int len)
+{
+ Tcppriv *priv;
+ char *p, *e;
+ int i;
+
+ priv = tcp->priv;
+ p = buf;
+ e = p+len;
+ for(i = 0; i < Nstats; i++)
+ p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
+ return p - buf;
+}
+
+/*
+ * garbage collect any stale conversations:
+ * - SYN received but no SYN-ACK after 5 seconds (could be the SYN attack)
+ * - Finwait2 after 5 minutes
+ *
+ * this is called whenever we run out of channels. Both checks are
+ * of questionable validity so we try to use them only when we're
+ * up against the wall.
+ */
+int
+tcpgc(Proto *tcp)
+{
+ Conv *c, **pp, **ep;
+ int n;
+ Tcpctl *tcb;
+
+
+ n = natgc(tcp->ipproto);
+ ep = &tcp->conv[tcp->nc];
+ for(pp = tcp->conv; pp < ep; pp++) {
+ c = *pp;
+ if(c == nil)
+ break;
+ if(!canqlock(c))
+ continue;
+ tcb = (Tcpctl*)c->ptcl;
+ switch(tcb->state){
+ case Syn_received:
+ if(NOW - tcb->time > 5000){
+ localclose(c, "timed out");
+ n++;
+ }
+ break;
+ case Finwait2:
+ if(NOW - tcb->time > 5*60*1000){
+ localclose(c, "timed out");
+ n++;
+ }
+ break;
+ }
+ qunlock(c);
+ }
+ return n;
+}
+
+void
+tcpsettimer(Tcpctl *tcb)
+{
+ int x;
+
+ /* round trip dependency */
+ x = backoff(tcb->backoff) *
+ (tcb->mdev + (tcb->srtt>>LOGAGAIN) + MSPTICK) / MSPTICK;
+
+ /* bounded twixt 1/2 and 64 seconds */
+ if(x < 500/MSPTICK)
+ x = 500/MSPTICK;
+ else if(x > (64000/MSPTICK))
+ x = 64000/MSPTICK;
+ tcb->timer.start = x;
+}
+
+void
+tcpinit(Fs *fs)
+{
+ Proto *tcp;
+ Tcppriv *tpriv;
+
+ tcp = smalloc(sizeof(Proto));
+ tpriv = tcp->priv = smalloc(sizeof(Tcppriv));
+ tcp->name = "tcp";
+ tcp->connect = tcpconnect;
+ tcp->announce = tcpannounce;
+ tcp->ctl = tcpctl;
+ tcp->state = tcpstate;
+ tcp->create = tcpcreate;
+ tcp->close = tcpclose;
+ tcp->rcv = tcpiput;
+ tcp->advise = tcpadvise;
+ tcp->stats = tcpstats;
+ tcp->inuse = tcpinuse;
+ tcp->gc = tcpgc;
+ tcp->ipproto = IP_TCPPROTO;
+ tcp->nc = scalednconv();
+ tcp->ptclsize = sizeof(Tcpctl);
+ tpriv->stats[MaxConn] = tcp->nc;
+
+ Fsproto(fs, tcp);
+}
+
+void
+tcpsetscale(Conv *s, Tcpctl *tcb, ushort rcvscale, ushort sndscale)
+{
+ if(rcvscale){
+ tcb->rcv.scale = rcvscale & 0xff;
+ tcb->snd.scale = sndscale & 0xff;
+ tcb->window = QMAX<<tcb->snd.scale;
+ qsetlimit(s->rq, tcb->window);
+ } else {
+ tcb->rcv.scale = 0;
+ tcb->snd.scale = 0;
+ tcb->window = QMAX;
+ qsetlimit(s->rq, tcb->window);
+ }
+}
--- /dev/null
+++ b/os/ip.original/udp.c
@@ -1,0 +1,656 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+#include "ip.h"
+#include "ipv6.h"
+
+
+#define DPRINT if(0)print
+
+enum
+{
+ UDP_UDPHDR_SZ = 8,
+
+ UDP4_PHDR_OFF = 8,
+ UDP4_PHDR_SZ = 12,
+ UDP4_IPHDR_SZ = 20,
+ UDP6_IPHDR_SZ = 40,
+ UDP6_PHDR_SZ = 40,
+ UDP6_PHDR_OFF = 0,
+
+ IP_UDPPROTO = 17,
+ UDP_USEAD7 = 52,
+ UDP_USEAD6 = 36,
+
+ Udprxms = 200,
+ Udptickms = 100,
+ Udpmaxxmit = 10,
+};
+
+typedef struct Udp4hdr Udp4hdr;
+struct Udp4hdr
+{
+ /* ip header */
+ uchar vihl; /* Version and header length */
+ uchar tos; /* Type of service */
+ uchar length[2]; /* packet length */
+ uchar id[2]; /* Identification */
+ uchar frag[2]; /* Fragment information */
+ uchar Unused;
+ uchar udpproto; /* Protocol */
+ uchar udpplen[2]; /* Header plus data length */
+ uchar udpsrc[IPv4addrlen]; /* Ip source */
+ uchar udpdst[IPv4addrlen]; /* Ip destination */
+
+ /* udp header */
+ uchar udpsport[2]; /* Source port */
+ uchar udpdport[2]; /* Destination port */
+ uchar udplen[2]; /* data length */
+ uchar udpcksum[2]; /* Checksum */
+};
+
+typedef struct Udp6hdr Udp6hdr;
+struct Udp6hdr {
+ uchar viclfl[4];
+ uchar len[2];
+ uchar nextheader;
+ uchar hoplimit;
+ uchar udpsrc[IPaddrlen];
+ uchar udpdst[IPaddrlen];
+
+ /* udp header */
+ uchar udpsport[2]; /* Source port */
+ uchar udpdport[2]; /* Destination port */
+ uchar udplen[2]; /* data length */
+ uchar udpcksum[2]; /* Checksum */
+};
+
+/* MIB II counters */
+typedef struct Udpstats Udpstats;
+struct Udpstats
+{
+ ulong udpInDatagrams;
+ ulong udpNoPorts;
+ ulong udpInErrors;
+ ulong udpOutDatagrams;
+};
+
+typedef struct Udppriv Udppriv;
+struct Udppriv
+{
+ Ipht ht;
+
+ /* MIB counters */
+ Udpstats ustats;
+
+ /* non-MIB stats */
+ ulong csumerr; /* checksum errors */
+ ulong lenerr; /* short packet */
+};
+
+void (*etherprofiler)(char *name, int qlen);
+void udpkick(void *x, Block *bp);
+
+/*
+ * protocol specific part of Conv
+ */
+typedef struct Udpcb Udpcb;
+struct Udpcb
+{
+ QLock;
+ uchar headers;
+};
+
+static char*
+udpconnect(Conv *c, char **argv, int argc)
+{
+ char *e;
+ Udppriv *upriv;
+
+ upriv = c->p->priv;
+ e = Fsstdconnect(c, argv, argc);
+ Fsconnected(c, e);
+ if(e != nil)
+ return e;
+
+ iphtadd(&upriv->ht, c);
+ return nil;
+}
+
+
+static int
+udpstate(Conv *c, char *state, int n)
+{
+ return snprint(state, n, "%s qin %d qout %d",
+ c->inuse ? "Open" : "Closed",
+ c->rq ? qlen(c->rq) : 0,
+ c->wq ? qlen(c->wq) : 0
+ );
+}
+
+static char*
+udpannounce(Conv *c, char** argv, int argc)
+{
+ char *e;
+ Udppriv *upriv;
+
+ upriv = c->p->priv;
+ e = Fsstdannounce(c, argv, argc);
+ if(e != nil)
+ return e;
+ Fsconnected(c, nil);
+ iphtadd(&upriv->ht, c);
+
+ return nil;
+}
+
+static void
+udpcreate(Conv *c)
+{
+ c->rq = qopen(64*1024, Qmsg, 0, 0);
+ c->wq = qbypass(udpkick, c);
+}
+
+static void
+udpclose(Conv *c)
+{
+ Udpcb *ucb;
+ Udppriv *upriv;
+
+ upriv = c->p->priv;
+ iphtrem(&upriv->ht, c);
+
+ c->state = 0;
+ qclose(c->rq);
+ qclose(c->wq);
+ qclose(c->eq);
+ ipmove(c->laddr, IPnoaddr);
+ ipmove(c->raddr, IPnoaddr);
+ c->lport = 0;
+ c->rport = 0;
+
+ ucb = (Udpcb*)c->ptcl;
+ ucb->headers = 0;
+
+ qunlock(c);
+}
+
+void
+udpkick(void *x, Block *bp)
+{
+ Conv *c = x;
+ Udp4hdr *uh4;
+ Udp6hdr *uh6;
+ ushort rport;
+ uchar laddr[IPaddrlen], raddr[IPaddrlen];
+ Udpcb *ucb;
+ int dlen, ptcllen;
+ Udppriv *upriv;
+ Fs *f;
+ int version;
+ Conv *rc;
+
+ upriv = c->p->priv;
+ f = c->p->f;
+
+ netlog(c->p->f, Logudp, "udp: kick\n");
+ if(bp == nil)
+ return;
+
+ ucb = (Udpcb*)c->ptcl;
+ switch(ucb->headers) {
+ case 7:
+ /* get user specified addresses */
+ bp = pullupblock(bp, UDP_USEAD7);
+ if(bp == nil)
+ return;
+ ipmove(raddr, bp->rp);
+ bp->rp += IPaddrlen;
+ ipmove(laddr, bp->rp);
+ bp->rp += IPaddrlen;
+ /* pick interface closest to dest */
+ if(ipforme(f, laddr) != Runi)
+ findlocalip(f, laddr, raddr);
+ bp->rp += IPaddrlen; /* Ignore ifc address */
+ rport = nhgets(bp->rp);
+ bp->rp += 2+2; /* Ignore local port */
+ break;
+ case 6:
+ /* get user specified addresses */
+ bp = pullupblock(bp, UDP_USEAD6);
+ if(bp == nil)
+ return;
+ ipmove(raddr, bp->rp);
+ bp->rp += IPaddrlen;
+ ipmove(laddr, bp->rp);
+ bp->rp += IPaddrlen;
+ /* pick interface closest to dest */
+ if(ipforme(f, laddr) != Runi)
+ findlocalip(f, laddr, raddr);
+ rport = nhgets(bp->rp);
+ bp->rp += 2+2; /* Ignore local port */
+ break;
+ default:
+ rport = 0;
+ break;
+ }
+
+ if(ucb->headers) {
+ if(memcmp(laddr, v4prefix, IPv4off) == 0 ||
+ ipcmp(laddr, IPnoaddr) == 0)
+ version = V4;
+ else
+ version = V6;
+ } else {
+ if( (memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
+ memcmp(c->laddr, v4prefix, IPv4off) == 0)
+ || ipcmp(c->raddr, IPnoaddr) == 0)
+ version = V4;
+ else
+ version = V6;
+ }
+
+ dlen = blocklen(bp);
+
+ /* fill in pseudo header and compute checksum */
+ switch(version){
+ case V4:
+ bp = padblock(bp, UDP4_IPHDR_SZ+UDP_UDPHDR_SZ);
+ if(bp == nil)
+ return;
+
+ uh4 = (Udp4hdr *)(bp->rp);
+ ptcllen = dlen + UDP_UDPHDR_SZ;
+ uh4->Unused = 0;
+ uh4->udpproto = IP_UDPPROTO;
+ uh4->frag[0] = 0;
+ uh4->frag[1] = 0;
+ hnputs(uh4->udpplen, ptcllen);
+ if(ucb->headers) {
+ v6tov4(uh4->udpdst, raddr);
+ hnputs(uh4->udpdport, rport);
+ v6tov4(uh4->udpsrc, laddr);
+ rc = nil;
+ } else {
+ v6tov4(uh4->udpdst, c->raddr);
+ hnputs(uh4->udpdport, c->rport);
+ if(ipcmp(c->laddr, IPnoaddr) == 0)
+ findlocalip(f, c->laddr, c->raddr);
+ v6tov4(uh4->udpsrc, c->laddr);
+ rc = c;
+ }
+ hnputs(uh4->udpsport, c->lport);
+ hnputs(uh4->udplen, ptcllen);
+ uh4->udpcksum[0] = 0;
+ uh4->udpcksum[1] = 0;
+ hnputs(uh4->udpcksum,
+ ptclcsum(bp, UDP4_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP4_PHDR_SZ));
+ uh4->vihl = IP_VER4;
+ ipoput4(f, bp, 0, c->ttl, c->tos, rc);
+ break;
+
+ case V6:
+ bp = padblock(bp, UDP6_IPHDR_SZ+UDP_UDPHDR_SZ);
+ if(bp == nil)
+ return;
+
+ // using the v6 ip header to create pseudo header
+ // first then reset it to the normal ip header
+ uh6 = (Udp6hdr *)(bp->rp);
+ memset(uh6, 0, 8);
+ ptcllen = dlen + UDP_UDPHDR_SZ;
+ hnputl(uh6->viclfl, ptcllen);
+ uh6->hoplimit = IP_UDPPROTO;
+ if(ucb->headers) {
+ ipmove(uh6->udpdst, raddr);
+ hnputs(uh6->udpdport, rport);
+ ipmove(uh6->udpsrc, laddr);
+ rc = nil;
+ } else {
+ ipmove(uh6->udpdst, c->raddr);
+ hnputs(uh6->udpdport, c->rport);
+ if(ipcmp(c->laddr, IPnoaddr) == 0)
+ findlocalip(f, c->laddr, c->raddr);
+ ipmove(uh6->udpsrc, c->laddr);
+ rc = c;
+ }
+ hnputs(uh6->udpsport, c->lport);
+ hnputs(uh6->udplen, ptcllen);
+ uh6->udpcksum[0] = 0;
+ uh6->udpcksum[1] = 0;
+ hnputs(uh6->udpcksum,
+ ptclcsum(bp, UDP6_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP6_PHDR_SZ));
+ memset(uh6, 0, 8);
+ uh6->viclfl[0] = IP_VER6;
+ hnputs(uh6->len, ptcllen);
+ uh6->nextheader = IP_UDPPROTO;
+ ipoput6(f, bp, 0, c->ttl, c->tos, rc);
+ break;
+
+ default:
+ panic("udpkick: version %d", version);
+ }
+ upriv->ustats.udpOutDatagrams++;
+}
+
+void
+udpiput(Proto *udp, Ipifc *ifc, Block *bp)
+{
+ int len;
+ Udp4hdr *uh4;
+ Udp6hdr *uh6;
+ Conv *c;
+ Udpcb *ucb;
+ uchar raddr[IPaddrlen], laddr[IPaddrlen];
+ ushort rport, lport;
+ Udppriv *upriv;
+ Fs *f;
+ int version;
+ int ottl, oviclfl, olen;
+ uchar *p;
+
+ upriv = udp->priv;
+ f = udp->f;
+ upriv->ustats.udpInDatagrams++;
+
+ uh4 = (Udp4hdr*)(bp->rp);
+ version = ((uh4->vihl&0xF0)==IP_VER6) ? V6 : V4;
+
+ /*
+ * Put back pseudo header for checksum
+ * (remember old values for icmpnoconv())
+ */
+ switch(version) {
+ case V4:
+ ottl = uh4->Unused;
+ uh4->Unused = 0;
+ len = nhgets(uh4->udplen);
+ olen = nhgets(uh4->udpplen);
+ hnputs(uh4->udpplen, len);
+
+ v4tov6(raddr, uh4->udpsrc);
+ v4tov6(laddr, uh4->udpdst);
+ lport = nhgets(uh4->udpdport);
+ rport = nhgets(uh4->udpsport);
+
+ if(nhgets(uh4->udpcksum)) {
+ if(ptclcsum(bp, UDP4_PHDR_OFF, len+UDP4_PHDR_SZ)) {
+ upriv->ustats.udpInErrors++;
+ netlog(f, Logudp, "udp: checksum error %I\n", raddr);
+ DPRINT("udp: checksum error %I\n", raddr);
+ freeblist(bp);
+ return;
+ }
+ }
+ uh4->Unused = ottl;
+ hnputs(uh4->udpplen, olen);
+ break;
+ case V6:
+ uh6 = (Udp6hdr*)(bp->rp);
+ len = nhgets(uh6->udplen);
+ oviclfl = nhgetl(uh6->viclfl);
+ olen = nhgets(uh6->len);
+ ottl = uh6->hoplimit;
+ ipmove(raddr, uh6->udpsrc);
+ ipmove(laddr, uh6->udpdst);
+ lport = nhgets(uh6->udpdport);
+ rport = nhgets(uh6->udpsport);
+ memset(uh6, 0, 8);
+ hnputl(uh6->viclfl, len);
+ uh6->hoplimit = IP_UDPPROTO;
+ if(ptclcsum(bp, UDP6_PHDR_OFF, len+UDP6_PHDR_SZ)) {
+ upriv->ustats.udpInErrors++;
+ netlog(f, Logudp, "udp: checksum error %I\n", raddr);
+ DPRINT("udp: checksum error %I\n", raddr);
+ freeblist(bp);
+ return;
+ }
+ hnputl(uh6->viclfl, oviclfl);
+ hnputs(uh6->len, olen);
+ uh6->nextheader = IP_UDPPROTO;
+ uh6->hoplimit = ottl;
+ break;
+ default:
+ panic("udpiput: version %d", version);
+ return; /* to avoid a warning */
+ }
+
+ qlock(udp);
+
+ c = iphtlook(&upriv->ht, raddr, rport, laddr, lport);
+ if(c == nil){
+ /* no converstation found */
+ upriv->ustats.udpNoPorts++;
+ qunlock(udp);
+ netlog(f, Logudp, "udp: no conv %I!%d -> %I!%d\n", raddr, rport,
+ laddr, lport);
+
+ switch(version){
+ case V4:
+ icmpnoconv(f, bp);
+ break;
+ case V6:
+ icmphostunr(f, ifc, bp, icmp6_port_unreach, 0);
+ break;
+ default:
+ panic("udpiput2: version %d", version);
+ }
+
+ freeblist(bp);
+ return;
+ }
+ ucb = (Udpcb*)c->ptcl;
+
+ if(c->state == Announced){
+ if(ucb->headers == 0){
+ /* create a new conversation */
+ if(ipforme(f, laddr) != Runi) {
+ switch(version){
+ case V4:
+ v4tov6(laddr, ifc->lifc->local);
+ break;
+ case V6:
+ ipmove(laddr, ifc->lifc->local);
+ break;
+ default:
+ panic("udpiput3: version %d", version);
+ }
+ }
+ c = Fsnewcall(c, raddr, rport, laddr, lport, version);
+ if(c == nil){
+ qunlock(udp);
+ freeblist(bp);
+ return;
+ }
+ iphtadd(&upriv->ht, c);
+ ucb = (Udpcb*)c->ptcl;
+ }
+ }
+
+ qlock(c);
+ qunlock(udp);
+
+ /*
+ * Trim the packet down to data size
+ */
+ len -= UDP_UDPHDR_SZ;
+ switch(version){
+ case V4:
+ bp = trimblock(bp, UDP4_IPHDR_SZ+UDP_UDPHDR_SZ, len);
+ break;
+ case V6:
+ bp = trimblock(bp, UDP6_IPHDR_SZ+UDP_UDPHDR_SZ, len);
+ break;
+ default:
+ bp = nil;
+ panic("udpiput4: version %d", version);
+ }
+ if(bp == nil){
+ qunlock(c);
+ netlog(f, Logudp, "udp: len err %I.%d -> %I.%d\n", raddr, rport,
+ laddr, lport);
+ upriv->lenerr++;
+ return;
+ }
+
+ netlog(f, Logudpmsg, "udp: %I.%d -> %I.%d l %d\n", raddr, rport,
+ laddr, lport, len);
+
+ switch(ucb->headers){
+ case 7:
+ /* pass the src address */
+ bp = padblock(bp, UDP_USEAD7);
+ p = bp->rp;
+ ipmove(p, raddr); p += IPaddrlen;
+ ipmove(p, laddr); p += IPaddrlen;
+ ipmove(p, ifc->lifc->local); p += IPaddrlen;
+ hnputs(p, rport); p += 2;
+ hnputs(p, lport);
+ break;
+ case 6:
+ /* pass the src address */
+ bp = padblock(bp, UDP_USEAD6);
+ p = bp->rp;
+ ipmove(p, raddr); p += IPaddrlen;
+ ipmove(p, ipforme(f, laddr)==Runi ? laddr : ifc->lifc->local); p += IPaddrlen;
+ hnputs(p, rport); p += 2;
+ hnputs(p, lport);
+ break;
+ }
+
+ if(bp->next)
+ bp = concatblock(bp);
+
+ if(qfull(c->rq)){
+ qunlock(c);
+ netlog(f, Logudp, "udp: qfull %I.%d -> %I.%d\n", raddr, rport,
+ laddr, lport);
+ freeblist(bp);
+ return;
+ }
+
+ qpass(c->rq, bp);
+ qunlock(c);
+
+}
+
+char*
+udpctl(Conv *c, char **f, int n)
+{
+ Udpcb *ucb;
+
+ ucb = (Udpcb*)c->ptcl;
+ if(n == 1){
+ if(strcmp(f[0], "oldheaders") == 0){
+ ucb->headers = 6;
+ return nil;
+ } else if(strcmp(f[0], "headers") == 0){
+ ucb->headers = 7;
+ return nil;
+ }
+ }
+ return "unknown control request";
+}
+
+void
+udpadvise(Proto *udp, Block *bp, char *msg)
+{
+ Udp4hdr *h4;
+ Udp6hdr *h6;
+ uchar source[IPaddrlen], dest[IPaddrlen];
+ ushort psource, pdest;
+ Conv *s, **p;
+ int version;
+
+ h4 = (Udp4hdr*)(bp->rp);
+ version = ((h4->vihl&0xF0)==IP_VER6) ? V6 : V4;
+
+ switch(version) {
+ case V4:
+ v4tov6(dest, h4->udpdst);
+ v4tov6(source, h4->udpsrc);
+ psource = nhgets(h4->udpsport);
+ pdest = nhgets(h4->udpdport);
+ break;
+ case V6:
+ h6 = (Udp6hdr*)(bp->rp);
+ ipmove(dest, h6->udpdst);
+ ipmove(source, h6->udpsrc);
+ psource = nhgets(h6->udpsport);
+ pdest = nhgets(h6->udpdport);
+ break;
+ default:
+ panic("udpadvise: version %d", version);
+ return; /* to avoid a warning */
+ }
+
+ /* Look for a connection */
+ qlock(udp);
+ for(p = udp->conv; *p; p++) {
+ s = *p;
+ if(s->rport == pdest)
+ if(s->lport == psource)
+ if(ipcmp(s->raddr, dest) == 0)
+ if(ipcmp(s->laddr, source) == 0){
+ if(s->ignoreadvice)
+ break;
+ qlock(s);
+ qunlock(udp);
+ qhangup(s->rq, msg);
+ qhangup(s->wq, msg);
+ qunlock(s);
+ freeblist(bp);
+ return;
+ }
+ }
+ qunlock(udp);
+ freeblist(bp);
+}
+
+int
+udpstats(Proto *udp, char *buf, int len)
+{
+ Udppriv *upriv;
+
+ upriv = udp->priv;
+ return snprint(buf, len, "InDatagrams: %lud\nNoPorts: %lud\nInErrors: %lud\nOutDatagrams: %lud\n",
+ upriv->ustats.udpInDatagrams,
+ upriv->ustats.udpNoPorts,
+ upriv->ustats.udpInErrors,
+ upriv->ustats.udpOutDatagrams);
+}
+
+int
+udpgc(Proto *udp)
+{
+ return natgc(udp->ipproto);
+}
+
+void
+udpinit(Fs *fs)
+{
+ Proto *udp;
+
+ udp = smalloc(sizeof(Proto));
+ udp->priv = smalloc(sizeof(Udppriv));
+ udp->name = "udp";
+ udp->connect = udpconnect;
+ udp->announce = udpannounce;
+ udp->ctl = udpctl;
+ udp->state = udpstate;
+ udp->create = udpcreate;
+ udp->close = udpclose;
+ udp->rcv = udpiput;
+ udp->advise = udpadvise;
+ udp->stats = udpstats;
+ udp->gc = udpgc;
+ udp->ipproto = IP_UDPPROTO;
+ udp->nc = Nchans;
+ udp->ptclsize = sizeof(Udpcb);
+
+ Fsproto(fs, udp);
+}
--- a/os/ip/arp.c
+++ b/os/ip/arp.c
@@ -47,7 +47,8 @@
#define haship(s) ((s)[IPaddrlen-1]%NHASH)
-extern int ReTransTimer = RETRANS_TIMER;
+int ReTransTimer = RETRANS_TIMER;
+
static void rxmitproc(void *v);
void
@@ -57,145 +58,121 @@
f->arp->f = f;
f->arp->rxmt = nil;
f->arp->dropf = f->arp->dropl = nil;
- kproc("rxmitproc", rxmitproc, f->arp, 0);
+ kproc("rxmitproc", rxmitproc, f->arp);
}
-/*
- * create a new arp entry for an ip address.
- */
-static Arpent*
-newarp6(Arp *arp, uchar *ip, Ipifc *ifc, int addrxt)
+static void
+freeblistchain(Block *bp)
{
- uint t;
- Block *next, *xp;
- Arpent *a, *e, *f, **l;
- Medium *m = ifc->m;
- int empty;
+ Block *next;
- /* find oldest entry */
- e = &arp->cache[NCACHE];
- a = arp->cache;
- t = a->utime;
- for(f = a; f < e; f++){
- if(f->utime < t){
- t = f->utime;
- a = f;
- }
+ while(bp != nil){
+ next = bp->list;
+ freeblist(bp);
+ bp = next;
}
+}
- /* dump waiting packets */
- xp = a->hold;
- a->hold = nil;
+/* take out of re-transmit chain */
+static Arpent**
+rxmtunchain(Arp *arp, Arpent *a)
+{
+ Arpent **l;
- if(isv4(a->ip)){
- while(xp){
- next = xp->list;
- freeblist(xp);
- xp = next;
+ for(l = &arp->rxmt; *l != nil; l = &((*l)->nextrxt)){
+ if(*l == a){
+ *l = a->nextrxt;
+ break;
}
}
- else { // queue icmp unreachable for rxmitproc later on, w/o arp lock
- if(xp){
- if(arp->dropl == nil)
- arp->dropf = xp;
- else
- arp->dropl->list = xp;
+ a->nextrxt = nil;
+ return l;
+}
- for(next = xp->list; next; next = next->list)
- xp = next;
- arp->dropl = xp;
- wakeup(&arp->rxmtq);
- }
- }
+static void
+cleanarpent(Arp *arp, Arpent *a)
+{
+ Arpent **l;
+ Block *bp;
/* take out of current chain */
- l = &arp->hash[haship(a->ip)];
- for(f = *l; f; f = f->hash){
- if(f == a){
+ for(l = &arp->hash[haship(a->ip)]; *l != nil; l = &((*l)->hash)){
+ if(*l == a){
*l = a->hash;
break;
}
- l = &f->hash;
}
+ a->hash = nil;
- /* insert into new chain */
- l = &arp->hash[haship(ip)];
- a->hash = *l;
- *l = a;
+ /* dump waiting packets */
+ bp = a->hold;
+ a->hold = nil;
+ if(isv4(a->ip))
+ freeblistchain(bp);
+ else {
+ rxmtunchain(arp, a);
- memmove(a->ip, ip, sizeof(a->ip));
- a->utime = NOW;
- a->ctime = 0;
- a->type = m;
+ /* queue icmp unreachable for rxmitproc later on, w/o arp lock */
+ if(bp != nil){
+ if(arp->dropf == nil)
+ arp->dropf = bp;
+ else
+ arp->dropl->list = bp;
+ arp->dropl = a->last;
- a->rtime = NOW + ReTransTimer;
- a->rxtsrem = MAX_MULTICAST_SOLICIT;
- a->ifc = ifc;
- a->ifcid = ifc->ifcid;
-
- /* put to the end of re-transmit chain; addrxt is 0 when isv4(a->ip) */
- if(!ipismulticast(a->ip) && addrxt){
- l = &arp->rxmt;
- empty = (*l==nil);
-
- for(f = *l; f; f = f->nextrxt){
- if(f == a){
- *l = a->nextrxt;
- break;
- }
- l = &f->nextrxt;
+ if(bp == arp->dropf)
+ wakeup(&arp->rxmtq);
}
- for(f = *l; f; f = f->nextrxt){
- l = &f->nextrxt;
- }
- *l = a;
- if(empty)
- wakeup(&arp->rxmtq);
}
+ a->last = nil;
- a->nextrxt = nil;
+ a->ifc = nil;
+ a->ifcid = 0;
- return a;
-}
+ a->state = 0;
+ a->rxtsrem = 0;
-/* called with arp qlocked */
+ a->utime = 0;
+ a->ctime = 0;
-void
-cleanarpent(Arp *arp, Arpent *a)
+ memset(a->ip, 0, sizeof(a->ip));
+ memset(a->mac, 0, sizeof(a->mac));
+}
+
+/*
+ * create a new arp entry for an ip address on ifc.
+ */
+static Arpent*
+newarpent(Arp *arp, uchar *ip, Ipifc *ifc)
{
- Arpent *f, **l;
+ Arpent *a, *e, *f, **l;
+ ulong t;
- a->utime = 0;
- a->ctime = 0;
- a->type = 0;
- a->state = 0;
-
- /* take out of current chain */
- l = &arp->hash[haship(a->ip)];
- for(f = *l; f; f = f->hash){
- if(f == a){
- *l = a->hash;
- break;
+ /* find oldest entry */
+ e = &arp->cache[NCACHE];
+ a = arp->cache;
+ t = a->utime;
+ for(f = a; f < e; f++){
+ if(f->utime < t){
+ t = f->utime;
+ a = f;
}
- l = &f->hash;
}
+ cleanarpent(arp, a);
- /* take out of re-transmit chain */
- l = &arp->rxmt;
- for(f = *l; f; f = f->nextrxt){
- if(f == a){
- *l = a->nextrxt;
- break;
- }
- l = &f->nextrxt;
- }
- a->nextrxt = nil;
- a->hash = nil;
- a->hold = nil;
- a->last = nil;
- a->ifc = nil;
+ ipmove(a->ip, ip);
+ a->ifc = ifc;
+ a->ifcid = ifc->ifcid;
+
+ /* insert into new chain */
+ l = &arp->hash[haship(ip)];
+ a->hash = *l;
+ *l = a;
+
+ return a;
}
+
/*
* fill in the media address if we have it. Otherwise return an
* Arpent that represents the state of the address resolution FSM
@@ -207,7 +184,6 @@
{
int hash;
Arpent *a;
- Medium *type = ifc->m;
uchar v6ip[IPaddrlen];
if(version == V4){
@@ -217,30 +193,28 @@
qlock(arp);
hash = haship(ip);
- for(a = arp->hash[hash]; a; a = a->hash){
- if(memcmp(ip, a->ip, sizeof(a->ip)) == 0)
- if(type == a->type)
+ for(a = arp->hash[hash]; a != nil; a = a->hash){
+ if(a->ifc == ifc && a->ifcid == ifc->ifcid && ipcmp(ip, a->ip) == 0)
break;
}
-
if(a == nil){
- a = newarp6(arp, ip, ifc, (version != V4));
+ a = newarpent(arp, ip, ifc);
a->state = AWAIT;
}
a->utime = NOW;
if(a->state == AWAIT){
if(bp != nil){
- if(a->hold)
- a->last->list = bp;
- else
+ bp->list = nil;
+ if(a->hold == nil)
a->hold = bp;
+ else
+ a->last->list = bp;
a->last = bp;
- bp->list = nil;
}
return a; /* return with arp qlocked */
}
- memmove(mac, a->mac, a->type->maclen);
+ memmove(mac, a->mac, ifc->m->maclen);
/* remove old entries */
if(NOW - a->ctime > 15*60*1000)
@@ -269,132 +243,82 @@
arpresolve(Arp *arp, Arpent *a, Medium *type, uchar *mac)
{
Block *bp;
- Arpent *f, **l;
- if(!isv4(a->ip)){
- l = &arp->rxmt;
- for(f = *l; f; f = f->nextrxt){
- if(f == a){
- *l = a->nextrxt;
- break;
- }
- l = &f->nextrxt;
- }
- }
-
memmove(a->mac, mac, type->maclen);
- a->type = type;
+ if(a->state == AWAIT && !isv4(a->ip)){
+ rxmtunchain(arp, a);
+ a->rxtsrem = 0;
+ }
a->state = AOK;
- a->utime = NOW;
+ a->ctime = a->utime = NOW;
bp = a->hold;
- a->hold = nil;
+ a->hold = a->last = nil;
qunlock(arp);
return bp;
}
-void
-arpenter(Fs *fs, int version, uchar *ip, uchar *mac, int n, int refresh)
+int
+arpenter(Fs *fs, int version, uchar *ip, uchar *mac, int n, uchar *ia, Ipifc *ifc, int refresh)
{
- Arp *arp;
- Route *r;
- Arpent *a, *f, **l;
- Ipifc *ifc;
- Medium *type;
- Block *bp, *next;
uchar v6ip[IPaddrlen];
+ Block *bp, *next;
+ Arpent *a;
+ Route *r;
+ Arp *arp;
- arp = fs->arp;
+ if(ifc->m == nil || ifc->m->maclen != n || ifc->m->maclen == 0)
+ return -1;
- if(n != 6){
-// print("arp: len = %d\n", n);
- return;
- }
-
switch(version){
case V4:
- r = v4lookup(fs, ip, nil);
+ r = v4lookup(fs, ip, ia, nil);
v4tov6(v6ip, ip);
ip = v6ip;
break;
case V6:
- r = v6lookup(fs, ip, nil);
+ r = v6lookup(fs, ip, ia, nil);
break;
default:
panic("arpenter: version %d", version);
- return; /* to supress warnings */
+ return -1; /* to supress warnings */
}
- if(r == nil){
-// print("arp: no route for entry\n");
- return;
- }
+ if(r == nil || r->ifc != ifc || (r->type & (Rbcast|Rmulti)) != 0)
+ return -1;
- ifc = r->ifc;
- type = ifc->m;
-
+ arp = fs->arp;
qlock(arp);
- for(a = arp->hash[haship(ip)]; a; a = a->hash){
- if(a->type != type || (a->state != AWAIT && a->state != AOK))
+ for(a = arp->hash[haship(ip)]; a != nil; a = a->hash){
+ if(a->ifc != ifc || a->ifcid != ifc->ifcid)
continue;
-
if(ipcmp(a->ip, ip) == 0){
- a->state = AOK;
- memmove(a->mac, mac, type->maclen);
-
- if(version == V6){
- /* take out of re-transmit chain */
- l = &arp->rxmt;
- for(f = *l; f; f = f->nextrxt){
- if(f == a){
- *l = a->nextrxt;
- break;
- }
- l = &f->nextrxt;
- }
- }
-
- a->ifc = ifc;
- a->ifcid = ifc->ifcid;
- bp = a->hold;
- a->hold = nil;
if(version == V4)
ip += IPv4off;
- a->utime = NOW;
- a->ctime = a->utime;
- qunlock(arp);
-
- while(bp){
+ bp = arpresolve(arp, a, ifc->m, mac); /* unlocks arp */
+ for(; bp != nil; bp = next){
next = bp->list;
- if(ifc != nil){
- if(waserror()){
- runlock(ifc);
- nexterror();
- }
- rlock(ifc);
- if(ifc->m != nil)
- ifc->m->bwrite(ifc, bp, version, ip);
- else
- freeb(bp);
- runlock(ifc);
- poperror();
- } else
- freeb(bp);
- bp = next;
+ bp->list = nil;
+ if(waserror()){
+ freeblistchain(next);
+ break;
+ }
+ ipifcoput(ifc, bp, version, ip);
+ poperror();
}
- return;
+ return 1;
}
}
if(refresh == 0){
- a = newarp6(arp, ip, ifc, 0);
+ a = newarpent(arp, ip, ifc);
a->state = AOK;
- a->type = type;
- a->ctime = NOW;
- memmove(a->mac, mac, type->maclen);
+ a->ctime = a->utime = NOW;
+ memmove(a->mac, mac, n);
}
-
qunlock(arp);
+
+ return refresh == 0;
}
int
@@ -401,13 +325,12 @@
arpwrite(Fs *fs, char *s, int len)
{
int n;
- Route *r;
Arp *arp;
- Block *bp;
- Arpent *a, *fl, **l;
+ Arpent *a, *x;
Medium *m;
- char *f[4], buf[256];
- uchar ip[IPaddrlen], mac[MAClen];
+ Ipifc *ifc;
+ char *f[5], buf[256];
+ uchar ip[IPaddrlen], ia[IPaddrlen], mac[MAClen];
arp = fs->arp;
@@ -420,7 +343,7 @@
if(len > 0 && buf[len-1] == '\n')
buf[len-1] = 0;
- n = getfields(buf, f, 4, 1, " ");
+ n = getfields(buf, f, nelem(f), 1, " ");
if(strcmp(f[0], "flush") == 0){
qlock(arp);
for(a = arp->cache; a < &arp->cache[NCACHE]; a++){
@@ -427,19 +350,20 @@
memset(a->ip, 0, sizeof(a->ip));
memset(a->mac, 0, sizeof(a->mac));
a->hash = nil;
+ a->nextrxt = nil;
+ a->ifc = nil;
+ a->ifcid = 0;
a->state = 0;
+ a->rxtsrem = 0;
+ a->ctime = 0;
a->utime = 0;
- while(a->hold != nil){
- bp = a->hold->list;
- freeblist(a->hold);
- a->hold = bp;
- }
+ freeblistchain(a->hold);
+ a->hold = a->last = nil;
}
memset(arp->hash, 0, sizeof(arp->hash));
-// clear all pkts on these lists (rxmt, dropf/l)
+ freeblistchain(arp->dropf);
+ arp->dropf = arp->dropl = nil;
arp->rxmt = nil;
- arp->dropf = nil;
- arp->dropl = nil;
qunlock(arp);
} else if(strcmp(f[0], "add") == 0){
switch(n){
@@ -446,64 +370,53 @@
default:
error(Ebadarg);
case 3:
- parseip(ip, f[1]);
- if(isv4(ip))
- r = v4lookup(fs, ip+IPv4off, nil);
- else
- r = v6lookup(fs, ip, nil);
- if(r == nil)
- error("Destination unreachable");
- m = r->ifc->m;
- n = parsemac(mac, f[2], m->maclen);
+ if(parseip(ip, f[1]) == -1)
+ error(Ebadip);
+ if((n = parsemac(mac, f[2], sizeof(mac))) <= 0)
+ error(Ebadarp);
+ findlocalip(fs, ia, ip);
break;
case 4:
m = ipfindmedium(f[1]);
- if(m == nil)
+ if(m == nil || m->maclen == 0)
error(Ebadarp);
- parseip(ip, f[2]);
- n = parsemac(mac, f[3], m->maclen);
+ if(parseip(ip, f[2]) == -1)
+ error(Ebadip);
+ if((n = parsemac(mac, f[3], sizeof(mac))) != m->maclen)
+ error(Ebadarp);
+ findlocalip(fs, ia, ip);
break;
+ case 5:
+ m = ipfindmedium(f[1]);
+ if(m == nil || m->maclen == 0)
+ error(Ebadarp);
+ if(parseip(ip, f[2]) == -1)
+ error(Ebadip);
+ if((n = parsemac(mac, f[3], sizeof(mac))) != m->maclen)
+ error(Ebadarp);
+ if(parseip(ia, f[4]) == -1)
+ error(Ebadip);
+ break;
}
-
- if(m->ares == nil)
- error(Ebadarp);
-
- m->ares(fs, V6, ip, mac, n, 0);
+ if((ifc = findipifc(fs, ia, ia, Runi)) == nil)
+ error("no interface");
+ rlock(ifc);
+ if(!ipv6local(ifc, ia, 0, ip) || arpenter(fs, V6, ip, mac, n, ia, ifc, 0) < 0){
+ runlock(ifc);
+ error("destination unreachable");
+ }
+ runlock(ifc);
} else if(strcmp(f[0], "del") == 0){
- if(n != 2)
+ if (n != 2)
error(Ebadarg);
-
- parseip(ip, f[1]);
+ if (parseip(ip, f[1]) == -1)
+ error(Ebadip);
qlock(arp);
-
- l = &arp->hash[haship(ip)];
- for(a = *l; a; a = a->hash){
- if(memcmp(ip, a->ip, sizeof(a->ip)) == 0){
- *l = a->hash;
- break;
- }
- l = &a->hash;
+ for(a = arp->hash[haship(ip)]; a != nil; a = x){
+ x = a->hash;
+ if(ipcmp(ip, a->ip) == 0)
+ cleanarpent(arp, a);
}
-
- if(a){
- /* take out of re-transmit chain */
- l = &arp->rxmt;
- for(fl = *l; fl; fl = fl->nextrxt){
- if(fl == a){
- *l = a->nextrxt;
- break;
- }
- l = &fl->nextrxt;
- }
-
- a->nextrxt = nil;
- a->hash = nil;
- a->hold = nil;
- a->last = nil;
- a->ifc = nil;
- memset(a->ip, 0, sizeof(a->ip));
- memset(a->mac, 0, sizeof(a->mac));
- }
qunlock(arp);
} else
error(Ebadarp);
@@ -511,13 +424,6 @@
return len;
}
-enum
-{
- Alinelen= 90,
-};
-
-char *aformat = "%-6.6s %-8.8s %-40.40I %-32.32s\n";
-
static void
convmac(char *p, uchar *mac, int n)
{
@@ -526,136 +432,136 @@
}
int
-arpread(Arp *arp, char *p, ulong offset, int len)
+arpread(Arp *arp, char *s, ulong offset, int len)
{
+ char mac[2*MAClen+1], *state, *mname, *p;
+ uchar ip[IPaddrlen], ia[IPaddrlen];
+ Ipifc *ifc;
Arpent *a;
- int n;
- char mac[2*MAClen+1];
+ long n, o;
- if(offset % Alinelen)
- return 0;
-
- offset = offset/Alinelen;
- len = len/Alinelen;
-
- n = 0;
+ p = s;
+ o = -offset;
for(a = arp->cache; len > 0 && a < &arp->cache[NCACHE]; a++){
- if(a->state == 0)
+ if(a->state == 0 || (ifc = a->ifc) == nil)
continue;
- if(offset > 0){
- offset--;
+
+ rlock(ifc);
+ qlock(arp);
+ state = arpstate[a->state];
+ ipmove(ip, a->ip);
+ if(ifc->m == nil || a->ifcid != ifc->ifcid || !ipv6local(ifc, ia, 0, ip)){
+ qunlock(arp);
+ runlock(ifc);
continue;
}
- len--;
- qlock(arp);
- convmac(mac, a->mac, a->type->maclen);
- n += sprint(p+n, aformat, a->type->name, arpstate[a->state], a->ip, mac);
+ mname = ifc->m->name;
+ convmac(mac, a->mac, ifc->m->maclen);
qunlock(arp);
+ runlock(ifc);
+
+ n = snprint(up->genbuf, sizeof up->genbuf,
+ "%-6.6s %-4.4s %-40.40I %-16.16s %I\n",
+ mname, state, ip, mac, ia);
+ o += n;
+ if(o <= 0)
+ continue;
+ if(n > len)
+ break;
+ memmove(p, up->genbuf, n);
+ len -= n;
+ p += n;
}
- return n;
+ return p - s;
}
-extern int
-rxmitsols(Arp *arp)
+void
+ndpsendsol(Fs *f, Ipifc *ifc, Arpent *a)
{
- uint sflag;
- Block *next, *xp;
- Arpent *a, *b, **l;
- Fs *f;
- uchar ipsrc[IPaddrlen];
- Ipifc *ifc = nil;
- long nrxt;
+ uchar targ[IPaddrlen], src[IPaddrlen];
+ Arpent **l;
- qlock(arp);
- f = arp->f;
+ a->ctime = NOW;
+ if(a->rxtsrem == 0)
+ a->rxtsrem = MAX_MULTICAST_SOLICIT;
+ else
+ a->rxtsrem--;
- a = arp->rxmt;
- if(a==nil){
- nrxt = 0;
- goto dodrops; //return nrxt;
- }
- nrxt = a->rtime - NOW;
- if(nrxt > 3*ReTransTimer/4)
- goto dodrops; //return nrxt;
+ /* put on end of re-transmit chain */
+ for(l = rxmtunchain(f->arp, a); *l != nil; l = &(*l)->nextrxt)
+ ;
+ *l = a;
- for(; a; a = a->nextrxt){
- ifc = a->ifc;
- assert(ifc != nil);
- if((a->rxtsrem <= 0) || !(canrlock(ifc)) || (a->ifcid != ifc->ifcid)){
- xp = a->hold;
- a->hold = nil;
+ if(l == &f->arp->rxmt)
+ wakeup(&f->arp->rxmtq);
- if(xp){
- if(arp->dropl == nil)
- arp->dropf = xp;
- else
- arp->dropl->list = xp;
- }
+ /* try to use source address of original packet */
+ ipmove(targ, a->ip);
+ if(a->last != nil){
+ ipmove(src, ((Ip6hdr*)a->last->rp)->src);
+ arprelease(f->arp, a);
- cleanarpent(arp, a);
- }
- else
- break;
+ if(iplocalonifc(ifc, src) != nil || ipproxyifc(f, ifc, src))
+ goto send;
+ } else {
+ arprelease(f->arp, a);
}
- if(a == nil)
- goto dodrops;
+ if(!ipv6local(ifc, src, 0, targ))
+ return;
+send:
+ if(!waserror()){
+ icmpns(f, src, SRC_UNI, targ, TARG_MULTI, ifc->mac);
+ poperror();
+ }
+}
+static void
+rxmitsols(Arp *arp)
+{
+ Block *next, *bp;
+ Arpent *a;
+ Ipifc *ifc;
+ Route *r;
- qunlock(arp); /* for icmpns */
- if((sflag = ipv6anylocal(ifc, ipsrc)) != SRC_UNSPEC)
- icmpns(f, ipsrc, sflag, a->ip, TARG_MULTI, ifc->mac);
-
- runlock(ifc);
- qlock(arp);
-
- /* put to the end of re-transmit chain */
- l = &arp->rxmt;
- for(b = *l; b; b = b->nextrxt){
- if(b == a){
- *l = a->nextrxt;
- break;
+ qlock(arp);
+ while((a = arp->rxmt) != nil && NOW - a->ctime > 3*ReTransTimer/4){
+ if(a->rxtsrem > 0 && (ifc = a->ifc) != nil && canrlock(ifc)){
+ if(a->ifcid == ifc->ifcid){
+ ndpsendsol(arp->f, ifc, a); /* unlocks arp */
+ runlock(ifc);
+ qlock(arp);
+ continue;
+ }
+ runlock(ifc);
}
- l = &b->nextrxt;
+ cleanarpent(arp, a);
}
- for(b = *l; b; b = b->nextrxt){
- l = &b->nextrxt;
- }
- *l = a;
- a->rxtsrem--;
- a->nextrxt = nil;
- a->rtime = NOW + ReTransTimer;
-
- a = arp->rxmt;
- if(a==nil)
- nrxt = 0;
- else
- nrxt = a->rtime - NOW;
-
-dodrops:
- xp = arp->dropf;
- arp->dropf = nil;
- arp->dropl = nil;
+ bp = arp->dropf;
+ arp->dropf = arp->dropl = nil;
qunlock(arp);
- for(; xp; xp = next){
- next = xp->list;
- icmphostunr(f, ifc, xp, icmp6_adr_unreach, 1);
+ for(; bp != nil; bp = next){
+ next = bp->list;
+ bp->list = nil;
+ r = v6lookup(arp->f, ((Ip6hdr*)bp->rp)->src, ((Ip6hdr*)bp->rp)->dst, nil);
+ if(r != nil && (ifc = r->ifc) != nil && canrlock(ifc)){
+ if(!waserror()){
+ icmphostunr6(arp->f, ifc, bp, Icmp6_adr_unreach, (r->type & Runi) != 0);
+ poperror();
+ }
+ runlock(ifc);
+ }
+ freeblist(bp);
}
-
- return nrxt;
-
}
static int
rxready(void *v)
{
- Arp *arp = (Arp *) v;
- int x;
+ Arp *arp = (Arp *)v;
- x = ((arp->rxmt != nil) || (arp->dropf != nil));
-
- return x;
+ return arp->rxmt != nil || arp->dropf != nil;
}
static void
@@ -662,20 +568,15 @@
rxmitproc(void *v)
{
Arp *arp = v;
- long wakeupat;
arp->rxmitp = up;
- //print("arp rxmitproc started\n");
if(waserror()){
- arp->rxmitp = 0;
+ arp->rxmitp = nil;
pexit("hangup", 1);
}
for(;;){
- wakeupat = rxmitsols(arp);
- if(wakeupat == 0)
- sleep(&arp->rxmtq, rxready, v);
- else if(wakeupat > ReTransTimer/4)
- tsleep(&arp->rxmtq, return0, 0, wakeupat);
+ sleep(&arp->rxmtq, rxready, v);
+ rxmitsols(arp);
+ tsleep(&arp->rxmtq, return0, nil, ReTransTimer/4);
}
}
-
--- /dev/null
+++ b/os/ip/chandial.c
@@ -1,0 +1,126 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "../ip/ip.h"
+
+typedef struct DS DS;
+static Chan* call(char*, char*, DS*);
+static void _dial_string_parse(char*, DS*);
+
+enum
+{
+ Maxstring= 128,
+};
+
+struct DS
+{
+ char buf[Maxstring]; /* dist string */
+ char *netdir;
+ char *proto;
+ char *rem;
+ char *local; /* other args */
+ char *dir;
+ Chan **ctlp;
+};
+
+/*
+ * the dialstring is of the form '[/net/]proto!dest'
+ */
+Chan*
+chandial(char *dest, char *local, char *dir, Chan **ctlp)
+{
+ DS ds;
+ char clone[Maxpath];
+
+ ds.local = local;
+ ds.dir = dir;
+ ds.ctlp = ctlp;
+
+ _dial_string_parse(dest, &ds);
+ if(ds.netdir == nil)
+ ds.netdir = "/net";
+
+ /* no connection server, don't translate */
+ snprint(clone, sizeof(clone), "%s/%s/clone", ds.netdir, ds.proto);
+ return call(clone, ds.rem, &ds);
+}
+
+static Chan*
+call(char *clone, char *dest, DS *ds)
+{
+ int n;
+ Chan *dchan, *cchan;
+ char name[Maxpath], data[Maxpath], *p;
+
+ cchan = namec(clone, Aopen, ORDWR, 0);
+
+ /* get directory name */
+ if(waserror()){
+ cclose(cchan);
+ nexterror();
+ }
+ n = devtab[cchan->type]->read(cchan, name, sizeof(name)-1, 0);
+ name[n] = 0;
+ for(p = name; *p == ' '; p++)
+ ;
+ sprint(name, "%lud", strtoul(p, 0, 0));
+ p = strrchr(clone, '/');
+ *p = 0;
+ if(ds->dir)
+ snprint(ds->dir, Maxpath, "%s/%s", clone, name);
+ snprint(data, sizeof(data), "%s/%s/data", clone, name);
+
+ /* connect */
+ if(ds->local)
+ snprint(name, sizeof(name), "connect %s %s", dest, ds->local);
+ else
+ snprint(name, sizeof(name), "connect %s", dest);
+ devtab[cchan->type]->write(cchan, name, strlen(name), 0);
+
+ /* open data connection */
+ dchan = namec(data, Aopen, ORDWR, 0);
+ if(ds->ctlp)
+ *ds->ctlp = cchan;
+ else
+ cclose(cchan);
+ poperror();
+ return dchan;
+
+}
+
+/*
+ * parse a dial string
+ */
+static void
+_dial_string_parse(char *str, DS *ds)
+{
+ char *p, *p2;
+
+ strncpy(ds->buf, str, Maxstring);
+ ds->buf[Maxstring-1] = 0;
+
+ p = strchr(ds->buf, '!');
+ if(p == 0) {
+ ds->netdir = 0;
+ ds->proto = "net";
+ ds->rem = ds->buf;
+ } else {
+ if(*ds->buf != '/' && *ds->buf != '#'){
+ ds->netdir = nil;
+ ds->proto = ds->buf;
+ } else {
+ for(p2 = p; *p2 != '/' && p2 != ds->buf; p2--)
+ ;
+ if(p2 == ds->buf)
+ error(Ebadarg);
+ *p2++ = 0;
+ ds->netdir = ds->buf;
+ ds->proto = p2;
+ }
+ *p = 0;
+ ds->rem = p + 1;
+ }
+}
--- a/os/ip/devip.c
+++ b/os/ip/devip.c
@@ -14,7 +14,6 @@
Qbootp,
Qndb,
Qiproute,
- Qiprouter,
Qipselftab,
Qlog,
@@ -43,11 +42,11 @@
Maskproto= (1<<Logproto)-1,
Shiftproto= Logtype + Logconv,
- Nfs= 32,
+ Nfs= 128,
};
-#define TYPE(x) ( ((u32)(x).path) & Masktype )
-#define CONV(x) ( (((u32)(x).path) >> Shiftconv) & Maskconv )
-#define PROTO(x) ( (((u32)(x).path) >> Shiftproto) & Maskproto )
+#define TYPE(x) ( ((ulong)(x).path) & Masktype )
+#define CONV(x) ( (((ulong)(x).path) >> Shiftconv) & Maskconv )
+#define PROTO(x) ( (((ulong)(x).path) >> Shiftproto) & Maskproto )
#define QID(p, c, y) ( ((p)<<(Shiftproto)) | ((c)<<Shiftconv) | (y) )
static char network[] = "network";
@@ -58,8 +57,7 @@
extern void nullmediumlink(void);
extern void pktmediumlink(void);
-static long ndbwrite(Fs*, char*, ulong, int);
-extern void closeconv(Conv*);
+ long ndbwrite(Fs *f, char *a, ulong off, int n);
static int
ip3gen(Chan *c, int i, Dir *dp)
@@ -121,7 +119,7 @@
mkqid(&q, QID(PROTO(c->qid), 0, Qstats), 0, QTFILE);
devdir(c, q, "stats", 0, network, 0444, dp);
return 1;
- }
+ }
return -1;
}
@@ -144,11 +142,10 @@
return -1;
case Qarp:
p = "arp";
+ prot = 0664;
break;
case Qbootp:
p = "bootp";
- if(bootp == nil)
- return 0;
break;
case Qndb:
p = "ndb";
@@ -157,14 +154,12 @@
break;
case Qiproute:
p = "iproute";
+ prot = 0664;
break;
case Qipselftab:
p = "ipselftab";
prot = 0444;
break;
- case Qiprouter:
- p = "iprouter";
- break;
case Qlog:
p = "log";
break;
@@ -188,7 +183,7 @@
case Qtopdir:
if(s == DEVDOTDOT){
mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
- sprint(up->genbuf, "#I%ud", c->dev);
+ snprint(up->genbuf, sizeof up->genbuf, "#I%lud", c->dev);
devdir(c, q, up->genbuf, 0, network, 0555, dp);
return 1;
}
@@ -206,19 +201,18 @@
case Qndb:
case Qlog:
case Qiproute:
- case Qiprouter:
case Qipselftab:
return ip1gen(c, TYPE(c->qid), dp);
case Qprotodir:
if(s == DEVDOTDOT){
mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
- sprint(up->genbuf, "#I%ud", c->dev);
+ snprint(up->genbuf, sizeof up->genbuf, "#I%lud", c->dev);
devdir(c, q, up->genbuf, 0, network, 0555, dp);
return 1;
}
if(s < f->p[PROTO(c->qid)]->ac) {
cv = f->p[PROTO(c->qid)]->conv[s];
- sprint(up->genbuf, "%d", s);
+ snprint(up->genbuf, sizeof up->genbuf, "%d", s);
mkqid(&q, QID(PROTO(c->qid), s, Qconvdir), 0, QTDIR);
devdir(c, q, up->genbuf, 0, cv->owner, 0555, dp);
return 1;
@@ -262,45 +256,14 @@
fmtinstall('M', eipfmt);
}
-static Fs*
-ipgetfs(int dev)
-{
- extern void (*ipprotoinit[])(Fs*);
- Fs *f;
- int i;
-
- if(dev >= Nfs)
- return nil;
-
- qlock(&fslock);
- if(ipfs[dev] == nil){
- f = smalloc(sizeof(Fs));
- ip_init(f);
- arpinit(f);
- netloginit(f);
- for(i = 0; ipprotoinit[i]; i++)
- ipprotoinit[i](f);
- f->dev = dev;
- ipfs[dev] = f;
- }
- qunlock(&fslock);
-
- return ipfs[dev];
-}
-
IPaux*
newipaux(char *owner, char *tag)
{
IPaux *a;
- int n;
a = smalloc(sizeof(*a));
kstrdup(&a->owner, owner);
- memset(a->tag, ' ', sizeof(a->tag));
- n = strlen(tag);
- if(n > sizeof(a->tag))
- n = sizeof(a->tag);
- memmove(a->tag, tag, n);
+ strncpy(a->tag, tag, sizeof(a->tag));
return a;
}
@@ -310,13 +273,29 @@
ipattach(char* spec)
{
Chan *c;
- int dev;
+ ulong dev;
- dev = atoi(spec);
+ dev = strtoul(spec, nil, 10);
if(dev >= Nfs)
- error("bad specification");
+ error(Enodev);
- ipgetfs(dev);
+ qlock(&fslock);
+ if(ipfs[dev] == nil){
+ extern void (*ipprotoinit[])(Fs*);
+ Fs *f;
+ int i;
+
+ f = smalloc(sizeof(Fs));
+ ip_init(f);
+ arpinit(f);
+ netloginit(f);
+ for(i = 0; ipprotoinit[i]; i++)
+ ipprotoinit[i](f);
+ f->dev = dev;
+ ipfs[dev] = f;
+ }
+ qunlock(&fslock);
+
c = devattach('I', spec);
mkqid(&c->qid, QID(0, 0, Qtopdir), 0, QTDIR);
c->dev = dev;
@@ -327,7 +306,7 @@
}
static Walkqid*
-ipwalk(Chan* c, Chan *nc, char **name, s32 nname)
+ipwalk(Chan* c, Chan *nc, char **name, int nname)
{
IPaux *a = c->aux;
Walkqid* w;
@@ -338,8 +317,9 @@
return w;
}
-static s32
-ipstat(Chan* c, uchar* db, s32 n)
+
+static int
+ipstat(Chan* c, uchar* db, int n)
{
return devstat(c, db, n, nil, 0, ipgen);
}
@@ -360,7 +340,7 @@
};
static Chan*
-ipopen(Chan* c, u32 omode)
+ipopen(Chan* c, int omode)
{
Conv *cv, *nc;
Proto *p;
@@ -375,7 +355,7 @@
default:
break;
case Qndb:
- if(omode & (OWRITE|OTRUNC) && !iseve())
+ if((omode & (OWRITE|OTRUNC)) != 0 && !iseve())
error(Eperm);
if((omode & (OWRITE|OTRUNC)) == (OWRITE|OTRUNC))
f->ndb[0] = 0;
@@ -383,10 +363,10 @@
case Qlog:
netlogopen(f);
break;
- case Qiprouter:
- iprouteropen(f);
- break;
case Qiproute:
+ case Qarp:
+ if(omode != OREAD && !iseve())
+ error(Eperm);
break;
case Qtopdir:
case Qprotodir:
@@ -412,13 +392,8 @@
case Qclone:
p = f->p[PROTO(c->qid)];
qlock(p);
- if(waserror()){
- qunlock(p);
- nexterror();
- }
cv = Fsprotoclone(p, ATTACHER(c));
qunlock(p);
- poperror();
if(cv == nil) {
error(Enodev);
break;
@@ -437,15 +412,12 @@
qunlock(p);
nexterror();
}
- if((perm & (cv->perm>>6)) != perm) {
- if(strcmp(ATTACHER(c), cv->owner) != 0)
- error(Eperm);
- if((perm & cv->perm) != perm)
- error(Eperm);
+ if(strcmp(ATTACHER(c), cv->owner) == 0)
+ perm <<= 6;
+ if((perm & cv->perm) != perm && !iseve())
+ error(Eperm);
- }
- cv->inuse++;
- if(cv->inuse == 1){
+ if(++cv->inuse == 1){
kstrdup(&cv->owner, ATTACHER(c));
cv->perm = 0660;
}
@@ -455,24 +427,26 @@
break;
case Qlisten:
cv = f->p[PROTO(c->qid)]->conv[CONV(c->qid)];
- if((perm & (cv->perm>>6)) != perm) {
- if(strcmp(ATTACHER(c), cv->owner) != 0)
- error(Eperm);
- if((perm & cv->perm) != perm)
- error(Eperm);
-
+ qlock(cv);
+ if(waserror()){
+ qunlock(cv);
+ nexterror();
}
+ if(strcmp(ATTACHER(c), cv->owner) == 0)
+ perm <<= 6;
+ if((perm & cv->perm) != perm && !iseve())
+ error(Eperm);
if(cv->state != Announced)
error("not announced");
+ cv->inuse++;
+ qunlock(cv);
+ poperror();
if(waserror()){
closeconv(cv);
nexterror();
}
- qlock(cv);
- cv->inuse++;
- qunlock(cv);
nc = nil;
while(nc == nil) {
@@ -494,7 +468,6 @@
if(nc != nil){
cv->incall = nc->next;
mkqid(&c->qid, QID(PROTO(c->qid), nc->x, Qctl), 0, QTFILE);
- kstrdup(&cv->owner, ATTACHER(c));
}
qunlock(cv);
@@ -511,13 +484,25 @@
return c;
}
-static s32
-ipwstat(Chan *c, uchar *dp, s32 n)
+static Chan*
+ipcreate(Chan*, char*, int, ulong)
{
- Dir *d;
+ error(Eperm);
+ return 0;
+}
+
+static void
+ipremove(Chan*)
+{
+ error(Eperm);
+}
+
+static int
+ipwstat(Chan *c, uchar *dp, int n)
+{
+ Dir *dir;
Conv *cv;
Fs *f;
- Proto *p;
f = ipfs[c->dev];
switch(TYPE(c->qid)) {
@@ -529,28 +514,40 @@
break;
}
- d = smalloc(sizeof(*d)+n);
+ dir = smalloc(sizeof(Dir)+n);
if(waserror()){
- free(d);
+ free(dir);
nexterror();
}
- n = convM2D(dp, n, d, (char*)&d[1]);
+ n = convM2D(dp, n, &dir[0], (char*)&dir[1]);
if(n == 0)
error(Eshortstat);
- p = f->p[PROTO(c->qid)];
- cv = p->conv[CONV(c->qid)];
- if(!iseve() && strcmp(ATTACHER(c), cv->owner) != 0)
+
+ cv = f->p[PROTO(c->qid)]->conv[CONV(c->qid)];
+ qlock(cv);
+ if(waserror()){
+ qunlock(cv);
+ nexterror();
+ }
+ if(strcmp(ATTACHER(c), cv->owner) != 0 && !iseve())
error(Eperm);
- if(!emptystr(d->uid))
- kstrdup(&cv->owner, d->uid);
- if(d->mode != ~0UL)
- cv->perm = d->mode & 0777;
+ if(!emptystr(dir->uid)){
+ if(strcmp(dir->uid, commonuser()) != 0 && !iseve())
+ error(Eperm);
+ kstrdup(&cv->owner, dir->uid);
+ }
+ if(dir->mode != ~0UL)
+ cv->perm = dir->mode & 0666;
+ qunlock(cv);
poperror();
- free(d);
+
+ free(dir);
+ poperror();
+
return n;
}
-extern void
+void
closeconv(Conv *cv)
{
Conv *nc;
@@ -564,7 +561,7 @@
}
/* close all incoming calls since no listen will ever happen */
- for(nc = cv->incall; nc; nc = cv->incall){
+ for(nc = cv->incall; nc != nil; nc = cv->incall){
cv->incall = nc->next;
closeconv(nc);
}
@@ -576,9 +573,9 @@
while((mp = cv->multi) != nil)
ipifcremmulti(cv, mp->ma, mp->ia);
- cv->r = nil;
- cv->rgen = 0;
- cv->p->close(cv);
+ if(cv->p->close != nil)
+ (*cv->p->close)(cv);
+
cv->state = Idle;
qunlock(cv);
}
@@ -596,10 +593,6 @@
if(c->flag & COPEN)
netlogclose(f);
break;
- case Qiprouter:
- if(c->flag & COPEN)
- iprouterclose(f);
- break;
case Qdata:
case Qctl:
case Qerr:
@@ -620,13 +613,13 @@
Statelen= 32*1024,
};
-static s32
-ipread(Chan *ch, void *a, s32 n, s64 off)
+static long
+ipread(Chan *ch, void *a, long n, vlong off)
{
Conv *c;
Proto *x;
char *buf, *p;
- s32 rv;
+ long rv;
Fs *f;
ulong offset = off;
@@ -648,21 +641,22 @@
return readstr(offset, a, n, f->ndb);
case Qiproute:
return routeread(f, a, offset, n);
- case Qiprouter:
- return iprouterread(f, a, n);
case Qipselftab:
return ipselftabread(f, a, offset, n);
case Qlog:
return netlogread(f, a, offset, n);
case Qctl:
- sprint(up->genbuf, "%ud", CONV(ch->qid));
- return readstr(offset, p, n, up->genbuf);
+ buf = smalloc(16);
+ snprint(buf, 16, "%lud", CONV(ch->qid));
+ rv = readstr(offset, p, n, buf);
+ free(buf);
+ return rv;
case Qremote:
buf = smalloc(Statelen);
x = f->p[PROTO(ch->qid)];
c = x->conv[CONV(ch->qid)];
if(x->remote == nil) {
- sprint(buf, "%I!%d\n", c->raddr, c->rport);
+ snprint(buf, Statelen, "%I!%d\n", c->raddr, c->rport);
} else {
(*x->remote)(c, buf, Statelen-2);
}
@@ -674,7 +668,7 @@
x = f->p[PROTO(ch->qid)];
c = x->conv[CONV(ch->qid)];
if(x->local == nil) {
- sprint(buf, "%I!%d\n", c->laddr, c->lport);
+ snprint(buf, Statelen, "%I!%d\n", c->laddr, c->lport);
} else {
(*x->local)(c, buf, Statelen-2);
}
@@ -711,7 +705,7 @@
}
static Block*
-ipbread(Chan* ch, s32 n, u32 offset)
+ipbread(Chan* ch, long n, ulong offset)
{
Conv *c;
Proto *x;
@@ -740,7 +734,7 @@
/*
* set a local port making sure the quad of raddr,rport,laddr,lport is unique
*/
-static char*
+char*
setluniqueport(Conv* c, int lport)
{
Proto *p;
@@ -771,51 +765,63 @@
}
/*
+ * is lport in use by anyone?
+ */
+static int
+lportinuse(Proto *p, ushort lport)
+{
+ int x;
+
+ for(x = 0; x < p->nc && p->conv[x]; x++)
+ if(p->conv[x]->lport == lport)
+ return 1;
+ return 0;
+}
+
+/*
* pick a local port and set it
*/
-extern void
+char *
setlport(Conv* c)
{
Proto *p;
- ushort *pp;
- int x, found;
+ int i, port;
p = c->p;
- if(c->restricted)
- pp = &p->nextrport;
- else
- pp = &p->nextport;
qlock(p);
- for(;;(*pp)++){
+ if(c->restricted){
+ /* Restricted ports cycle between 600 and 1024. */
+ for(i=0; i<1024-600; i++){
+ if(p->nextrport >= 1024 || p->nextrport < 600)
+ p->nextrport = 600;
+ port = p->nextrport++;
+ if(!lportinuse(p, port))
+ goto chosen;
+ }
+ }else{
/*
- * Fsproto initialises p->nextport to 0 and the restricted
- * ports (p->nextrport) to 600.
- * Restricted ports must lie between 600 and 1024.
- * For the initial condition or if the unrestricted port number
- * has wrapped round, select a random port between 5000 and 1<<15
- * to start at.
+ * Unrestricted ports are chosen randomly
+ * between 2^15 and 2^16. There are at most
+ * 4*Nchan = 4096 ports in use at any given time,
+ * so even in the worst case, a random probe has a
+ * 1 - 4096/2^15 = 87% chance of success.
+ * If 64 successive probes fail, there is a bug somewhere
+ * (or a once in 10^58 event has happened, but that's
+ * less likely than a venti collision).
*/
- if(c->restricted){
- if(*pp >= 1024)
- *pp = 600;
+ for(i=0; i<64; i++){
+ port = (1<<15) + nrand(1<<15);
+ if(!lportinuse(p, port))
+ goto chosen;
}
- else while(*pp < 5000)
- *pp = nrand(1<<15);
-
- found = 0;
- for(x = 0; x < p->nc; x++){
- if(p->conv[x] == nil)
- break;
- if(p->conv[x]->lport == *pp){
- found = 1;
- break;
- }
- }
- if(found == 0)
- break;
}
- c->lport = (*pp)++;
qunlock(p);
+ return "no ports available";
+
+chosen:
+ c->lport = port;
+ qunlock(p);
+ return nil;
}
/*
@@ -822,7 +828,7 @@
* set a local address and port from a string of the form
* [address!]port[!r]
*/
-static char*
+char*
setladdrport(Conv* c, char* str, int announcing)
{
char *p;
@@ -830,8 +836,6 @@
ushort lport;
uchar addr[IPaddrlen];
- rv = nil;
-
/*
* ignore restricted part if it exists. it's
* meaningless on local ports.
@@ -854,8 +858,9 @@
if(strcmp(str, "*") == 0)
ipmove(c->laddr, IPnoaddr);
else {
- parseip(addr, str);
- if(ipforme(c->p->f, addr))
+ if(parseip(addr, str) == -1)
+ return Ebadip;
+ if(ipforme(c->p->f, addr) != 0 || ipismulticast(addr))
ipmove(c->laddr, addr);
else
return "not a local IP address";
@@ -869,9 +874,13 @@
return setluniqueport(c, 0);
}
- lport = atoi(p);
+ str = p;
+ lport = strtol(str, &p, 10);
+ if(p <= str || strchr("!", *p) == nil)
+ return "bad numeric port";
+
if(lport <= 0)
- setlport(c);
+ rv = setlport(c);
else
rv = setluniqueport(c, lport);
return rv;
@@ -886,13 +895,17 @@
if(p == nil)
return "malformed address";
*p++ = 0;
- parseip(c->raddr, str);
- c->rport = atoi(p);
- p = strchr(p, '!');
- if(p){
- if(strstr(p, "!r") != nil)
- c->restricted = 1;
- }
+ if(parseip(c->raddr, str) == -1)
+ return Ebadip;
+
+ str = p;
+ c->rport = strtol(str, &p, 10);
+ if(p <= str || strchr("!", *p) == nil)
+ return "bad numeric port";
+
+ if(strstr(p, "!r") != nil)
+ c->restricted = 1;
+
return nil;
}
@@ -912,7 +925,9 @@
if(p != nil)
return p;
setladdr(c);
- setlport(c);
+ p = setlport(c);
+ if (p != nil)
+ return p;
break;
case 3:
p = setraddrport(c, argv[1]);
@@ -923,12 +938,7 @@
return p;
}
- if((memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
- memcmp(c->laddr, v4prefix, IPv4off) == 0)
- || ipcmp(c->raddr, IPnoaddr) == 0)
- c->ipversion = V4;
- else
- c->ipversion = V6;
+ c->ipversion = convipvers(c);
return nil;
}
@@ -978,10 +988,11 @@
c->rport = 0;
switch(argc){
default:
- return "bad args to announce";
+ break;
case 2:
return setladdrport(c, argv[1], 1);
}
+ return "bad args to announce";
}
/*
@@ -1028,10 +1039,11 @@
{
switch(argc){
default:
- return "bad args to bind";
+ break;
case 2:
return setladdrport(c, argv[1], 0);
}
+ return "bad args to bind";
}
static void
@@ -1042,7 +1054,7 @@
if(x->bind == nil)
p = Fsstdbind(c, cb->f, cb->nf);
else
- p = x->bind(c, cb->f, cb->nf);
+ p = (*x->bind)(c, cb->f, cb->nf);
if(p != nil)
error(p);
}
@@ -1065,8 +1077,8 @@
c->ttl = atoi(cb->f[1]);
}
-static s32
-ipwrite(Chan* ch, void *v, s32 n, s64 off)
+static long
+ipwrite(Chan* ch, void *v, long n, vlong off)
{
Conv *c;
Proto *x;
@@ -1075,6 +1087,7 @@
uchar ia[IPaddrlen], ma[IPaddrlen];
Fs *f;
char *a;
+ ulong offset = off;
a = v;
f = ipfs[ch->dev];
@@ -1099,7 +1112,8 @@
netlogctl(f, a, n);
return n;
case Qndb:
- return ndbwrite(f, a, off, n);
+ return ndbwrite(f, a, offset, n);
+ break;
case Qctl:
x = f->p[PROTO(ch->qid)];
c = x->conv[CONV(ch->qid)];
@@ -1131,13 +1145,15 @@
if(cb->nf == 2){
if(!ipismulticast(c->raddr))
error("addmulti for a non multicast address");
- parseip(ia, cb->f[1]);
+ if (parseip(ia, cb->f[1]) == -1)
+ error(Ebadip);
ipifcaddmulti(c, c->raddr, ia);
} else {
- parseip(ma, cb->f[2]);
+ if (parseip(ia, cb->f[1]) == -1 ||
+ parseip(ma, cb->f[2]) == -1)
+ error(Ebadip);
if(!ipismulticast(ma))
error("addmulti for a non multicast address");
- parseip(ia, cb->f[1]);
ipifcaddmulti(c, ma, ia);
}
} else if(strcmp(cb->f[0], "remmulti") == 0){
@@ -1145,10 +1161,11 @@
error("remmulti needs interface address");
if(!ipismulticast(c->raddr))
error("remmulti for a non multicast address");
- parseip(ia, cb->f[1]);
+ if (parseip(ia, cb->f[1]) == -1)
+ error(Ebadip);
ipifcremmulti(c, c->raddr, ia);
} else if(x->ctl != nil) {
- p = x->ctl(c, cb->f, cb->nf);
+ p = (*x->ctl)(c, cb->f, cb->nf);
if(p != nil)
error(p);
} else
@@ -1160,13 +1177,12 @@
return n;
}
-static s32
-ipbwrite(Chan* ch, Block* bp, u32 offset)
+static long
+ipbwrite(Chan* ch, Block* bp, ulong offset)
{
Conv *c;
Proto *x;
Fs *f;
- int n;
switch(TYPE(ch->qid)){
case Qdata:
@@ -1177,11 +1193,7 @@
if(c->wq == nil)
error(Eperm);
- if(bp->next)
- bp = concatblock(bp);
- n = BLEN(bp);
- qbwrite(c->wq, bp);
- return n;
+ return qbwrite(c->wq, bp);
default:
return devbwrite(ch, bp, offset);
}
@@ -1198,13 +1210,13 @@
ipwalk,
ipstat,
ipopen,
- devcreate,
+ ipcreate,
ipclose,
ipread,
ipbread,
ipwrite,
ipbwrite,
- devremove,
+ ipremove,
ipwstat,
};
@@ -1224,12 +1236,15 @@
p->qid.type = QTDIR;
p->qid.path = QID(f->np, 0, Qprotodir);
+ if(p->nc > Maskconv+1){
+ print("Fsproto: %s nc %d > %d\n", p->name, p->nc, Maskconv+1);
+ p->nc = Maskconv+1;
+ }
p->conv = malloc(sizeof(Conv*)*(p->nc+1));
if(p->conv == nil)
panic("Fsproto");
p->x = f->np;
- p->nextport = 0;
p->nextrport = 600;
f->p[f->np++] = p;
@@ -1262,21 +1277,33 @@
if(c == nil){
c = malloc(sizeof(Conv));
if(c == nil)
- error(Enomem);
- qlock(c);
+ return nil;
+ if(waserror()){
+ qfree(c->rq);
+ qfree(c->wq);
+ qfree(c->eq);
+ qfree(c->sq);
+ free(c->ptcl);
+ free(c);
+ return nil;
+ }
c->p = p;
c->x = pp - p->conv;
if(p->ptclsize != 0){
c->ptcl = malloc(p->ptclsize);
- if(c->ptcl == nil) {
- free(c);
+ if(c->ptcl == nil)
error(Enomem);
- }
}
- *pp = c;
- p->ac++;
c->eq = qopen(1024, Qmsg, 0, 0);
+ if(c->eq == nil)
+ error(Enomem);
(*p->create)(c);
+ if(c->rq == nil || c->wq == nil)
+ error(Enomem);
+ poperror();
+ qlock(c);
+ *pp = c;
+ p->ac++;
break;
}
if(canqlock(c)){
@@ -1291,8 +1318,11 @@
}
}
if(pp >= ep) {
- if(p->gc != nil && (*p->gc)(p))
- goto retry;
+ if(p->gc != nil){
+ print("Fsprotoclone: garbage collecting %s Convs\n", p->name);
+ if((*p->gc)(p))
+ goto retry;
+ }
return nil;
}
@@ -1307,8 +1337,9 @@
c->lport = 0;
c->rport = 0;
c->restricted = 0;
+ c->ignoreadvice = 0;
c->ttl = MAXTTL;
- c->tos = DFLTTOS;
+ c->tos = 0;
qreopen(c->rq);
qreopen(c->wq);
qreopen(c->eq);
@@ -1321,7 +1352,7 @@
Fsconnected(Conv* c, char* msg)
{
if(msg != nil && *msg != '\0')
- kstrcpy(c->cerr, msg, sizeof(c->cerr));
+ strncpy(c->cerr, msg, ERRMAX-1);
switch(c->state){
@@ -1368,12 +1399,19 @@
for(l = &c->incall; *l; l = &(*l)->next)
i++;
if(i >= Maxincall) {
+ static int beenhere;
+
qunlock(c);
+ if (!beenhere) {
+ beenhere = 1;
+ print("Fsnewcall: incall queue full (%d) on port %d\n",
+ i, c->lport);
+ }
return nil;
}
/* find a free conversation */
- nc = Fsprotoclone(c->p, network);
+ nc = Fsprotoclone(c->p, c->owner);
if(nc == nil) {
qunlock(c);
return nil;
@@ -1394,12 +1432,12 @@
return nc;
}
-static long
+long
ndbwrite(Fs *f, char *a, ulong off, int n)
{
if(off > strlen(f->ndb))
error(Eio);
- if(off+n >= sizeof(f->ndb)-1)
+ if(off+n >= sizeof(f->ndb))
error(Eio);
memmove(f->ndb+off, a, n);
f->ndb[off+n] = 0;
@@ -1411,7 +1449,7 @@
ulong
scalednconv(void)
{
- if(conf.npage*BY2PG >= 128*MB)
+ if(cpuserver && conf.npage*BY2PG >= 128*MB)
return Nchans*4;
return Nchans;
}
--- a/os/ip/esp.c
+++ b/os/ip/esp.c
@@ -1,3 +1,11 @@
+/*
+ * Encapsulating Security Payload for IPsec for IPv4, rfc1827.
+ * extended to IPv6.
+ * rfc2104 defines hmac computation.
+ * currently only implements tunnel mode.
+ * TODO: verify aes algorithms;
+ * transport mode (host-to-host)
+ */
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
@@ -6,47 +14,79 @@
#include "../port/error.h"
#include "ip.h"
+#include "ipv6.h"
+#include <libsec.h>
-#include "libsec.h"
+#define BITS2BYTES(bi) (((bi) + BI2BY - 1) / BI2BY)
+#define BYTES2BITS(by) ((by) * BI2BY)
+typedef struct Algorithm Algorithm;
+typedef struct Esp4hdr Esp4hdr;
+typedef struct Esp6hdr Esp6hdr;
+typedef struct Espcb Espcb;
typedef struct Esphdr Esphdr;
+typedef struct Esppriv Esppriv;
typedef struct Esptail Esptail;
typedef struct Userhdr Userhdr;
-typedef struct Esppriv Esppriv;
-typedef struct Espcb Espcb;
-typedef struct Algorithm Algorithm;
-typedef struct Esprc4 Esprc4;
-#define DPRINT if(0)print
+enum {
+ Encrypt,
+ Decrypt,
-enum
-{
- IP_ESPPROTO = 50,
- EsphdrSize = 28, // includes IP header
- IphdrSize = 20, // options have been striped
- EsptailSize = 2, // does not include pad or auth data
- UserhdrSize = 4, // user visable header size - if enabled
+ IP_ESPPROTO = 50, /* IP v4 and v6 protocol number */
+ Esp4hdrlen = IP4HDR + 8,
+ Esp6hdrlen = IP6HDR + 8,
+
+ Esptaillen = 2, /* does not include pad or auth data */
+ Userhdrlen = 4, /* user-visible header size - if enabled */
+
+ Desblk = BITS2BYTES(64),
+ Des3keysz = BITS2BYTES(192),
+
+ Aesblk = BITS2BYTES(128),
+ Aeskeysz = BITS2BYTES(128),
};
struct Esphdr
{
- /* ip header */
+ uchar espspi[4]; /* Security parameter index */
+ uchar espseq[4]; /* Sequence number */
+ uchar payload[];
+};
+
+/*
+ * tunnel-mode (network-to-network, etc.) layout is:
+ * new IP hdrs | ESP hdr |
+ * enc { orig IP hdrs | TCP/UDP hdr | user data | ESP trailer } | ESP ICV
+ *
+ * transport-mode (host-to-host) layout would be:
+ * orig IP hdrs | ESP hdr |
+ * enc { TCP/UDP hdr | user data | ESP trailer } | ESP ICV
+ */
+struct Esp4hdr
+{
+ /* ipv4 header */
uchar vihl; /* Version and header length */
uchar tos; /* Type of service */
uchar length[2]; /* packet length */
uchar id[2]; /* Identification */
uchar frag[2]; /* Fragment information */
- uchar Unused;
+ uchar Unused;
uchar espproto; /* Protocol */
uchar espplen[2]; /* Header plus data length */
uchar espsrc[4]; /* Ip source */
uchar espdst[4]; /* Ip destination */
- /* esp header */
- uchar espspi[4]; /* Security parameter index */
- uchar espseq[4]; /* Sequence number */
+ Esphdr;
};
+/* tunnel-mode layout */
+struct Esp6hdr
+{
+ IPV6HDR;
+ Esphdr;
+};
+
struct Esptail
{
uchar pad;
@@ -53,16 +93,28 @@
uchar nexthdr;
};
+/* IP-version-dependent data */
+typedef struct Versdep Versdep;
+struct Versdep
+{
+ ulong version;
+ ulong iphdrlen;
+ ulong hdrlen; /* iphdrlen + esp hdr len */
+ ulong spi;
+ uchar laddr[IPaddrlen];
+ uchar raddr[IPaddrlen];
+};
+
/* header as seen by the user */
struct Userhdr
{
- uchar nexthdr; // next protocol
+ uchar nexthdr; /* next protocol */
uchar unused[3];
};
struct Esppriv
{
- ulong in;
+ uvlong in;
ulong inerrors;
};
@@ -72,77 +124,68 @@
struct Espcb
{
int incoming;
- int header; // user user level header
+ int header; /* user-level header */
ulong spi;
- ulong seq; // last seq sent
- ulong window; // for replay attacks
+ ulong seq; /* last seq sent */
+ ulong window; /* for replay attacks */
+
char *espalg;
- void *espstate; // other state for esp
- int espivlen; // in bytes
+ void *espstate; /* other state for esp */
+ int espivlen; /* in bytes */
int espblklen;
int (*cipher)(Espcb*, uchar *buf, int len);
+
char *ahalg;
- void *ahstate; // other state for esp
- int ahlen; // auth data length in bytes
+ void *ahstate; /* other state for esp */
+ int ahlen; /* auth data length in bytes */
int ahblklen;
int (*auth)(Espcb*, uchar *buf, int len, uchar *hash);
+ DigestState *ds;
};
struct Algorithm
{
char *name;
- int keylen; // in bits
- void (*init)(Espcb*, char* name, uchar *key, int keylen);
+ int keylen; /* in bits */
+ void (*init)(Espcb*, char* name, uchar *key, unsigned keylen);
};
-
-enum {
- RC4forward = 10*1024*1024, // maximum skip forward
- RC4back = 100*1024, // maximum look back
-};
-
-struct Esprc4
-{
- ulong cseq; // current byte sequence number
- RC4state current;
-
- int ovalid; // old is valid
- ulong lgseq; // last good sequence
- ulong oseq; // old byte sequence number
- RC4state old;
-};
-
static Conv* convlookup(Proto *esp, ulong spi);
static char *setalg(Espcb *ecb, char **f, int n, Algorithm *alg);
-static void nullespinit(Espcb*, char*, uchar *key, int keylen);
-static void nullahinit(Espcb*, char*, uchar *key, int keylen);
-static void shaahinit(Espcb*, char*, uchar *key, int keylen);
-static void md5ahinit(Espcb*, char*, uchar *key, int keylen);
-static void desespinit(Espcb *ecb, char *name, uchar *k, int n);
-static void rc4espinit(Espcb *ecb, char *name, uchar *k, int n);
static void espkick(void *x);
+static void nullespinit(Espcb*, char*, uchar *key, unsigned keylen);
+static void des3espinit(Espcb*, char*, uchar *key, unsigned keylen);
+static void aescbcespinit(Espcb*, char*, uchar *key, unsigned keylen);
+static void aesctrespinit(Espcb*, char*, uchar *key, unsigned keylen);
+static void desespinit(Espcb *ecb, char *name, uchar *k, unsigned n);
+
+static void nullahinit(Espcb*, char*, uchar *key, unsigned keylen);
+static void shaahinit(Espcb*, char*, uchar *key, unsigned keylen);
+static void md5ahinit(Espcb*, char*, uchar *key, unsigned keylen);
+
static Algorithm espalg[] =
{
- "null", 0, nullespinit,
- "des_56_cbc", 64, desespinit,
- "rc4_128", 128, rc4espinit,
- nil, 0, nil,
+ "null", 0, nullespinit,
+ "des3_cbc", 192, des3espinit, /* new rfc2451, des-ede3 */
+ "aes_128_cbc", 128, aescbcespinit, /* new rfc3602 */
+ "aes_ctr", 128, aesctrespinit, /* new rfc3686 */
+ "des_56_cbc", 64, desespinit, /* rfc2405, deprecated */
+ nil, 0, nil,
};
static Algorithm ahalg[] =
{
- "null", 0, nullahinit,
- "hmac_sha1_96", 128, shaahinit,
- "hmac_md5_96", 128, md5ahinit,
- nil, 0, nil,
+ "null", 0, nullahinit,
+ "hmac_sha1_96", 128, shaahinit, /* rfc2404 */
+ "hmac_md5_96", 128, md5ahinit, /* rfc2403 */
+ nil, 0, nil,
};
static char*
espconnect(Conv *c, char **argv, int argc)
{
- char *p, *pp;
- char *e = nil;
+ char *p, *pp, *e = nil;
ulong spi;
Espcb *ecb = (Espcb*)c->ptcl;
@@ -157,7 +200,10 @@
break;
}
*p++ = 0;
- parseip(c->raddr, argv[1]);
+ if (parseip(c->raddr, argv[1]) == -1) {
+ e = Ebadip;
+ break;
+ }
findlocalip(c->p->f, c->laddr, c->raddr);
ecb->incoming = 0;
ecb->seq = 0;
@@ -215,26 +261,86 @@
ipmove(c->raddr, IPnoaddr);
ecb = (Espcb*)c->ptcl;
- free(ecb->espstate);
- free(ecb->ahstate);
+ secfree(ecb->espstate);
+ secfree(ecb->ahstate);
memset(ecb, 0, sizeof(Espcb));
}
+static int
+pktipvers(Fs *f, Block **bpp)
+{
+ if (*bpp == nil || BLEN(*bpp) == 0) {
+ /* get enough to identify the IP version */
+ *bpp = pullupblock(*bpp, IP4HDR);
+ if(*bpp == nil) {
+ netlog(f, Logesp, "esp: short packet\n");
+ return 0;
+ }
+ }
+ return (((Esp4hdr*)(*bpp)->rp)->vihl & 0xf0) == IP_VER4? V4: V6;
+}
+
static void
+getverslens(int version, Versdep *vp)
+{
+ vp->version = version;
+ switch(vp->version) {
+ case V4:
+ vp->iphdrlen = IP4HDR;
+ vp->hdrlen = Esp4hdrlen;
+ break;
+ case V6:
+ vp->iphdrlen = IP6HDR;
+ vp->hdrlen = Esp6hdrlen;
+ break;
+ default:
+ panic("esp: getverslens version %d wrong", version);
+ }
+}
+
+static void
+getpktspiaddrs(uchar *pkt, Versdep *vp)
+{
+ Esp4hdr *eh4;
+ Esp6hdr *eh6;
+
+ switch(vp->version) {
+ case V4:
+ eh4 = (Esp4hdr*)pkt;
+ v4tov6(vp->raddr, eh4->espsrc);
+ v4tov6(vp->laddr, eh4->espdst);
+ vp->spi = nhgetl(eh4->espspi);
+ break;
+ case V6:
+ eh6 = (Esp6hdr*)pkt;
+ ipmove(vp->raddr, eh6->src);
+ ipmove(vp->laddr, eh6->dst);
+ vp->spi = nhgetl(eh6->espspi);
+ break;
+ default:
+ panic("esp: getpktspiaddrs vp->version %ld wrong", vp->version);
+ }
+}
+
+/*
+ * encapsulate next IP packet on x's write queue in IP/ESP packet
+ * and initiate output of the result.
+ */
+static void
espkick(void *x)
{
+ int nexthdr, payload, pad, align;
+ uchar *auth;
+ Block *bp;
Conv *c = x;
- Esphdr *eh;
+ Esp4hdr *eh4;
+ Esp6hdr *eh6;
+ Espcb *ecb;
Esptail *et;
Userhdr *uh;
- Espcb *ecb;
- Block *bp;
- int nexthdr;
- int payload;
- int pad;
- int align;
- uchar *auth;
+ Versdep vers;
+ getverslens(convipvers(c), &vers);
bp = qget(c->wq);
if(bp == nil)
return;
@@ -244,7 +350,7 @@
if(ecb->header) {
/* make sure the message has a User header */
- bp = pullupblock(bp, UserhdrSize);
+ bp = pullupblock(bp, Userhdrlen);
if(bp == nil) {
qunlock(c);
return;
@@ -251,15 +357,16 @@
}
uh = (Userhdr*)bp->rp;
nexthdr = uh->nexthdr;
- bp->rp += UserhdrSize;
+ bp->rp += Userhdrlen;
} else {
- nexthdr = 0; // what should this be?
+ nexthdr = 0; /* what should this be? */
}
payload = BLEN(bp) + ecb->espivlen;
/* Make space to fit ip header */
- bp = padblock(bp, EsphdrSize + ecb->espivlen);
+ bp = padblock(bp, vers.hdrlen + ecb->espivlen);
+ getpktspiaddrs(bp->rp, &vers);
align = 4;
if(ecb->espblklen > align)
@@ -266,7 +373,7 @@
align = ecb->espblklen;
if(align % ecb->ahblklen != 0)
panic("espkick: ahblklen is important after all");
- pad = (align-1) - (payload + EsptailSize-1)%align;
+ pad = (align-1) - (payload + Esptaillen-1)%align;
/*
* Make space for tail
@@ -273,70 +380,88 @@
* this is done by calling padblock with a negative size
* Padblock does not change bp->wp!
*/
- bp = padblock(bp, -(pad+EsptailSize+ecb->ahlen));
- bp->wp += pad+EsptailSize+ecb->ahlen;
+ bp = padblock(bp, -(pad+Esptaillen+ecb->ahlen));
+ bp->wp += pad+Esptaillen+ecb->ahlen;
- eh = (Esphdr *)(bp->rp);
- et = (Esptail*)(bp->rp + EsphdrSize + payload + pad);
+ et = (Esptail*)(bp->rp + vers.hdrlen + payload + pad);
- // fill in tail
+ /* fill in tail */
et->pad = pad;
et->nexthdr = nexthdr;
- ecb->cipher(ecb, bp->rp+EsphdrSize, payload+pad+EsptailSize);
- auth = bp->rp + EsphdrSize + payload + pad + EsptailSize;
+ /* encrypt the payload */
+ ecb->cipher(ecb, bp->rp + vers.hdrlen, payload + pad + Esptaillen);
+ auth = bp->rp + vers.hdrlen + payload + pad + Esptaillen;
- // fill in head
- eh->vihl = IP_VER4;
- hnputl(eh->espspi, ecb->spi);
- hnputl(eh->espseq, ++ecb->seq);
- v6tov4(eh->espsrc, c->laddr);
- v6tov4(eh->espdst, c->raddr);
- eh->espproto = IP_ESPPROTO;
- eh->frag[0] = 0;
- eh->frag[1] = 0;
+ /* fill in head; construct a new IP header and an ESP header */
+ if (vers.version == V4) {
+ eh4 = (Esp4hdr *)bp->rp;
+ eh4->vihl = IP_VER4;
+ v6tov4(eh4->espsrc, c->laddr);
+ v6tov4(eh4->espdst, c->raddr);
+ eh4->espproto = IP_ESPPROTO;
+ eh4->frag[0] = 0;
+ eh4->frag[1] = 0;
- ecb->auth(ecb, bp->rp+IphdrSize, (EsphdrSize-IphdrSize)+payload+pad+EsptailSize, auth);
+ hnputl(eh4->espspi, ecb->spi);
+ hnputl(eh4->espseq, ++ecb->seq);
+ } else {
+ eh6 = (Esp6hdr *)bp->rp;
+ eh6->vcf[0] = IP_VER6;
+ ipmove(eh6->src, c->laddr);
+ ipmove(eh6->dst, c->raddr);
+ eh6->proto = IP_ESPPROTO;
+ hnputl(eh6->espspi, ecb->spi);
+ hnputl(eh6->espseq, ++ecb->seq);
+ }
+
+ /* compute secure hash */
+ ecb->auth(ecb, bp->rp + vers.iphdrlen, (vers.hdrlen - vers.iphdrlen) +
+ payload + pad + Esptaillen, auth);
+
qunlock(c);
- //print("esp: pass down: %uld\n", BLEN(bp));
- ipoput4(c->p->f, bp, 0, c->ttl, c->tos, c);
+ /* print("esp: pass down: %uld\n", BLEN(bp)); */
+ if (vers.version == V4)
+ ipoput4(c->p->f, bp, 0, c->ttl, c->tos, c);
+ else
+ ipoput6(c->p->f, bp, 0, c->ttl, c->tos, c);
}
+/*
+ * decapsulate IP packet from IP/ESP packet in bp and
+ * pass the result up the spi's Conv's read queue.
+ */
void
espiput(Proto *esp, Ipifc*, Block *bp)
{
- Esphdr *eh;
- Esptail *et;
- Userhdr *uh;
+ int payload, nexthdr;
+ uchar *auth, *espspi;
Conv *c;
Espcb *ecb;
- uchar raddr[IPaddrlen], laddr[IPaddrlen];
+ Esptail *et;
Fs *f;
- uchar *auth;
- ulong spi;
- int payload, nexthdr;
+ Userhdr *uh;
+ Versdep vers;
f = esp->f;
- bp = pullupblock(bp, EsphdrSize+EsptailSize);
+ getverslens(pktipvers(f, &bp), &vers);
+
+ bp = pullupblock(bp, vers.hdrlen + Esptaillen);
if(bp == nil) {
netlog(f, Logesp, "esp: short packet\n");
return;
}
+ getpktspiaddrs(bp->rp, &vers);
- eh = (Esphdr*)(bp->rp);
- spi = nhgetl(eh->espspi);
- v4tov6(raddr, eh->espsrc);
- v4tov6(laddr, eh->espdst);
-
qlock(esp);
/* Look for a conversation structure for this port */
- c = convlookup(esp, spi);
+ c = convlookup(esp, vers.spi);
if(c == nil) {
qunlock(esp);
- netlog(f, Logesp, "esp: no conv %I -> %I!%d\n", raddr,
- laddr, spi);
+ netlog(f, Logesp, "esp: no conv %I -> %I!%lud\n", vers.raddr,
+ vers.laddr, vers.spi);
icmpnoconv(f, bp);
freeblist(bp);
return;
@@ -346,76 +471,83 @@
qunlock(esp);
ecb = c->ptcl;
- // too hard to do decryption/authentication on block lists
- if(bp->next)
+ /* too hard to do decryption/authentication on block lists */
+ if(bp->next != nil)
bp = concatblock(bp);
- if(BLEN(bp) < EsphdrSize + ecb->espivlen + EsptailSize + ecb->ahlen) {
+ if(BLEN(bp) < vers.hdrlen + ecb->espivlen + Esptaillen + ecb->ahlen) {
qunlock(c);
- netlog(f, Logesp, "esp: short block %I -> %I!%d\n", raddr,
- laddr, spi);
+ netlog(f, Logesp, "esp: short block %I -> %I!%lud\n", vers.raddr,
+ vers.laddr, vers.spi);
freeb(bp);
return;
}
- eh = (Esphdr*)(bp->rp);
auth = bp->wp - ecb->ahlen;
- if(!ecb->auth(ecb, eh->espspi, auth-eh->espspi, auth)) {
+ espspi = vers.version == V4? ((Esp4hdr*)bp->rp)->espspi:
+ ((Esp6hdr*)bp->rp)->espspi;
+
+ /* compute secure hash and authenticate */
+ if(!ecb->auth(ecb, espspi, auth - espspi, auth)) {
qunlock(c);
-print("esp: bad auth %I -> %I!%ld\n", raddr, laddr, spi);
- netlog(f, Logesp, "esp: bad auth %I -> %I!%d\n", raddr,
- laddr, spi);
+print("esp: bad auth %I -> %I!%ld\n", vers.raddr, vers.laddr, vers.spi);
+ netlog(f, Logesp, "esp: bad auth %I -> %I!%lud\n", vers.raddr,
+ vers.laddr, vers.spi);
freeb(bp);
return;
}
- payload = BLEN(bp)-EsphdrSize-ecb->ahlen;
- if(payload<=0 || payload%4 != 0 || payload%ecb->espblklen!=0) {
+ payload = BLEN(bp) - vers.hdrlen - ecb->ahlen;
+ if(payload <= 0 || payload % 4 != 0 || payload % ecb->espblklen != 0) {
qunlock(c);
- netlog(f, Logesp, "esp: bad length %I -> %I!%d payload=%d BLEN=%d\n", raddr,
- laddr, spi, payload, BLEN(bp));
+ netlog(f, Logesp, "esp: bad length %I -> %I!%lud payload=%d BLEN=%zd\n",
+ vers.raddr, vers.laddr, vers.spi, payload, BLEN(bp));
freeb(bp);
return;
}
- if(!ecb->cipher(ecb, bp->rp+EsphdrSize, payload)) {
+
+ /* decrypt payload */
+ if(!ecb->cipher(ecb, bp->rp + vers.hdrlen, payload)) {
qunlock(c);
-print("esp: cipher failed %I -> %I!%ld: %r\n", raddr, laddr, spi);
- netlog(f, Logesp, "esp: cipher failed %I -> %I!%d: %r\n", raddr,
- laddr, spi);
+print("esp: cipher failed %I -> %I!%ld: %s\n", vers.raddr, vers.laddr, vers.spi, up->errstr);
+ netlog(f, Logesp, "esp: cipher failed %I -> %I!%lud: %s\n",
+ vers.raddr, vers.laddr, vers.spi, up->errstr);
freeb(bp);
return;
}
- payload -= EsptailSize;
- et = (Esptail*)(bp->rp + EsphdrSize + payload);
+ payload -= Esptaillen;
+ et = (Esptail*)(bp->rp + vers.hdrlen + payload);
payload -= et->pad + ecb->espivlen;
nexthdr = et->nexthdr;
if(payload <= 0) {
qunlock(c);
- netlog(f, Logesp, "esp: short packet after decrypt %I -> %I!%d\n", raddr,
- laddr, spi);
+ netlog(f, Logesp, "esp: short packet after decrypt %I -> %I!%lud\n",
+ vers.raddr, vers.laddr, vers.spi);
freeb(bp);
return;
}
- // trim packet
- bp->rp += EsphdrSize + ecb->espivlen;
+ /* trim packet */
+ bp->rp += vers.hdrlen + ecb->espivlen; /* toss original IP & ESP hdrs */
bp->wp = bp->rp + payload;
if(ecb->header) {
- // assume UserhdrSize < EsphdrSize
- bp->rp -= UserhdrSize;
+ /* assume Userhdrlen < Esp4hdrlen < Esp6hdrlen */
+ bp->rp -= Userhdrlen;
uh = (Userhdr*)bp->rp;
- memset(uh, 0, UserhdrSize);
+ memset(uh, 0, Userhdrlen);
uh->nexthdr = nexthdr;
}
+ /* ingress filtering here? */
+
if(qfull(c->rq)){
- netlog(f, Logesp, "esp: qfull %I -> %I.%uld\n", raddr,
- laddr, spi);
+ netlog(f, Logesp, "esp: qfull %I -> %I.%uld\n", vers.raddr,
+ vers.laddr, vers.spi);
freeblist(bp);
}else {
-//print("esp: pass up: %uld\n", BLEN(bp));
- qpass(c->rq, bp);
+// print("esp: pass up: %uld\n", BLEN(bp));
+ qpass(c->rq, bp); /* pass packet up the read queue */
}
qunlock(c);
@@ -440,19 +572,19 @@
return e;
}
+/* called from icmp(v6) for unreachable hosts, time exceeded, etc. */
void
espadvise(Proto *esp, Block *bp, char *msg)
{
- Esphdr *h;
Conv *c;
- ulong spi;
+ Versdep vers;
- h = (Esphdr*)(bp->rp);
+ getverslens(pktipvers(esp->f, &bp), &vers);
+ getpktspiaddrs(bp->rp, &vers);
- spi = nhgets(h->espspi);
qlock(esp);
- c = convlookup(esp, spi);
- if(c != nil) {
+ c = convlookup(esp, vers.spi);
+ if(c != nil && !c->ignoreadvice) {
qhangup(c->rq, msg);
qhangup(c->wq, msg);
}
@@ -466,7 +598,7 @@
Esppriv *upriv;
upriv = esp->priv;
- return snprint(buf, len, "%lud %lud\n",
+ return snprint(buf, len, "%llud %lud\n",
upriv->in,
upriv->inerrors);
}
@@ -520,10 +652,10 @@
setalg(Espcb *ecb, char **f, int n, Algorithm *alg)
{
uchar *key;
- int i, nbyte, nchar;
- int c;
+ int c, nbyte, nchar;
+ uint i;
- if(n < 2)
+ if(n < 2 || n > 3)
return "bad format";
for(; alg->name; alg++)
if(strcmp(f[1], alg->name) == 0)
@@ -531,10 +663,14 @@
if(alg->name == nil)
return "unknown algorithm";
- if(n != 3)
- return "bad format";
nbyte = (alg->keylen + 7) >> 3;
- nchar = strlen(f[2]);
+ if (n == 2)
+ nchar = 0;
+ else
+ nchar = strlen(f[2]);
+ if(nchar != 2 * nbyte) /* TODO: maybe < is ok */
+ return "key not required length";
+ /* convert hex digits from ascii, in place */
for(i=0; i<nchar; i++) {
c = f[2][i];
if(c >= '0' && c <= '9')
@@ -544,21 +680,27 @@
else if(c >= 'A' && c <= 'F')
f[2][i] -= 'A'-10;
else
- return "bad character in key";
+ return "non-hex character in key";
}
- key = smalloc(nbyte);
- for(i=0; i<nchar && i*2<nbyte; i++) {
+ /* collapse hex digits into complete bytes in reverse order in key */
+ key = secalloc(nbyte);
+ for(i = 0; i < nchar && i/2 < nbyte; i++) {
c = f[2][nchar-i-1];
if(i&1)
c <<= 4;
- key[i>>1] |= c;
+ key[i/2] |= c;
}
-
+ memset(f[2], 0, nchar);
alg->init(ecb, alg->name, key, alg->keylen);
- free(key);
+ secfree(key);
return nil;
}
+
+/*
+ * null encryption
+ */
+
static int
nullcipher(Espcb*, uchar*, int)
{
@@ -566,7 +708,7 @@
}
static void
-nullespinit(Espcb *ecb, char *name, uchar*, int)
+nullespinit(Espcb *ecb, char *name, uchar*, unsigned)
{
ecb->espalg = name;
ecb->espblklen = 1;
@@ -581,7 +723,7 @@
}
static void
-nullahinit(Espcb *ecb, char *name, uchar*, int)
+nullahinit(Espcb *ecb, char *name, uchar*, unsigned)
{
ecb->ahalg = name;
ecb->ahblklen = 1;
@@ -589,26 +731,28 @@
ecb->auth = nullauth;
}
-void
+
+/*
+ * sha1
+ */
+
+static void
seanq_hmac_sha1(uchar hash[SHA1dlen], uchar *t, long tlen, uchar *key, long klen)
{
- uchar ipad[65], opad[65];
int i;
+ uchar ipad[Hmacblksz+1], opad[Hmacblksz+1], innerhash[SHA1dlen];
DigestState *digest;
- uchar innerhash[SHA1dlen];
- for(i=0; i<64; i++){
- ipad[i] = 0x36;
- opad[i] = 0x5c;
- }
- ipad[64] = opad[64] = 0;
- for(i=0; i<klen; i++){
+ memset(ipad, 0x36, Hmacblksz);
+ memset(opad, 0x5c, Hmacblksz);
+ ipad[Hmacblksz] = opad[Hmacblksz] = 0;
+ for(i = 0; i < klen; i++){
ipad[i] ^= key[i];
opad[i] ^= key[i];
}
- digest = sha1(ipad, 64, nil, nil);
+ digest = sha1(ipad, Hmacblksz, nil, nil);
sha1(t, tlen, innerhash, digest);
- digest = sha1(opad, 64, nil, nil);
+ digest = sha1(opad, Hmacblksz, nil, nil);
sha1(innerhash, SHA1dlen, hash, digest);
}
@@ -615,11 +759,11 @@
static int
shaauth(Espcb *ecb, uchar *t, int tlen, uchar *auth)
{
- uchar hash[SHA1dlen];
int r;
+ uchar hash[SHA1dlen];
memset(hash, 0, SHA1dlen);
- seanq_hmac_sha1(hash, t, tlen, (uchar*)ecb->ahstate, 16);
+ seanq_hmac_sha1(hash, t, tlen, (uchar*)ecb->ahstate, BITS2BYTES(128));
r = memcmp(auth, hash, ecb->ahlen) == 0;
memmove(auth, hash, ecb->ahlen);
return r;
@@ -626,40 +770,162 @@
}
static void
-shaahinit(Espcb *ecb, char *name, uchar *key, int klen)
+shaahinit(Espcb *ecb, char *name, uchar *key, unsigned klen)
{
if(klen != 128)
panic("shaahinit: bad keylen");
- klen >>= 8; // convert to bytes
+ klen /= BI2BY;
ecb->ahalg = name;
ecb->ahblklen = 1;
- ecb->ahlen = 12;
+ ecb->ahlen = BITS2BYTES(96);
ecb->auth = shaauth;
- ecb->ahstate = smalloc(klen);
+ ecb->ahstate = secalloc(klen);
memmove(ecb->ahstate, key, klen);
}
-void
+
+/*
+ * aes
+ */
+static int
+aescbccipher(Espcb *ecb, uchar *p, int n) /* 128-bit blocks */
+{
+ uchar tmp[AESbsize], q[AESbsize];
+ uchar *pp, *tp, *ip, *eip, *ep;
+ AESstate *ds = ecb->espstate;
+
+ ep = p + n;
+ if(ecb->incoming) {
+ memmove(ds->ivec, p, AESbsize);
+ p += AESbsize;
+ while(p < ep){
+ memmove(tmp, p, AESbsize);
+ aes_decrypt(ds->dkey, ds->rounds, p, q);
+ memmove(p, q, AESbsize);
+ tp = tmp;
+ ip = ds->ivec;
+ for(eip = ip + AESbsize; ip < eip; ){
+ *p++ ^= *ip;
+ *ip++ = *tp++;
+ }
+ }
+ } else {
+ memmove(p, ds->ivec, AESbsize);
+ for(p += AESbsize; p < ep; p += AESbsize){
+ pp = p;
+ ip = ds->ivec;
+ for(eip = ip + AESbsize; ip < eip; )
+ *pp++ ^= *ip++;
+ aes_encrypt(ds->ekey, ds->rounds, p, q);
+ memmove(ds->ivec, q, AESbsize);
+ memmove(p, q, AESbsize);
+ }
+ }
+ return 1;
+}
+
+static void
+aescbcespinit(Espcb *ecb, char *name, uchar *k, unsigned n)
+{
+ uchar key[Aeskeysz], ivec[Aeskeysz];
+
+ n = BITS2BYTES(n);
+ if(n > Aeskeysz)
+ n = Aeskeysz;
+ memset(key, 0, sizeof(key));
+ memmove(key, k, n);
+ prng(ivec, Aeskeysz);
+ ecb->espalg = name;
+ ecb->espblklen = Aesblk;
+ ecb->espivlen = Aesblk;
+ ecb->cipher = aescbccipher;
+ ecb->espstate = secalloc(sizeof(AESstate));
+ setupAESstate(ecb->espstate, key, n /* keybytes */, ivec);
+ memset(ivec, 0, sizeof(ivec));
+ memset(key, 0, sizeof(key));
+}
+
+static int
+aesctrcipher(Espcb *ecb, uchar *p, int n) /* 128-bit blocks */
+{
+ uchar tmp[AESbsize], q[AESbsize];
+ uchar *pp, *tp, *ip, *eip, *ep;
+ AESstate *ds = ecb->espstate;
+
+ ep = p + n;
+ if(ecb->incoming) {
+ memmove(ds->ivec, p, AESbsize);
+ p += AESbsize;
+ while(p < ep){
+ memmove(tmp, p, AESbsize);
+ aes_decrypt(ds->dkey, ds->rounds, p, q);
+ memmove(p, q, AESbsize);
+ tp = tmp;
+ ip = ds->ivec;
+ for(eip = ip + AESbsize; ip < eip; ){
+ *p++ ^= *ip;
+ *ip++ = *tp++;
+ }
+ }
+ } else {
+ memmove(p, ds->ivec, AESbsize);
+ for(p += AESbsize; p < ep; p += AESbsize){
+ pp = p;
+ ip = ds->ivec;
+ for(eip = ip + AESbsize; ip < eip; )
+ *pp++ ^= *ip++;
+ aes_encrypt(ds->ekey, ds->rounds, p, q);
+ memmove(ds->ivec, q, AESbsize);
+ memmove(p, q, AESbsize);
+ }
+ }
+ return 1;
+}
+
+static void
+aesctrespinit(Espcb *ecb, char *name, uchar *k, unsigned n)
+{
+ uchar key[Aesblk], ivec[Aesblk];
+
+ n = BITS2BYTES(n);
+ if(n > Aeskeysz)
+ n = Aeskeysz;
+ memset(key, 0, sizeof(key));
+ memmove(key, k, n);
+ prng(ivec, Aesblk);
+ ecb->espalg = name;
+ ecb->espblklen = Aesblk;
+ ecb->espivlen = Aesblk;
+ ecb->cipher = aesctrcipher;
+ ecb->espstate = secalloc(sizeof(AESstate));
+ setupAESstate(ecb->espstate, key, n /* keybytes */, ivec);
+ memset(ivec, 0, sizeof(ivec));
+ memset(key, 0, sizeof(key));
+}
+
+
+/*
+ * md5
+ */
+
+static void
seanq_hmac_md5(uchar hash[MD5dlen], uchar *t, long tlen, uchar *key, long klen)
{
- uchar ipad[65], opad[65];
int i;
+ uchar ipad[Hmacblksz+1], opad[Hmacblksz+1], innerhash[MD5dlen];
DigestState *digest;
- uchar innerhash[MD5dlen];
- for(i=0; i<64; i++){
- ipad[i] = 0x36;
- opad[i] = 0x5c;
- }
- ipad[64] = opad[64] = 0;
- for(i=0; i<klen; i++){
+ memset(ipad, 0x36, Hmacblksz);
+ memset(opad, 0x5c, Hmacblksz);
+ ipad[Hmacblksz] = opad[Hmacblksz] = 0;
+ for(i = 0; i < klen; i++){
ipad[i] ^= key[i];
opad[i] ^= key[i];
}
- digest = md5(ipad, 64, nil, nil);
+ digest = md5(ipad, Hmacblksz, nil, nil);
md5(t, tlen, innerhash, digest);
- digest = md5(opad, 64, nil, nil);
+ digest = md5(opad, Hmacblksz, nil, nil);
md5(innerhash, MD5dlen, hash, digest);
}
@@ -670,7 +936,7 @@
int r;
memset(hash, 0, MD5dlen);
- seanq_hmac_md5(hash, t, tlen, (uchar*)ecb->ahstate, 16);
+ seanq_hmac_md5(hash, t, tlen, (uchar*)ecb->ahstate, BITS2BYTES(128));
r = memcmp(auth, hash, ecb->ahlen) == 0;
memmove(auth, hash, ecb->ahlen);
return r;
@@ -677,168 +943,102 @@
}
static void
-md5ahinit(Espcb *ecb, char *name, uchar *key, int klen)
+md5ahinit(Espcb *ecb, char *name, uchar *key, unsigned klen)
{
if(klen != 128)
panic("md5ahinit: bad keylen");
- klen >>= 3; // convert to bytes
-
-
+ klen = BITS2BYTES(klen);
ecb->ahalg = name;
ecb->ahblklen = 1;
- ecb->ahlen = 12;
+ ecb->ahlen = BITS2BYTES(96);
ecb->auth = md5auth;
- ecb->ahstate = smalloc(klen);
+ ecb->ahstate = secalloc(klen);
memmove(ecb->ahstate, key, klen);
}
+
+/*
+ * des, single and triple
+ */
+
static int
descipher(Espcb *ecb, uchar *p, int n)
{
- uchar tmp[8];
- uchar *pp, *tp, *ip, *eip, *ep;
DESstate *ds = ecb->espstate;
- ep = p + n;
if(ecb->incoming) {
- memmove(ds->ivec, p, 8);
- p += 8;
- while(p < ep){
- memmove(tmp, p, 8);
- block_cipher(ds->expanded, p, 1);
- tp = tmp;
- ip = ds->ivec;
- for(eip = ip+8; ip < eip; ){
- *p++ ^= *ip;
- *ip++ = *tp++;
- }
- }
+ memmove(ds->ivec, p, Desblk);
+ desCBCdecrypt(p + Desblk, n - Desblk, ds);
} else {
- memmove(p, ds->ivec, 8);
- for(p += 8; p < ep; p += 8){
- pp = p;
- ip = ds->ivec;
- for(eip = ip+8; ip < eip; )
- *pp++ ^= *ip++;
- block_cipher(ds->expanded, p, 0);
- memmove(ds->ivec, p, 8);
- }
+ memmove(p, ds->ivec, Desblk);
+ desCBCencrypt(p + Desblk, n - Desblk, ds);
}
return 1;
}
-
+
+static int
+des3cipher(Espcb *ecb, uchar *p, int n)
+{
+ DES3state *ds = ecb->espstate;
+
+ if(ecb->incoming) {
+ memmove(ds->ivec, p, Desblk);
+ des3CBCdecrypt(p + Desblk, n - Desblk, ds);
+ } else {
+ memmove(p, ds->ivec, Desblk);
+ des3CBCencrypt(p + Desblk, n - Desblk, ds);
+ }
+ return 1;
+}
+
static void
-desespinit(Espcb *ecb, char *name, uchar *k, int n)
+desespinit(Espcb *ecb, char *name, uchar *k, unsigned n)
{
- uchar key[8];
- uchar ivec[8];
- int i;
-
- // bits to bytes
- n = (n+7)>>3;
- if(n > 8)
- n = 8;
+ uchar key[Desblk], ivec[Desblk];
+
+ n = BITS2BYTES(n);
+ if(n > Desblk)
+ n = Desblk;
memset(key, 0, sizeof(key));
memmove(key, k, n);
- for(i=0; i<8; i++)
- ivec[i] = nrand(256);
+ prng(ivec, Desblk);
ecb->espalg = name;
- ecb->espblklen = 8;
- ecb->espivlen = 8;
+ ecb->espblklen = Desblk;
+ ecb->espivlen = Desblk;
+
ecb->cipher = descipher;
- ecb->espstate = smalloc(sizeof(DESstate));
+ ecb->espstate = secalloc(sizeof(DESstate));
setupDESstate(ecb->espstate, key, ivec);
+ memset(ivec, 0, sizeof(ivec));
+ memset(key, 0, sizeof(key));
}
-static int
-rc4cipher(Espcb *ecb, uchar *p, int n)
+static void
+des3espinit(Espcb *ecb, char *name, uchar *k, unsigned n)
{
- Esprc4 *esprc4;
- RC4state tmpstate;
- ulong seq;
- long d, dd;
+ uchar key[3][Desblk], ivec[Desblk];
- if(n < 4)
- return 0;
+ n = BITS2BYTES(n);
+ if(n > Des3keysz)
+ n = Des3keysz;
+ memset(key, 0, sizeof(key));
+ memmove(key, k, n);
+ prng(ivec, Desblk);
+ ecb->espalg = name;
+ ecb->espblklen = Desblk;
+ ecb->espivlen = Desblk;
- esprc4 = ecb->espstate;
- if(ecb->incoming) {
- seq = nhgetl(p);
- p += 4;
- n -= 4;
- d = seq-esprc4->cseq;
- if(d == 0) {
- rc4(&esprc4->current, p, n);
- esprc4->cseq += n;
- if(esprc4->ovalid) {
- dd = esprc4->cseq - esprc4->lgseq;
- if(dd > RC4back)
- esprc4->ovalid = 0;
- }
- } else if(d > 0) {
-print("missing packet: %uld %ld\n", seq, d);
- // this link is hosed
- if(d > RC4forward) {
- strcpy(up->errstr, "rc4cipher: skipped too much");
- return 0;
- }
- esprc4->lgseq = seq;
- if(!esprc4->ovalid) {
- esprc4->ovalid = 1;
- esprc4->oseq = esprc4->cseq;
- memmove(&esprc4->old, &esprc4->current, sizeof(RC4state));
- }
- rc4skip(&esprc4->current, d);
- rc4(&esprc4->current, p, n);
- esprc4->cseq = seq+n;
- } else {
-print("reordered packet: %uld %ld\n", seq, d);
- dd = seq - esprc4->oseq;
- if(!esprc4->ovalid || -d > RC4back || dd < 0) {
- strcpy(up->errstr, "rc4cipher: too far back");
- return 0;
- }
- memmove(&tmpstate, &esprc4->old, sizeof(RC4state));
- rc4skip(&tmpstate, dd);
- rc4(&tmpstate, p, n);
- return 1;
- }
-
- // move old state up
- if(esprc4->ovalid) {
- dd = esprc4->cseq - RC4back - esprc4->oseq;
- if(dd > 0) {
- rc4skip(&esprc4->old, dd);
- esprc4->oseq += dd;
- }
- }
- } else {
- hnputl(p, esprc4->cseq);
- p += 4;
- n -= 4;
- rc4(&esprc4->current, p, n);
- esprc4->cseq += n;
- }
- return 1;
+ ecb->cipher = des3cipher;
+ ecb->espstate = secalloc(sizeof(DES3state));
+ setupDES3state(ecb->espstate, key, ivec);
+ memset(ivec, 0, sizeof(ivec));
+ memset(key, 0, sizeof(key));
}
-static void
-rc4espinit(Espcb *ecb, char *name, uchar *k, int n)
-{
- Esprc4 *esprc4;
- // bits to bytes
- n = (n+7)>>3;
- esprc4 = smalloc(sizeof(Esprc4));
- memset(esprc4, 0, sizeof(Esprc4));
- setupRC4state(&esprc4->current, k, n);
- ecb->espalg = name;
- ecb->espblklen = 4;
- ecb->espivlen = 4;
- ecb->cipher = rc4cipher;
- ecb->espstate = esprc4;
-}
-
+/*
+ * interfacing to devip
+ */
void
espinit(Fs *fs)
{
--- a/os/ip/ethermedium.c
+++ b/os/ip/ethermedium.c
@@ -5,9 +5,9 @@
#include "fns.h"
#include "../port/error.h"
+#include "../port/netif.h"
#include "ip.h"
#include "ipv6.h"
-#include "kernel.h"
typedef struct Etherhdr Etherhdr;
struct Etherhdr
@@ -18,10 +18,10 @@
};
static uchar ipbroadcast[IPaddrlen] = {
- 0xff,0xff,0xff,0xff,
- 0xff,0xff,0xff,0xff,
- 0xff,0xff,0xff,0xff,
0xff,0xff,0xff,0xff,
+ 0xff,0xff,0xff,0xff,
+ 0xff,0xff,0xff,0xff,
+ 0xff,0xff,0xff,0xff,
};
static uchar etherbroadcast[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
@@ -33,12 +33,12 @@
static void etherbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip);
static void etheraddmulti(Ipifc *ifc, uchar *a, uchar *ia);
static void etherremmulti(Ipifc *ifc, uchar *a, uchar *ia);
+static void etherareg(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *ip);
static Block* multicastarp(Fs *f, Arpent *a, Medium*, uchar *mac);
static void sendarp(Ipifc *ifc, Arpent *a);
-static void sendgarp(Ipifc *ifc, uchar*);
+static void sendndp(Ipifc *ifc, Arpent *a);
static int multicastea(uchar *ea, uchar *ip);
static void recvarpproc(void*);
-static void resolveaddr6(Ipifc *ifc, Arpent *a);
static void etherpref2addr(uchar *pref, uchar *ea);
Medium ethermedium =
@@ -53,8 +53,7 @@
.bwrite= etherbwrite,
.addmulti= etheraddmulti,
.remmulti= etherremmulti,
-.ares= arpenter,
-.areg= sendgarp,
+.areg= etherareg,
.pref2addr= etherpref2addr,
};
@@ -70,8 +69,7 @@
.bwrite= etherbwrite,
.addmulti= etheraddmulti,
.remmulti= etherremmulti,
-.ares= arpenter,
-.areg= sendgarp,
+.areg= etherareg,
.pref2addr= etherpref2addr,
};
@@ -94,9 +92,6 @@
*/
enum
{
- ETARP = 0x0806,
- ETIP4 = 0x0800,
- ETIP6 = 0x86DD,
ARPREQUEST = 1,
ARPREPLY = 2,
};
@@ -127,128 +122,92 @@
static void
etherbind(Ipifc *ifc, int argc, char **argv)
{
- Chan *mchan4, *cchan4, *achan, *mchan6, *cchan6;
- char addr[Maxpath]; //char addr[2*KNAMELEN];
- char dir[Maxpath]; //char dir[2*KNAMELEN];
- char *buf;
- int fd, cfd, n;
- char *ptr;
+ char addr[Maxpath], dir[Maxpath];
Etherrock *er;
+ Chan *c;
+ int n;
if(argc < 2)
error(Ebadarg);
- mchan4 = cchan4 = achan = mchan6 = cchan6 = nil;
- buf = nil;
+ /*
+ * get mac address
+ */
+ snprint(addr, sizeof(addr), "%s/addr", argv[2]);
+ c = namec(addr, Aopen, OREAD, 0);
if(waserror()){
- if(mchan4 != nil)
- cclose(mchan4);
- if(cchan4 != nil)
- cclose(cchan4);
- if(achan != nil)
- cclose(achan);
- if(mchan6 != nil)
- cclose(mchan6);
- if(cchan6 != nil)
- cclose(cchan6);
- if(buf != nil)
- free(buf);
- nexterror();
+ cclose(c);
+ nexterror();
}
+ n = devtab[c->type]->read(c, addr, sizeof(addr)-1, 0);
+ if(n < 0)
+ error(Eio);
+ addr[n] = 0;
+ if(parsemac(ifc->mac, addr, sizeof(ifc->mac)) != 6)
+ error("could not find mac address");
+ cclose(c);
+ poperror();
+ er = smalloc(sizeof(*er));
+ er->read4p = er->read6p = er->arpp = (void*)-1;
+ er->mchan4 = er->cchan4 = er->mchan6 = er->cchan6 = er->achan = nil;
+ er->f = ifc->conv->p->f;
+
+ if(waserror()){
+ if(er->mchan4 != nil)
+ cclose(er->mchan4);
+ if(er->cchan4 != nil)
+ cclose(er->cchan4);
+ if(er->mchan6 != nil)
+ cclose(er->mchan6);
+ if(er->cchan6 != nil)
+ cclose(er->cchan6);
+ if(er->achan != nil)
+ cclose(er->achan);
+ free(er);
+ nexterror();
+ }
+
/*
- * open ip converstation
+ * open ipv4 conversation
*
* the dial will fail if the type is already open on
* this device.
*/
- snprint(addr, sizeof(addr), "%s!0x800", argv[2]);
- fd = kdial(addr, nil, dir, &cfd);
- if(fd < 0)
- errorf("dial 0x800 failed: %s", up->env->errstr);
- mchan4 = commonfdtochan(fd, ORDWR, 0, 1);
- cchan4 = commonfdtochan(cfd, ORDWR, 0, 1);
- kclose(fd);
- kclose(cfd);
+ snprint(addr, sizeof(addr), "%s!0x800", argv[2]); /* ETIP4 */
+ er->mchan4 = chandial(addr, nil, dir, &er->cchan4);
/*
* make it non-blocking
*/
- devtab[cchan4->type]->write(cchan4, nbmsg, strlen(nbmsg), 0);
+ devtab[er->cchan4->type]->write(er->cchan4, nbmsg, strlen(nbmsg), 0);
/*
- * get mac address and speed
- */
- snprint(addr, sizeof(addr), "%s/stats", dir);
- fd = kopen(addr, OREAD);
- if(fd < 0)
- errorf("can't open ether stats: %s", up->env->errstr);
-
- buf = smalloc(512);
- n = kread(fd, buf, 511);
- kclose(fd);
- if(n <= 0)
- error(Eio);
- buf[n] = 0;
-
- ptr = strstr(buf, "addr: ");
- if(!ptr)
- error(Eio);
- ptr += 6;
- parsemac(ifc->mac, ptr, 6);
-
- ptr = strstr(buf, "mbps: ");
- if(ptr){
- ptr += 6;
- ifc->mbps = atoi(ptr);
- } else
- ifc->mbps = 100;
-
- /*
- * open arp conversation
- */
- snprint(addr, sizeof(addr), "%s!0x806", argv[2]);
- fd = kdial(addr, nil, nil, nil);
- if(fd < 0)
- errorf("dial 0x806 failed: %s", up->env->errstr);
- achan = commonfdtochan(fd, ORDWR, 0, 1);
- kclose(fd);
-
- /*
- * open ip conversation
+ * open ipv6 conversation
*
* the dial will fail if the type is already open on
* this device.
*/
- snprint(addr, sizeof(addr), "%s!0x86DD", argv[2]);
- fd = kdial(addr, nil, dir, &cfd);
- if(fd < 0)
- errorf("dial 0x86DD failed: %s", up->env->errstr);
- mchan6 = commonfdtochan(fd, ORDWR, 0, 1);
- cchan6 = commonfdtochan(cfd, ORDWR, 0, 1);
- kclose(fd);
- kclose(cfd);
+ snprint(addr, sizeof(addr), "%s!0x86DD", argv[2]); /* ETIP6 */
+ er->mchan6 = chandial(addr, nil, dir, &er->cchan6);
/*
* make it non-blocking
*/
- devtab[cchan6->type]->write(cchan6, nbmsg, strlen(nbmsg), 0);
+ devtab[er->cchan6->type]->write(er->cchan6, nbmsg, strlen(nbmsg), 0);
- er = smalloc(sizeof(*er));
- er->mchan4 = mchan4;
- er->cchan4 = cchan4;
- er->achan = achan;
- er->mchan6 = mchan6;
- er->cchan6 = cchan6;
- er->f = ifc->conv->p->f;
- ifc->arg = er;
-
- free(buf);
+ /*
+ * open arp conversation
+ */
+ snprint(addr, sizeof(addr), "%s!0x806", argv[2]); /* ETARP */
+ er->achan = chandial(addr, nil, nil, nil);
poperror();
- kproc("etherread4", etherread4, ifc, 0);
- kproc("recvarpproc", recvarpproc, ifc, 0);
- kproc("etherread6", etherread6, ifc, 0);
+ ifc->arg = er;
+
+ kproc("etherread4", etherread4, ifc);
+ kproc("etherread6", etherread6, ifc);
+ kproc("recvarpproc", recvarpproc, ifc);
}
/*
@@ -259,21 +218,35 @@
{
Etherrock *er = ifc->arg;
- if(er->read4p)
+ while(waserror())
+ ;
+
+ /* wait for readers to start */
+ while(er->arpp == (void*)-1 || er->read4p == (void*)-1 || er->read6p == (void*)-1)
+ tsleep(&up->sleep, return0, 0, 300);
+
+ if(er->read4p != nil)
postnote(er->read4p, 1, "unbind", 0);
- if(er->read6p)
+ if(er->read6p != nil)
postnote(er->read6p, 1, "unbind", 0);
- if(er->arpp)
+ if(er->arpp != nil)
postnote(er->arpp, 1, "unbind", 0);
+ poperror();
+
+ wunlock(ifc);
+ while(waserror())
+ ;
+
/* wait for readers to die */
- while(er->arpp != 0 || er->read4p != 0 || er->read6p != 0)
+ while(er->arpp != nil || er->read4p != nil || er->read6p != nil)
tsleep(&up->sleep, return0, 0, 300);
+ poperror();
+ wlock(ifc);
+
if(er->mchan4 != nil)
cclose(er->mchan4);
- if(er->achan != nil)
- cclose(er->achan);
if(er->cchan4 != nil)
cclose(er->cchan4);
if(er->mchan6 != nil)
@@ -280,6 +253,8 @@
cclose(er->mchan6);
if(er->cchan6 != nil)
cclose(er->cchan6);
+ if(er->achan != nil)
+ cclose(er->achan);
free(er);
}
@@ -297,16 +272,16 @@
/* get mac address of destination */
a = arpget(er->f->arp, bp, version, ifc, ip, mac);
- if(a){
+ if(a != nil){
/* check for broadcast or multicast */
bp = multicastarp(er->f, a, ifc->m, mac);
- if(bp==nil){
+ if(bp == nil){
switch(version){
case V4:
sendarp(ifc, a);
break;
- case V6:
- resolveaddr6(ifc, a);
+ case V6:
+ sendndp(ifc, a);
break;
default:
panic("etherbwrite: version %d", version);
@@ -317,8 +292,6 @@
/* make it a single block with space for the ether header */
bp = padblock(bp, ifc->m->hsize);
- if(bp->next)
- bp = concatblock(bp);
if(BLEN(bp) < ifc->mintu)
bp = adjustblock(bp, ifc->mintu);
eh = (Etherhdr*)bp->rp;
@@ -358,29 +331,28 @@
ifc = a;
er = ifc->arg;
er->read4p = up; /* hide identity under a rock for unbind */
- if(waserror()){
- er->read4p = 0;
- pexit("hangup", 1);
- }
+ if(!waserror())
for(;;){
bp = devtab[er->mchan4->type]->bread(er->mchan4, ifc->maxtu, 0);
- if(!canrlock(ifc)){
- freeb(bp);
- continue;
- }
+ if(bp == nil)
+ break;
+ rlock(ifc);
if(waserror()){
runlock(ifc);
nexterror();
}
ifc->in++;
- bp->rp += ifc->m->hsize;
- if(ifc->lifc == nil)
+ if(ifc->lifc == nil || BLEN(bp) <= ifc->m->hsize)
freeb(bp);
- else
+ else {
+ bp->rp += ifc->m->hsize;
ipiput4(er->f, ifc, bp);
+ }
runlock(ifc);
poperror();
}
+ er->read4p = nil;
+ pexit("hangup", 1);
}
@@ -397,29 +369,28 @@
ifc = a;
er = ifc->arg;
er->read6p = up; /* hide identity under a rock for unbind */
- if(waserror()){
- er->read6p = 0;
- pexit("hangup", 1);
- }
+ if(!waserror())
for(;;){
bp = devtab[er->mchan6->type]->bread(er->mchan6, ifc->maxtu, 0);
- if(!canrlock(ifc)){
- freeb(bp);
- continue;
- }
+ if(bp == nil)
+ break;
+ rlock(ifc);
if(waserror()){
runlock(ifc);
nexterror();
}
ifc->in++;
- bp->rp += ifc->m->hsize;
- if(ifc->lifc == nil)
+ if(ifc->lifc == nil || BLEN(bp) <= ifc->m->hsize)
freeb(bp);
- else
+ else {
+ bp->rp += ifc->m->hsize;
ipiput6(er->f, ifc, bp);
+ }
runlock(ifc);
poperror();
}
+ er->read6p = nil;
+ pexit("hangup", 1);
}
static void
@@ -477,6 +448,7 @@
Block *bp;
Etherarp *e;
Etherrock *er = ifc->arg;
+ uchar targ[IPv4addrlen], src[IPv4addrlen];
/* don't do anything if it's been less than a second since the last */
if(NOW - a->ctime < 1000){
@@ -484,6 +456,9 @@
return;
}
+ /* try to keep it around for a second more */
+ a->ctime = NOW;
+
/* remove all but the last message */
while((bp = a->hold) != nil){
if(bp == a->last)
@@ -492,18 +467,20 @@
freeblist(bp);
}
- /* try to keep it around for a second more */
- a->ctime = NOW;
+ memmove(targ, a->ip+IPv4off, IPv4addrlen);
arprelease(er->f->arp, a);
+ if(!ipv4local(ifc, src, 0, targ))
+ return;
+
n = sizeof(Etherarp);
- if(n < a->type->mintu)
- n = a->type->mintu;
+ if(n < ifc->m->mintu)
+ n = ifc->m->mintu;
bp = allocb(n);
memset(bp->rp, 0, n);
e = (Etherarp*)bp->rp;
- memmove(e->tpa, a->ip+IPv4off, sizeof(e->tpa));
- ipv4local(ifc, e->spa);
+ memmove(e->tpa, targ, sizeof(e->tpa));
+ memmove(e->spa, src, sizeof(e->spa));
memmove(e->sha, ifc->mac, sizeof(e->sha));
memset(e->d, 0xff, sizeof(e->d)); /* ethernet broadcast */
memmove(e->s, ifc->mac, sizeof(e->s));
@@ -516,18 +493,14 @@
hnputs(e->op, ARPREQUEST);
bp->wp += n;
- n = devtab[er->achan->type]->bwrite(er->achan, bp, 0);
- if(n < 0)
- print("arp: send: %r\n");
+ devtab[er->achan->type]->bwrite(er->achan, bp, 0);
}
static void
-resolveaddr6(Ipifc *ifc, Arpent *a)
+sendndp(Ipifc *ifc, Arpent *a)
{
- int sflag;
Block *bp;
Etherrock *er = ifc->arg;
- uchar ipsrc[IPaddrlen];
/* don't do anything if it's been less than a second since the last */
if(NOW - a->ctime < ReTransTimer){
@@ -543,19 +516,7 @@
freeblist(bp);
}
- /* try to keep it around for a second more */
- a->ctime = NOW;
- a->rtime = NOW + ReTransTimer;
- if(a->rxtsrem <= 0) {
- arprelease(er->f->arp, a);
- return;
- }
-
- a->rxtsrem--;
- arprelease(er->f->arp, a);
-
- if(sflag = ipv6anylocal(ifc, ipsrc))
- icmpns(er->f, ipsrc, sflag, a->ip, TARG_MULTI, ifc->mac);
+ ndpsendsol(er->f, ifc, a); /* unlocks arp */
}
/*
@@ -569,10 +530,6 @@
Etherarp *e;
Etherrock *er = ifc->arg;
- /* don't arp for our initial non address */
- if(ipcmp(ip, IPnoaddr) == 0)
- return;
-
n = sizeof(Etherarp);
if(n < ifc->m->mintu)
n = ifc->m->mintu;
@@ -593,15 +550,13 @@
hnputs(e->op, ARPREQUEST);
bp->wp += n;
- n = devtab[er->achan->type]->bwrite(er->achan, bp, 0);
- if(n < 0)
- print("garp: send: %r\n");
+ devtab[er->achan->type]->bwrite(er->achan, bp, 0);
}
static void
recvarp(Ipifc *ifc)
{
- int n;
+ int n, forme;
Block *ebp, *rbp;
Etherarp *e, *r;
uchar ip[IPaddrlen];
@@ -609,11 +564,11 @@
Etherrock *er = ifc->arg;
ebp = devtab[er->achan->type]->bread(er->achan, ifc->maxtu, 0);
- if(ebp == nil) {
- print("arp: rcv: %r\n");
+ if(ebp == nil)
return;
- }
+ rlock(ifc);
+
e = (Etherarp*)ebp->rp;
switch(nhgets(e->op)) {
default:
@@ -620,9 +575,13 @@
break;
case ARPREPLY:
+ /* make sure not to enter multi/broadcat address */
+ if(e->sha[0] & 1)
+ break;
+
/* check for machine using my ip address */
v4tov6(ip, e->spa);
- if(iplocalonifc(ifc, ip) || ipproxyifc(er->f, ifc, ip)){
+ if(iplocalonifc(ifc, ip) != nil || ipproxyifc(er->f, ifc, ip)){
if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) != 0){
print("arprep: 0x%E/0x%E also has ip addr %V\n",
e->s, e->sha, e->spa);
@@ -630,46 +589,47 @@
}
}
- /* make sure we're not entering broadcast addresses */
- if(ipcmp(ip, ipbroadcast) == 0 ||
- !memcmp(e->sha, etherbroadcast, sizeof(e->sha))){
- print("arprep: 0x%E/0x%E cannot register broadcast address %I\n",
- e->s, e->sha, e->spa);
- break;
- }
-
- arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), 0);
+ /* refresh what we know about sender */
+ arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), e->tpa, ifc, 1);
break;
case ARPREQUEST:
+ /* don't reply to multi/broadcat addresses */
+ if(e->sha[0] & 1)
+ break;
+
/* don't answer arps till we know who we are */
- if(ifc->lifc == 0)
+ if(ifc->lifc == nil)
break;
/* check for machine using my ip or ether address */
v4tov6(ip, e->spa);
- if(iplocalonifc(ifc, ip) || ipproxyifc(er->f, ifc, ip)){
+ if(iplocalonifc(ifc, ip) != nil || ipproxyifc(er->f, ifc, ip)){
if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) != 0){
- if (memcmp(eprinted, e->spa, sizeof(e->spa))){
+ if(memcmp(eprinted, e->spa, sizeof(e->spa)) != 0){
/* print only once */
- print("arpreq: 0x%E also has ip addr %V\n", e->sha, e->spa);
+ print("arpreq: 0x%E also has ip addr %V\n",
+ e->sha, e->spa);
memmove(eprinted, e->spa, sizeof(e->spa));
}
+ break;
}
} else {
if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) == 0){
- print("arpreq: %V also has ether addr %E\n", e->spa, e->sha);
+ print("arpreq: %V also has ether addr %E\n",
+ e->spa, e->sha);
break;
}
}
- /* refresh what we know about sender */
- arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), 1);
-
- /* answer only requests for our address or systems we're proxying for */
+ /*
+ * when request is for our address or systems we're proxying for,
+ * enter senders address into arp table and reply, otherwise just
+ * refresh the senders address.
+ */
v4tov6(ip, e->tpa);
- if(!iplocalonifc(ifc, ip))
- if(!ipproxyifc(er->f, ifc, ip))
+ forme = iplocalonifc(ifc, ip) != nil || ipproxyifc(er->f, ifc, ip);
+ if(arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), e->tpa, ifc, !forme) < 0 || !forme)
break;
n = sizeof(Etherarp);
@@ -692,10 +652,14 @@
memmove(r->s, ifc->mac, sizeof(r->s));
rbp->wp += n;
- n = devtab[er->achan->type]->bwrite(er->achan, rbp, 0);
- if(n < 0)
- print("arp: write: %r\n");
+ runlock(ifc);
+ freeb(ebp);
+
+ devtab[er->achan->type]->bwrite(er->achan, rbp, 0);
+ return;
}
+
+ runlock(ifc);
freeb(ebp);
}
@@ -707,7 +671,7 @@
er->arpp = up;
if(waserror()){
- er->arpp = 0;
+ er->arpp = nil;
pexit("hangup", 1);
}
for(;;)
@@ -749,14 +713,9 @@
multicastarp(Fs *f, Arpent *a, Medium *medium, uchar *mac)
{
/* is it broadcast? */
- switch(ipforme(f, a->ip)){
- case Runi:
- return nil;
- case Rbcast:
- memset(mac, 0xff, 6);
+ if(ipforme(f, a->ip) == Rbcast){
+ memset(mac, 0xff, medium->maclen);
return arpresolve(f->arp, a, medium, mac);
- default:
- break;
}
/* if multicast, fill in mac */
@@ -778,11 +737,11 @@
}
-static void
+static void
etherpref2addr(uchar *pref, uchar *ea)
{
- pref[8] = ea[0] | 0x2;
- pref[9] = ea[1];
+ pref[8] = ea[0] ^ 0x2;
+ pref[9] = ea[1];
pref[10] = ea[2];
pref[11] = 0xFF;
pref[12] = 0xFE;
@@ -789,4 +748,41 @@
pref[13] = ea[3];
pref[14] = ea[4];
pref[15] = ea[5];
+}
+
+static void
+etherareg(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *ip)
+{
+ static char tdad[] = "dad6";
+ uchar a[IPaddrlen];
+
+ if(ipcmp(ip, IPnoaddr) == 0 || ipcmp(ip, v4prefix) == 0)
+ return;
+
+ if(isv4(ip)){
+ sendgarp(ifc, ip);
+ return;
+ }
+
+ if((lifc->type&Rv4) != 0)
+ return;
+
+ if(!lifc->tentative){
+ icmpna(f, lifc->local, v6allnodesL, ip, ifc->mac, 1<<5);
+ return;
+ }
+
+ if(ipcmp(lifc->local, ip) != 0)
+ return;
+
+ /* temporarily add route for duplicate address detection */
+ ipv62smcast(a, ip);
+ addroute(f, a, IPallbits, v6Unspecified, IPallbits, ip, Rmulti, ifc, tdad);
+ if(waserror()){
+ remroute(f, a, IPallbits, v6Unspecified, IPallbits, ip, Rmulti, ifc, tdad);
+ nexterror();
+ }
+ icmpns(f, 0, SRC_UNSPEC, ip, TARG_MULTI, ifc->mac);
+ poperror();
+ remroute(f, a, IPallbits, v6Unspecified, IPallbits, ip, Rmulti, ifc, tdad);
}
--- a/os/ip/gre.c
+++ b/os/ip/gre.c
@@ -1,3 +1,6 @@
+/*
+ * Generic Routing Encapsulation over IPv4, rfc1702
+ */
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
@@ -7,10 +10,7 @@
#include "ip.h"
-#define DPRINT if(0)print
-
-enum
-{
+enum {
GRE_IPONLY = 12, /* size of ip header */
GRE_IPPLUSGRE = 12, /* minimum size of GRE header */
IP_GREPROTO = 47,
@@ -18,10 +18,33 @@
GRErxms = 200,
GREtickms = 100,
GREmaxxmit = 10,
+
+ K = 1024,
+ GREqlen = 256 * K,
+
+ GRE_cksum = 0x8000,
+ GRE_routing = 0x4000,
+ GRE_key = 0x2000,
+ GRE_seq = 0x1000,
+
+ Nring = 1 << 10, /* power of two, please */
+ Ringmask = Nring - 1,
+
+ GREctlraw = 0,
+ GREctlcooked,
+ GREctlretunnel,
+ GREctlreport,
+ GREctldlsuspend,
+ GREctlulsuspend,
+ GREctldlresume,
+ GREctlulresume,
+ GREctlforward,
+ GREctlulkey,
+ Ncmds,
};
-typedef struct GREhdr
-{
+typedef struct GREhdr GREhdr;
+struct GREhdr{
/* ip header */
uchar vihl; /* Version and header length */
uchar tos; /* Type of service */
@@ -28,7 +51,7 @@
uchar len[2]; /* packet length (including headers) */
uchar id[2]; /* Identification */
uchar frag[2]; /* Fragment information */
- uchar Unused;
+ uchar ttl;
uchar proto; /* Protocol */
uchar cksum[2]; /* checksum */
uchar src[4]; /* Ip source */
@@ -37,21 +60,115 @@
/* gre header */
uchar flags[2];
uchar eproto[2]; /* encapsulation protocol */
-} GREhdr;
+};
typedef struct GREpriv GREpriv;
-struct GREpriv
-{
- int raw; /* Raw GRE mode */
-
+struct GREpriv{
/* non-MIB stats */
- ulong csumerr; /* checksum errors */
- ulong lenerr; /* short packet */
+ uvlong lenerr; /* short packet */
};
+typedef struct Bring Bring;
+struct Bring{
+ Block *ring[Nring];
+ long produced;
+ long consumed;
+};
+
+typedef struct GREconv GREconv;
+struct GREconv{
+ int raw;
+
+ /* Retunnelling information. v4 only */
+ uchar north[4]; /* HA */
+ uchar south[4]; /* Base station */
+ uchar hoa[4]; /* Home address */
+ uchar coa[4]; /* Careof address */
+ ulong seq; /* Current sequence # */
+ int dlsusp; /* Downlink suspended? */
+ int ulsusp; /* Uplink suspended? */
+ ulong ulkey; /* GRE key */
+
+ QLock lock; /* Lock for rings */
+ Bring dlpending; /* Ring of pending packets */
+ Bring dlbuffered; /* Received while suspended */
+ Bring ulbuffered; /* Received while suspended */
+};
+
+typedef struct Metablock Metablock;
+struct Metablock{
+ uchar *rp;
+ ulong seq;
+};
+
+static char *grectlcooked(Conv *, int, char **);
+static char *grectldlresume(Conv *, int, char **);
+static char *grectldlsuspend(Conv *, int, char **);
+static char *grectlforward(Conv *, int, char **);
+static char *grectlraw(Conv *, int, char **);
+static char *grectlreport(Conv *, int, char **);
+static char *grectlretunnel(Conv *, int, char **);
+static char *grectlulkey(Conv *, int, char **);
+static char *grectlulresume(Conv *, int, char **);
+static char *grectlulsuspend(Conv *, int, char **);
+
+static struct{
+ char *cmd;
+ int argc;
+ char *(*f)(Conv *, int, char **);
+} grectls[Ncmds] = {
+[GREctlraw] = { "raw", 1, grectlraw, },
+[GREctlcooked] = { "cooked", 1, grectlcooked, },
+[GREctlretunnel]= { "retunnel", 5, grectlretunnel, },
+[GREctlreport] = { "report", 2, grectlreport, },
+[GREctldlsuspend]= { "dlsuspend", 1, grectldlsuspend,},
+[GREctlulsuspend]= { "ulsuspend", 1, grectlulsuspend,},
+[GREctldlresume]= { "dlresume", 1, grectldlresume, },
+[GREctlulresume]= { "ulresume", 1, grectlulresume, },
+[GREctlforward] = { "forward", 2, grectlforward, },
+[GREctlulkey] = { "ulkey", 2, grectlulkey, },
+};
+
+static uchar nulladdr[4];
+static char *sessend = "session end";
+
static void grekick(void *x, Block *bp);
+static char *gresetup(Conv *, char *, char *, char *);
-static char*
+uvlong grepdin, grepdout, grebdin, grebdout;
+uvlong grepuin, grepuout, grebuin, grebuout;
+
+static Block *
+getring(Bring *r)
+{
+ Block *bp;
+
+ if(r->consumed == r->produced)
+ return nil;
+
+ bp = r->ring[r->consumed & Ringmask];
+ r->ring[r->consumed & Ringmask] = nil;
+ r->consumed++;
+ return bp;
+}
+
+static void
+addring(Bring *r, Block *bp)
+{
+ Block *tbp;
+
+ if(r->produced - r->consumed > Ringmask){
+ /* Full! */
+ tbp = r->ring[r->produced & Ringmask];
+ assert(tbp);
+ freeb(tbp);
+ r->consumed++;
+ }
+ r->ring[r->produced & Ringmask] = bp;
+ r->produced++;
+}
+
+static char *
greconnect(Conv *c, char **argv, int argc)
{
Proto *p;
@@ -91,7 +208,7 @@
static void
grecreate(Conv *c)
{
- c->rq = qopen(64*1024, Qmsg, 0, c);
+ c->rq = qopen(GREqlen, Qmsg, 0, c);
c->wq = qbypass(grekick, c);
}
@@ -98,44 +215,88 @@
static int
grestate(Conv *c, char *state, int n)
{
- USED(c);
- return snprint(state, n, "%s", "Datagram");
+ GREconv *grec;
+ char *ep, *p;
+
+ grec = c->ptcl;
+ p = state;
+ ep = p + n;
+ p = seprint(p, ep, "%s%s%s%shoa %V north %V south %V seq %ulx "
+ "pending %uld %uld buffered dl %uld %uld ul %uld %uld ulkey %.8ulx\n",
+ c->inuse? "Open ": "Closed ",
+ grec->raw? "raw ": "",
+ grec->dlsusp? "DL suspended ": "",
+ grec->ulsusp? "UL suspended ": "",
+ grec->hoa, grec->north, grec->south, grec->seq,
+ grec->dlpending.consumed, grec->dlpending.produced,
+ grec->dlbuffered.consumed, grec->dlbuffered.produced,
+ grec->ulbuffered.consumed, grec->ulbuffered.produced,
+ grec->ulkey);
+ return p - state;
}
static char*
greannounce(Conv*, char**, int)
{
- return "pktifc does not support announce";
+ return "gre does not support announce";
}
static void
greclose(Conv *c)
{
- qclose(c->rq);
- qclose(c->wq);
- qclose(c->eq);
+ GREconv *grec;
+ Block *bp;
+
+ grec = c->ptcl;
+
+ /* Make sure we don't forward any more packets */
+ memset(grec->hoa, 0, sizeof grec->hoa);
+ memset(grec->north, 0, sizeof grec->north);
+ memset(grec->south, 0, sizeof grec->south);
+
+ qlock(&grec->lock);
+ while((bp = getring(&grec->dlpending)) != nil)
+ freeb(bp);
+
+ while((bp = getring(&grec->dlbuffered)) != nil)
+ freeb(bp);
+
+ while((bp = getring(&grec->ulbuffered)) != nil)
+ freeb(bp);
+
+ grec->dlpending.produced = grec->dlpending.consumed = 0;
+ grec->dlbuffered.produced = grec->dlbuffered.consumed = 0;
+ grec->ulbuffered.produced = grec->ulbuffered.consumed = 0;
+ qunlock(&grec->lock);
+
+ grec->raw = 0;
+ grec->seq = 0;
+ grec->dlsusp = grec->ulsusp = 1;
+
+ qhangup(c->rq, sessend);
+ qhangup(c->wq, sessend);
+ qhangup(c->eq, sessend);
ipmove(c->laddr, IPnoaddr);
ipmove(c->raddr, IPnoaddr);
- c->lport = 0;
- c->rport = 0;
+ c->lport = c->rport = 0;
}
-int drop;
-
static void
grekick(void *x, Block *bp)
{
- Conv *c = x;
- GREhdr *ghp;
+ Conv *c;
+ GREconv *grec;
+ GREhdr *gre;
uchar laddr[IPaddrlen], raddr[IPaddrlen];
if(bp == nil)
return;
+ c = x;
+ grec = c->ptcl;
+
/* Make space to fit ip header (gre header already there) */
bp = padblock(bp, GRE_IPONLY);
- if(bp == nil)
- return;
/* make sure the message has a GRE header */
bp = pullupblock(bp, GRE_IPONLY+GRE_IPPLUSGRE);
@@ -142,90 +303,344 @@
if(bp == nil)
return;
- ghp = (GREhdr *)(bp->rp);
- ghp->vihl = IP_VER4;
+ gre = (GREhdr *)bp->rp;
+ gre->vihl = IP_VER4;
- if(!((GREpriv*)c->p->priv)->raw){
- v4tov6(raddr, ghp->dst);
+ if(grec->raw == 0){
+ v4tov6(raddr, gre->dst);
if(ipcmp(raddr, v4prefix) == 0)
- memmove(ghp->dst, c->raddr + IPv4off, IPv4addrlen);
- v4tov6(laddr, ghp->src);
+ memmove(gre->dst, c->raddr + IPv4off, IPv4addrlen);
+ v4tov6(laddr, gre->src);
if(ipcmp(laddr, v4prefix) == 0){
if(ipcmp(c->laddr, IPnoaddr) == 0)
- findlocalip(c->p->f, c->laddr, raddr); /* pick interface closest to dest */
- memmove(ghp->src, c->laddr + IPv4off, IPv4addrlen);
+ /* pick interface closest to dest */
+ findlocalip(c->p->f, c->laddr, raddr);
+ memmove(gre->src, c->laddr + IPv4off, sizeof gre->src);
}
- hnputs(ghp->eproto, c->rport);
+ hnputs(gre->eproto, c->rport);
}
- ghp->proto = IP_GREPROTO;
- ghp->frag[0] = 0;
- ghp->frag[1] = 0;
+ gre->proto = IP_GREPROTO;
+ gre->frag[0] = gre->frag[1] = 0;
+ grepdout++;
+ grebdout += BLEN(bp);
ipoput4(c->p->f, bp, 0, c->ttl, c->tos, nil);
}
static void
-greiput(Proto *gre, Ipifc*, Block *bp)
+gredownlink(Conv *c, Block *bp)
{
- int len;
- GREhdr *ghp;
- Conv *c, **p;
- ushort eproto;
+ Metablock *m;
+ GREconv *grec;
+ GREhdr *gre;
+ int hdrlen, suspended, extra;
+ ushort flags;
+ ulong seq;
+
+ gre = (GREhdr *)bp->rp;
+ if(gre->ttl == 1){
+ freeb(bp);
+ return;
+ }
+
+ /*
+ * We've received a packet with a GRE header and we need to
+ * re-adjust the packet header to strip all unwanted parts
+ * but leave room for only a sequence number.
+ */
+ grec = c->ptcl;
+ flags = nhgets(gre->flags);
+ hdrlen = 0;
+ if(flags & GRE_cksum)
+ hdrlen += 2;
+ if(flags & GRE_routing){
+ print("%V routing info present. Discarding packet", gre->src);
+ freeb(bp);
+ return;
+ }
+ if(flags & (GRE_cksum|GRE_routing))
+ hdrlen += 2; /* Offset field */
+ if(flags & GRE_key)
+ hdrlen += 4;
+ if(flags & GRE_seq)
+ hdrlen += 4;
+
+ /*
+ * The outgoing packet only has the sequence number set. Make room
+ * for the sequence number.
+ */
+ if(hdrlen != sizeof(ulong)){
+ extra = hdrlen - sizeof(ulong);
+ if(extra < 0 && bp->rp - bp->base < -extra){
+ print("gredownlink: cannot add sequence number\n");
+ freeb(bp);
+ return;
+ }
+ memmove(bp->rp + extra, bp->rp, sizeof(GREhdr));
+ bp->rp += extra;
+ assert(BLEN(bp) >= sizeof(GREhdr) + sizeof(ulong));
+ gre = (GREhdr *)bp->rp;
+ }
+ seq = grec->seq++;
+ hnputs(gre->flags, GRE_seq);
+ hnputl(bp->rp + sizeof(GREhdr), seq);
+
+ /*
+ * Keep rp and seq at the base. ipoput4 consumes rp for
+ * refragmentation.
+ */
+ assert(bp->rp - bp->base >= sizeof(Metablock));
+ m = (Metablock *)bp->base;
+ m->rp = bp->rp;
+ m->seq = seq;
+
+ /*
+ * Here we make a decision what we're doing with the packet. We're
+ * doing this w/o holding a lock which means that later on in the
+ * process we may discover we've done the wrong thing. I don't want
+ * to call ipoput with the lock held.
+ */
+restart:
+ suspended = grec->dlsusp;
+ if(suspended){
+ if(!canqlock(&grec->lock)){
+ /*
+ * just give up. too bad, we lose a packet. this
+ * is just too hard and my brain already hurts.
+ */
+ freeb(bp);
+ return;
+ }
+
+ if(!grec->dlsusp){
+ /*
+ * suspend race. We though we were suspended, but
+ * we really weren't.
+ */
+ qunlock(&grec->lock);
+ goto restart;
+ }
+
+ /* Undo the incorrect ref count addition */
+ addring(&grec->dlbuffered, bp);
+ qunlock(&grec->lock);
+ return;
+ }
+
+ /*
+ * When we get here, we're not suspended. Proceed to send the
+ * packet.
+ */
+ memmove(gre->src, grec->coa, sizeof gre->dst);
+ memmove(gre->dst, grec->south, sizeof gre->dst);
+
+ ipoput4(c->p->f, copyblock(bp, BLEN(bp)), 0, gre->ttl - 1, gre->tos, nil);
+ grepdout++;
+ grebdout += BLEN(bp);
+
+ /*
+ * Now make sure we didn't do the wrong thing.
+ */
+ if(!canqlock(&grec->lock)){
+ freeb(bp); /* The packet just goes away */
+ return;
+ }
+
+ /* We did the right thing */
+ addring(&grec->dlpending, bp);
+ qunlock(&grec->lock);
+}
+
+static void
+greuplink(Conv *c, Block *bp)
+{
+ GREconv *grec;
+ GREhdr *gre;
+ ushort flags;
+
+ gre = (GREhdr *)bp->rp;
+ if(gre->ttl == 1)
+ return;
+
+ grec = c->ptcl;
+ memmove(gre->src, grec->coa, sizeof gre->src);
+ memmove(gre->dst, grec->north, sizeof gre->dst);
+
+ /*
+ * Add a key, if needed.
+ */
+ if(grec->ulkey){
+ flags = nhgets(gre->flags);
+ if(flags & (GRE_cksum|GRE_routing)){
+ print("%V routing info present. Discarding packet\n",
+ gre->src);
+ freeb(bp);
+ return;
+ }
+
+ if((flags & GRE_key) == 0){
+ /* Make room for the key */
+ if(bp->rp - bp->base < sizeof(ulong)){
+ print("%V can't add key\n", gre->src);
+ freeb(bp);
+ return;
+ }
+
+ bp->rp -= 4;
+ memmove(bp->rp, bp->rp + 4, sizeof(GREhdr));
+
+ gre = (GREhdr *)bp->rp;
+ hnputs(gre->flags, flags | GRE_key);
+ }
+
+ /* Add the key */
+ hnputl(bp->rp + sizeof(GREhdr), grec->ulkey);
+ }
+
+ if(!canqlock(&grec->lock)){
+ freeb(bp);
+ return;
+ }
+
+ if(grec->ulsusp)
+ addring(&grec->ulbuffered, bp);
+ else{
+ ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+ grepuout++;
+ grebuout += BLEN(bp);
+ }
+ qunlock(&grec->lock);
+}
+
+static void
+greiput(Proto *proto, Ipifc *, Block *bp)
+{
+ int len, hdrlen;
+ ushort eproto, flags;
uchar raddr[IPaddrlen];
+ Conv *c, **p;
+ GREconv *grec;
+ GREhdr *gre;
GREpriv *gpriv;
+ Ip4hdr *ip;
- gpriv = gre->priv;
- ghp = (GREhdr*)(bp->rp);
+ /*
+ * We don't want to deal with block lists. Ever. The problem is
+ * that when the block is forwarded, devether.c puts the block into
+ * a queue that also uses ->next. Just do not use ->next here!
+ */
+ if(bp->next != nil)
+ bp = pullupblock(bp, blocklen(bp));
- v4tov6(raddr, ghp->src);
- eproto = nhgets(ghp->eproto);
- qlock(gre);
+ gre = (GREhdr *)bp->rp;
+ if(BLEN(bp) < sizeof(GREhdr) || gre->proto != IP_GREPROTO){
+ freeb(bp);
+ return;
+ }
- /* Look for a conversation structure for this port and address */
- c = nil;
- for(p = gre->conv; *p; p++) {
+ v4tov6(raddr, gre->src);
+ eproto = nhgets(gre->eproto);
+ flags = nhgets(gre->flags);
+ hdrlen = sizeof(GREhdr);
+
+ if(flags & GRE_cksum)
+ hdrlen += 2;
+ if(flags & GRE_routing){
+ print("%I routing info present. Discarding packet\n", raddr);
+ freeb(bp);
+ return;
+ }
+ if(flags & (GRE_cksum|GRE_routing))
+ hdrlen += 2; /* Offset field */
+ if(flags & GRE_key)
+ hdrlen += 4;
+ if(flags & GRE_seq)
+ hdrlen += 4;
+
+ qlock(proto);
+
+ if(eproto != 0x880B && BLEN(bp) - hdrlen >= sizeof(Ip4hdr)){
+ ip = (Ip4hdr *)(bp->rp + hdrlen);
+
+ /*
+ * Look for a conversation structure for this port and address, or
+ * match the retunnel part, or match on the raw flag.
+ */
+ for(p = proto->conv; *p; p++) {
+ c = *p;
+
+ if(c->inuse == 0)
+ continue;
+
+ /*
+ * Do not stop this session - blocking here
+ * implies that etherread is blocked.
+ */
+ grec = c->ptcl;
+ if(memcmp(ip->dst, grec->hoa, sizeof ip->dst) == 0){
+ grepdin++;
+ grebdin += BLEN(bp);
+ gredownlink(c, bp);
+ qunlock(proto);
+ return;
+ }
+
+ if(memcmp(ip->src, grec->hoa, sizeof ip->src) == 0){
+ grepuin++;
+ grebuin += BLEN(bp);
+ greuplink(c, bp);
+ qunlock(proto);
+ return;
+ }
+ }
+ }
+
+
+ /*
+ * when we get here, none of the forwarding tunnels matched. now
+ * try to match on raw and conversational sessions.
+ */
+ for(c = nil, p = proto->conv; *p; p++) {
c = *p;
+
if(c->inuse == 0)
continue;
- if(c->rport == eproto &&
- (gpriv->raw || ipcmp(c->raddr, raddr) == 0))
+
+ /*
+ * Do not stop this session - blocking here
+ * implies that etherread is blocked.
+ */
+ grec = c->ptcl;
+ if(c->rport == eproto &&
+ (grec->raw || ipcmp(c->raddr, raddr) == 0))
break;
}
- if(*p == nil) {
- qunlock(gre);
- freeblist(bp);
+ qunlock(proto);
+
+ if(*p == nil){
+ freeb(bp);
return;
}
- qunlock(gre);
-
/*
* Trim the packet down to data size
*/
- len = nhgets(ghp->len) - GRE_IPONLY;
+ len = nhgets(gre->len) - GRE_IPONLY;
if(len < GRE_IPPLUSGRE){
- freeblist(bp);
+ freeb(bp);
return;
}
+
bp = trimblock(bp, GRE_IPONLY, len);
if(bp == nil){
+ gpriv = proto->priv;
gpriv->lenerr++;
return;
}
- /*
- * Can't delimit packet so pull it all into one block.
- */
- if(qlen(c->rq) > 64*1024)
- freeblist(bp);
- else{
- bp = concatblock(bp);
- if(bp == 0)
- panic("greiput");
- qpass(c->rq, bp);
- }
+ qpass(c->rq, bp);
}
int
@@ -234,29 +649,258 @@
GREpriv *gpriv;
gpriv = gre->priv;
+ return snprint(buf, len,
+ "gre: %llud %llud %llud %llud %llud %llud %llud %llud, lenerrs %llud\n",
+ grepdin, grepdout, grepuin, grepuout,
+ grebdin, grebdout, grebuin, grebuout, gpriv->lenerr);
+}
- return snprint(buf, len, "gre: len %lud\n", gpriv->lenerr);
+static char *
+grectlraw(Conv *c, int, char **)
+{
+ GREconv *grec;
+
+ grec = c->ptcl;
+ grec->raw = 1;
+ return nil;
}
-char*
-grectl(Conv *c, char **f, int n)
+static char *
+grectlcooked(Conv *c, int, char **)
{
- GREpriv *gpriv;
+ GREconv *grec;
- gpriv = c->p->priv;
- if(n == 1){
- if(strcmp(f[0], "raw") == 0){
- gpriv->raw = 1;
- return nil;
- }
- else if(strcmp(f[0], "cooked") == 0){
- gpriv->raw = 0;
- return nil;
- }
+ grec = c->ptcl;
+ grec->raw = 0;
+ return nil;
+}
+
+static char *
+grectlretunnel(Conv *c, int, char **argv)
+{
+ GREconv *grec;
+ uchar ipaddr[4];
+
+ grec = c->ptcl;
+ if(memcmp(grec->hoa, nulladdr, sizeof grec->hoa))
+ return "tunnel already set up";
+
+ v4parseip(ipaddr, argv[1]);
+ if(memcmp(ipaddr, nulladdr, sizeof ipaddr) == 0)
+ return "bad hoa";
+ memmove(grec->hoa, ipaddr, sizeof grec->hoa);
+ v4parseip(ipaddr, argv[2]);
+ memmove(grec->north, ipaddr, sizeof grec->north);
+ v4parseip(ipaddr, argv[3]);
+ memmove(grec->south, ipaddr, sizeof grec->south);
+ v4parseip(ipaddr, argv[4]);
+ memmove(grec->coa, ipaddr, sizeof grec->coa);
+ grec->ulsusp = 1;
+ grec->dlsusp = 0;
+
+ return nil;
+}
+
+static char *
+grectlreport(Conv *c, int, char **argv)
+{
+ ulong seq;
+ Block *bp;
+ Bring *r;
+ GREconv *grec;
+ Metablock *m;
+
+ grec = c->ptcl;
+ seq = strtoul(argv[1], nil, 0);
+
+ qlock(&grec->lock);
+ r = &grec->dlpending;
+ while(r->produced - r->consumed > 0){
+ bp = r->ring[r->consumed & Ringmask];
+
+ assert(bp && bp->rp - bp->base >= sizeof(Metablock));
+ m = (Metablock *)bp->base;
+ if((long)(seq - m->seq) <= 0)
+ break;
+
+ r->ring[r->consumed & Ringmask] = nil;
+ r->consumed++;
+
+ freeb(bp);
}
- return "unknown control request";
+ qunlock(&grec->lock);
+ return nil;
}
+static char *
+grectldlsuspend(Conv *c, int, char **)
+{
+ GREconv *grec;
+
+ grec = c->ptcl;
+ if(grec->dlsusp)
+ return "already suspended";
+
+ grec->dlsusp = 1;
+ return nil;
+}
+
+static char *
+grectlulsuspend(Conv *c, int, char **)
+{
+ GREconv *grec;
+
+ grec = c->ptcl;
+ if(grec->ulsusp)
+ return "already suspended";
+
+ grec->ulsusp = 1;
+ return nil;
+}
+
+static char *
+grectldlresume(Conv *c, int, char **)
+{
+ GREconv *grec;
+ GREhdr *gre;
+ Block *bp;
+
+ grec = c->ptcl;
+
+ qlock(&grec->lock);
+ if(!grec->dlsusp){
+ qunlock(&grec->lock);
+ return "not suspended";
+ }
+
+ while((bp = getring(&grec->dlbuffered)) != nil){
+ gre = (GREhdr *)bp->rp;
+ qunlock(&grec->lock);
+
+ ipoput4(c->p->f, copyblock(bp, BLEN(bp)), 0, gre->ttl - 1, gre->tos, nil);
+
+ qlock(&grec->lock);
+ addring(&grec->dlpending, bp);
+ }
+ grec->dlsusp = 0;
+ qunlock(&grec->lock);
+ return nil;
+}
+
+static char *
+grectlulresume(Conv *c, int, char **)
+{
+ GREconv *grec;
+ GREhdr *gre;
+ Block *bp;
+
+ grec = c->ptcl;
+
+ qlock(&grec->lock);
+ while((bp = getring(&grec->ulbuffered)) != nil){
+ gre = (GREhdr *)bp->rp;
+
+ qunlock(&grec->lock);
+ ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+ qlock(&grec->lock);
+ }
+ grec->ulsusp = 0;
+ qunlock(&grec->lock);
+ return nil;
+}
+
+static char *
+grectlforward(Conv *c, int, char **argv)
+{
+ Block *bp;
+ GREconv *grec;
+ GREhdr *gre;
+ Metablock *m;
+
+ grec = c->ptcl;
+
+ v4parseip(grec->south, argv[1]);
+ memmove(grec->north, grec->south, sizeof grec->north);
+
+ qlock(&grec->lock);
+ if(!grec->dlsusp){
+ qunlock(&grec->lock);
+ return "not suspended";
+ }
+ grec->dlsusp = 0;
+ grec->ulsusp = 0;
+
+ while((bp = getring(&grec->dlpending)) != nil){
+
+ assert(bp->rp - bp->base >= sizeof(Metablock));
+ m = (Metablock *)bp->base;
+ assert(m->rp >= bp->base && m->rp < bp->lim);
+
+ bp->rp = m->rp;
+
+ gre = (GREhdr *)bp->rp;
+ memmove(gre->src, grec->coa, sizeof gre->dst);
+ memmove(gre->dst, grec->south, sizeof gre->dst);
+
+ qunlock(&grec->lock);
+ ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+ qlock(&grec->lock);
+ }
+
+ while((bp = getring(&grec->dlbuffered)) != nil){
+ gre = (GREhdr *)bp->rp;
+ memmove(gre->src, grec->coa, sizeof gre->dst);
+ memmove(gre->dst, grec->south, sizeof gre->dst);
+
+ qunlock(&grec->lock);
+ ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+ qlock(&grec->lock);
+ }
+
+ while((bp = getring(&grec->ulbuffered)) != nil){
+ gre = (GREhdr *)bp->rp;
+
+ memmove(gre->src, grec->coa, sizeof gre->dst);
+ memmove(gre->dst, grec->south, sizeof gre->dst);
+
+ qunlock(&grec->lock);
+ ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+ qlock(&grec->lock);
+ }
+ qunlock(&grec->lock);
+ return nil;
+}
+
+static char *
+grectlulkey(Conv *c, int, char **argv)
+{
+ GREconv *grec;
+
+ grec = c->ptcl;
+ grec->ulkey = strtoul(argv[1], nil, 0);
+ return nil;
+}
+
+char *
+grectl(Conv *c, char **f, int n)
+{
+ int i;
+
+ if(n < 1)
+ return "too few arguments";
+
+ for(i = 0; i < Ncmds; i++)
+ if(strcmp(f[0], grectls[i].cmd) == 0)
+ break;
+
+ if(i == Ncmds)
+ return "no such command";
+ if(grectls[i].argc != 0 && grectls[i].argc != n)
+ return "incorrect number of arguments";
+
+ return grectls[i].f(c, n, f);
+}
+
void
greinit(Fs *fs)
{
@@ -276,7 +920,7 @@
gre->stats = grestats;
gre->ipproto = IP_GREPROTO;
gre->nc = 64;
- gre->ptclsize = 0;
+ gre->ptclsize = sizeof(GREconv);
Fsproto(fs, gre);
}
--- a/os/ip/icmp.c
+++ b/os/ip/icmp.c
@@ -44,11 +44,6 @@
Maxtype = 18,
};
-enum
-{
- MinAdvise = 24, /* minimum needed for us to advise another protocol */
-};
-
char *icmpnames[Maxtype+1] =
{
[EchoReply] "EchoReply",
@@ -70,6 +65,8 @@
IP_ICMPPROTO = 1,
ICMP_IPSIZE = 20,
ICMP_HDRSIZE = 8,
+
+ MinAdvise = ICMP_IPSIZE+4, /* minimum needed for us to advise another protocol */
};
enum
@@ -113,7 +110,7 @@
c->wq = qbypass(icmpkick, c);
}
-extern char*
+char*
icmpconnect(Conv *c, char **argv, int argc)
{
char *e;
@@ -126,11 +123,11 @@
return nil;
}
-extern int
+int
icmpstate(Conv *c, char *state, int n)
{
USED(c);
- return snprint(state, n, "%s qin %d qout %d",
+ return snprint(state, n, "%s qin %d qout %d\n",
"Datagram",
c->rq ? qlen(c->rq) : 0,
c->wq ? qlen(c->wq) : 0
@@ -137,7 +134,7 @@
);
}
-extern char*
+char*
icmpannounce(Conv *c, char **argv, int argc)
{
char *e;
@@ -150,7 +147,7 @@
return nil;
}
-extern void
+void
icmpclose(Conv *c)
{
qclose(c->rq);
@@ -169,8 +166,7 @@
if(bp == nil)
return;
-
- if(blocklen(bp) < ICMP_IPSIZE + ICMP_HDRSIZE){
+ if(BLEN(bp) < ICMP_IPSIZE + ICMP_HDRSIZE){
freeblist(bp);
return;
}
@@ -190,21 +186,50 @@
ipoput4(c->p->f, bp, 0, c->ttl, c->tos, nil);
}
-extern void
-icmpttlexceeded(Fs *f, uchar *ia, Block *bp)
+static int
+ip4reply(Fs *f, uchar ip4[4])
{
+ uchar addr[IPaddrlen];
+ int i;
+
+ v4tov6(addr, ip4);
+ if(ipismulticast(addr))
+ return 0;
+ i = ipforme(f, addr);
+ return i == 0 || i == Runi;
+}
+
+static int
+ip4me(Fs *f, uchar ip4[4])
+{
+ uchar addr[IPaddrlen];
+
+ v4tov6(addr, ip4);
+ if(ipismulticast(addr))
+ return 0;
+ return ipforme(f, addr) == Runi;
+}
+
+void
+icmpttlexceeded(Fs *f, Ipifc *ifc, Block *bp)
+{
Block *nbp;
Icmp *p, *np;
+ uchar ia[IPv4addrlen];
p = (Icmp *)bp->rp;
+ if(!ip4reply(f, p->src) || !ipv4local(ifc, ia, 0, p->src))
+ return;
- netlog(f, Logicmp, "sending icmpttlexceeded -> %V\n", p->src);
+ netlog(f, Logicmp, "sending icmpttlexceeded %V -> src %V dst %V\n",
+ ia, p->src, p->dst);
+
nbp = allocb(ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8);
nbp->wp += ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8;
np = (Icmp *)nbp->rp;
np->vihl = IP_VER4;
+ memmove(np->src, ia, sizeof(np->src));
memmove(np->dst, p->src, sizeof(np->dst));
- v6tov4(np->src, ia);
memmove(np->data, bp->rp, ICMP_IPSIZE + 8);
np->type = TimeExceed;
np->code = 0;
@@ -214,7 +239,6 @@
memset(np->cksum, 0, sizeof(np->cksum));
hnputs(np->cksum, ptclcsum(nbp, ICMP_IPSIZE, blocklen(nbp) - ICMP_IPSIZE));
ipoput4(f, nbp, 0, MAXTTL, DFLTTOS, nil);
-
}
static void
@@ -222,20 +246,10 @@
{
Block *nbp;
Icmp *p, *np;
- int i;
- uchar addr[IPaddrlen];
p = (Icmp *)bp->rp;
-
- /* only do this for unicast sources and destinations */
- v4tov6(addr, p->dst);
- i = ipforme(f, addr);
- if((i&Runi) == 0)
+ if(!ip4me(f, p->dst) || !ip4reply(f, p->src))
return;
- v4tov6(addr, p->src);
- i = ipforme(f, addr);
- if(i != 0 && (i&Runi) == 0)
- return;
netlog(f, Logicmp, "sending icmpnoconv -> %V\n", p->src);
nbp = allocb(ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8);
@@ -255,13 +269,13 @@
ipoput4(f, nbp, 0, MAXTTL, DFLTTOS, nil);
}
-extern void
+void
icmpnoconv(Fs *f, Block *bp)
{
icmpunreachable(f, bp, 3, 0);
}
-extern void
+void
icmpcantfrag(Fs *f, Block *bp, int mtu)
{
icmpunreachable(f, bp, 4, mtu);
@@ -270,35 +284,34 @@
static void
goticmpkt(Proto *icmp, Block *bp)
{
+ ushort recid;
+ uchar dst[IPaddrlen], src[IPaddrlen];
Conv **c, *s;
Icmp *p;
- uchar dst[IPaddrlen];
- ushort recid;
p = (Icmp *) bp->rp;
- v4tov6(dst, p->src);
+ v4tov6(dst, p->dst);
+ v4tov6(src, p->src);
recid = nhgets(p->icmpid);
- for(c = icmp->conv; *c; c++) {
- s = *c;
+ for(c = icmp->conv; (s = *c) != nil; c++){
if(s->lport == recid)
- if(ipcmp(s->raddr, dst) == 0){
- bp = concatblock(bp);
- if(bp != nil)
- qpass(s->rq, bp);
- return;
- }
+ if(ipcmp(s->laddr, dst) == 0 || ipcmp(s->raddr, src) == 0)
+ qpass(s->rq, copyblock(bp, blocklen(bp)));
}
freeblist(bp);
}
static Block *
-mkechoreply(Block *bp)
+mkechoreply(Block *bp, Fs *f)
{
Icmp *q;
uchar ip[4];
q = (Icmp *)bp->rp;
+ if(!ip4me(f, q->dst) || !ip4reply(f, q->src))
+ return nil;
+
q->vihl = IP_VER4;
memmove(ip, q->src, sizeof(q->dst));
memmove(q->src, q->dst, sizeof(q->src));
@@ -318,12 +331,22 @@
[3] "port unreachable",
[4] "fragmentation needed and DF set",
[5] "source route failed",
+[6] "destination network unknown",
+[7] "destination host unknown",
+[8] "source host isolated",
+[9] "network administratively prohibited",
+[10] "host administratively prohibited",
+[11] "network unreachable for tos",
+[12] "host unreachable for tos",
+[13] "communication administratively prohibited",
+[14] "host precedence violation",
+[15] "precedence cutoff in effect",
};
static void
icmpiput(Proto *icmp, Ipifc*, Block *bp)
{
- int n, iplen;
+ int n;
Icmp *p;
Block *r;
Proto *pr;
@@ -332,12 +355,10 @@
Icmppriv *ipriv;
ipriv = icmp->priv;
-
ipriv->stats[InMsgs]++;
- p = (Icmp *)bp->rp;
- netlog(icmp->f, Logicmp, "icmpiput %d %d\n", p->type, p->code);
- n = blocklen(bp);
+ bp = concatblock(bp);
+ n = BLEN(bp);
if(n < ICMP_IPSIZE+ICMP_HDRSIZE){
ipriv->stats[InErrors]++;
ipriv->stats[HlenErrs]++;
@@ -344,69 +365,57 @@
netlog(icmp->f, Logicmp, "icmp hlen %d\n", n);
goto raise;
}
- iplen = nhgets(p->length);
- if(iplen > n || (iplen % 1)){
- ipriv->stats[LenErrs]++;
+ if(ptclcsum(bp, ICMP_IPSIZE, n - ICMP_IPSIZE)){
ipriv->stats[InErrors]++;
- netlog(icmp->f, Logicmp, "icmp length %d\n", iplen);
- goto raise;
- }
- if(ptclcsum(bp, ICMP_IPSIZE, iplen - ICMP_IPSIZE)){
- ipriv->stats[InErrors]++;
ipriv->stats[CsumErrs]++;
netlog(icmp->f, Logicmp, "icmp checksum error\n");
goto raise;
}
+ p = (Icmp *)bp->rp;
+ netlog(icmp->f, Logicmp, "icmpiput %s (%d) %d\n",
+ (p->type < nelem(icmpnames)? icmpnames[p->type]: ""),
+ p->type, p->code);
if(p->type <= Maxtype)
ipriv->in[p->type]++;
switch(p->type) {
case EchoRequest:
- if (iplen < n)
- bp = trimblock(bp, 0, iplen);
- r = mkechoreply(bp);
+ r = mkechoreply(bp, icmp->f);
+ if(r == nil)
+ goto raise;
ipriv->out[EchoReply]++;
ipoput4(icmp->f, r, 0, MAXTTL, DFLTTOS, nil);
break;
case Unreachable:
- if(p->code > 5)
- msg = unreachcode[1];
- else
+ if(p->code >= nelem(unreachcode)) {
+ snprint(m2, sizeof m2, "unreachable %V -> %V code %d",
+ p->src, p->dst, p->code);
+ msg = m2;
+ } else
msg = unreachcode[p->code];
+ Advise:
bp->rp += ICMP_IPSIZE+ICMP_HDRSIZE;
- if(blocklen(bp) < MinAdvise){
+ if(BLEN(bp) < MinAdvise){
ipriv->stats[LenErrs]++;
goto raise;
}
p = (Icmp *)bp->rp;
- pr = Fsrcvpcolx(icmp->f, p->proto);
- if(pr != nil && pr->advise != nil) {
- (*pr->advise)(pr, bp, msg);
- return;
+ if((nhgets(p->frag) & IP_FO) == 0){
+ pr = Fsrcvpcolx(icmp->f, p->proto);
+ if(pr != nil && pr->advise != nil) {
+ (*pr->advise)(pr, bp, msg);
+ return;
+ }
}
-
bp->rp -= ICMP_IPSIZE+ICMP_HDRSIZE;
goticmpkt(icmp, bp);
break;
case TimeExceed:
if(p->code == 0){
- sprint(m2, "ttl exceeded at %V", p->src);
-
- bp->rp += ICMP_IPSIZE+ICMP_HDRSIZE;
- if(blocklen(bp) < MinAdvise){
- ipriv->stats[LenErrs]++;
- goto raise;
- }
- p = (Icmp *)bp->rp;
- pr = Fsrcvpcolx(icmp->f, p->proto);
- if(pr != nil && pr->advise != nil) {
- (*pr->advise)(pr, bp, m2);
- return;
- }
- bp->rp -= ICMP_IPSIZE+ICMP_HDRSIZE;
+ snprint(msg = m2, sizeof m2, "ttl exceeded at %V", p->src);
+ goto Advise;
}
-
goticmpkt(icmp, bp);
break;
default:
@@ -419,22 +428,25 @@
freeblist(bp);
}
-void
+static void
icmpadvise(Proto *icmp, Block *bp, char *msg)
{
+ ushort recid;
+ uchar dst[IPaddrlen], src[IPaddrlen];
Conv **c, *s;
Icmp *p;
- uchar dst[IPaddrlen];
- ushort recid;
p = (Icmp *) bp->rp;
v4tov6(dst, p->dst);
+ v4tov6(src, p->src);
recid = nhgets(p->icmpid);
- for(c = icmp->conv; *c; c++) {
- s = *c;
+ for(c = icmp->conv; (s = *c) != nil; c++){
if(s->lport == recid)
+ if(ipcmp(s->laddr, src) == 0)
if(ipcmp(s->raddr, dst) == 0){
+ if(s->ignoreadvice)
+ break;
qhangup(s->rq, msg);
qhangup(s->wq, msg);
break;
@@ -443,7 +455,7 @@
freeblist(bp);
}
-int
+static int
icmpstats(Proto *icmp, char *buf, int len)
{
Icmppriv *priv;
@@ -456,7 +468,7 @@
for(i = 0; i < Nstats; i++)
p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
for(i = 0; i <= Maxtype; i++){
- if(icmpnames[i])
+ if(icmpnames[i] != nil)
p = seprint(p, e, "%s: %lud %lud\n", icmpnames[i], priv->in[i], priv->out[i]);
else
p = seprint(p, e, "%d: %lud %lud\n", i, priv->in[i], priv->out[i]);
@@ -463,13 +475,7 @@
}
return p - buf;
}
-
-int
-icmpgc(Proto *icmp)
-{
- return natgc(icmp->ipproto);
-}
-
+
void
icmpinit(Fs *fs)
{
@@ -487,7 +493,7 @@
icmp->stats = icmpstats;
icmp->ctl = nil;
icmp->advise = icmpadvise;
- icmp->gc = icmpgc;
+ icmp->gc = nil;
icmp->ipproto = IP_ICMPPROTO;
icmp->nc = 128;
icmp->ptclsize = 0;
--- a/os/ip/icmp6.c
+++ b/os/ip/icmp6.c
@@ -1,3 +1,6 @@
+/*
+ * Internet Control Message Protocol for IPv6
+ */
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
@@ -7,41 +10,36 @@
#include "ip.h"
#include "ipv6.h"
-typedef struct ICMPpkt ICMPpkt;
-typedef struct IPICMP IPICMP;
-typedef struct Ndpkt Ndpkt;
-typedef struct NdiscC NdiscC;
+enum
+{
+ InMsgs6,
+ InErrors6,
+ OutMsgs6,
+ CsumErrs6,
+ LenErrs6,
+ HlenErrs6,
+ HoplimErrs6,
+ IcmpCodeErrs6,
+ TargetErrs6,
+ OptlenErrs6,
+ AddrmxpErrs6,
+ RouterAddrErrs6,
-struct ICMPpkt {
- uchar type;
- uchar code;
- uchar cksum[2];
- uchar icmpid[2];
- uchar seq[2];
+ Nstats6,
};
-struct IPICMP {
- Ip6hdr;
- ICMPpkt;
+enum {
+ ICMP_USEAD6 = 40,
};
-struct NdiscC
-{
- IPICMP;
- uchar target[IPaddrlen];
+enum {
+ Oflag = 1<<5,
+ Sflag = 1<<6,
+ Rflag = 1<<7,
};
-struct Ndpkt
-{
- NdiscC;
- uchar otype;
- uchar olen; // length in units of 8 octets(incl type, code),
- // 1 for IEEE 802 addresses
- uchar lnaddr[6]; // link-layer address
-};
-
-enum {
- // ICMPv6 types
+enum {
+ /* ICMPv6 types */
EchoReply = 0,
UnreachableV6 = 1,
PacketTooBigV6 = 2,
@@ -69,6 +67,65 @@
Maxtype6 = 137,
};
+enum {
+ MinAdvise = IP6HDR+4, /* minimum needed for us to advise another protocol */
+};
+
+/* on-the-wire packet formats */
+typedef struct IPICMP IPICMP;
+typedef struct Ndpkt Ndpkt;
+typedef struct NdiscC NdiscC;
+
+/* we do this to avoid possible struct padding */
+#define ICMPHDR \
+ IPV6HDR; \
+ uchar type; \
+ uchar code; \
+ uchar cksum[2]; \
+ uchar icmpid[2]; \
+ uchar seq[2]
+
+struct IPICMP {
+ ICMPHDR;
+ uchar payload[];
+};
+
+#define IPICMPSZ offsetof(IPICMP, payload[0])
+
+struct NdiscC {
+ ICMPHDR;
+ uchar target[IPaddrlen];
+ uchar payload[];
+};
+
+#define NDISCSZ offsetof(NdiscC, payload[0])
+
+struct Ndpkt {
+ ICMPHDR;
+ uchar target[IPaddrlen];
+ uchar otype;
+ uchar olen; /* length in units of 8 octets(incl type, code),
+ * 1 for IEEE 802 addresses */
+ uchar lnaddr[6]; /* link-layer address */
+ uchar payload[];
+};
+
+#define NDPKTSZ offsetof(Ndpkt, payload[0])
+
+typedef struct Icmppriv6
+{
+ ulong stats[Nstats6];
+
+ /* message counts */
+ ulong in[Maxtype6+1];
+ ulong out[Maxtype6+1];
+} Icmppriv6;
+
+typedef struct Icmpcb6
+{
+ uchar headers;
+} Icmpcb6;
+
char *icmpnames6[Maxtype6+1] =
{
[EchoReply] "EchoReply",
@@ -95,24 +152,6 @@
[RedirectV6] "RedirectV6",
};
-enum
-{
- InMsgs6,
- InErrors6,
- OutMsgs6,
- CsumErrs6,
- LenErrs6,
- HlenErrs6,
- HoplimErrs6,
- IcmpCodeErrs6,
- TargetErrs6,
- OptlenErrs6,
- AddrmxpErrs6,
- RouterAddrErrs6,
-
- Nstats6,
-};
-
static char *statnames6[Nstats6] =
{
[InMsgs6] "InMsgs",
@@ -129,49 +168,18 @@
[RouterAddrErrs6] "RouterAddrErrs",
};
-typedef struct Icmppriv6
-{
- ulong stats[Nstats6];
-
- /* message counts */
- ulong in[Maxtype6+1];
- ulong out[Maxtype6+1];
-} Icmppriv6;
-
-typedef struct Icmpcb6
-{
- QLock;
- uchar headers;
-} Icmpcb6;
-
static char *unreachcode[] =
{
-[icmp6_no_route] "no route to destination",
-[icmp6_ad_prohib] "comm with destination administratively prohibited",
-[icmp6_unassigned] "icmp unreachable: unassigned error code (2)",
-[icmp6_adr_unreach] "address unreachable",
-[icmp6_port_unreach] "port unreachable",
-[icmp6_unkn_code] "icmp unreachable: unknown code",
+[Icmp6_no_route] "no route to destination",
+[Icmp6_ad_prohib] "comm with destination administratively prohibited",
+[Icmp6_out_src_scope] "beyond scope of source address",
+[Icmp6_adr_unreach] "address unreachable",
+[Icmp6_port_unreach] "port unreachable",
+[Icmp6_gress_src_fail] "source address failed ingress/egress policy",
+[Icmp6_rej_route] "reject route to destination",
+[Icmp6_unknown] "icmp unreachable: unknown code",
};
-enum {
- ICMP_USEAD6 = 40,
-};
-
-enum {
- Oflag = 1<<5,
- Sflag = 1<<6,
- Rflag = 1<<7,
-};
-
-enum {
- slladd = 1,
- tlladd = 2,
- prfinfo = 3,
- redhdr = 4,
- mtuopt = 5,
-};
-
static void icmpkick6(void *x, Block *bp);
static void
@@ -185,13 +193,14 @@
set_cksum(Block *bp)
{
IPICMP *p = (IPICMP *)(bp->rp);
+ int n = blocklen(bp);
- hnputl(p->vcf, 0); // borrow IP header as pseudoheader
- hnputs(p->ploadlen, blocklen(bp)-IPV6HDR_LEN);
+ hnputl(p->vcf, 0); /* borrow IP header as pseudoheader */
+ hnputs(p->ploadlen, n - IP6HDR);
p->proto = 0;
- p->ttl = ICMPv6; // ttl gets set later
+ p->ttl = ICMPv6; /* ttl gets set later */
hnputs(p->cksum, 0);
- hnputs(p->cksum, ptclcsum(bp, 0, blocklen(bp)));
+ hnputs(p->cksum, ptclcsum(bp, 0, n));
p->proto = ICMPv6;
}
@@ -198,7 +207,8 @@
static Block *
newIPICMP(int packetlen)
{
- Block *nbp;
+ Block *nbp;
+
nbp = allocb(packetlen);
nbp->wp += packetlen;
memset(nbp->rp, 0, packetlen);
@@ -205,20 +215,22 @@
return nbp;
}
-void
+static void
icmpadvise6(Proto *icmp, Block *bp, char *msg)
{
- Conv **c, *s;
- IPICMP *p;
- ushort recid;
+ ushort recid;
+ Conv **c, *s;
+ IPICMP *p;
- p = (IPICMP *) bp->rp;
+ p = (IPICMP *)bp->rp;
recid = nhgets(p->icmpid);
- for(c = icmp->conv; *c; c++) {
- s = *c;
+ for(c = icmp->conv; (s = *c) != nil; c++){
if(s->lport == recid)
+ if(ipcmp(s->laddr, p->src) == 0)
if(ipcmp(s->raddr, p->dst) == 0){
+ if(s->ignoreadvice)
+ break;
qhangup(s->rq, msg);
qhangup(s->wq, msg);
break;
@@ -230,9 +242,9 @@
static void
icmpkick6(void *x, Block *bp)
{
+ uchar laddr[IPaddrlen], raddr[IPaddrlen];
Conv *c = x;
IPICMP *p;
- uchar laddr[IPaddrlen], raddr[IPaddrlen];
Icmppriv6 *ipriv = c->p->priv;
Icmpcb6 *icb = (Icmpcb6*)c->ptcl;
@@ -249,10 +261,10 @@
bp->rp += IPaddrlen;
ipmove(raddr, bp->rp);
bp->rp += IPaddrlen;
- bp = padblock(bp, sizeof(Ip6hdr));
+ bp = padblock(bp, IP6HDR);
}
- if(blocklen(bp) < sizeof(IPICMP)){
+ if(BLEN(bp) < IPICMPSZ){
freeblist(bp);
return;
}
@@ -268,23 +280,20 @@
set_cksum(bp);
p->vcf[0] = 0x06 << 4;
- if(p->type <= Maxtype6)
+ if(p->type <= Maxtype6)
ipriv->out[p->type]++;
ipoput6(c->p->f, bp, 0, c->ttl, c->tos, nil);
}
-char*
+static char*
icmpctl6(Conv *c, char **argv, int argc)
{
Icmpcb6 *icb;
icb = (Icmpcb6*) c->ptcl;
-
- if(argc==1) {
- if(strcmp(argv[0], "headers")==0) {
- icb->headers = 6;
- return nil;
- }
+ if(argc==1 && strcmp(argv[0], "headers")==0) {
+ icb->headers = 6;
+ return nil;
}
return "unknown control request";
}
@@ -292,41 +301,39 @@
static void
goticmpkt6(Proto *icmp, Block *bp, int muxkey)
{
- Conv **c, *s;
- IPICMP *p = (IPICMP *)bp->rp;
- ushort recid;
- uchar *addr;
+ ushort recid;
+ uchar *addr;
+ Conv **c, *s;
+ IPICMP *p = (IPICMP *)bp->rp;
if(muxkey == 0) {
recid = nhgets(p->icmpid);
addr = p->src;
- }
- else {
+ } else {
recid = muxkey;
addr = p->dst;
}
-
- for(c = icmp->conv; *c; c++){
- s = *c;
- if(s->lport == recid && ipcmp(s->raddr, addr) == 0){
- bp = concatblock(bp);
- if(bp != nil)
- qpass(s->rq, bp);
- return;
- }
+ for(c = icmp->conv; (s = *c) != nil; c++){
+ if(s->lport == recid)
+ if(ipcmp(s->laddr, p->dst) == 0 || ipcmp(s->raddr, addr) == 0)
+ qpass(s->rq, copyblock(bp, blocklen(bp)));
}
-
freeblist(bp);
}
static Block *
-mkechoreply6(Block *bp)
+mkechoreply6(Block *bp, Ipifc *ifc)
{
+ uchar addr[IPaddrlen];
IPICMP *p = (IPICMP *)(bp->rp);
- uchar addr[IPaddrlen];
+ if(isv6mcast(p->src))
+ return nil;
ipmove(addr, p->src);
- ipmove(p->src, p->dst);
+ if(!isv6mcast(p->dst))
+ ipmove(p->src, p->dst);
+ else if (!ipv6local(ifc, p->src, 0, addr))
+ return nil;
ipmove(p->dst, addr);
p->type = EchoReplyV6;
set_cksum(bp);
@@ -335,49 +342,43 @@
/*
* sends out an ICMPv6 neighbor solicitation
- * suni == SRC_UNSPEC or SRC_UNI,
+ * suni == SRC_UNSPEC or SRC_UNI,
* tuni == TARG_MULTI => multicast for address resolution,
* and tuni == TARG_UNI => neighbor reachability.
*/
-
-extern void
+void
icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac)
{
- Block *nbp;
+ Block *nbp;
Ndpkt *np;
Proto *icmp = f->t2p[ICMPv6];
Icmppriv6 *ipriv = icmp->priv;
-
- nbp = newIPICMP(sizeof(Ndpkt));
+ nbp = newIPICMP(NDPKTSZ);
np = (Ndpkt*) nbp->rp;
+ if(suni == SRC_UNSPEC)
+ ipmove(np->src, v6Unspecified);
+ else
+ ipmove(np->src, src);
- if(suni == SRC_UNSPEC)
- memmove(np->src, v6Unspecified, IPaddrlen);
- else
- memmove(np->src, src, IPaddrlen);
-
if(tuni == TARG_UNI)
- memmove(np->dst, targ, IPaddrlen);
+ ipmove(np->dst, targ);
else
ipv62smcast(np->dst, targ);
np->type = NbrSolicit;
np->code = 0;
- memmove(np->target, targ, IPaddrlen);
+ ipmove(np->target, targ);
if(suni != SRC_UNSPEC) {
- np->otype = SRC_LLADDRESS;
- np->olen = 1; /* 1+1+6 = 8 = 1 8-octet */
+ np->otype = SRC_LLADDR;
+ np->olen = 1; /* 1+1+6 = 8 = 1 8-octet */
memmove(np->lnaddr, mac, sizeof(np->lnaddr));
- }
- else {
- int r = sizeof(Ndpkt)-sizeof(NdiscC);
- nbp->wp -= r;
- }
+ } else
+ nbp->wp -= NDPKTSZ - NDISCSZ;
set_cksum(nbp);
- np = (Ndpkt*) nbp->rp;
+ np = (Ndpkt*)nbp->rp;
np->ttl = HOP_LIMIT;
np->vcf[0] = 0x06 << 4;
ipriv->out[NbrSolicit]++;
@@ -388,122 +389,101 @@
/*
* sends out an ICMPv6 neighbor advertisement. pktflags == RSO flags.
*/
-extern void
+void
icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags)
{
- Block *nbp;
+ Block *nbp;
Ndpkt *np;
Proto *icmp = f->t2p[ICMPv6];
Icmppriv6 *ipriv = icmp->priv;
- nbp = newIPICMP(sizeof(Ndpkt));
- np = (Ndpkt*) nbp->rp;
+ nbp = newIPICMP(NDPKTSZ);
+ np = (Ndpkt*)nbp->rp;
- memmove(np->src, src, IPaddrlen);
- memmove(np->dst, dst, IPaddrlen);
+ ipmove(np->src, src);
+ ipmove(np->dst, dst);
np->type = NbrAdvert;
np->code = 0;
np->icmpid[0] = flags;
- memmove(np->target, targ, IPaddrlen);
+ ipmove(np->target, targ);
- np->otype = TARGET_LLADDRESS;
- np->olen = 1;
+ np->otype = TARGET_LLADDR;
+ np->olen = 1;
memmove(np->lnaddr, mac, sizeof(np->lnaddr));
set_cksum(nbp);
- np = (Ndpkt*) nbp->rp;
+ np = (Ndpkt*)nbp->rp;
np->ttl = HOP_LIMIT;
np->vcf[0] = 0x06 << 4;
ipriv->out[NbrAdvert]++;
- netlog(f, Logicmp, "sending neighbor advertisement %I\n", src);
+ netlog(f, Logicmp, "sending neighbor advertisement %I\n", targ);
ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
}
-extern void
-icmphostunr(Fs *f, Ipifc *ifc, Block *bp, int code, int free)
+void
+icmphostunr6(Fs *f, Ipifc *ifc, Block *bp, int code, int tome)
{
+ int osz = BLEN(bp);
+ int sz = MIN(IPICMPSZ + osz, v6MINTU);
Block *nbp;
IPICMP *np;
- Ip6hdr *p;
- int osz = BLEN(bp);
- int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
- Proto *icmp = f->t2p[ICMPv6];
+ Ip6hdr *p;
+ Proto *icmp = f->t2p[ICMPv6];
Icmppriv6 *ipriv = icmp->priv;
+ uchar ia[IPaddrlen];
- p = (Ip6hdr *) bp->rp;
+ p = (Ip6hdr *)bp->rp;
+ if(isv6mcast(p->dst) || isv6mcast(p->src) || !ipv6local(ifc, ia, 0, p->src))
+ return;
- if(isv6mcast(p->src))
- goto clean;
+ netlog(f, Logicmp, "send icmphostunr %I -> src %I dst %I\n",
+ ia, p->src, p->dst);
nbp = newIPICMP(sz);
- np = (IPICMP *) nbp->rp;
-
- rlock(ifc);
- if(ipv6anylocal(ifc, np->src)) {
- netlog(f, Logicmp, "send icmphostunr -> s%I d%I\n", p->src, p->dst);
- }
- else {
- netlog(f, Logicmp, "icmphostunr fail -> s%I d%I\n", p->src, p->dst);
- freeblist(nbp);
- if(free)
- goto clean;
- else
- return;
- }
-
- memmove(np->dst, p->src, IPaddrlen);
+ np = (IPICMP *)nbp->rp;
+ ipmove(np->src, ia);
+ ipmove(np->dst, p->src);
np->type = UnreachableV6;
np->code = code;
- memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+ memmove(nbp->rp + IPICMPSZ, bp->rp, sz - IPICMPSZ);
set_cksum(nbp);
np->ttl = HOP_LIMIT;
np->vcf[0] = 0x06 << 4;
ipriv->out[UnreachableV6]++;
- if(free)
+ if(tome)
ipiput6(f, ifc, nbp);
- else {
+ else
ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
- return;
- }
-
-clean:
- runlock(ifc);
- freeblist(bp);
}
-extern void
+void
icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp)
{
+ int osz = BLEN(bp);
+ int sz = MIN(IPICMPSZ + osz, v6MINTU);
Block *nbp;
IPICMP *np;
- Ip6hdr *p;
- int osz = BLEN(bp);
- int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
- Proto *icmp = f->t2p[ICMPv6];
+ Ip6hdr *p;
+ Proto *icmp = f->t2p[ICMPv6];
Icmppriv6 *ipriv = icmp->priv;
+ uchar ia[IPaddrlen];
- p = (Ip6hdr *) bp->rp;
-
- if(isv6mcast(p->src))
+ p = (Ip6hdr *)bp->rp;
+ if(isv6mcast(p->dst) || isv6mcast(p->src) || !ipv6local(ifc, ia, 0, p->src))
return;
+ netlog(f, Logicmp, "send icmpttlexceeded6 %I -> src %I dst %I\n",
+ ia, p->src, p->dst);
+
nbp = newIPICMP(sz);
np = (IPICMP *) nbp->rp;
-
- if(ipv6anylocal(ifc, np->src)) {
- netlog(f, Logicmp, "send icmpttlexceeded6 -> s%I d%I\n", p->src, p->dst);
- }
- else {
- netlog(f, Logicmp, "icmpttlexceeded6 fail -> s%I d%I\n", p->src, p->dst);
- return;
- }
-
- memmove(np->dst, p->src, IPaddrlen);
+ ipmove(np->src, ia);
+ ipmove(np->dst, p->src);
np->type = TimeExceedV6;
np->code = 0;
- memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+ memmove(nbp->rp + IPICMPSZ, bp->rp, sz - IPICMPSZ);
set_cksum(nbp);
np->ttl = HOP_LIMIT;
np->vcf[0] = 0x06 << 4;
@@ -511,38 +491,33 @@
ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
}
-extern void
+void
icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp)
{
+ int osz = BLEN(bp);
+ int sz = MIN(IPICMPSZ + osz, v6MINTU);
Block *nbp;
IPICMP *np;
- Ip6hdr *p;
- int osz = BLEN(bp);
- int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
- Proto *icmp = f->t2p[ICMPv6];
+ Ip6hdr *p;
+ Proto *icmp = f->t2p[ICMPv6];
Icmppriv6 *ipriv = icmp->priv;
+ uchar ia[IPaddrlen];
- p = (Ip6hdr *) bp->rp;
-
- if(isv6mcast(p->src))
+ p = (Ip6hdr *)bp->rp;
+ if(isv6mcast(p->dst) || isv6mcast(p->src) || !ipv6local(ifc, ia, 0, p->src))
return;
- nbp = newIPICMP(sz);
- np = (IPICMP *) nbp->rp;
+ netlog(f, Logicmp, "send icmppkttoobig6 %I -> src %I dst %I\n",
+ ia, p->src, p->dst);
- if(ipv6anylocal(ifc, np->src)) {
- netlog(f, Logicmp, "send icmppkttoobig6 -> s%I d%I\n", p->src, p->dst);
- }
- else {
- netlog(f, Logicmp, "icmppkttoobig6 fail -> s%I d%I\n", p->src, p->dst);
- return;
- }
-
- memmove(np->dst, p->src, IPaddrlen);
+ nbp = newIPICMP(sz);
+ np = (IPICMP *)nbp->rp;
+ ipmove(np->src, ia);
+ ipmove(np->dst, p->src);
np->type = PacketTooBigV6;
np->code = 0;
hnputl(np->icmpid, ifc->maxtu - ifc->m->hsize);
- memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+ memmove(nbp->rp + IPICMPSZ, bp->rp, sz - IPICMPSZ);
set_cksum(nbp);
np->ttl = HOP_LIMIT;
np->vcf[0] = 0x06 << 4;
@@ -554,31 +529,23 @@
* RFC 2461, pages 39-40, pages 57-58.
*/
static int
-valid(Proto *icmp, Ipifc *ifc, Block *bp, Icmppriv6 *ipriv) {
- int sz, osz, unsp, n, ttl, iplen;
- int pktsz = BLEN(bp);
- uchar *packet = bp->rp;
- IPICMP *p = (IPICMP *) packet;
- Ndpkt *np;
+valid(Proto *icmp, Ipifc *, Block *bp, Icmppriv6 *ipriv)
+{
+ int sz, osz, unsp, ttl;
+ int pktsz = BLEN(bp);
+ uchar *packet = bp->rp;
+ IPICMP *p = (IPICMP *) packet;
+ Ndpkt *np;
- USED(ifc);
- n = blocklen(bp);
- if(n < sizeof(IPICMP)) {
+ if(pktsz < IPICMPSZ) {
ipriv->stats[HlenErrs6]++;
- netlog(icmp->f, Logicmp, "icmp hlen %d\n", n);
+ netlog(icmp->f, Logicmp, "icmp hlen %d\n", pktsz);
goto err;
}
- iplen = nhgets(p->ploadlen);
- if(iplen > n-IPV6HDR_LEN || (iplen % 1)) {
- ipriv->stats[LenErrs6]++;
- netlog(icmp->f, Logicmp, "icmp length %d\n", iplen);
- goto err;
- }
-
- // Rather than construct explicit pseudoheader, overwrite IPv6 header
+ /* Rather than construct explicit pseudoheader, overwrite IPv6 header */
if(p->proto != ICMPv6) {
- // This code assumes no extension headers!!!
+ /* This code assumes no extension headers!!! */
netlog(icmp->f, Logicmp, "icmp error: extension header\n");
goto err;
}
@@ -586,7 +553,7 @@
ttl = p->ttl;
p->ttl = p->proto;
p->proto = 0;
- if(ptclcsum(bp, 0, iplen + IPV6HDR_LEN)) {
+ if(ptclcsum(bp, 0, pktsz)) {
ipriv->stats[CsumErrs6]++;
netlog(icmp->f, Logicmp, "icmp checksum error\n");
goto err;
@@ -595,19 +562,16 @@
p->ttl = ttl;
/* additional tests for some pkt types */
- if( (p->type == NbrSolicit) ||
- (p->type == NbrAdvert) ||
- (p->type == RouterAdvert) ||
- (p->type == RouterSolicit) ||
- (p->type == RedirectV6) ) {
-
+ if (p->type == NbrSolicit || p->type == NbrAdvert ||
+ p->type == RouterAdvert || p->type == RouterSolicit ||
+ p->type == RedirectV6) {
if(p->ttl != HOP_LIMIT) {
- ipriv->stats[HoplimErrs6]++;
- goto err;
+ ipriv->stats[HoplimErrs6]++;
+ goto err;
}
if(p->code != 0) {
- ipriv->stats[IcmpCodeErrs6]++;
- goto err;
+ ipriv->stats[IcmpCodeErrs6]++;
+ goto err;
}
switch (p->type) {
@@ -615,82 +579,78 @@
case NbrAdvert:
np = (Ndpkt*) p;
if(isv6mcast(np->target)) {
- ipriv->stats[TargetErrs6]++;
- goto err;
+ ipriv->stats[TargetErrs6]++;
+ goto err;
}
- if(optexsts(np) && (np->olen == 0)) {
- ipriv->stats[OptlenErrs6]++;
- goto err;
+ if(optexsts(np) && np->olen == 0) {
+ ipriv->stats[OptlenErrs6]++;
+ goto err;
}
-
- if(p->type == NbrSolicit) {
- if(ipcmp(np->src, v6Unspecified) == 0) {
- if(!issmcast(np->dst) || optexsts(np)) {
- ipriv->stats[AddrmxpErrs6]++;
- goto err;
- }
+
+ if (p->type == NbrSolicit &&
+ ipcmp(np->src, v6Unspecified) == 0)
+ if(!issmcast(np->dst) || optexsts(np)) {
+ ipriv->stats[AddrmxpErrs6]++;
+ goto err;
}
- }
-
- if(p->type == NbrAdvert) {
- if((isv6mcast(np->dst))&&(nhgets(np->icmpid) & Sflag)){
- ipriv->stats[AddrmxpErrs6]++;
- goto err;
+
+ if(p->type == NbrAdvert)
+ if(isv6mcast(np->dst) &&
+ (nhgets(np->icmpid) & Sflag)){
+ ipriv->stats[AddrmxpErrs6]++;
+ goto err;
}
- }
break;
-
+
case RouterAdvert:
- if(pktsz - sizeof(Ip6hdr) < 16) {
- ipriv->stats[HlenErrs6]++;
- goto err;
+ if(pktsz - IP6HDR < 16) {
+ ipriv->stats[HlenErrs6]++;
+ goto err;
}
if(!islinklocal(p->src)) {
- ipriv->stats[RouterAddrErrs6]++;
- goto err;
+ ipriv->stats[RouterAddrErrs6]++;
+ goto err;
}
- sz = sizeof(IPICMP) + 8;
- while ((sz+1) < pktsz) {
- osz = *(packet+sz+1);
+ sz = IPICMPSZ + 8;
+ while (sz+8 <= pktsz) {
+ osz = packet[sz+1];
if(osz <= 0) {
- ipriv->stats[OptlenErrs6]++;
- goto err;
- }
+ ipriv->stats[OptlenErrs6]++;
+ goto err;
+ }
sz += 8*osz;
}
break;
-
+
case RouterSolicit:
- if(pktsz - sizeof(Ip6hdr) < 8) {
- ipriv->stats[HlenErrs6]++;
- goto err;
+ if(pktsz - IP6HDR < 8) {
+ ipriv->stats[HlenErrs6]++;
+ goto err;
}
unsp = (ipcmp(p->src, v6Unspecified) == 0);
- sz = sizeof(IPICMP) + 8;
- while ((sz+1) < pktsz) {
- osz = *(packet+sz+1);
- if((osz <= 0) ||
- (unsp && (*(packet+sz) == slladd)) ) {
- ipriv->stats[OptlenErrs6]++;
- goto err;
+ sz = IPICMPSZ + 8;
+ while (sz+8 <= pktsz) {
+ osz = packet[sz+1];
+ if(osz <= 0 ||
+ (unsp && packet[sz] == SRC_LLADDR)) {
+ ipriv->stats[OptlenErrs6]++;
+ goto err;
}
sz += 8*osz;
}
break;
-
+
case RedirectV6:
- //to be filled in
+ /* to be filled in */
break;
-
+
default:
goto err;
}
}
-
return 1;
-
err:
- ipriv->stats[InErrors6]++;
+ ipriv->stats[InErrors6]++;
return 0;
}
@@ -700,169 +660,162 @@
Iplifc *lifc;
int t;
- rlock(ifc);
- if(ipproxyifc(f, ifc, target)) {
- runlock(ifc);
- return t_uniproxy;
- }
-
- for(lifc = ifc->lifc; lifc; lifc = lifc->next) {
- if(ipcmp(lifc->local, target) == 0) {
- t = (lifc->tentative) ? t_unitent : t_unirany;
- runlock(ifc);
- return t;
- }
- }
-
- runlock(ifc);
- return 0;
+ if((lifc = iplocalonifc(ifc, target)) != nil)
+ t = lifc->tentative? Tunitent: Tunirany;
+ else if(ipproxyifc(f, ifc, target))
+ t = Tuniproxy;
+ else
+ t = 0;
+ return t;
}
static void
-icmpiput6(Proto *icmp, Ipifc *ipifc, Block *bp)
+icmpiput6(Proto *icmp, Ipifc *ifc, Block *bp)
{
- uchar *packet = bp->rp;
- IPICMP *p = (IPICMP *)packet;
- Icmppriv6 *ipriv = icmp->priv;
- Block *r;
- Proto *pr;
- char *msg, m2[128];
- Ndpkt* np;
+ char *msg, m2[128];
uchar pktflags;
- uchar lsrc[IPaddrlen];
- int refresh = 1;
+ uchar ia[IPaddrlen];
+ Block *r;
+ IPICMP *p;
+ Icmppriv6 *ipriv = icmp->priv;
Iplifc *lifc;
+ Ndpkt* np;
+ Proto *pr;
- if(!valid(icmp, ipifc, bp, ipriv))
- goto raise;
+ bp = concatblock(bp);
+ p = (IPICMP*)bp->rp;
- if(p->type <= Maxtype6)
- ipriv->in[p->type]++;
- else
+ if(!valid(icmp, ifc, bp, ipriv) || p->type > Maxtype6)
goto raise;
+ ipriv->in[p->type]++;
+
switch(p->type) {
case EchoRequestV6:
- r = mkechoreply6(bp);
+ r = mkechoreply6(bp, ifc);
+ if(r == nil)
+ goto raise;
ipriv->out[EchoReply]++;
ipoput6(icmp->f, r, 0, MAXTTL, DFLTTOS, nil);
break;
case UnreachableV6:
- if(p->code > 4)
- msg = unreachcode[icmp6_unkn_code];
+ if(p->code >= nelem(unreachcode))
+ msg = unreachcode[Icmp6_unknown];
else
msg = unreachcode[p->code];
-
- bp->rp += sizeof(IPICMP);
- if(blocklen(bp) < 8){
+ Advise:
+ bp->rp += IPICMPSZ;
+ if(BLEN(bp) < MinAdvise){
ipriv->stats[LenErrs6]++;
goto raise;
}
p = (IPICMP *)bp->rp;
- pr = Fsrcvpcolx(icmp->f, p->proto);
- if(pr != nil && pr->advise != nil) {
- (*pr->advise)(pr, bp, msg);
- return;
- }
- bp->rp -= sizeof(IPICMP);
- goticmpkt6(icmp, bp, 0);
- break;
-
- case TimeExceedV6:
- if(p->code == 0){
- sprint(m2, "ttl exceeded at %I", p->src);
-
- bp->rp += sizeof(IPICMP);
- if(blocklen(bp) < 8){
- ipriv->stats[LenErrs6]++;
- goto raise;
+ /* get rid of fragment header if this is the first fragment */
+ if(p->proto == FH && BLEN(bp) >= MinAdvise+IP6FHDR && MinAdvise > IP6HDR){
+ Fraghdr6 *fh = (Fraghdr6*)(bp->rp + IP6HDR);
+ if((nhgets(fh->offsetRM) & ~7) == 0){ /* first fragment */
+ p->proto = fh->nexthdr;
+ /* copy down payload over fragment header */
+ bp->rp += IP6HDR;
+ bp->wp -= IP6FHDR;
+ memmove(bp->rp, bp->rp+IP6FHDR, BLEN(bp));
+ hnputs(p->ploadlen, BLEN(bp));
+ bp->rp -= IP6HDR;
}
- p = (IPICMP *)bp->rp;
+ }
+ if(p->proto != FH){
pr = Fsrcvpcolx(icmp->f, p->proto);
if(pr != nil && pr->advise != nil) {
- (*pr->advise)(pr, bp, m2);
+ (*pr->advise)(pr, bp, msg);
return;
}
- bp->rp -= sizeof(IPICMP);
}
+ bp->rp -= IPICMPSZ;
+ goticmpkt6(icmp, bp, 0);
+ break;
+ case TimeExceedV6:
+ if(p->code == 0){
+ snprint(msg = m2, sizeof m2, "ttl exceeded at %I", p->src);
+ goto Advise;
+ }
+ if(p->code == 1){
+ snprint(msg = m2, sizeof m2, "frag time exceeded at %I", p->src);
+ goto Advise;
+ }
goticmpkt6(icmp, bp, 0);
break;
+ case PacketTooBigV6:
+ snprint(msg = m2, sizeof(m2), "packet too big for %lud mtu at %I",
+ (ulong)nhgetl(p->icmpid), p->src);
+ goto Advise;
+
case RouterAdvert:
case RouterSolicit:
- /* using lsrc as a temp, munge hdr for goticmp6
- memmove(lsrc, p->src, IPaddrlen);
- memmove(p->src, p->dst, IPaddrlen);
- memmove(p->dst, lsrc, IPaddrlen); */
-
goticmpkt6(icmp, bp, p->type);
break;
case NbrSolicit:
- np = (Ndpkt*) p;
+ np = (Ndpkt*)p;
pktflags = 0;
- switch (targettype(icmp->f, ipifc, np->target)) {
- case t_unirany:
+ if(ifc->sendra6)
+ pktflags |= Rflag;
+ switch (targettype(icmp->f, ifc, np->target)) {
+ case Tunirany:
pktflags |= Oflag;
/* fall through */
- case t_uniproxy:
- if(ipcmp(np->src, v6Unspecified) != 0) {
- arpenter(icmp->f, V6, np->src, np->lnaddr, 8*np->olen-2, 0);
+ case Tuniproxy:
+ if(ipv6local(ifc, ia, 0, np->src)) {
+ if(arpenter(icmp->f, V6, np->src, np->lnaddr, 8*np->olen-2, ia, ifc, 0) < 0)
+ break;
pktflags |= Sflag;
- }
- if(ipv6local(ipifc, lsrc)) {
- icmpna(icmp->f, lsrc,
- (ipcmp(np->src, v6Unspecified)==0)?v6allnodesL:np->src,
- np->target, ipifc->mac, pktflags);
- }
- else
- freeblist(bp);
+ } else
+ ipmove(ia, np->target);
+ icmpna(icmp->f, ia, (pktflags & Sflag)? np->src: v6allnodesL,
+ np->target, ifc->mac, pktflags);
break;
-
- case t_unitent:
- /* not clear what needs to be done. send up
- * an icmp mesg saying don't use this address? */
-
- default:
- freeblist(bp);
+ case Tunitent:
+ /*
+ * not clear what needs to be done. send up
+ * an icmp mesg saying don't use this address?
+ */
+ break;
}
-
+ freeblist(bp);
break;
case NbrAdvert:
- np = (Ndpkt*) p;
+ np = (Ndpkt*)p;
- /* if the target address matches one of the local interface
- * address and the local interface address has tentative bit set,
- * then insert into ARP table. this is so the duplication address
- * detection part of ipconfig can discover duplication through
- * the arp table
+ /*
+ * if the target address matches one of the local interface
+ * addresses and the local interface address has tentative bit
+ * set, insert into ARP table. this is so the duplicate address
+ * detection part of ipconfig can discover duplication through
+ * the arp table.
*/
- lifc = iplocalonifc(ipifc, np->target);
- if(lifc && lifc->tentative)
- refresh = 0;
- arpenter(icmp->f, V6, np->target, np->lnaddr, 8*np->olen-2, refresh);
+ lifc = iplocalonifc(ifc, np->target);
+ if(lifc != nil && lifc->tentative)
+ arpenter(icmp->f, V6, np->target, np->lnaddr, 8*np->olen-2, np->target, ifc, 0);
+ else if(ipv6local(ifc, ia, 0, np->target))
+ arpenter(icmp->f, V6, np->target, np->lnaddr, 8*np->olen-2, ia, ifc, 1);
freeblist(bp);
break;
- case PacketTooBigV6:
-
default:
goticmpkt6(icmp, bp, 0);
break;
}
return;
-
raise:
freeblist(bp);
-
}
-int
+static int
icmpstats6(Proto *icmp6, char *buf, int len)
{
Icmppriv6 *priv;
@@ -874,23 +827,28 @@
e = p+len;
for(i = 0; i < Nstats6; i++)
p = seprint(p, e, "%s: %lud\n", statnames6[i], priv->stats[i]);
- for(i = 0; i <= Maxtype6; i++){
+ for(i = 0; i <= Maxtype6; i++)
if(icmpnames6[i])
- p = seprint(p, e, "%s: %lud %lud\n", icmpnames6[i], priv->in[i], priv->out[i]);
-/* else
- p = seprint(p, e, "%d: %lud %lud\n", i, priv->in[i], priv->out[i]);
-*/
- }
+ p = seprint(p, e, "%s: %lud %lud\n", icmpnames6[i],
+ priv->in[i], priv->out[i]);
return p - buf;
}
-// need to import from icmp.c
+/* import from icmp.c */
extern int icmpstate(Conv *c, char *state, int n);
extern char* icmpannounce(Conv *c, char **argv, int argc);
extern char* icmpconnect(Conv *c, char **argv, int argc);
extern void icmpclose(Conv *c);
+static void
+icmpclose6(Conv *c)
+{
+ Icmpcb6 *icb = (Icmpcb6*)c->ptcl;
+ icb->headers = 0;
+ icmpclose(c);
+}
+
void
icmp6init(Fs *fs)
{
@@ -902,7 +860,7 @@
icmp6->announce = icmpannounce;
icmp6->state = icmpstate;
icmp6->create = icmpcreate6;
- icmp6->close = icmpclose;
+ icmp6->close = icmpclose6;
icmp6->rcv = icmpiput6;
icmp6->stats = icmpstats6;
icmp6->ctl = icmpctl6;
@@ -914,4 +872,3 @@
Fsproto(fs, icmp6);
}
-
--- a/os/ip/igmp.c
+++ b/os/ip/igmp.c
@@ -1,3 +1,7 @@
+/*
+ * igmp - internet group management protocol
+ * unfinished.
+ */
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
@@ -40,8 +44,12 @@
uchar unused;
uchar igmpcksum[2]; /* checksum of igmp portion */
uchar group[IPaddrlen]; /* multicast group */
+
+ uchar payload[];
};
+#define IGMPPKTSZ offsetof(IGMPpkt, payload[0])
+
/*
* lists for group reports
*/
@@ -49,7 +57,7 @@
struct IGMPrep
{
IGMPrep *next;
- Media *m;
+ Medium *m;
int ticks;
Multicast *multi;
};
@@ -76,19 +84,17 @@
} stats;
void
-igmpsendreport(Media *m, uchar *addr)
+igmpsendreport(Medium *m, uchar *addr)
{
IGMPpkt *p;
Block *bp;
bp = allocb(sizeof(IGMPpkt));
- if(bp == nil)
- return;
p = (IGMPpkt*)bp->wp;
p->vihl = IP_VER4;
- bp->wp += sizeof(IGMPpkt);
- memset(bp->rp, 0, sizeof(IGMPpkt));
- hnputl(p->src, Mediagetaddr(m));
+ bp->wp += IGMPPKTSZ;
+ memset(bp->rp, 0, IGMPPKTSZ);
+ hnputl(p->src, Mediumgetaddr(m));
hnputl(p->dst, Ipallsys);
p->vertype = (1<<4) | IGMPreport;
p->proto = IP_IGMPPROTO;
@@ -166,7 +172,7 @@
}
void
-igmpiput(Media *m, Ipifc *, Block *bp)
+igmpiput(Medium *m, Ipifc *, Block *bp)
{
int n;
IGMPpkt *ghp;
@@ -206,7 +212,7 @@
if(rp != nil)
break; /* already reporting */
- mp = Mediacopymulti(m);
+ mp = Mediumcopymulti(m);
if(mp == nil)
break;
@@ -285,7 +291,7 @@
igmp.ptclsize = 0;
igmpreportfn = igmpsendreport;
- kproc("igmpproc", igmpproc, 0, 0);
+ kproc("igmpproc", igmpproc, 0);
Fsproto(fs, &igmp);
}
--- a/os/ip/il.c
+++ b/os/ip/il.c
@@ -189,7 +189,7 @@
{
Ipht ht;
- ulong stats[Nstats];
+ uvlong stats[Nstats];
ulong csumerr; /* checksum errors */
ulong hlenerr; /* header length error */
@@ -208,7 +208,7 @@
void ilrcvmsg(Conv*, Block*);
-void ilsendctl(Conv*, Ilhdr*, int, ulong, ulong, int);
+int ilsendctl(Conv*, Ilhdr*, int, ulong, ulong, int);
void ilackq(Ilcb*, Block*);
void ilprocess(Conv*, Ilhdr*, Block*);
void ilpullup(Conv*);
@@ -251,6 +251,9 @@
e = Fsstdconnect(c, argv, argc);
if(e != nil)
return e;
+ if(c->ipversion != V4)
+ return "only IP version 4 supported";
+
return ilstart(c, IL_CONNECT, fast);
}
@@ -260,7 +263,7 @@
Ilcb *ic;
ic = (Ilcb*)(c->ptcl);
- return snprint(state, n, "%s qin %d qout %d del %5.5d Br %5.5d md %5.5d una %5.5lud rex %5.5d rxq %5.5d max %5.5d",
+ return snprint(state, n, "%s qin %d qout %d del %5.5d Br %5.5d md %5.5d una %5.5lud rex %5.5d rxq %5.5d max %5.5d\n",
ilstates[ic->state],
c->rq ? qlen(c->rq) : 0,
c->wq ? qlen(c->wq) : 0,
@@ -434,7 +437,7 @@
p = buf;
e = p+len;
for(i = 0; i < Nstats; i++)
- p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
+ p = seprint(p, e, "%s: %llud\n", statnames[i], priv->stats[i]);
return p - buf;
}
@@ -548,6 +551,9 @@
ih = (Ilhdr *)bp->rp;
plen = blocklen(bp);
+ if(plen > 0 && (ih->vihl&0xF0)!=IP_VER4)
+ goto raise; /* ignore non V4 packets */
+
if(plen < IL_IPSIZE+IL_HDRSIZE){
netlog(il->f, Logil, "il: hlenerr\n");
ipriv->stats[HlenErrs]++;
@@ -572,7 +578,7 @@
else
st = iltype[ih->iltype];
ipriv->stats[CsumErrs]++;
- netlog(il->f, Logil, "il: cksum %ux %ux, pkt(%s id %lud ack %lud %I/%d->%d)\n",
+ netlog(il->f, Logil, "il: cksum %ux %s, pkt(%ux id %ud ack %I/%d->%d)\n",
csum, st, nhgetl(ih->ilid), nhgetl(ih->ilack), raddr, sp, dp);
goto raise;
}
@@ -595,7 +601,7 @@
else
st = iltype[ih->iltype];
ilreject(il->f, ih); /* no channel and not sync */
- netlog(il->f, Logil, "il: no channel, pkt(%s id %lud ack %lud %I/%ud->%ud)\n",
+ netlog(il->f, Logil, "il: no channel, pkt(%s id %ud ack %ud %I/%ud->%ud)\n",
st, nhgetl(ih->ilid), nhgetl(ih->ilack), raddr, sp, dp);
goto raise;
}
@@ -829,7 +835,7 @@
c = ic->conv;
id = nhgetl(h->ilid);
- netlog(c->p->f, Logil, "il: rexmit %d %ud: %d %d: %i %d/%d\n", id, ic->recvd,
+ netlog(c->p->f, Logil, "il: rexmit %lud %lud: %d %lud: %I %d/%d\n", id, ic->recvd,
ic->rexmit, ic->timeout,
c->raddr, c->lport, c->rport);
@@ -852,7 +858,7 @@
ic = (Ilcb*)s->ptcl;
USED(ic);
- netlog(s->p->f, Logilmsg, "%11s rcv %d/%d snt %d/%d pkt(%s id %d ack %d %d->%d) ",
+ netlog(s->p->f, Logilmsg, "%11s rcv %lud/%lud snt %lud/%lud pkt(%s id %d ack %ud %ud->%ud) ",
ilstates[ic->state], ic->rstart, ic->recvd, ic->start,
ic->next, iltype[h->iltype], nhgetl(h->ilid),
nhgetl(h->ilack), nhgets(h->ilsrc), nhgets(h->ildst));
@@ -859,7 +865,7 @@
_ilprocess(s, h, bp);
- netlog(s->p->f, Logilmsg, "%11s rcv %d snt %d\n", ilstates[ic->state], ic->recvd, ic->next);
+ netlog(s->p->f, Logilmsg, "%11s rcv %lud snt %lud\n", ilstates[ic->state], ic->recvd, ic->next);
}
void
@@ -917,17 +923,12 @@
bp->list = nil;
dlen = nhgets(oh->illen)-IL_HDRSIZE;
bp = trimblock(bp, IL_IPSIZE+IL_HDRSIZE, dlen);
+
/*
* Upper levels don't know about multiple-block
* messages so copy all into one (yick).
*/
- bp = concatblock(bp);
- if(bp == 0)
- panic("ilpullup");
- bp = packblock(bp);
- if(bp == 0)
- panic("ilpullup2");
- qpass(s->rq, bp);
+ qpass(s->rq, packblock(concatblock(bp)));
}
qunlock(&ic->outo);
}
@@ -948,7 +949,7 @@
id = nhgetl(h->ilid);
/* Window checks */
if(id <= ic->recvd || id > ic->recvd+ic->window) {
- netlog(s->p->f, Logil, "il: message outside window %ud <%ud-%ud>: %i %d/%d\n",
+ netlog(s->p->f, Logil, "il: message outside window %lud <%lud-%lud>: %I %d/%d\n",
id, ic->recvd, ic->recvd+ic->window, s->raddr, s->lport, s->rport);
freeblist(bp);
return;
@@ -983,7 +984,7 @@
qunlock(&ic->outo);
}
-void
+int
ilsendctl(Conv *ipc, Ilhdr *inih, int type, ulong id, ulong ack, int ilspec)
{
Ilhdr *ih;
@@ -1034,7 +1035,7 @@
hnputs(ih->ilsum, ptclcsum(bp, IL_IPSIZE, IL_HDRSIZE));
if(ipc==nil)
- panic("ipc is nil caller is %.8lux", getcallerpc(&ipc));
+ panic("ipc is nil caller is %#p", getcallerpc(&ipc));
if(ipc->p==nil)
panic("ipc->p is nil");
@@ -1042,7 +1043,7 @@
iltype[ih->iltype], nhgetl(ih->ilid), nhgetl(ih->ilack),
nhgets(ih->ilsrc), nhgets(ih->ildst));
- ipoput4(ipc->p->f, bp, 0, ttl, tos, ipc);
+ return ipoput4(ipc->p->f, bp, 0, ttl, tos, ipc);
}
void
@@ -1145,6 +1146,8 @@
il = x;
+ while(waserror())
+ ;
loop:
tsleep(&up->sleep, return0, 0, Iltickms);
for(s = il->conv; s && *s; s++) {
@@ -1248,7 +1251,7 @@
qlock(&ipriv->apl);
if(ipriv->ackprocstarted == 0){
sprint(kpname, "#I%dilack", c->p->f->dev);
- kproc(kpname, ilackproc, c->p, 0);
+ kproc(kpname, ilackproc, c->p);
ipriv->ackprocstarted = 1;
}
qunlock(&ipriv->apl);
@@ -1280,7 +1283,8 @@
case IL_CONNECT:
ic->state = Ilsyncer;
iphtadd(&ipriv->ht, c);
- ilsendctl(c, nil, Ilsync, ic->start, ic->recvd, 0);
+ if(ilsendctl(c, nil, Ilsync, ic->start, ic->recvd, 0) < 0)
+ ilhangup(c, "no route");
break;
}
@@ -1332,6 +1336,8 @@
if(s->lport == psource)
if(ipcmp(s->laddr, source) == 0)
if(ipcmp(s->raddr, dest) == 0){
+ if(s->ignoreadvice)
+ break;
qunlock(il);
ic = (Ilcb*)s->ptcl;
switch(ic->state){
@@ -1380,12 +1386,6 @@
}
}
-int
-ilgc(Proto *il)
-{
- return natgc(il->ipproto);
-}
-
void
ilinit(Fs *f)
{
@@ -1406,7 +1406,7 @@
il->advise = iladvise;
il->stats = ilxstats;
il->inuse = ilinuse;
- il->gc = ilgc;
+ il->gc = nil;
il->ipproto = IP_ILPROTO;
il->nc = scalednconv();
il->ptclsize = sizeof(Ilcb);
--- /dev/null
+++ b/os/ip/inferno.c
@@ -1,0 +1,28 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+
+/*
+ * some hacks for commonality twixt inferno and plan9
+ */
+
+char*
+commonuser(void)
+{
+ return up->user;
+}
+
+char*
+commonerror(void)
+{
+ return up->errstr;
+}
+
+int
+bootpread(char*, ulong, int)
+{
+ return 0;
+}
--- a/os/ip/ip.c
+++ b/os/ip/ip.c
@@ -7,94 +7,6 @@
#include "ip.h"
-typedef struct IP IP;
-typedef struct Fragment4 Fragment4;
-typedef struct Fragment6 Fragment6;
-typedef struct Ipfrag Ipfrag;
-
-enum
-{
- IP4HDR = 20, /* sizeof(Ip4hdr) */
- IP6HDR = 40, /* sizeof(Ip6hdr) */
- IP_HLEN4 = 0x05, /* Header length in words */
- IP_DF = 0x4000, /* Don't fragment */
- IP_MF = 0x2000, /* More fragments */
- IP6FHDR = 8, /* sizeof(Fraghdr6) */
- IP_MAX = 64*1024, /* Maximum Internet packet size */
-};
-
-#define BLKIPVER(xp) (((Ip4hdr*)((xp)->rp))->vihl&0xF0)
-
-/* MIB II counters */
-enum
-{
- Forwarding,
- DefaultTTL,
- InReceives,
- InHdrErrors,
- InAddrErrors,
- ForwDatagrams,
- InUnknownProtos,
- InDiscards,
- InDelivers,
- OutRequests,
- OutDiscards,
- OutNoRoutes,
- ReasmTimeout,
- ReasmReqds,
- ReasmOKs,
- ReasmFails,
- FragOKs,
- FragFails,
- FragCreates,
-
- Nstats,
-};
-
-struct Fragment4
-{
- Block* blist;
- Fragment4* next;
- ulong src;
- ulong dst;
- ushort id;
- ulong age;
-};
-
-struct Fragment6
-{
- Block* blist;
- Fragment6* next;
- uchar src[IPaddrlen];
- uchar dst[IPaddrlen];
- uint id;
- ulong age;
-};
-
-struct Ipfrag
-{
- ushort foff;
- ushort flen;
-};
-
-/* an instance of IP */
-struct IP
-{
- ulong stats[Nstats];
-
- QLock fraglock4;
- Fragment4* flisthead4;
- Fragment4* fragfree4;
- Ref id4;
-
- QLock fraglock6;
- Fragment6* flisthead6;
- Fragment6* fragfree6;
- Ref id6;
-
- int iprouting; /* true if we route like a gateway */
-};
-
static char *statnames[] =
{
[Forwarding] "Forwarding",
@@ -118,45 +30,11 @@
[FragCreates] "FragCreates",
};
-#define BLKIP(xp) ((Ip4hdr*)((xp)->rp))
-/*
- * This sleazy macro relies on the media header size being
- * larger than sizeof(Ipfrag). ipreassemble checks this is true
- */
-#define BKFG(xp) ((Ipfrag*)((xp)->base))
+static Block* ip4reassemble(IP*, int, Block*);
+static void ipfragfree4(IP*, Fragment4*);
+static Fragment4* ipfragallo4(IP*);
-ushort ipcsum(uchar*);
-Block* ip4reassemble(IP*, int, Block*, Ip4hdr*);
-void ipfragfree4(IP*, Fragment4*);
-Fragment4* ipfragallo4(IP*);
-
-
-void
-ip_init_6(Fs *f)
-{
- V6params *v6p;
-
- v6p = smalloc(sizeof(V6params));
-
- v6p->rp.mflag = 0; // default not managed
- v6p->rp.oflag = 0;
- v6p->rp.maxraint = 600000; // millisecs
- v6p->rp.minraint = 200000;
- v6p->rp.linkmtu = 0; // no mtu sent
- v6p->rp.reachtime = 0;
- v6p->rp.rxmitra = 0;
- v6p->rp.ttl = MAXTTL;
- v6p->rp.routerlt = 3*(v6p->rp.maxraint);
-
- v6p->hp.rxmithost = 1000; // v6 RETRANS_TIMER
-
- v6p->cdrouter = -1;
-
- f->v6p = v6p;
-
-}
-
-void
+static void
initfrag(IP *ip, int size)
{
Fragment4 *fq4, *eq4;
@@ -189,6 +67,7 @@
IP *ip;
ip = smalloc(sizeof(IP));
+ ip->stats[DefaultTTL] = MAXTTL;
initfrag(ip, 100);
f->ip = ip;
@@ -202,11 +81,11 @@
if(f->ip->iprouting==0)
f->ip->stats[Forwarding] = 2;
else
- f->ip->stats[Forwarding] = 1;
+ f->ip->stats[Forwarding] = 1;
}
int
-ipoput4(Fs *f, Block *bp, int gating, int ttl, int tos, Conv *c)
+ipoput4(Fs *f, Block *bp, int gating, int ttl, int tos, Routehint *rh)
{
Ipifc *ifc;
uchar *gate;
@@ -213,66 +92,41 @@
ulong fragoff;
Block *xp, *nb;
Ip4hdr *eh, *feh;
- int lid, len, seglen, chunk, dlen, blklen, offset, medialen;
- Route *r, *sr;
+ int lid, len, seglen, chunk, hlen, dlen, blklen, offset, medialen;
+ Route *r;
IP *ip;
int rv = 0;
ip = f->ip;
-
- /* Fill out the ip header */
- eh = (Ip4hdr*)(bp->rp);
-
ip->stats[OutRequests]++;
- /* Number of uchars in data and ip header to write */
+ /* Fill out the ip header */
+ eh = (Ip4hdr*)bp->rp;
+ assert(BLEN(bp) >= IP4HDR);
len = blocklen(bp);
-
- if(gating){
- chunk = nhgets(eh->length);
- if(chunk > len){
- ip->stats[OutDiscards]++;
- netlog(f, Logip, "short gated packet\n");
- goto free;
- }
- if(chunk < len)
- len = chunk;
- }
if(len >= IP_MAX){
ip->stats[OutDiscards]++;
- netlog(f, Logip, "exceeded ip max size %V\n", eh->dst);
+ netlog(f, Logip, "%V -> %V: exceeded ip max size: %d\n", eh->src, eh->dst, len);
goto free;
}
- r = v4lookup(f, eh->dst, c);
- if(r == nil){
+ r = v4lookup(f, eh->dst, eh->src, rh);
+ if(r == nil || (ifc = r->ifc) == nil){
ip->stats[OutNoRoutes]++;
- netlog(f, Logip, "no interface %V\n", eh->dst);
+ netlog(f, Logip, "%V -> %V: no interface\n", eh->src, eh->dst);
rv = -1;
goto free;
}
- ifc = r->ifc;
- if(r->type & (Rifc|Runi))
+ if(r->type & (Rifc|Runi|Rbcast|Rmulti))
gate = eh->dst;
else
- if(r->type & (Rbcast|Rmulti)) {
- gate = eh->dst;
- sr = v4lookup(f, eh->src, nil);
- if(sr != nil && (sr->type & Runi))
- ifc = sr->ifc;
- }
- else
gate = r->v4.gate;
- if(!gating)
- eh->vihl = IP_VER4|IP_HLEN4;
- eh->ttl = ttl;
- if(!gating)
- eh->tos = tos;
-
- if(!canrlock(ifc))
+ if(!canrlock(ifc)){
+ ip->stats[OutDiscards]++;
goto free;
+ }
if(waserror()){
runlock(ifc);
nexterror();
@@ -280,17 +134,18 @@
if(ifc->m == nil)
goto raise;
- /* Output NAT */
- if(nato(bp, ifc, f) != 0)
- goto raise;
+ if(!gating){
+ eh->vihl = IP_VER4|IP_HLEN4;
+ eh->tos = tos;
+ }
+ eh->ttl = ttl;
/* If we dont need to fragment just send it */
medialen = ifc->maxtu - ifc->m->hsize;
if(len <= medialen) {
- if(!gating)
- hnputs(eh->id, incref(&ip->id4));
hnputs(eh->length, len);
if(!gating){
+ hnputs(eh->id, incref(&ip->id4));
eh->frag[0] = 0;
eh->frag[1] = 0;
}
@@ -297,31 +152,31 @@
eh->cksum[0] = 0;
eh->cksum[1] = 0;
hnputs(eh->cksum, ipcsum(&eh->vihl));
- ifc->m->bwrite(ifc, bp, V4, gate);
+
+ ipifcoput(ifc, bp, V4, gate);
runlock(ifc);
poperror();
return 0;
}
-if((eh->frag[0] & (IP_DF>>8)) && !gating) print("%V: DF set\n", eh->dst);
-
if(eh->frag[0] & (IP_DF>>8)){
ip->stats[FragFails]++;
ip->stats[OutDiscards]++;
icmpcantfrag(f, bp, medialen);
- netlog(f, Logip, "%V: eh->frag[0] & (IP_DF>>8)\n", eh->dst);
+ netlog(f, Logip, "%V -> %V: can't fragment with DF flag set\n", eh->src, eh->dst);
goto raise;
}
- seglen = (medialen - IP4HDR) & ~7;
+ hlen = (eh->vihl & 0xF)<<2;
+ seglen = (medialen - hlen) & ~7;
if(seglen < 8){
ip->stats[FragFails]++;
ip->stats[OutDiscards]++;
- netlog(f, Logip, "%V seglen < 8\n", eh->dst);
+ netlog(f, Logip, "%V -> %V: can't fragment with seglen < 8\n", eh->src, eh->dst);
goto raise;
}
- dlen = len - IP4HDR;
+ dlen = len - hlen;
xp = bp;
if(gating)
lid = nhgets(eh->id);
@@ -328,8 +183,8 @@
else
lid = incref(&ip->id4);
- offset = IP4HDR;
- while(xp != nil && offset && offset >= BLEN(xp)) {
+ offset = hlen;
+ while(offset && offset >= BLEN(xp)) {
offset -= BLEN(xp);
xp = xp->next;
}
@@ -341,30 +196,30 @@
fragoff = 0;
dlen += fragoff;
for(; fragoff < dlen; fragoff += seglen) {
- nb = allocb(IP4HDR+seglen);
- feh = (Ip4hdr*)(nb->rp);
+ nb = allocb(hlen+seglen);
+ feh = (Ip4hdr*)nb->rp;
- memmove(nb->wp, eh, IP4HDR);
- nb->wp += IP4HDR;
+ memmove(nb->wp, eh, hlen);
+ nb->wp += hlen;
if((fragoff + seglen) >= dlen) {
seglen = dlen - fragoff;
hnputs(feh->frag, fragoff>>3);
}
- else
+ else
hnputs(feh->frag, (fragoff>>3)|IP_MF);
- hnputs(feh->length, seglen + IP4HDR);
+ hnputs(feh->length, seglen + hlen);
hnputs(feh->id, lid);
/* Copy up the data area */
chunk = seglen;
while(chunk) {
- if(!xp) {
+ if(xp == nil) {
ip->stats[OutDiscards]++;
ip->stats[FragFails]++;
freeblist(nb);
- netlog(f, Logip, "!xp: chunk %d\n", chunk);
+ netlog(f, Logip, "xp == nil: chunk %d\n", chunk);
goto raise;
}
blklen = chunk;
@@ -376,12 +231,13 @@
chunk -= blklen;
if(xp->rp == xp->wp)
xp = xp->next;
- }
+ }
feh->cksum[0] = 0;
feh->cksum[1] = 0;
hnputs(feh->cksum, ipcsum(&feh->vihl));
- ifc->m->bwrite(ifc, nb, V4, gate);
+
+ ipifcoput(ifc, nb, V4, gate);
ip->stats[FragCreates]++;
}
ip->stats[FragOKs]++;
@@ -396,17 +252,14 @@
void
ipiput4(Fs *f, Ipifc *ifc, Block *bp)
{
- int hl;
- int hop, tos, proto, olen;
+ int hl, len, hop, tos;
+ uchar v6dst[IPaddrlen];
+ ushort frag;
Ip4hdr *h;
Proto *p;
- ushort frag;
- int notforme;
- uchar *dp, v6dst[IPaddrlen];
IP *ip;
- Route *r;
- if(BLKIPVER(bp) != IP_VER4) {
+ if((bp->rp[0]&0xF0) != IP_VER4) {
ipiput6(f, ifc, bp);
return;
}
@@ -430,58 +283,45 @@
return;
}
- h = (Ip4hdr*)(bp->rp);
-
- /* Input NAT */
- nati(bp, ifc);
-
- /* dump anything that whose header doesn't checksum */
+ h = (Ip4hdr*)bp->rp;
+ hl = (h->vihl & 0xF)<<2;
+ if(hl < IP4HDR || hl > BLEN(bp)) {
+ ip->stats[InHdrErrors]++;
+ netlog(f, Logip, "%V -> %V: bad ip header length: %d\n", h->src, h->dst, hl);
+ goto drop;
+ }
if((bp->flag & Bipck) == 0 && ipcsum(&h->vihl)) {
ip->stats[InHdrErrors]++;
- netlog(f, Logip, "ip: checksum error %V\n", h->src);
- freeblist(bp);
+ netlog(f, Logip, "%V -> %V: bad ip header checksum\n", h->src, h->dst);
+ goto drop;
+ }
+ len = nhgets(h->length);
+ if(len < hl || (bp = trimblock(bp, 0, len)) == nil){
+ ip->stats[InHdrErrors]++;
+ netlog(f, Logip, "%V -> %V: bogus packet length: %d\n", h->src, h->dst, len);
+ if(bp != nil)
+ goto drop;
return;
}
- v4tov6(v6dst, h->dst);
- notforme = ipforme(f, v6dst) == 0;
+ h = (Ip4hdr*)bp->rp;
- /* Check header length and version */
- if((h->vihl&0x0F) != IP_HLEN4) {
- hl = (h->vihl&0xF)<<2;
- if(hl < (IP_HLEN4<<2)) {
- ip->stats[InHdrErrors]++;
- netlog(f, Logip, "ip: %V bad hivl %ux\n", h->src, h->vihl);
- freeblist(bp);
- return;
- }
- /* If this is not routed strip off the options */
- if(notforme == 0) {
- olen = nhgets(h->length);
- dp = bp->rp + (hl - (IP_HLEN4<<2));
- memmove(dp, h, IP_HLEN4<<2);
- bp->rp = dp;
- h = (Ip4hdr*)(bp->rp);
- h->vihl = (IP_VER4|IP_HLEN4);
- hnputs(h->length, olen-hl+(IP_HLEN4<<2));
- }
- }
-
/* route */
- if(notforme) {
- Conv conv;
+ v4tov6(v6dst, h->dst);
+ if(!ipforme(f, v6dst)) {
+ Route *r;
+ Routehint rh;
+ Ipifc *nifc;
- if(!ip->iprouting){
- freeb(bp);
- return;
- }
+ if(!ip->iprouting)
+ goto drop;
/* don't forward to source's network */
- conv.r = nil;
- r = v4lookup(f, h->dst, &conv);
- if(r == nil || r->ifc == ifc){
+ rh.r = nil;
+ r = v4lookup(f, h->dst, h->src, &rh);
+ if(r == nil || (nifc = r->ifc) == nil
+ || (nifc == ifc && !ifc->reflect)){
ip->stats[OutDiscards]++;
- freeblist(bp);
- return;
+ goto drop;
}
/* don't forward if packet has timed out */
@@ -488,23 +328,18 @@
hop = h->ttl;
if(hop < 1) {
ip->stats[InHdrErrors]++;
- icmpttlexceeded(f, ifc->lifc->local, bp);
- freeblist(bp);
- return;
+ icmpttlexceeded(f, ifc, bp);
+ goto drop;
}
/* reassemble if the interface expects it */
-if(r->ifc == nil) panic("nil route rfc");
- if(r->ifc->reassemble){
+ if(nifc->reassemble){
frag = nhgets(h->frag);
- if(frag) {
- h->tos = 0;
- if(frag & IP_MF)
- h->tos = 1;
- bp = ip4reassemble(ip, frag, bp, h);
+ if(frag & (IP_MF|IP_FO)) {
+ bp = ip4reassemble(ip, frag, bp);
if(bp == nil)
return;
- h = (Ip4hdr*)(bp->rp);
+ h = (Ip4hdr*)bp->rp;
}
}
@@ -511,27 +346,30 @@
ip->stats[ForwDatagrams]++;
tos = h->tos;
hop = h->ttl;
- ipoput4(f, bp, 1, hop - 1, tos, &conv);
+ ipoput4(f, bp, 1, hop - 1, tos, &rh);
return;
}
+ /* If this is not routed strip off the options */
+ if(hl > IP4HDR) {
+ hl -= IP4HDR;
+ len -= hl;
+ bp->rp += hl;
+ memmove(bp->rp, h, IP4HDR);
+ h = (Ip4hdr*)bp->rp;
+ h->vihl = IP_VER4|IP_HLEN4;
+ hnputs(h->length, len);
+ }
+
frag = nhgets(h->frag);
- if(frag) {
- h->tos = 0;
- if(frag & IP_MF)
- h->tos = 1;
- bp = ip4reassemble(ip, frag, bp, h);
+ if(frag & (IP_MF|IP_FO)) {
+ bp = ip4reassemble(ip, frag, bp);
if(bp == nil)
return;
- h = (Ip4hdr*)(bp->rp);
+ h = (Ip4hdr*)bp->rp;
}
- /* don't let any frag info go up the stack */
- h->frag[0] = 0;
- h->frag[1] = 0;
-
- proto = h->proto;
- p = Fsrcvpcol(f, proto);
+ p = Fsrcvpcol(f, h->proto);
if(p != nil && p->rcv != nil) {
ip->stats[InDelivers]++;
(*p->rcv)(p, ifc, bp);
@@ -539,6 +377,7 @@
}
ip->stats[InDiscards]++;
ip->stats[InUnknownProtos]++;
+drop:
freeblist(bp);
}
@@ -550,45 +389,43 @@
int i;
ip = f->ip;
- ip->stats[DefaultTTL] = MAXTTL;
-
p = buf;
e = p+len;
- for(i = 0; i < Nstats; i++)
- p = seprint(p, e, "%s: %lud\n", statnames[i], ip->stats[i]);
+ for(i = 0; i < Nipstats; i++)
+ p = seprint(p, e, "%s: %llud\n", statnames[i], ip->stats[i]);
return p - buf;
}
-Block*
-ip4reassemble(IP *ip, int offset, Block *bp, Ip4hdr *ih)
+static Block*
+ip4reassemble(IP *ip, int offset, Block *bp)
{
- int fend;
+ int ovlap, fragsize, len;
+ ulong src, dst;
ushort id;
+ Block *bl, **l, *prev;
Fragment4 *f, *fnext;
- ulong src, dst;
- Block *bl, **l, *last, *prev;
- int ovlap, len, fragsize, pktposn;
+ Ipfrag *fp, *fq;
+ Ip4hdr *ih;
+ /*
+ * block lists are too hard, concatblock into a single block
+ */
+ bp = concatblock(bp);
+
+ ih = (Ip4hdr*)bp->rp;
src = nhgetl(ih->src);
dst = nhgetl(ih->dst);
id = nhgets(ih->id);
+ fragsize = BLEN(bp) - ((ih->vihl&0xF)<<2);
- /*
- * block lists are too hard, pullupblock into a single block
- */
- if(bp->next){
- bp = pullupblock(bp, blocklen(bp));
- ih = (Ip4hdr*)(bp->rp);
- }
-
qlock(&ip->fraglock4);
/*
* find a reassembly queue for this fragment
*/
- for(f = ip->flisthead4; f; f = fnext){
+ for(f = ip->flisthead4; f != nil; f = fnext){
fnext = f->next; /* because ipfragfree4 changes the list */
- if(f->src == src && f->dst == dst && f->id == id)
+ if(f->id == id && f->src == src && f->dst == dst)
break;
if(f->age < NOW){
ip->stats[ReasmTimeout]++;
@@ -601,22 +438,23 @@
* and get rid of any fragments that might go
* with it.
*/
- if(!ih->tos && (offset & ~(IP_MF|IP_DF)) == 0) {
+ if((offset & (IP_MF|IP_FO)) == 0) {
if(f != nil) {
- ipfragfree4(ip, f);
ip->stats[ReasmFails]++;
+ ipfragfree4(ip, f);
}
qunlock(&ip->fraglock4);
return bp;
}
- if(bp->base+sizeof(Ipfrag) >= bp->rp){
- bp = padblock(bp, sizeof(Ipfrag));
- bp->rp += sizeof(Ipfrag);
+ if(bp->base+IPFRAGSZ > bp->rp){
+ bp = padblock(bp, IPFRAGSZ);
+ bp->rp += IPFRAGSZ;
}
- BKFG(bp)->foff = offset<<3;
- BKFG(bp)->flen = nhgets(ih->length)-IP4HDR;
+ fp = (Ipfrag*)bp->base;
+ fp->foff = (offset & IP_FO)<<3;
+ fp->flen = fragsize;
/* First fragment allocates a reassembly queue */
if(f == nil) {
@@ -627,8 +465,9 @@
f->blist = bp;
- qunlock(&ip->fraglock4);
ip->stats[ReasmReqds]++;
+ qunlock(&ip->fraglock4);
+
return nil;
}
@@ -638,7 +477,7 @@
prev = nil;
l = &f->blist;
bl = f->blist;
- while(bl != nil && BKFG(bp)->foff > BKFG(bl)->foff) {
+ while(bl != nil && fp->foff > ((Ipfrag*)bl->base)->foff) {
prev = bl;
l = &bl->next;
bl = bl->next;
@@ -645,15 +484,16 @@
}
/* Check overlap of a previous fragment - trim away as necessary */
- if(prev) {
- ovlap = BKFG(prev)->foff + BKFG(prev)->flen - BKFG(bp)->foff;
+ if(prev != nil) {
+ fq = (Ipfrag*)prev->base;
+ ovlap = fq->foff + fq->flen - fp->foff;
if(ovlap > 0) {
- if(ovlap >= BKFG(bp)->flen) {
- freeblist(bp);
+ if(ovlap >= fp->flen) {
qunlock(&ip->fraglock4);
+ freeb(bp);
return nil;
}
- BKFG(prev)->flen -= ovlap;
+ fq->flen -= ovlap;
}
}
@@ -662,26 +502,26 @@
*l = bp;
/* Check to see if succeeding segments overlap */
- if(bp->next) {
+ if(bp->next != nil) {
l = &bp->next;
- fend = BKFG(bp)->foff + BKFG(bp)->flen;
+ offset = fp->foff + fp->flen;
/* Take completely covered segments out */
- while(*l) {
- ovlap = fend - BKFG(*l)->foff;
+ while((bl = *l) != nil) {
+ fq = (Ipfrag*)bl->base;
+ ovlap = offset - fq->foff;
if(ovlap <= 0)
break;
- if(ovlap < BKFG(*l)->flen) {
- BKFG(*l)->flen -= ovlap;
- BKFG(*l)->foff += ovlap;
- /* move up ih hdrs */
- memmove((*l)->rp + ovlap, (*l)->rp, IP4HDR);
- (*l)->rp += ovlap;
+ if(ovlap < fq->flen) {
+ /* move up ip header */
+ memmove(bl->rp + ovlap, bl->rp, BLEN(bl) - fq->flen);
+ bl->rp += ovlap;
+ fq->flen -= ovlap;
+ fq->foff += ovlap;
break;
}
- last = (*l)->next;
- (*l)->next = nil;
- freeblist(*l);
- *l = last;
+ *l = bl->next;
+ bl->next = nil;
+ freeb(bl);
}
}
@@ -689,35 +529,50 @@
* look for a complete packet. if we get to a fragment
* without IP_MF set, we're done.
*/
- pktposn = 0;
- for(bl = f->blist; bl; bl = bl->next) {
- if(BKFG(bl)->foff != pktposn)
+ offset = 0;
+ for(bl = f->blist; bl != nil; bl = bl->next, offset += fp->flen) {
+ fp = (Ipfrag*)bl->base;
+ if(fp->foff != offset)
break;
- if((BLKIP(bl)->frag[0]&(IP_MF>>8)) == 0) {
- bl = f->blist;
- len = nhgets(BLKIP(bl)->length);
- bl->wp = bl->rp + len;
- /* Pullup all the fragment headers and
- * return a complete packet
- */
- for(bl = bl->next; bl; bl = bl->next) {
- fragsize = BKFG(bl)->flen;
- len += fragsize;
- bl->rp += IP4HDR;
- bl->wp = bl->rp + fragsize;
- }
+ ih = (Ip4hdr*)bl->rp;
+ if(ih->frag[0]&(IP_MF>>8))
+ continue;
- bl = f->blist;
- f->blist = nil;
+ bl = f->blist;
+ len = BLEN(bl);
+
+ /*
+ * Pullup all the fragment headers and
+ * return a complete packet
+ */
+ for(bl = bl->next; bl != nil && len < IP_MAX; bl = bl->next) {
+ fq = (Ipfrag*)bl->base;
+ fragsize = fq->flen;
+ bl->rp = bl->wp - fragsize;
+ len += fragsize;
+ }
+
+ if(len >= IP_MAX){
ipfragfree4(ip, f);
- ih = BLKIP(bl);
- hnputs(ih->length, len);
+ ip->stats[ReasmFails]++;
qunlock(&ip->fraglock4);
- ip->stats[ReasmOKs]++;
- return bl;
+ return nil;
}
- pktposn += BKFG(bl)->flen;
+
+ bl = f->blist;
+ f->blist = nil;
+ ipfragfree4(ip, f);
+
+ ih = (Ip4hdr*)bl->rp;
+ ih->frag[0] = 0;
+ ih->frag[1] = 0;
+ hnputs(ih->length, len);
+
+ ip->stats[ReasmOKs]++;
+ qunlock(&ip->fraglock4);
+
+ return bl;
}
qunlock(&ip->fraglock4);
return nil;
@@ -726,20 +581,20 @@
/*
* ipfragfree4 - Free a list of fragments - assume hold fraglock4
*/
-void
+static void
ipfragfree4(IP *ip, Fragment4 *frag)
{
Fragment4 *fl, **l;
- if(frag->blist)
+ if(frag->blist != nil)
freeblist(frag->blist);
-
- frag->src = 0;
- frag->id = 0;
frag->blist = nil;
+ frag->id = 0;
+ frag->src = 0;
+ frag->dst = 0;
l = &ip->flisthead4;
- for(fl = *l; fl; fl = fl->next) {
+ for(fl = *l; fl != nil; fl = fl->next) {
if(fl == frag) {
*l = frag->next;
break;
@@ -755,7 +610,7 @@
/*
* ipfragallo4 - allocate a reassembly queue - assume hold fraglock4
*/
-Fragment4 *
+static Fragment4*
ipfragallo4(IP *ip)
{
Fragment4 *f;
@@ -762,7 +617,7 @@
while(ip->fragfree4 == nil) {
/* free last entry on fraglist */
- for(f = ip->flisthead4; f->next; f = f->next)
+ for(f = ip->flisthead4; f->next != nil; f = f->next)
;
ipfragfree4(ip, f);
}
--- a/os/ip/ip.h
+++ b/os/ip/ip.h
@@ -1,35 +1,33 @@
typedef struct Conv Conv;
+typedef struct Fragment4 Fragment4;
+typedef struct Fragment6 Fragment6;
typedef struct Fs Fs;
typedef union Hwaddr Hwaddr;
typedef struct IP IP;
typedef struct IPaux IPaux;
+typedef struct Ip4hdr Ip4hdr;
+typedef struct Ipfrag Ipfrag;
typedef struct Ipself Ipself;
typedef struct Ipselftab Ipselftab;
typedef struct Iplink Iplink;
typedef struct Iplifc Iplifc;
typedef struct Ipmulti Ipmulti;
-typedef struct IProuter IProuter;
typedef struct Ipifc Ipifc;
typedef struct Iphash Iphash;
typedef struct Ipht Ipht;
typedef struct Netlog Netlog;
-typedef struct Ifclog Ifclog;
typedef struct Medium Medium;
typedef struct Proto Proto;
typedef struct Arpent Arpent;
typedef struct Arp Arp;
typedef struct Route Route;
+typedef struct Routehint Routehint;
typedef struct Routerparams Routerparams;
typedef struct Hostparams Hostparams;
-typedef struct V6router V6router;
-typedef struct V6params V6params;
+typedef struct v6params v6params;
-typedef struct Ip4hdr Ip4hdr;
-typedef struct Nat Nat;
-
#pragma incomplete Arp
-#pragma incomplete Ifclog
#pragma incomplete Ipself
#pragma incomplete Ipselftab
#pragma incomplete IP
@@ -39,10 +37,9 @@
{
Addrlen= 64,
Maxproto= 20,
- Nhash= 64,
- Maxincall= 5,
- Nchans= 16383,
- MAClen= 16, /* longest mac address */
+ Maxincall= 10,
+ Nchans= 1024,
+ MAClen= 8, /* longest mac address */
MAXTTL= 255,
DFLTTOS= 0,
@@ -57,6 +54,12 @@
V6= 6,
IP_VER4= 0x40,
IP_VER6= 0x60,
+ IP_HLEN4= 5, /* v4: Header length in words */
+ IP_DF= 0x4000, /* v4: Don't fragment */
+ IP_MF= 0x2000, /* v4: More fragments */
+ IP_FO= 0x1fff, /* v4: Fragment offset */
+ IP4HDR= IP_HLEN4<<2, /* sizeof(Ip4hdr) */
+ IP_MAX= 64*1024, /* Max. Internet packet size, v4 & v6 */
/* 2^Lroot trees in the root table */
Lroot= 10,
@@ -73,6 +76,79 @@
Connected= 4,
};
+/* MIB II counters */
+enum
+{
+ Forwarding,
+ DefaultTTL,
+ InReceives,
+ InHdrErrors,
+ InAddrErrors,
+ ForwDatagrams,
+ InUnknownProtos,
+ InDiscards,
+ InDelivers,
+ OutRequests,
+ OutDiscards,
+ OutNoRoutes,
+ ReasmTimeout,
+ ReasmReqds,
+ ReasmOKs,
+ ReasmFails,
+ FragOKs,
+ FragFails,
+ FragCreates,
+
+ Nipstats,
+};
+
+struct Fragment4
+{
+ Block* blist;
+ Fragment4* next;
+ ulong src;
+ ulong dst;
+ ushort id;
+ ulong age;
+};
+
+struct Fragment6
+{
+ Block* blist;
+ Fragment6* next;
+ uchar src[IPaddrlen];
+ uchar dst[IPaddrlen];
+ uint id;
+ ulong age;
+};
+
+struct Ipfrag
+{
+ ushort foff;
+ ushort flen;
+ uchar payload[];
+};
+
+#define IPFRAGSZ offsetof(Ipfrag, payload[0])
+
+/* an instance of IP */
+struct IP
+{
+ uvlong stats[Nipstats];
+
+ QLock fraglock4;
+ Fragment4* flisthead4;
+ Fragment4* fragfree4;
+ Ref id4;
+
+ QLock fraglock6;
+ Fragment6* flisthead6;
+ Fragment6* fragfree6;
+ Ref id6;
+
+ int iprouting; /* true if we route like a gateway */
+};
+
/* on the wire packet header */
struct Ip4hdr
{
@@ -86,9 +162,14 @@
uchar cksum[2]; /* Header checksum */
uchar src[4]; /* IP source */
uchar dst[4]; /* IP destination */
- uchar data[1]; /* start of data */
};
+struct Routehint
+{
+ Route *r; /* last route used */
+ ulong rgen; /* routetable generation for *r */
+};
+
/*
* one per conversation directory
*/
@@ -100,9 +181,9 @@
Proto* p;
int restricted; /* remote port is restricted */
+ int ignoreadvice; /* don't terminate connection on icmp errors */
uint ttl; /* max time to live */
uint tos; /* type of service */
- int ignoreadvice; /* don't terminate connection on icmp errors */
uchar ipversion;
uchar laddr[IPaddrlen]; /* local IP address */
@@ -139,8 +220,7 @@
void* ptcl; /* protocol specific stuff */
- Route *r; /* last route used */
- ulong rgen; /* routetable generation for *r */
+ Routehint;
};
struct Medium
@@ -161,18 +241,8 @@
/* process packets written to 'data' */
void (*pktin)(Fs *f, Ipifc *ifc, Block *bp);
- /* routes for router boards */
- void (*addroute)(Ipifc *ifc, int, uchar*, uchar*, uchar*, int);
- void (*remroute)(Ipifc *ifc, int, uchar*, uchar*);
- void (*flushroutes)(Ipifc *ifc);
-
- /* for routing multicast groups */
- void (*joinmulti)(Ipifc *ifc, uchar *a, uchar *ia);
- void (*leavemulti)(Ipifc *ifc, uchar *a, uchar *ia);
-
/* address resolution */
- void (*ares)(Fs*, int, uchar*, uchar*, int, int); /* resolve */
- void (*areg)(Ipifc*, uchar*); /* register */
+ void (*areg)(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *ip);
/* v6 address generation */
void (*pref2addr)(uchar *pref, uchar *ea);
@@ -187,12 +257,13 @@
uchar mask[IPaddrlen];
uchar remote[IPaddrlen];
uchar net[IPaddrlen];
+ uchar type; /* route type */
uchar tentative; /* =1 => v6 dup disc on, =0 => confirmed unique */
uchar onlink; /* =1 => onlink, =0 offlink. */
uchar autoflag; /* v6 autonomous flag */
- long validlt; /* v6 valid lifetime */
- long preflt; /* v6 preferred lifetime */
- long origint; /* time when addr was added */
+ ulong validlt; /* v6 valid lifetime */
+ ulong preflt; /* v6 preferred lifetime */
+ ulong origint; /* time when addr was added */
Iplink *link; /* addresses linked to this lifc */
Iplifc *next;
};
@@ -203,25 +274,25 @@
Ipself *self;
Iplifc *lifc;
Iplink *selflink; /* next link for this local address */
- Iplink *lifclink; /* next link for this ifc */
- ulong expire;
+ Iplink *lifclink; /* next link for this lifc */
Iplink *next; /* free list */
+ ulong expire;
int ref;
};
-/* rfc 2461, pp.40--43. */
+/* rfc 2461, pp.40—43. */
/* default values, one per stack */
struct Routerparams {
- int mflag;
- int oflag;
- int maxraint;
- int minraint;
- int linkmtu;
- int reachtime;
- int rxmitra;
- int ttl;
- int routerlt;
+ int mflag; /* flag: managed address configuration */
+ int oflag; /* flag: other stateful configuration */
+ int maxraint; /* max. router adv interval (ms) */
+ int minraint; /* min. router adv interval (ms) */
+ int linkmtu; /* mtu options */
+ int reachtime; /* reachable time */
+ int rxmitra; /* retransmit interval */
+ int ttl; /* cur hop count limit */
+ int routerlt; /* router lifetime */
};
struct Hostparams {
@@ -231,22 +302,18 @@
struct Ipifc
{
RWlock;
-
+
Conv *conv; /* link to its conversation structure */
char dev[64]; /* device we're attached to */
Medium *m; /* Media pointer */
int maxtu; /* Maximum transfer unit */
int mintu; /* Minumum tranfer unit */
- int mbps; /* megabits per second */
void *arg; /* medium specific */
- int reassemble; /* reassemble IP packets before forwarding */
- /* these are used so that we can unbind on the fly */
- Lock idlock;
+ uchar reflect; /* allow forwarded packets to go out the same interface */
+ uchar reassemble; /* reassemble IP packets before forwarding to this interface */
+
uchar ifcid; /* incremented each 'bind/unbind/add/remove' */
- int ref; /* number of proc's using this ipifc */
- Rendez wait; /* where unbinder waits for ref == 0 */
- int unbinding;
uchar mac[MAClen]; /* MAC address */
@@ -255,10 +322,16 @@
ulong in, out; /* message statistics */
ulong inerr, outerr; /* ... */
- uchar sendra6; /* == 1 => send router advs on this ifc */
- uchar recvra6; /* == 1 => recv router advs on this ifc */
- Routerparams rp; /* router parameters as in RFC 2461, pp.40--43.
+ uchar sendra6; /* flag: send router advs on this ifc */
+ uchar recvra6; /* flag: recv router advs on this ifc */
+ Routerparams rp; /* router parameters as in RFC 2461, pp.40—43.
used only if node is router */
+
+ int speed; /* link speed in bits per second */
+ int delay; /* burst delay in ms */
+ int burst; /* burst delay in bytes */
+ int load; /* bytes in flight */
+ ulong ticks;
};
/*
@@ -330,20 +403,11 @@
int nc; /* number of conversations */
int ac;
Qid qid; /* qid for protocol directory */
- ushort nextport;
ushort nextrport;
void *priv;
};
-/*
- * Stream for sending packets to user level
- */
-struct IProuter {
- QLock;
- int opens;
- Queue *q;
-};
/*
* one per IP protocol stack
@@ -362,8 +426,7 @@
IP *ip;
Ipselftab *self;
Arp *arp;
- V6params *v6p;
- IProuter iprouter;
+ v6params *v6p;
Route *v4root[1<<Lroot]; /* v4 routing forest */
Route *v6root[1<<Lroot]; /* v6 routing forest */
@@ -370,7 +433,6 @@
Route *queue; /* used as temp when reinjecting routes */
Netlog *alog;
- Ifclog *ilog;
char ndb[1024]; /* an ndb entry for this interface */
int ndbvers;
@@ -377,23 +439,10 @@
long ndbmtime;
};
-/* one per default router known to host */
-struct V6router {
- uchar inuse;
- Ipifc *ifc;
- int ifcid;
- uchar routeraddr[IPaddrlen];
- long ltorigin;
- Routerparams rp;
-};
-
-struct V6params
+struct v6params
{
Routerparams rp; /* v6 params, one copy per node now */
Hostparams hp;
- V6router v6rlist[3]; /* max 3 default routers, currently */
- int cdrouter; /* uses only v6rlist[cdrouter] if */
- /* cdrouter >= 0. */
};
@@ -410,8 +459,7 @@
char* Fsstdbind(Conv*, char**, int);
ulong scalednconv(void);
void closeconv(Conv*);
-
-/*
+/*
* logging
*/
enum
@@ -434,7 +482,6 @@
Logrudpmsg= 1<<16,
Logesp= 1<<17,
Logtcpwin= 1<<18,
- Lognat= 1<<19,
};
void netloginit(Fs*);
@@ -449,17 +496,17 @@
void ifclogopen(Fs*, Chan*);
void ifclogclose(Fs*, Chan*);
+#pragma varargck argpos netlog 3
+
/*
* iproute.c
*/
typedef struct RouteTree RouteTree;
-typedef struct Routewalk Routewalk;
typedef struct V4route V4route;
typedef struct V6route V6route;
enum
{
-
/* type bits */
Rv4= (1<<0), /* this is a version 4 route */
Rifc= (1<<1), /* this route is a directly connected interface */
@@ -468,27 +515,18 @@
Rbcast= (1<<4), /* a broadcast self address */
Rmulti= (1<<5), /* a multicast self address */
Rproxy= (1<<6), /* this route should be proxied */
+ Rsrc= (1<<7), /* source specific route */
};
-struct Routewalk
-{
- int o;
- int h;
- char* p;
- char* e;
- void* state;
- void (*walk)(Route*, Routewalk*);
-};
-
struct RouteTree
{
- Route* right;
- Route* left;
- Route* mid;
+ Route *mid;
+ Route *left;
+ Route *right;
+ Ipifc *ifc;
+ uchar ifcid; /* must match ifc->id */
uchar depth;
uchar type;
- uchar ifcid; /* must match ifc->id */
- Ipifc *ifc;
char tag[4];
int ref;
};
@@ -497,6 +535,10 @@
{
ulong address;
ulong endaddress;
+
+ ulong source;
+ ulong endsource;
+
uchar gate[IPv4addrlen];
};
@@ -504,6 +546,10 @@
{
ulong address[IPllen];
ulong endaddress[IPllen];
+
+ ulong source[IPllen];
+ ulong endsource[IPllen];
+
uchar gate[IPaddrlen];
};
@@ -516,17 +562,16 @@
V4route v4;
};
};
-extern void v4addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type);
-extern void v6addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type);
-extern void v4delroute(Fs *f, uchar *a, uchar *mask, int dolock);
-extern void v6delroute(Fs *f, uchar *a, uchar *mask, int dolock);
-extern Route* v4lookup(Fs *f, uchar *a, Conv *c);
-extern Route* v6lookup(Fs *f, uchar *a, Conv *c);
+
+extern void addroute(Fs *f, uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag);
+extern void remroute(Fs *f, uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag);
+extern Route* v4lookup(Fs *f, uchar *a, uchar *s, Routehint *h);
+extern Route* v6lookup(Fs *f, uchar *a, uchar *s, Routehint *h);
+extern Route* v4source(Fs *f, uchar *a, uchar *s);
+extern Route* v6source(Fs *f, uchar *a, uchar *s);
extern long routeread(Fs *f, char*, ulong, int);
extern long routewrite(Fs *f, Chan*, char*, int);
-extern void routetype(int, char*);
-extern void ipwalkroutes(Fs*, Routewalk*);
-extern void convroute(Route*, uchar*, uchar*, uchar*, char*, int*);
+extern void routetype(int type, char p[8]);
/*
* devip.c
@@ -543,7 +588,6 @@
};
extern IPaux* newipaux(char*, char*);
-extern void setlport(Conv*);
/*
* arp.c
@@ -552,18 +596,16 @@
{
uchar ip[IPaddrlen];
uchar mac[MAClen];
- Medium *type; /* media type */
- Arpent* hash;
- Block* hold;
- Block* last;
- uint ctime; /* time entry was created or refreshed */
- uint utime; /* time entry was last used */
- uchar state;
+ Arpent *hash;
Arpent *nextrxt; /* re-transmit chain */
- uint rtime; /* time for next retransmission */
- uchar rxtsrem;
+ Block *hold;
+ Block *last;
Ipifc *ifc;
uchar ifcid; /* must match ifc->id */
+ uchar state;
+ uchar rxtsrem; /* re-tranmissions remaining */
+ ulong ctime; /* time entry was created or refreshed */
+ ulong utime; /* time entry was last used */
};
extern void arpinit(Fs*);
@@ -572,15 +614,17 @@
extern Arpent* arpget(Arp*, Block *bp, int version, Ipifc *ifc, uchar *ip, uchar *h);
extern void arprelease(Arp*, Arpent *a);
extern Block* arpresolve(Arp*, Arpent *a, Medium *type, uchar *mac);
-extern void arpenter(Fs*, int version, uchar *ip, uchar *mac, int len, int norefresh);
+extern int arpenter(Fs*, int version, uchar *ip, uchar *mac, int n, uchar *ia, Ipifc *ifc, int refresh);
+extern void ndpsendsol(Fs*, Ipifc*, Arpent*);
/*
* ipaux.c
*/
-extern int myetheraddr(uchar*, char*);
-extern ulong parseip(uchar*, char*);
-extern ulong parseipmask(uchar*, char*);
+extern int parseether(uchar*, char*);
+extern vlong parseip(uchar*, char*);
+extern vlong parseipmask(uchar*, char*, int);
+extern vlong parseipandmask(uchar*, uchar*, char*, char*);
extern char* v4parseip(uchar*, char*);
extern void maskip(uchar *from, uchar *mask, uchar *to);
extern int parsemac(uchar *to, char *from, int len);
@@ -589,12 +633,10 @@
extern void v4tov6(uchar *v6, uchar *v4);
extern int v6tov4(uchar *v4, uchar *v6);
extern int eipfmt(Fmt*);
+extern int convipvers(Conv *c);
#define ipmove(x, y) memmove(x, y, IPaddrlen)
#define ipcmp(x, y) ( (x)[IPaddrlen-1] != (y)[IPaddrlen-1] || memcmp(x, y, IPaddrlen) )
-
-#define ip4move(x, y) memmove(x, y, IPv4addrlen)
-#define ip4cmp(x, y) ( (x)[IPv4addrlen-1] != (y)[IPv4addrlen-1] || memcmp(x, y, IPv4addrlen) )
extern uchar IPv4bcast[IPaddrlen];
extern uchar IPv4bcastobs[IPaddrlen];
@@ -612,7 +654,6 @@
extern Medium ethermedium;
extern Medium nullmedium;
extern Medium pktmedium;
-extern Medium tripmedium;
/*
* ipifc.c
@@ -619,33 +660,24 @@
*/
extern Medium* ipfindmedium(char *name);
extern void addipmedium(Medium *med);
+extern void ipifcoput(Ipifc *ifc, Block *bp, int version, uchar *ip);
extern int ipforme(Fs*, uchar *addr);
-extern int iptentative(Fs*, uchar *addr);
-extern int ipisbm(uchar *);
-extern int ipismulticast(uchar *);
-extern Ipifc* findipifc(Fs*, uchar *remote, int type);
-extern void findprimaryip(Fs*, uchar*);
+extern int ipismulticast(uchar *ip);
+extern Ipifc* findipifc(Fs*, uchar *local, uchar *remote, int type);
+extern Ipifc* findipifcstr(Fs *f, char *s);
extern void findlocalip(Fs*, uchar *local, uchar *remote);
-extern int ipv4local(Ipifc *ifc, uchar *addr);
-extern int ipv6local(Ipifc *ifc, uchar *addr);
-extern int ipv6anylocal(Ipifc *ifc, uchar *addr);
+extern int ipv4local(Ipifc *ifc, uchar *local, int prefixlen, uchar *remote);
+extern int ipv6local(Ipifc *ifc, uchar *local, int prefixlen, uchar *remote);
extern Iplifc* iplocalonifc(Ipifc *ifc, uchar *ip);
+extern Iplifc* ipremoteonifc(Ipifc *ifc, uchar *ip);
extern int ipproxyifc(Fs *f, Ipifc *ifc, uchar *ip);
-extern int ipismulticast(uchar *ip);
-extern int ipisbooting(void);
-extern int ipifccheckin(Ipifc *ifc, Medium *med);
-extern void ipifccheckout(Ipifc *ifc);
-extern int ipifcgrab(Ipifc *ifc);
-extern void ipifcaddroute(Fs*, int, uchar*, uchar*, uchar*, int);
-extern void ipifcremroute(Fs*, int, uchar*, uchar*);
extern void ipifcremmulti(Conv *c, uchar *ma, uchar *ia);
extern void ipifcaddmulti(Conv *c, uchar *ma, uchar *ia);
extern char* ipifcrem(Ipifc *ifc, char **argv, int argc);
extern char* ipifcadd(Ipifc *ifc, char **argv, int argc, int tentative, Iplifc *lifcp);
extern long ipselftabread(Fs*, char *a, ulong offset, int n);
-extern char* ipifcaddpref6(Ipifc *ifc, char**argv, int argc);
-extern void ipsendra6(Fs *f, int on);
-
+extern char* ipifcadd6(Ipifc *ifc, char**argv, int argc);
+extern char* ipifcremove6(Ipifc *ifc, char**argv, int argc);
/*
* ip.c
*/
@@ -652,37 +684,26 @@
extern void iprouting(Fs*, int);
extern void icmpnoconv(Fs*, Block*);
extern void icmpcantfrag(Fs*, Block*, int);
-extern void icmpttlexceeded(Fs*, uchar*, Block*);
+extern void icmpttlexceeded(Fs*, Ipifc*, Block*);
extern ushort ipcsum(uchar*);
extern void ipiput4(Fs*, Ipifc*, Block*);
extern void ipiput6(Fs*, Ipifc*, Block*);
-extern int ipoput4(Fs*, Block*, int, int, int, Conv*);
-extern int ipoput6(Fs*, Block*, int, int, int, Conv*);
+extern int ipoput4(Fs*, Block*, int, int, int, Routehint*);
+extern int ipoput6(Fs*, Block*, int, int, int, Routehint*);
extern int ipstats(Fs*, char*, int);
extern ushort ptclbsum(uchar*, int);
extern ushort ptclcsum(Block*, int, int);
extern void ip_init(Fs*);
-extern void update_mtucache(uchar*, ulong);
-extern ulong restrict_mtu(uchar*, ulong);
+extern void ip_init_6(Fs*);
/*
* bootp.c
*/
-char* (*bootp)(Ipifc*);
-int (*bootpread)(char*, ulong, int);
+extern int bootpread(char*, ulong, int);
/*
- * iprouter.c
- */
-void useriprouter(Fs*, Ipifc*, Block*);
-void iprouteropen(Fs*);
-void iprouterclose(Fs*);
-long iprouterread(Fs*, void*, int);
-
-/*
* resolving inferno/plan9 differences
*/
-Chan* commonfdtochan(int, int, int, int);
char* commonuser(void);
char* commonerror(void);
@@ -695,15 +716,3 @@
* global to all of the stack
*/
extern void (*igmpreportfn)(Ipifc*, uchar*);
-
-/*
- * nat.c
- */
-extern int nato(Block*, Ipifc*, Fs*);
-extern void nati(Block*, Ipifc*);
-extern int natgc(uchar);
-
-extern int addnataddr(uchar*, uchar*, Iplifc*);
-extern int removenataddr(uchar*, uchar*, Iplifc*);
-extern void shownataddr(void);
-extern void flushnataddr(void);
--- a/os/ip/ipaux.c
+++ b/os/ip/ipaux.c
@@ -5,49 +5,8 @@
#include "fns.h"
#include "../port/error.h"
#include "ip.h"
-#include "ipv6.h"
+#include "ipv6.h"
-/*
- * well known IP addresses
- */
-uchar IPv4bcast[IPaddrlen] = {
- 0, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff
-};
-uchar IPv4allsys[IPaddrlen] = {
- 0, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0xff, 0xff,
- 0xe0, 0, 0, 0x01
-};
-uchar IPv4allrouter[IPaddrlen] = {
- 0, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0xff, 0xff,
- 0xe0, 0, 0, 0x02
-};
-uchar IPallbits[IPaddrlen] = {
- 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff
-};
-
-uchar IPnoaddr[IPaddrlen];
-
-/*
- * prefix of all v4 addresses
- */
-uchar v4prefix[IPaddrlen] = {
- 0, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0xff, 0xff,
- 0, 0, 0, 0
-};
-
-
char *v6hdrtypes[Maxhdrtype] =
{
[HBH] "HopbyHop",
@@ -54,7 +13,7 @@
[ICMP] "ICMP",
[IGMP] "IGMP",
[GGP] "GGP",
- [IPINIP] "IP",
+ [IPINIP] "IP",
[ST] "ST",
[TCP] "TCP",
[UDP] "UDP",
@@ -87,6 +46,7 @@
0, 0, 0, 0,
0, 0, 0, 0x01
};
+
uchar v6linklocal[IPaddrlen] = {
0xfe, 0x80, 0, 0,
0, 0, 0, 0,
@@ -99,26 +59,8 @@
0, 0, 0, 0,
0, 0, 0, 0
};
-int v6llpreflen = 8; // link-local prefix length
-uchar v6sitelocal[IPaddrlen] = {
- 0xfe, 0xc0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0, 0
-};
-uchar v6sitelocalmask[IPaddrlen] = {
- 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff,
- 0, 0, 0, 0,
- 0, 0, 0, 0
-};
-int v6slpreflen = 6; // site-local prefix length
-uchar v6glunicast[IPaddrlen] = {
- 0x08, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0, 0
-};
+int v6llpreflen = 8; /* link-local prefix length in bytes */
+
uchar v6multicast[IPaddrlen] = {
0xff, 0, 0, 0,
0, 0, 0, 0,
@@ -131,7 +73,8 @@
0, 0, 0, 0,
0, 0, 0, 0
};
-int v6mcpreflen = 1; // multicast prefix length
+int v6mcpreflen = 1; /* multicast prefix length */
+
uchar v6allnodesN[IPaddrlen] = {
0xff, 0x01, 0, 0,
0, 0, 0, 0,
@@ -138,6 +81,12 @@
0, 0, 0, 0,
0, 0, 0, 0x01
};
+uchar v6allroutersN[IPaddrlen] = {
+ 0xff, 0x01, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0x02
+};
uchar v6allnodesNmask[IPaddrlen] = {
0xff, 0xff, 0, 0,
0, 0, 0, 0,
@@ -144,7 +93,8 @@
0, 0, 0, 0,
0, 0, 0, 0
};
-int v6aNpreflen = 2; // all nodes (N) prefix
+int v6aNpreflen = 2; /* all nodes (N) prefix */
+
uchar v6allnodesL[IPaddrlen] = {
0xff, 0x02, 0, 0,
0, 0, 0, 0,
@@ -151,19 +101,6 @@
0, 0, 0, 0,
0, 0, 0, 0x01
};
-uchar v6allnodesLmask[IPaddrlen] = {
- 0xff, 0xff, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0, 0
-};
-int v6aLpreflen = 2; // all nodes (L) prefix
-uchar v6allroutersN[IPaddrlen] = {
- 0xff, 0x01, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0, 0x02
-};
uchar v6allroutersL[IPaddrlen] = {
0xff, 0x02, 0, 0,
0, 0, 0, 0,
@@ -170,12 +107,14 @@
0, 0, 0, 0,
0, 0, 0, 0x02
};
-uchar v6allroutersS[IPaddrlen] = {
- 0xff, 0x05, 0, 0,
+uchar v6allnodesLmask[IPaddrlen] = {
+ 0xff, 0xff, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
- 0, 0, 0, 0x02
+ 0, 0, 0, 0
};
+int v6aLpreflen = 2; /* all nodes (L) prefix */
+
uchar v6solicitednode[IPaddrlen] = {
0xff, 0x02, 0, 0,
0, 0, 0, 0,
@@ -190,9 +129,6 @@
};
int v6snpreflen = 13;
-
-
-
ushort
ptclcsum(Block *bp, int offset, int len)
{
@@ -215,7 +151,7 @@
if(bp->next == nil) {
if(blocklen < len)
len = blocklen;
- return ~ptclbsum(addr, len) & 0xffff;
+ return ptclbsum(addr, len) ^ 0xffff;
}
losum = 0;
@@ -247,7 +183,7 @@
while((csum = losum>>16) != 0)
losum = csum + (losum & 0xffff);
- return ~losum & 0xffff;
+ return losum ^ 0xffff;
}
enum
@@ -255,306 +191,9 @@
Isprefix= 16,
};
-static uchar prefixvals[256] =
-{
-[0x00] 0 | Isprefix,
-[0x80] 1 | Isprefix,
-[0xC0] 2 | Isprefix,
-[0xE0] 3 | Isprefix,
-[0xF0] 4 | Isprefix,
-[0xF8] 5 | Isprefix,
-[0xFC] 6 | Isprefix,
-[0xFE] 7 | Isprefix,
-[0xFF] 8 | Isprefix,
-};
-
-int
-eipfmt(Fmt *f)
-{
- char buf[5*8];
- static char *efmt = "%.2lux%.2lux%.2lux%.2lux%.2lux%.2lux";
- static char *ifmt = "%d.%d.%d.%d";
- uchar *p, ip[16];
- ulong *lp;
- ushort s;
- int i, j, n, eln, eli;
-
- switch(f->r) {
- case 'E': /* Ethernet address */
- p = va_arg(f->args, uchar*);
- return fmtprint(f, efmt, p[0], p[1], p[2], p[3], p[4], p[5]);
-
- case 'I': /* Ip address */
- p = va_arg(f->args, uchar*);
-common:
- if(memcmp(p, v4prefix, 12) == 0)
- return fmtprint(f, ifmt, p[12], p[13], p[14], p[15]);
-
- /* find longest elision */
- eln = eli = -1;
- for(i = 0; i < 16; i += 2){
- for(j = i; j < 16; j += 2)
- if(p[j] != 0 || p[j+1] != 0)
- break;
- if(j > i && j - i > eln){
- eli = i;
- eln = j - i;
- }
- }
-
- /* print with possible elision */
- n = 0;
- for(i = 0; i < 16; i += 2){
- if(i == eli){
- n += sprint(buf+n, "::");
- i += eln;
- if(i >= 16)
- break;
- } else if(i != 0)
- n += sprint(buf+n, ":");
- s = (p[i]<<8) + p[i+1];
- n += sprint(buf+n, "%ux", s);
- }
- return fmtstrcpy(f, buf);
-
- case 'i': /* v6 address as 4 longs */
- lp = va_arg(f->args, ulong*);
- for(i = 0; i < 4; i++)
- hnputl(ip+4*i, *lp++);
- p = ip;
- goto common;
-
- case 'V': /* v4 ip address */
- p = va_arg(f->args, uchar*);
- return fmtprint(f, ifmt, p[0], p[1], p[2], p[3]);
-
- case 'M': /* ip mask */
- p = va_arg(f->args, uchar*);
-
- /* look for a prefix mask */
- for(i = 0; i < 16; i++)
- if(p[i] != 0xff)
- break;
- if(i < 16){
- if((prefixvals[p[i]] & Isprefix) == 0)
- goto common;
- for(j = i+1; j < 16; j++)
- if(p[j] != 0)
- goto common;
- n = 8*i + (prefixvals[p[i]] & ~Isprefix);
- } else
- n = 8*16;
-
- /* got one, use /xx format */
- return fmtprint(f, "/%d", n);
- }
- return fmtstrcpy(f, "(eipfmt)");
-}
-
#define CLASS(p) ((*(uchar*)(p))>>6)
-extern char*
-v4parseip(uchar *to, char *from)
-{
- int i;
- char *p;
-
- p = from;
- for(i = 0; i < 4 && *p; i++){
- to[i] = strtoul(p, &p, 0);
- if(*p == '.')
- p++;
- }
- switch(CLASS(to)){
- case 0: /* class A - 1 uchar net */
- case 1:
- if(i == 3){
- to[3] = to[2];
- to[2] = to[1];
- to[1] = 0;
- } else if(i == 2){
- to[3] = to[1];
- to[1] = 0;
- }
- break;
- case 2: /* class B - 2 uchar net */
- if(i == 3){
- to[3] = to[2];
- to[2] = 0;
- }
- break;
- }
- return p;
-}
-
-int
-isv4(uchar *ip)
-{
- return memcmp(ip, v4prefix, IPv4off) == 0;
-}
-
-
-/*
- * the following routines are unrolled with no memset's to speed
- * up the usual case
- */
void
-v4tov6(uchar *v6, uchar *v4)
-{
- v6[0] = 0;
- v6[1] = 0;
- v6[2] = 0;
- v6[3] = 0;
- v6[4] = 0;
- v6[5] = 0;
- v6[6] = 0;
- v6[7] = 0;
- v6[8] = 0;
- v6[9] = 0;
- v6[10] = 0xff;
- v6[11] = 0xff;
- v6[12] = v4[0];
- v6[13] = v4[1];
- v6[14] = v4[2];
- v6[15] = v4[3];
-}
-
-int
-v6tov4(uchar *v4, uchar *v6)
-{
- if(v6[0] == 0
- && v6[1] == 0
- && v6[2] == 0
- && v6[3] == 0
- && v6[4] == 0
- && v6[5] == 0
- && v6[6] == 0
- && v6[7] == 0
- && v6[8] == 0
- && v6[9] == 0
- && v6[10] == 0xff
- && v6[11] == 0xff)
- {
- v4[0] = v6[12];
- v4[1] = v6[13];
- v4[2] = v6[14];
- v4[3] = v6[15];
- return 0;
- } else {
- memset(v4, 0, 4);
- return -1;
- }
-}
-
-ulong
-parseip(uchar *to, char *from)
-{
- int i, elipsis = 0, v4 = 1;
- ulong x;
- char *p, *op;
-
- memset(to, 0, IPaddrlen);
- p = from;
- for(i = 0; i < 16 && *p; i+=2){
- op = p;
- x = strtoul(p, &p, 16);
- if(*p == '.' || (*p == 0 && i == 0)){
- p = v4parseip(to+i, op);
- i += 4;
- break;
- } else {
- to[i] = x>>8;
- to[i+1] = x;
- }
- if(*p == ':'){
- v4 = 0;
- if(*++p == ':'){
- elipsis = i+2;
- p++;
- }
- }
- }
- if(i < 16){
- memmove(&to[elipsis+16-i], &to[elipsis], i-elipsis);
- memset(&to[elipsis], 0, 16-i);
- }
- if(v4){
- to[10] = to[11] = 0xff;
- return nhgetl(to+12);
- } else
- return 6;
-}
-
-/*
- * hack to allow ip v4 masks to be entered in the old
- * style
- */
-ulong
-parseipmask(uchar *to, char *from)
-{
- ulong x;
- int i;
- uchar *p;
-
- if(*from == '/'){
- /* as a number of prefix bits */
- i = atoi(from+1);
- if(i < 0)
- i = 0;
- if(i > 128)
- i = 128;
- memset(to, 0, IPaddrlen);
- for(p = to; i >= 8; i -= 8)
- *p++ = 0xff;
- if(i > 0)
- *p = ~((1<<(8-i))-1);
- x = nhgetl(to+IPv4off);
- } else {
- /* as a straight bit mask */
- x = parseip(to, from);
- if(memcmp(to, v4prefix, IPv4off) == 0)
- memset(to, 0xff, IPv4off);
- }
- return x;
-}
-
-void
-maskip(uchar *from, uchar *mask, uchar *to)
-{
- int i;
-
- for(i = 0; i < IPaddrlen; i++)
- to[i] = from[i] & mask[i];
-}
-
-uchar classmask[4][16] = {
- 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0x00,0x00,0x00,
- 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0x00,0x00,0x00,
- 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0x00,0x00,
- 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0x00,
-};
-
-uchar*
-defmask(uchar *ip)
-{
- if(isv4(ip))
- return classmask[ip[IPv4off]>>6];
- else {
- if(ipcmp(ip, v6loopback) == 0)
- return IPallbits;
- else if(memcmp(ip, v6linklocal, v6llpreflen) == 0)
- return v6linklocalmask;
- else if(memcmp(ip, v6sitelocal, v6slpreflen) == 0)
- return v6sitelocalmask;
- else if(memcmp(ip, v6solicitednode, v6snpreflen) == 0)
- return v6solicitednodemask;
- else if(memcmp(ip, v6multicast, v6mcpreflen) == 0)
- return v6multicastmask;
- return IPallbits;
- }
-}
-
-void
ipv62smcast(uchar *smcast, uchar *a)
{
assert(IPaddrlen == 16);
@@ -599,7 +238,7 @@
ulong
iphash(uchar *sa, ushort sp, uchar *da, ushort dp)
{
- return ((sa[IPaddrlen-1]<<24) ^ (sp << 16) ^ (da[IPaddrlen-1]<<8) ^ dp ) % Nhash;
+ return ((sa[IPaddrlen-1]<<24) ^ (sp << 16) ^ (da[IPaddrlen-1]<<8) ^ dp ) % Nipht;
}
void
@@ -678,7 +317,7 @@
return c;
}
}
-
+
/* match local address and port */
hv = iphash(IPnoaddr, 0, da, dp);
for(h = ht->tab[hv]; h != nil; h = h->next){
@@ -690,7 +329,7 @@
return c;
}
}
-
+
/* match just port */
hv = iphash(IPnoaddr, 0, IPnoaddr, dp);
for(h = ht->tab[hv]; h != nil; h = h->next){
@@ -702,7 +341,7 @@
return c;
}
}
-
+
/* match local address */
hv = iphash(IPnoaddr, 0, da, 0);
for(h = ht->tab[hv]; h != nil; h = h->next){
@@ -714,7 +353,7 @@
return c;
}
}
-
+
/* look for something that matches anything */
hv = iphash(IPnoaddr, 0, IPnoaddr, 0);
for(h = ht->tab[hv]; h != nil; h = h->next){
@@ -726,4 +365,13 @@
}
unlock(ht);
return nil;
+}
+
+int
+convipvers(Conv *c)
+{
+ if(isv4(c->raddr) && isv4(c->laddr) || ipcmp(c->raddr, IPnoaddr) == 0)
+ return V4;
+ else
+ return V6;
}
--- a/os/ip/ipifc.c
+++ b/os/ip/ipifc.c
@@ -11,17 +11,14 @@
#define DPRINT if(0)print
enum {
- Maxmedia = 32,
- Nself = Maxmedia*5,
- NHASH = (1<<6),
- NCACHE = 256,
- QMAX = 64*1024-1,
+ Maxmedia = 32,
+ Nself = Maxmedia*5,
+ NHASH = 1<<6,
+ NCACHE = 256,
+ QMAX = 192*1024-1,
};
-Medium *media[Maxmedia] =
-{
- 0
-};
+Medium *media[Maxmedia] = { 0 };
/*
* cache of local addresses (addresses we answer to)
@@ -29,12 +26,10 @@
struct Ipself
{
uchar a[IPaddrlen];
- Ipself *hnext; /* next address in the hash table */
+ Ipself *next; /* next address in the hash table */
Iplink *link; /* binding twixt Ipself and Ipifc */
ulong expire;
uchar type; /* type of address */
- int ref;
- Ipself *next; /* free list */
};
struct Ipselftab
@@ -64,11 +59,47 @@
static void addselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a, int type);
static void remselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a);
-static char* ipifcjoinmulti(Ipifc *ifc, char **argv, int argc);
-static char* ipifcleavemulti(Ipifc *ifc, char **argv, int argc);
-static void ipifcregisterproxy(Fs*, Ipifc*, uchar*);
-static char* ipifcremlifc(Ipifc*, Iplifc*);
+static void ipifcregisteraddr(Fs*, Ipifc*, Iplifc*, uchar*);
+static void ipifcregisterproxy(Fs*, Ipifc*, uchar*, int);
+static char* ipifcremlifc(Ipifc*, Iplifc**);
+static char Ebound[] = "interface already bound";
+static char Eunbound[] = "interface not bound";
+
+enum {
+ unknownv6, /* UGH */
+ unspecifiedv6,
+ linklocalv6,
+ globalv6,
+};
+
+static int
+v6addrtype(uchar *addr)
+{
+ if(isv4(addr) || ipcmp(addr, IPnoaddr) == 0)
+ return unknownv6;
+ else if(islinklocal(addr) || ipcmp(addr, v6loopback) == 0 ||
+ isv6mcast(addr) && (addr[1] & 0xF) <= Link_local_scop)
+ return linklocalv6;
+ else
+ return globalv6;
+}
+
+static int
+comprefixlen(uchar *a, uchar *b, int n)
+{
+ int i, c;
+
+ for(i = 0; i < n; i++){
+ if((c = a[i] ^ b[i]) == 0)
+ continue;
+ for(i <<= 3; (c & 0x80) == 0; i++)
+ c <<= 1;
+ return i;
+ }
+ return i << 3;
+}
+
/*
* link in a new medium
*/
@@ -121,7 +152,7 @@
wlock(ifc);
if(ifc->m != nil){
wunlock(ifc);
- return "interface already bound";
+ return Ebound;
}
if(waserror()){
wunlock(ifc);
@@ -142,18 +173,14 @@
ifc->m = m;
ifc->mintu = ifc->m->mintu;
ifc->maxtu = ifc->m->maxtu;
+ ifc->delay = 40;
+ ifc->speed = 0;
if(ifc->m->unbindonclose == 0)
ifc->conv->inuse++;
- ifc->rp.mflag = 0; // default not managed
- ifc->rp.oflag = 0;
- ifc->rp.maxraint = 600000; // millisecs
- ifc->rp.minraint = 200000;
- ifc->rp.linkmtu = 0; // no mtu sent
- ifc->rp.reachtime = 0;
- ifc->rp.rxmitra = 0;
- ifc->rp.ttl = MAXTTL;
- ifc->rp.routerlt = 3*(ifc->rp.maxraint);
+ /* default router paramters */
+ ifc->rp = c->p->f->v6p->rp;
+
/* any ancillary structures (like routes) no longer pertain */
ifc->ifcid++;
@@ -170,29 +197,44 @@
/*
* detach a device from an interface, close the interface
- * called with ifc->conv closed
*/
static char*
ipifcunbind(Ipifc *ifc)
{
- char *err;
+ Medium *m;
- if(waserror()){
+ wlock(ifc);
+ m = ifc->m;
+ if(m == nil){
wunlock(ifc);
- nexterror();
+ return Eunbound;
}
- wlock(ifc);
- /* dissociate routes */
- if(ifc->m != nil && ifc->m->unbindonclose == 0)
- ifc->conv->inuse--;
- ifc->ifcid++;
+ /* disassociate logical interfaces (before zeroing ifc->arg) */
+ while(ifc->lifc != nil)
+ ipifcremlifc(ifc, &ifc->lifc);
/* disassociate device */
- if(ifc->m != nil && ifc->m->unbind)
- (*ifc->m->unbind)(ifc);
+ if(m->unbind != nil){
+ extern Medium nullmedium;
+
+ /*
+ * unbind() might unlock the ifc, so change the medium
+ * to the nullmedium to prevent packets from getting
+ * sent while the medium is shutting down.
+ */
+ ifc->m = &nullmedium;
+
+ if(!waserror()){
+ (*m->unbind)(ifc);
+ poperror();
+ }
+ }
+
memset(ifc->dev, 0, sizeof(ifc->dev));
ifc->arg = nil;
+
+ ifc->reflect = 0;
ifc->reassemble = 0;
/* close queues to stop queuing of packets */
@@ -200,26 +242,22 @@
qclose(ifc->conv->wq);
qclose(ifc->conv->sq);
- /* disassociate logical interfaces */
- while(ifc->lifc){
- err = ipifcremlifc(ifc, ifc->lifc);
- if(err)
- error(err);
- }
-
+ /* dissociate routes */
+ ifc->ifcid++;
+ if(m->unbindonclose == 0)
+ ifc->conv->inuse--;
ifc->m = nil;
wunlock(ifc);
- poperror();
+
return nil;
}
+char sfixedformat[] = "device %s maxtu %d sendra %d recvra %d mflag %d oflag %d"
+" maxraint %d minraint %d linkmtu %d reachtime %d rxmitra %d ttl %d routerlt %d"
+" pktin %lud pktout %lud errin %lud errout %lud speed %d delay %d\n";
-
-char sfixedformat[] = "device %s maxtu %d sendra %d recvra %d mflag %d oflag %d maxraint %d minraint %d linkmtu %d reachtime %d rxmitra %d ttl %d routerlt %d pktin %lud pktout %lud errin %lud errout %lud\n";
-
char slineformat[] = " %-40I %-10M %-40I %-12lud %-12lud\n";
-
static int
ipifcstate(Conv *c, char *state, int n)
{
@@ -228,19 +266,18 @@
int m;
ifc = (Ipifc*)c->ptcl;
-
m = snprint(state, n, sfixedformat,
ifc->dev, ifc->maxtu, ifc->sendra6, ifc->recvra6,
ifc->rp.mflag, ifc->rp.oflag, ifc->rp.maxraint,
ifc->rp.minraint, ifc->rp.linkmtu, ifc->rp.reachtime,
ifc->rp.rxmitra, ifc->rp.ttl, ifc->rp.routerlt,
- ifc->in, ifc->out, ifc->inerr, ifc->outerr);
+ ifc->in, ifc->out, ifc->inerr, ifc->outerr,
+ ifc->speed, ifc->delay);
rlock(ifc);
- for(lifc = ifc->lifc; lifc && n > m; lifc = lifc->next)
- m += snprint(state+m, n - m, slineformat,
- lifc->local, lifc->mask, lifc->remote,
- lifc->validlt, lifc->preflt);
+ for(lifc = ifc->lifc; lifc != nil && n > m; lifc = lifc->next)
+ m += snprint(state+m, n - m, slineformat, lifc->local,
+ lifc->mask, lifc->remote, lifc->validlt, lifc->preflt);
if(ifc->lifc == nil)
m += snprint(state+m, n - m, "\n");
runlock(ifc);
@@ -256,13 +293,11 @@
int m;
ifc = (Ipifc*)c->ptcl;
-
- m = 0;
-
rlock(ifc);
- for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+ m = 0;
+ for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
m += snprint(state+m, n - m, "%-40.40I ->", lifc->local);
- for(link = lifc->link; link; link = link->lifclink)
+ for(link = lifc->link; link != nil; link = link->lifclink)
m += snprint(state+m, n - m, " %-40.40I", link->self->a);
m += snprint(state+m, n - m, "\n");
}
@@ -279,6 +314,59 @@
return ifc->m != nil;
}
+static void
+ipifcadjustburst(Ipifc *ifc)
+{
+ int burst;
+
+ burst = ((vlong)ifc->delay * ifc->speed) / 8000;
+ if(burst < ifc->maxtu)
+ burst = ifc->maxtu;
+ ifc->burst = burst;
+}
+
+static void
+ipifcsetdelay(Ipifc *ifc, int delay)
+{
+ if(delay < 0)
+ delay = 0;
+ else if(delay > 1000)
+ delay = 1000;
+ ifc->delay = delay;
+ ipifcadjustburst(ifc);
+}
+
+static void
+ipifcsetspeed(Ipifc *ifc, int speed)
+{
+ if(speed < 0)
+ speed = 0;
+ ifc->speed = speed;
+ ifc->load = 0;
+ ipifcadjustburst(ifc);
+}
+
+void
+ipifcoput(Ipifc *ifc, Block *bp, int version, uchar *ip)
+{
+ if(ifc->speed){
+ ulong now = MACHP(0)->ticks;
+ int dt = TK2MS(now - ifc->ticks);
+ ifc->ticks = now;
+ ifc->load -= ((vlong)dt * ifc->speed) / 8000;
+ if(ifc->load < 0 || dt < 0 || dt > 1000)
+ ifc->load = 0;
+ else if(ifc->load > ifc->burst){
+ freeblist(bp);
+ return;
+ }
+ }
+ bp = concatblock(bp);
+ ifc->load += BLEN(bp);
+ ifc->m->bwrite(ifc, bp, version, ip);
+}
+
+
/*
* called when a process writes to an interface's 'data'
*/
@@ -294,18 +382,15 @@
return;
ifc = (Ipifc*)c->ptcl;
- if(!canrlock(ifc)){
- freeb(bp);
- return;
- }
+ rlock(ifc);
if(waserror()){
runlock(ifc);
nexterror();
}
- if(ifc->m == nil || ifc->m->pktin == nil)
- freeb(bp);
- else
+ if(ifc->m != nil && ifc->m->pktin != nil)
(*ifc->m->pktin)(c->p->f, ifc, bp);
+ else
+ freeb(bp);
runlock(ifc);
poperror();
}
@@ -319,27 +404,26 @@
Ipifc *ifc;
c->rq = qopen(QMAX, 0, 0, 0);
- c->sq = qopen(2*QMAX, 0, 0, 0);
c->wq = qopen(QMAX, Qkick, ipifckick, c);
+ c->sq = qopen(QMAX, 0, 0, 0);
+ if(c->rq == nil || c->wq == nil || c->sq == nil)
+ error(Enomem);
ifc = (Ipifc*)c->ptcl;
ifc->conv = c;
- ifc->unbinding = 0;
ifc->m = nil;
+ ifc->reflect = 0;
ifc->reassemble = 0;
}
/*
* called after last close of ipifc data or ctl
- * called with c locked, we must unlock
*/
static void
ipifcclose(Conv *c)
{
- Ipifc *ifc;
- Medium *m;
+ Ipifc *ifc = (Ipifc*)c->ptcl;
+ Medium *m = ifc->m;
- ifc = (Ipifc*)c->ptcl;
- m = ifc->m;
if(m != nil && m->unbindonclose)
ipifcunbind(ifc);
}
@@ -347,19 +431,17 @@
/*
* change an interface's mtu
*/
-char*
-ipifcsetmtu(Ipifc *ifc, char **argv, int argc)
+static char*
+ipifcsetmtu(Ipifc *ifc, int mtu)
{
- int mtu;
+ Medium *m = ifc->m;
- if(argc < 2)
+ if(m == nil)
+ return Eunbound;
+ if(mtu < m->mintu || mtu > m->maxtu)
return Ebadarg;
- if(ifc->m == nil)
- return Ebadarg;
- mtu = strtoul(argv[1], 0, 0);
- if(mtu < ifc->m->mintu || mtu > ifc->m->maxtu)
- return Ebadarg;
ifc->maxtu = mtu;
+ ipifcadjustburst(ifc);
return nil;
}
@@ -374,13 +456,8 @@
Iplifc *lifc, **l;
int i, type, mtu;
Fs *f;
- int sendnbrdisc = 0;
- if(ifc->m == nil)
- return "ipifc not yet bound to device";
-
- f = ifc->conv->p->f;
-
+ mtu = 0;
type = Rifc;
memset(ip, 0, IPaddrlen);
memset(mask, 0, IPaddrlen);
@@ -392,23 +469,21 @@
/* fall through */
case 5:
mtu = strtoul(argv[4], 0, 0);
- if(mtu >= ifc->m->mintu && mtu <= ifc->m->maxtu)
- ifc->maxtu = mtu;
/* fall through */
case 4:
- parseip(ip, argv[1]);
- parseipmask(mask, argv[2]);
- parseip(rem, argv[3]);
+ if (parseipandmask(ip, mask, argv[1], argv[2]) == -1 || parseip(rem, argv[3]) == -1)
+ return Ebadip;
maskip(rem, mask, net);
break;
case 3:
- parseip(ip, argv[1]);
- parseipmask(mask, argv[2]);
+ if (parseipandmask(ip, mask, argv[1], argv[2]) == -1)
+ return Ebadip;
maskip(ip, mask, rem);
maskip(rem, mask, net);
break;
case 2:
- parseip(ip, argv[1]);
+ if (parseip(ip, argv[1]) == -1)
+ return Ebadip;
memmove(mask, defmask(ip), IPaddrlen);
maskip(ip, mask, rem);
maskip(rem, mask, net);
@@ -415,26 +490,55 @@
break;
default:
return Ebadarg;
- break;
}
- if(isv4(ip))
+
+ /* check for point-to-point interface */
+ if(ipcmp(ip, v6loopback) != 0) /* skip v6 loopback, it's a special address */
+ if(ipcmp(mask, IPallbits) == 0)
+ type |= Rptpt;
+
+ if(isv4(ip) || ipcmp(ip, IPnoaddr) == 0){
+ type |= Rv4;
tentative = 0;
+ }
+
wlock(ifc);
+ if(ifc->m == nil){
+ wunlock(ifc);
+ return Eunbound;
+ }
+ f = ifc->conv->p->f;
+ if(waserror()){
+ wunlock(ifc);
+ return up->errstr;
+ }
+ if(mtu > 0)
+ ipifcsetmtu(ifc, mtu);
+
/* ignore if this is already a local address for this ifc */
- for(lifc = ifc->lifc; lifc; lifc = lifc->next) {
- if(ipcmp(lifc->local, ip) == 0) {
- if(lifc->tentative != tentative)
- lifc->tentative = tentative;
- if(lifcp != nil) {
- lifc->onlink = lifcp->onlink;
- lifc->autoflag = lifcp->autoflag;
- lifc->validlt = lifcp->validlt;
- lifc->preflt = lifcp->preflt;
- lifc->origint = lifcp->origint;
+ if((lifc = iplocalonifc(ifc, ip)) != nil){
+ if(lifcp != nil) {
+ if(!lifc->onlink && lifcp->onlink){
+ lifc->onlink = 1;
+ addroute(f, lifc->remote, lifc->mask, ip, IPallbits,
+ lifc->remote, lifc->type, ifc, tifc);
+ if(v6addrtype(ip) != linklocalv6)
+ addroute(f, lifc->remote, lifc->mask, ip, IPnoaddr,
+ lifc->remote, lifc->type, ifc, tifc);
}
- goto out;
+ lifc->autoflag = lifcp->autoflag;
+ lifc->validlt = lifcp->validlt;
+ lifc->preflt = lifcp->preflt;
+ lifc->origint = lifcp->origint;
}
+ if(lifc->tentative != tentative){
+ lifc->tentative = tentative;
+ goto done;
+ }
+ wunlock(ifc);
+ poperror();
+ return nil;
}
/* add the address to the list of logical ifc's for this ifc */
@@ -443,6 +547,7 @@
ipmove(lifc->mask, mask);
ipmove(lifc->remote, rem);
ipmove(lifc->net, net);
+ lifc->type = type;
lifc->tentative = tentative;
if(lifcp != nil) {
lifc->onlink = lifcp->onlink;
@@ -450,39 +555,34 @@
lifc->validlt = lifcp->validlt;
lifc->preflt = lifcp->preflt;
lifc->origint = lifcp->origint;
+ } else { /* default values */
+ lifc->onlink = lifc->autoflag = 1;
+ lifc->validlt = lifc->preflt = ~0UL;
+ lifc->origint = NOW / 1000;
}
- else { // default values
- lifc->onlink = 1;
- lifc->autoflag = 1;
- lifc->validlt = 0xffffffff;
- lifc->preflt = 0xffffffff;
- lifc->origint = NOW / 10^3;
- }
lifc->next = nil;
- for(l = &ifc->lifc; *l; l = &(*l)->next)
+ for(l = &ifc->lifc; *l != nil; l = &(*l)->next)
;
*l = lifc;
- /* check for point-to-point interface */
- if(ipcmp(ip, v6loopback)) /* skip v6 loopback, it's a special address */
- if(ipcmp(mask, IPallbits) == 0)
- type |= Rptpt;
+ /* add route for this logical interface */
+ if(lifc->onlink){
+ addroute(f, rem, mask, ip, IPallbits, rem, type, ifc, tifc);
+ if(v6addrtype(ip) != linklocalv6)
+ addroute(f, rem, mask, ip, IPnoaddr, rem, type, ifc, tifc);
+ }
- /* add local routes */
- if(isv4(ip))
- v4addroute(f, tifc, rem+IPv4off, mask+IPv4off, rem+IPv4off, type);
- else
- v6addroute(f, tifc, rem, mask, rem, type);
-
addselfcache(f, ifc, lifc, ip, Runi);
- if((type & (Rproxy|Rptpt)) == (Rproxy|Rptpt)){
- ipifcregisterproxy(f, ifc, rem);
- goto out;
+ /* register proxy */
+ if(type & Rptpt){
+ if(type & Rproxy)
+ ipifcregisterproxy(f, ifc, rem, 1);
+ goto done;
}
- if(isv4(ip) || ipcmp(ip, IPnoaddr) == 0) {
+ if(type & Rv4) {
/* add subnet directed broadcast address to the self cache */
for(i = 0; i < IPaddrlen; i++)
bcast[i] = (ip[i] & mask[i]) | ~mask[i];
@@ -504,174 +604,135 @@
for(i = 0; i < IPaddrlen; i++)
bcast[i] = (ip[i] & mask[i]) & mask[i];
addselfcache(f, ifc, lifc, bcast, Rbcast);
-
+
addselfcache(f, ifc, lifc, IPv4bcast, Rbcast);
- }
- else {
+ } else {
if(ipcmp(ip, v6loopback) == 0) {
/* add node-local mcast address */
addselfcache(f, ifc, lifc, v6allnodesN, Rmulti);
/* add route for all node multicast */
- v6addroute(f, tifc, v6allnodesN, v6allnodesNmask, v6allnodesN, Rmulti);
+ addroute(f, v6allnodesN, v6allnodesNmask,
+ ip, IPallbits,
+ v6allnodesN, Rmulti, ifc, tifc);
}
/* add all nodes multicast address */
addselfcache(f, ifc, lifc, v6allnodesL, Rmulti);
-
+
/* add route for all nodes multicast */
- v6addroute(f, tifc, v6allnodesL, v6allnodesLmask, v6allnodesL, Rmulti);
-
+ addroute(f, v6allnodesL, v6allnodesLmask,
+ ip, IPallbits,
+ v6allnodesL, Rmulti, ifc, tifc);
+
/* add solicited-node multicast address */
ipv62smcast(bcast, ip);
addselfcache(f, ifc, lifc, bcast, Rmulti);
-
- sendnbrdisc = 1;
}
- /* register the address on this network for address resolution */
- if(isv4(ip) && ifc->m->areg != nil)
- (*ifc->m->areg)(ifc, ip);
-
-out:
+done:
wunlock(ifc);
- if(tentative && sendnbrdisc)
- icmpns(f, 0, SRC_UNSPEC, ip, TARG_MULTI, ifc->mac);
+ poperror();
+
+ rlock(ifc);
+ ipifcregisteraddr(f, ifc, lifc, ip);
+ runlock(ifc);
+
return nil;
}
/*
* remove a logical interface from an ifc
- * always called with ifc wlock'd
+ * called with ifc wlock'd
*/
static char*
-ipifcremlifc(Ipifc *ifc, Iplifc *lifc)
+ipifcremlifc(Ipifc *ifc, Iplifc **l)
{
- Iplifc **l;
- Fs *f;
+ Iplifc *lifc = *l;
+ Fs *f = ifc->conv->p->f;
- f = ifc->conv->p->f;
-
- /*
- * find address on this interface and remove from chain.
- * for pt to pt we actually specify the remote address as the
- * addresss to remove.
- */
- for(l = &ifc->lifc; *l != nil && *l != lifc; l = &(*l)->next)
- ;
- if(*l == nil)
+ if(lifc == nil)
return "address not on this interface";
*l = lifc->next;
/* disassociate any addresses */
- while(lifc->link)
+ while(lifc->link != nil)
remselfcache(f, ifc, lifc, lifc->link->self->a);
/* remove the route for this logical interface */
- if(isv4(lifc->local))
- v4delroute(f, lifc->remote+IPv4off, lifc->mask+IPv4off, 1);
- else {
- v6delroute(f, lifc->remote, lifc->mask, 1);
+ if(lifc->onlink){
+ remroute(f, lifc->remote, lifc->mask,
+ lifc->local, IPallbits,
+ lifc->remote, lifc->type, ifc, tifc);
+ if(v6addrtype(lifc->local) != linklocalv6)
+ remroute(f, lifc->remote, lifc->mask,
+ lifc->local, IPnoaddr,
+ lifc->remote, lifc->type, ifc, tifc);
+ }
+
+ /* unregister proxy */
+ if(lifc->type & Rptpt){
+ if(lifc->type & Rproxy)
+ ipifcregisterproxy(f, ifc, lifc->remote, 0);
+ goto done;
+ }
+
+ /* remove route for all nodes multicast */
+ if((lifc->type & Rv4) == 0){
if(ipcmp(lifc->local, v6loopback) == 0)
- /* remove route for all node multicast */
- v6delroute(f, v6allnodesN, v6allnodesNmask, 1);
- else if(memcmp(lifc->local, v6linklocal, v6llpreflen) == 0)
- /* remove route for all link multicast */
- v6delroute(f, v6allnodesL, v6allnodesLmask, 1);
+ remroute(f, v6allnodesN, v6allnodesNmask,
+ lifc->local, IPallbits,
+ v6allnodesN, Rmulti, ifc, tifc);
+
+ remroute(f, v6allnodesL, v6allnodesLmask,
+ lifc->local, IPallbits,
+ v6allnodesL, Rmulti, ifc, tifc);
}
+done:
free(lifc);
return nil;
-
}
/*
* remove an address from an interface.
- * called with c locked
*/
char*
ipifcrem(Ipifc *ifc, char **argv, int argc)
{
- uchar ip[IPaddrlen];
- uchar mask[IPaddrlen];
- uchar rem[IPaddrlen];
- Iplifc *lifc;
- char *rv;
+ uchar ip[IPaddrlen], mask[IPaddrlen], rem[IPaddrlen];
+ Iplifc *lifc, **l;
+ char *err;
if(argc < 3)
return Ebadarg;
-
- parseip(ip, argv[1]);
- parseipmask(mask, argv[2]);
+ if(parseipandmask(ip, mask, argv[1], argv[2]) == -1)
+ return Ebadip;
if(argc < 4)
maskip(ip, mask, rem);
- else
- parseip(rem, argv[3]);
+ else if(parseip(rem, argv[3]) == -1)
+ return Ebadip;
- wlock(ifc);
-
/*
* find address on this interface and remove from chain.
* for pt to pt we actually specify the remote address as the
* addresss to remove.
*/
+ wlock(ifc);
+ l = &ifc->lifc;
for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next) {
- if (memcmp(ip, lifc->local, IPaddrlen) == 0
- && memcmp(mask, lifc->mask, IPaddrlen) == 0
- && memcmp(rem, lifc->remote, IPaddrlen) == 0)
+ if(ipcmp(ip, lifc->local) == 0
+ && ipcmp(mask, lifc->mask) == 0
+ && ipcmp(rem, lifc->remote) == 0)
break;
+ l = &lifc->next;
}
-
- rv = ipifcremlifc(ifc, lifc);
+ err = ipifcremlifc(ifc, l);
wunlock(ifc);
- return rv;
+ return err;
}
/*
- * distribute routes to active interfaces like the
- * TRIP linecards
- */
-void
-ipifcaddroute(Fs *f, int vers, uchar *addr, uchar *mask, uchar *gate, int type)
-{
- Medium *m;
- Conv **cp, **e;
- Ipifc *ifc;
-
- e = &f->ipifc->conv[f->ipifc->nc];
- for(cp = f->ipifc->conv; cp < e; cp++){
- if(*cp != nil) {
- ifc = (Ipifc*)(*cp)->ptcl;
- m = ifc->m;
- if(m == nil)
- continue;
- if(m->addroute != nil)
- m->addroute(ifc, vers, addr, mask, gate, type);
- }
- }
-}
-
-void
-ipifcremroute(Fs *f, int vers, uchar *addr, uchar *mask)
-{
- Medium *m;
- Conv **cp, **e;
- Ipifc *ifc;
-
- e = &f->ipifc->conv[f->ipifc->nc];
- for(cp = f->ipifc->conv; cp < e; cp++){
- if(*cp != nil) {
- ifc = (Ipifc*)(*cp)->ptcl;
- m = ifc->m;
- if(m == nil)
- continue;
- if(m->remroute != nil)
- m->remroute(ifc, vers, addr, mask);
- }
- }
-}
-
-/*
* associate an address with the interface. This wipes out any previous
* addresses. This is a macro that means, remove all the old interfaces
* and add a new one.
@@ -679,170 +740,89 @@
static char*
ipifcconnect(Conv* c, char **argv, int argc)
{
+ Ipifc *ifc = (Ipifc*)c->ptcl;
char *err;
- Ipifc *ifc;
- ifc = (Ipifc*)c->ptcl;
-
- if(ifc->m == nil)
- return "ipifc not yet bound to device";
-
- if(waserror()){
- wunlock(ifc);
- nexterror();
- }
wlock(ifc);
- while(ifc->lifc){
- err = ipifcremlifc(ifc, ifc->lifc);
- if(err)
- error(err);
- }
+ while(ifc->lifc != nil)
+ ipifcremlifc(ifc, &ifc->lifc);
wunlock(ifc);
- poperror();
err = ipifcadd(ifc, argv, argc, 0, nil);
- if(err)
+ if(err != nil)
return err;
Fsconnected(c, nil);
-
return nil;
}
char*
-ipifcsetpar6(Ipifc *ifc, char **argv, int argc)
+ipifcra6(Ipifc *ifc, char **argv, int argc)
{
- int i, argsleft, vmax = ifc->rp.maxraint, vmin = ifc->rp.minraint;
+ int i, argsleft;
+ uchar sendra, recvra;
+ Routerparams rp;
- argsleft = argc - 1;
i = 1;
-
- if(argsleft % 2 != 0)
+ argsleft = argc - 1;
+ if((argsleft % 2) != 0)
return Ebadarg;
+ sendra = ifc->sendra6;
+ recvra = ifc->recvra6;
+ rp = ifc->rp;
+
while (argsleft > 1) {
- if(strcmp(argv[i],"recvra")==0)
- ifc->recvra6 = (atoi(argv[i+1]) != 0);
- else if(strcmp(argv[i],"sendra")==0)
- ifc->sendra6 = (atoi(argv[i+1]) != 0);
- else if(strcmp(argv[i],"mflag")==0)
- ifc->rp.mflag = (atoi(argv[i+1]) != 0);
- else if(strcmp(argv[i],"oflag")==0)
- ifc->rp.oflag = (atoi(argv[i+1]) != 0);
- else if(strcmp(argv[i],"maxraint")==0)
- ifc->rp.maxraint = atoi(argv[i+1]);
- else if(strcmp(argv[i],"minraint")==0)
- ifc->rp.minraint = atoi(argv[i+1]);
- else if(strcmp(argv[i],"linkmtu")==0)
- ifc->rp.linkmtu = atoi(argv[i+1]);
- else if(strcmp(argv[i],"reachtime")==0)
- ifc->rp.reachtime = atoi(argv[i+1]);
- else if(strcmp(argv[i],"rxmitra")==0)
- ifc->rp.rxmitra = atoi(argv[i+1]);
- else if(strcmp(argv[i],"ttl")==0)
- ifc->rp.ttl = atoi(argv[i+1]);
- else if(strcmp(argv[i],"routerlt")==0)
- ifc->rp.routerlt = atoi(argv[i+1]);
+ if(strcmp(argv[i], "recvra") == 0)
+ recvra = atoi(argv[i+1]) != 0;
+ else if(strcmp(argv[i], "sendra") == 0)
+ sendra = atoi(argv[i+1]) != 0;
+ else if(strcmp(argv[i], "mflag") == 0)
+ rp.mflag = atoi(argv[i+1]) != 0;
+ else if(strcmp(argv[i], "oflag") == 0)
+ rp.oflag = atoi(argv[i+1]) != 0;
+ else if(strcmp(argv[i], "maxraint") == 0)
+ rp.maxraint = atoi(argv[i+1]);
+ else if(strcmp(argv[i], "minraint") == 0)
+ rp.minraint = atoi(argv[i+1]);
+ else if(strcmp(argv[i], "linkmtu") == 0)
+ rp.linkmtu = atoi(argv[i+1]);
+ else if(strcmp(argv[i], "reachtime") == 0)
+ rp.reachtime = atoi(argv[i+1]);
+ else if(strcmp(argv[i], "rxmitra") == 0)
+ rp.rxmitra = atoi(argv[i+1]);
+ else if(strcmp(argv[i], "ttl") == 0)
+ rp.ttl = atoi(argv[i+1]);
+ else if(strcmp(argv[i], "routerlt") == 0)
+ rp.routerlt = atoi(argv[i+1]);
else
- return Ebadarg;
+ return Ebadarg;
argsleft -= 2;
i += 2;
}
- // consistency check
- if(ifc->rp.maxraint < ifc->rp.minraint) {
- ifc->rp.maxraint = vmax;
- ifc->rp.minraint = vmin;
+ /* consistency check */
+ if(rp.maxraint < rp.minraint)
return Ebadarg;
- }
- return nil;
-}
+ ifc->rp = rp;
+ ifc->sendra6 = sendra;
+ ifc->recvra6 = recvra;
-char*
-ipifcsendra6(Ipifc *ifc, char **argv, int argc)
-{
- int i;
-
- i = 0;
- if(argc > 1)
- i = atoi(argv[1]);
- ifc->sendra6 = (i!=0);
return nil;
}
-char*
-ipifcrecvra6(Ipifc *ifc, char **argv, int argc)
-{
- int i;
-
- i = 0;
- if(argc > 1)
- i = atoi(argv[1]);
- ifc->recvra6 = (i!=0);
- return nil;
-}
-
-char*
-ipifcnat(Ipifc *ifc, char **argv, int argc)
-{
- uchar src[IPaddrlen], mask[IPaddrlen], dst[IPaddrlen];
- Iplifc *lifc;
-
- if(argc == 2){
- if((strcmp(argv[1], "show") == 0)){
- shownataddr();
- return nil;
- }else if((strcmp(argv[1], "flush") == 0)){
- flushnataddr();
- return nil;
- }else
- return Ebadarg;
- }
-
- if(argc != 5)
- return Ebadarg;
-
- if (parseip(src, argv[2]) == -1)
- return Ebadip;
-
- if (parseipmask(mask, argv[3]) == -1)
- return Ebadip;
-
- if (parseip(dst, argv[4]) == -1)
- return Ebadip;
-
- if((lifc=iplocalonifc(ifc, dst)) == nil)
- return Ebadip;
-
- if(strcmp(argv[1], "add") == 0){
- if(addnataddr(src, mask, lifc) != 0)
- return Ebadarg;
- }else if(strcmp(argv[1], "remove") == 0){
- if(removenataddr(src, mask, lifc) != 0)
- return Ebadarg;
- }else
- return Ebadarg;
-
- return nil;
-}
-
/*
* non-standard control messages.
- * called with c locked.
*/
static char*
-ipifcctl(Conv* c, char**argv, int argc)
+ipifcctl(Conv* c, char **argv, int argc)
{
- Ipifc *ifc;
- int i;
+ Ipifc *ifc = (Ipifc*)c->ptcl;
- ifc = (Ipifc*)c->ptcl;
if(strcmp(argv[0], "add") == 0)
return ipifcadd(ifc, argv, argc, 0, nil);
- else if(strcmp(argv[0], "bootp") == 0)
- return bootp(ifc);
else if(strcmp(argv[0], "try") == 0)
return ipifcadd(ifc, argv, argc, 1, nil);
else if(strcmp(argv[0], "remove") == 0)
@@ -849,36 +829,38 @@
return ipifcrem(ifc, argv, argc);
else if(strcmp(argv[0], "unbind") == 0)
return ipifcunbind(ifc);
- else if(strcmp(argv[0], "joinmulti") == 0)
- return ipifcjoinmulti(ifc, argv, argc);
- else if(strcmp(argv[0], "leavemulti") == 0)
- return ipifcleavemulti(ifc, argv, argc);
else if(strcmp(argv[0], "mtu") == 0)
- return ipifcsetmtu(ifc, argv, argc);
- else if(strcmp(argv[0], "reassemble") == 0){
- ifc->reassemble = 1;
+ return ipifcsetmtu(ifc, argc>1? strtoul(argv[1], 0, 0): 0);
+ else if(strcmp(argv[0], "speed") == 0){
+ ipifcsetspeed(ifc, argc>1? atoi(argv[1]): 0);
return nil;
}
+ else if(strcmp(argv[0], "delay") == 0){
+ ipifcsetdelay(ifc, argc>1? atoi(argv[1]): 0);
+ return nil;
+ }
else if(strcmp(argv[0], "iprouting") == 0){
- i = 1;
- if(argc > 1)
- i = atoi(argv[1]);
- iprouting(c->p->f, i);
+ iprouting(c->p->f, argc>1? atoi(argv[1]): 1);
return nil;
}
- else if(strcmp(argv[0], "addpref6") == 0)
- return ipifcaddpref6(ifc, argv, argc);
- else if(strcmp(argv[0], "setpar6") == 0)
- return ipifcsetpar6(ifc, argv, argc);
- else if(strcmp(argv[0], "sendra6") == 0)
- return ipifcsendra6(ifc, argv, argc);
- else if(strcmp(argv[0], "recvra6") == 0)
- return ipifcrecvra6(ifc, argv, argc);
- else if(strcmp(argv[0], "nat") == 0)
- return ipifcnat(ifc, argv, argc);
+ else if(strcmp(argv[0], "reflect") == 0){
+ ifc->reflect = argc>1? atoi(argv[1]): 1;
+ return nil;
+ }
+ else if(strcmp(argv[0], "reassemble") == 0){
+ ifc->reassemble = argc>1? atoi(argv[1]): 1;
+ return nil;
+ }
+ else if(strcmp(argv[0], "add6") == 0)
+ return ipifcadd6(ifc, argv, argc);
+ else if(strcmp(argv[0], "remove6") == 0)
+ return ipifcremove6(ifc, argv, argc);
+ else if(strcmp(argv[0], "ra6") == 0)
+ return ipifcra6(ifc, argv, argc);
return "unsupported ctl";
}
+int
ipifcstats(Proto *ipifc, char *buf, int len)
{
return ipstats(ipifc->f, buf, len);
@@ -907,7 +889,7 @@
ipifc->nc = Maxmedia;
ipifc->ptclsize = sizeof(Ipifc);
- f->ipifc = ipifc; /* hack for ipifcremroute, findipifc, ... */
+ f->ipifc = ipifc; /* hack for ipifcremroute, findipifc, ... */
f->self = smalloc(sizeof(Ipselftab)); /* hack for ipforme */
Fsproto(f, ipifc);
@@ -915,21 +897,25 @@
/*
* add to self routing cache
- * called with c locked
*/
static void
addselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a, int type)
{
- Ipself *p;
Iplink *lp;
+ Ipself *p;
int h;
+ type |= (lifc->type & Rv4);
qlock(f->self);
+ if(waserror()){
+ qunlock(f->self);
+ nexterror();
+ }
/* see if the address already exists */
h = hashipa(a);
- for(p = f->self->hash[h]; p; p = p->next)
- if(memcmp(a, p->a, IPaddrlen) == 0)
+ for(p = f->self->hash[h]; p != nil; p = p->next)
+ if(ipcmp(a, p->a) == 0)
break;
/* allocate a local address and add to hash chain */
@@ -946,7 +932,7 @@
}
/* look for a link for this lifc */
- for(lp = p->link; lp; lp = lp->selflink)
+ for(lp = p->link; lp != nil; lp = lp->selflink)
if(lp->lifc == lifc)
break;
@@ -962,18 +948,19 @@
lifc->link = lp;
/* add to routing table */
- if(isv4(a))
- v4addroute(f, tifc, a+IPv4off, IPallbits+IPv4off, a+IPv4off, type);
- else
- v6addroute(f, tifc, a, IPallbits, a, type);
+ addroute(f, a, IPallbits,
+ lifc->local,
+ ((type & (Rbcast|Rmulti)) != 0 || v6addrtype(a) == linklocalv6) ?
+ IPallbits : IPnoaddr,
+ a, type, ifc, tifc);
if((type & Rmulti) && ifc->m->addmulti != nil)
(*ifc->m->addmulti)(ifc, a, lifc->local);
- } else {
+ } else
lp->ref++;
- }
qunlock(f->self);
+ poperror();
}
/*
@@ -992,8 +979,8 @@
ulong now = NOW;
l = &freeiplink;
- for(np = *l; np; np = *l){
- if(np->expire > now){
+ for(np = *l; np != nil; np = *l){
+ if((long)(now - np->expire) >= 0){
*l = np->next;
free(np);
continue;
@@ -1000,10 +987,11 @@
}
l = &np->next;
}
- p->expire = now + 5000; /* give other threads 5 secs to get out */
+ p->expire = now + 5000; /* give other threads 5 secs to get out */
p->next = nil;
*l = p;
}
+
static void
ipselffree(Ipself *p)
{
@@ -1011,8 +999,8 @@
ulong now = NOW;
l = &freeipself;
- for(np = *l; np; np = *l){
- if(np->expire > now){
+ for(np = *l; np != nil; np = *l){
+ if((long)(now - np->expire) >= 0){
*l = np->next;
free(np);
continue;
@@ -1019,7 +1007,7 @@
}
l = &np->next;
}
- p->expire = now + 5000; /* give other threads 5 secs to get out */
+ p->expire = now + 5000; /* give other threads 5 secs to get out */
p->next = nil;
*l = p;
}
@@ -1027,7 +1015,6 @@
/*
* Decrement reference for this address on this link.
* Unlink from selftab if this is the last ref.
- * called with c locked
*/
static void
remselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a)
@@ -1039,7 +1026,7 @@
/* find the unique selftab entry */
l = &f->self->hash[hashipa(a)];
- for(p = *l; p; p = *l){
+ for(p = *l; p != nil; p = *l){
if(ipcmp(p->a, a) == 0)
break;
l = &p->next;
@@ -1053,7 +1040,7 @@
* that matches the selftab entry
*/
l_lifc = &lifc->link;
- for(link = *l_lifc; link; link = *l_lifc){
+ for(link = *l_lifc; link != nil; link = *l_lifc){
if(link->self == p)
break;
l_lifc = &link->lifclink;
@@ -1067,8 +1054,8 @@
* the one we just found
*/
l_self = &p->link;
- for(link = *l_self; link; link = *l_self){
- if(link == *(l_lifc))
+ for(link = *l_self; link != nil; link = *l_self){
+ if(link == *l_lifc)
break;
l_self = &link->selflink;
}
@@ -1079,9 +1066,20 @@
if(--(link->ref) != 0)
goto out;
- if((p->type & Rmulti) && ifc->m->remmulti != nil)
- (*ifc->m->remmulti)(ifc, a, lifc->local);
+ /* remove from routing table */
+ remroute(f, a, IPallbits,
+ lifc->local,
+ ((p->type & (Rbcast|Rmulti)) != 0 || v6addrtype(a) == linklocalv6) ?
+ IPallbits : IPnoaddr,
+ a, p->type, ifc, tifc);
+ if((p->type & Rmulti) && ifc->m->remmulti != nil){
+ if(!waserror()){
+ (*ifc->m->remmulti)(ifc, a, lifc->local);
+ poperror();
+ }
+ }
+
/* ref == 0, remove from both chains and free the link */
*l_lifc = link->lifclink;
*l_self = link->selflink;
@@ -1090,30 +1088,18 @@
if(p->link != nil)
goto out;
- /* remove from routing table */
- if(isv4(a))
- v4delroute(f, a+IPv4off, IPallbits+IPv4off, 1);
- else
- v6delroute(f, a, IPallbits, 1);
-
+ /* if null address, forget */
+ if(ipcmp(a, v4prefix) == 0 || ipcmp(a, IPnoaddr) == 0)
+ f->self->acceptall = 0;
+
/* no more links, remove from hash and free */
*l = p->next;
ipselffree(p);
- /* if IPnoaddr, forget */
- if(ipcmp(a, v4prefix) == 0 || ipcmp(a, IPnoaddr) == 0)
- f->self->acceptall = 0;
-
out:
qunlock(f->self);
}
-static char *stformat = "%-44.44I %2.2d %4.4s\n";
-enum
-{
- Nstformat= 41,
-};
-
long
ipselftabread(Fs *f, char *cp, ulong offset, int n)
{
@@ -1124,14 +1110,14 @@
m = 0;
off = offset;
- qlock(f->self);
for(i = 0; i < NHASH && m < n; i++){
for(p = f->self->hash[i]; p != nil && m < n; p = p->next){
nifc = 0;
- for(link = p->link; link; link = link->selflink)
+ for(link = p->link; link != nil; link = link->selflink)
nifc++;
routetype(p->type, state);
- m += snprint(cp + m, n - m, stformat, p->a, nifc, state);
+ m += snprint(cp + m, n - m, "%-44.44I %2.2d %4.4s\n",
+ p->a, nifc, state);
if(off > 0){
off -= m;
m = 0;
@@ -1138,30 +1124,15 @@
}
}
}
- qunlock(f->self);
return m;
}
-int
-iptentative(Fs *f, uchar *addr)
-{
- Ipself *p;
-
- p = f->self->hash[hashipa(addr)];
- for(; p; p = p->next){
- if(ipcmp(addr, p->a) == 0) {
- return p->link->lifc->tentative;
- }
- }
- return 0;
-}
-
/*
* returns
* 0 - no match
* Runi
* Rbcast
- * Rmcast
+ * Rmulti
*/
int
ipforme(Fs *f, uchar *addr)
@@ -1168,11 +1139,9 @@
{
Ipself *p;
- p = f->self->hash[hashipa(addr)];
- for(; p; p = p->next){
+ for(p = f->self->hash[hashipa(addr)]; p != nil; p = p->next)
if(ipcmp(addr, p->a) == 0)
- return p->type;
- }
+ return p->type & (Runi|Rbcast|Rmulti);
/* hack to say accept anything */
if(f->self->acceptall)
@@ -1186,254 +1155,237 @@
* return nil.
*/
Ipifc*
-findipifc(Fs *f, uchar *remote, int type)
+findipifc(Fs *f, uchar *local, uchar *remote, int type)
{
+ uchar gnet[IPaddrlen];
+ int spec, xspec;
Ipifc *ifc, *x;
Iplifc *lifc;
- Conv **cp, **e;
- uchar gnet[IPaddrlen];
- uchar xmask[IPaddrlen];
+ Conv **cp;
- x = nil; memset(xmask, 0, IPaddrlen);
-
- /* find most specific match */
- e = &f->ipifc->conv[f->ipifc->nc];
- for(cp = f->ipifc->conv; cp < e; cp++){
- if(*cp == 0)
- continue;
-
+ x = nil;
+ xspec = 0;
+ for(cp = f->ipifc->conv; *cp != nil; cp++){
ifc = (Ipifc*)(*cp)->ptcl;
-
- for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+ if(!canrlock(ifc))
+ continue;
+ for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+ if(type & Runi){
+ if(ipcmp(remote, lifc->local) == 0){
+ Found:
+ runlock(ifc);
+ return ifc;
+ }
+ } else if(type & (Rbcast|Rmulti)) {
+ if(ipcmp(local, lifc->local) == 0)
+ goto Found;
+ }
maskip(remote, lifc->mask, gnet);
if(ipcmp(gnet, lifc->net) == 0){
- if(x == nil || ipcmp(lifc->mask, xmask) > 0){
+ spec = comprefixlen(remote, lifc->local, IPaddrlen);
+ if(spec > xspec){
x = ifc;
- ipmove(xmask, lifc->mask);
+ xspec = spec;
}
}
}
+ runlock(ifc);
}
- if(x != nil)
- return x;
+ return x;
+}
- /* for now for broadcast and multicast, just use first interface */
- if(type & (Rbcast|Rmulti)){
- for(cp = f->ipifc->conv; cp < e; cp++){
- if(*cp == 0)
- continue;
- ifc = (Ipifc*)(*cp)->ptcl;
- if(ifc->lifc != nil)
- return ifc;
- }
+Ipifc*
+findipifcstr(Fs *f, char *s)
+{
+ uchar ip[IPaddrlen];
+ Conv *c;
+ char *p;
+ long x;
+
+ x = strtol(s, &p, 10);
+ if(p > s && *p == '\0'){
+ if(x < 0)
+ return nil;
+ if(x < f->ipifc->nc && (c = f->ipifc->conv[x]) != nil && ipifcinuse(c))
+ return (Ipifc*)c->ptcl;
}
-
+ if(parseip(ip, s) != -1)
+ return findipifc(f, ip, ip, Runi);
return nil;
}
-enum {
- unknownv6,
- multicastv6,
- unspecifiedv6,
- linklocalv6,
- sitelocalv6,
- globalv6,
-};
-
-int
-v6addrtype(uchar *addr)
-{
- if(isv6global(addr))
- return globalv6;
- if(islinklocal(addr))
- return linklocalv6;
- if(isv6mcast(addr))
- return multicastv6;
- if(issitelocal(addr))
- return sitelocalv6;
- return unknownv6;
-}
-
-#define v6addrcurr(lifc) (( (lifc)->origint + (lifc)->preflt >= (NOW/10^3) ) || ( (lifc)->preflt == 0xffffffff ))
-
+/*
+ * find "best" (global > link local > unspecified)
+ * local address; address must be current.
+ */
static void
findprimaryipv6(Fs *f, uchar *local)
{
- Conv **cp, **e;
- Ipifc *ifc;
- Iplifc *lifc;
+ ulong now = NOW/1000;
int atype, atypel;
+ Iplifc *lifc;
+ Ipifc *ifc;
+ Conv **cp;
ipmove(local, v6Unspecified);
atype = unspecifiedv6;
- /* find "best" (global > sitelocal > link local > unspecified)
- * local address; address must be current */
-
- e = &f->ipifc->conv[f->ipifc->nc];
- for(cp = f->ipifc->conv; cp < e; cp++){
- if(*cp == 0)
- continue;
+ for(cp = f->ipifc->conv; *cp != nil; cp++){
ifc = (Ipifc*)(*cp)->ptcl;
- for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+ rlock(ifc);
+ for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
atypel = v6addrtype(lifc->local);
if(atypel > atype)
- if(v6addrcurr(lifc)) {
+ if(lifc->preflt == ~0UL || lifc->preflt >= now-lifc->origint) {
ipmove(local, lifc->local);
atype = atypel;
- if(atype == globalv6)
+ if(atype == globalv6){
+ runlock(ifc);
return;
+ }
}
}
+ runlock(ifc);
}
}
/*
- * returns first ip address configured
+ * returns first v4 address configured
*/
static void
findprimaryipv4(Fs *f, uchar *local)
{
- Conv **cp, **e;
- Ipifc *ifc;
Iplifc *lifc;
+ Ipifc *ifc;
+ Conv **cp;
/* find first ifc local address */
- e = &f->ipifc->conv[f->ipifc->nc];
- for(cp = f->ipifc->conv; cp < e; cp++){
- if(*cp == 0)
- continue;
+ for(cp = f->ipifc->conv; *cp != nil; cp++){
ifc = (Ipifc*)(*cp)->ptcl;
- if((lifc = ifc->lifc) != nil){
- ipmove(local, lifc->local);
- return;
+ rlock(ifc);
+ for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+ if((lifc->type & Rv4) != 0){
+ ipmove(local, lifc->local);
+ runlock(ifc);
+ return;
+ }
}
+ runlock(ifc);
}
+ ipmove(local, IPnoaddr);
}
/*
- * find the local address 'closest' to the remote system, copy it to
- * local and return the ifc for that address
+ * ipv4local, ipv6local:
+ * return a local address associated with an interface close to remote.
+ * prefixlen is the number of leading bits in the local address that
+ * have to match an interface address to be considered. this is used
+ * by source specific routes to filter on the source address.
+ * return non-zero on success or zero when no address was found.
+ *
+ * for ipv4local, all addresses are 4 byte format.
*/
-void
-findlocalip(Fs *f, uchar *local, uchar *remote)
+int
+ipv4local(Ipifc *ifc, uchar *local, int prefixlen, uchar *remote)
{
- Ipifc *ifc;
Iplifc *lifc;
- Route *r;
- uchar gate[IPaddrlen];
- uchar gnet[IPaddrlen];
- int version;
- int atype = unspecifiedv6, atypel = unknownv6;
+ int a, b;
- USED(atype);
- USED(atypel);
- qlock(f->ipifc);
- r = v6lookup(f, remote, nil);
- version = (memcmp(remote, v4prefix, IPv4off) == 0) ? V4 : V6;
-
- if(r != nil){
- ifc = r->ifc;
- if(r->type & Rv4)
- v4tov6(gate, r->v4.gate);
- else {
- ipmove(gate, r->v6.gate);
- ipmove(local, v6Unspecified);
- }
+ b = -1;
+ for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+ if((lifc->type & Rv4) == 0 || ipcmp(lifc->local, IPnoaddr) == 0)
+ continue;
- /* find ifc address closest to the gateway to use */
- switch(version) {
- case V4:
- for(lifc = ifc->lifc; lifc; lifc = lifc->next){
- maskip(gate, lifc->mask, gnet);
- if(ipcmp(gnet, lifc->net) == 0){
- ipmove(local, lifc->local);
- goto out;
- }
- }
- break;
- case V6:
- for(lifc = ifc->lifc; lifc; lifc = lifc->next){
- atypel = v6addrtype(lifc->local);
- maskip(gate, lifc->mask, gnet);
- if(ipcmp(gnet, lifc->net) == 0)
- if(atypel > atype)
- if(v6addrcurr(lifc)) {
- ipmove(local, lifc->local);
- atype = atypel;
- if(atype == globalv6)
- break;
- }
- }
- if(atype > unspecifiedv6)
- goto out;
- break;
- default:
- panic("findlocalip: version %d", version);
+ if(prefixlen && comprefixlen(lifc->local+IPv4off, local, IPv4addrlen) < prefixlen)
+ continue;
+
+ a = comprefixlen(lifc->local+IPv4off, remote, IPv4addrlen);
+ if(a > b){
+ b = a;
+ memmove(local, lifc->local+IPv4off, IPv4addrlen);
}
}
-
- switch(version){
- case V4:
- findprimaryipv4(f, local);
- break;
- case V6:
- findprimaryipv6(f, local);
- break;
- default:
- panic("findlocalip2: version %d", version);
- }
-
-out:
- qunlock(f->ipifc);
+ return b >= 0;
}
-/*
- * return first v4 address associated with an interface
- */
int
-ipv4local(Ipifc *ifc, uchar *addr)
+ipv6local(Ipifc *ifc, uchar *local, int prefixlen, uchar *remote)
{
+ struct {
+ int atype;
+ int deprecated;
+ int comprefixlen;
+ } a, b;
+ int atype;
+ ulong now;
Iplifc *lifc;
- for(lifc = ifc->lifc; lifc; lifc = lifc->next){
- if(isv4(lifc->local)){
- memmove(addr, lifc->local+IPv4off, IPv4addrlen);
- return 1;
- }
+ if(isv4(remote)){
+ memmove(local, v4prefix, IPv4off);
+ if((prefixlen -= IPv4off*8) < 0)
+ prefixlen = 0;
+ return ipv4local(ifc, local+IPv4off, prefixlen, remote+IPv4off);
}
- return 0;
-}
-/*
- * return first v6 address associated with an interface
- */
-int
-ipv6local(Ipifc *ifc, uchar *addr)
-{
- Iplifc *lifc;
+ atype = v6addrtype(remote);
+ b.atype = unknownv6;
+ b.deprecated = 1;
+ b.comprefixlen = 0;
- for(lifc = ifc->lifc; lifc; lifc = lifc->next){
- if(!isv4(lifc->local) && !(lifc->tentative)){
- ipmove(addr, lifc->local);
- return 1;
+ now = NOW/1000;
+ for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+ if(lifc->tentative)
+ continue;
+
+ if(prefixlen && comprefixlen(lifc->local, local, IPaddrlen) < prefixlen)
+ continue;
+
+ a.atype = v6addrtype(lifc->local);
+ a.deprecated = lifc->preflt != ~0UL && lifc->preflt < now-lifc->origint;
+ a.comprefixlen = comprefixlen(lifc->local, remote, IPaddrlen);
+
+ /* prefer appropriate scope */
+ if(a.atype != b.atype){
+ if(a.atype > b.atype && b.atype < atype ||
+ a.atype < b.atype && b.atype > atype)
+ goto Good;
+ continue;
}
+ /* prefer non-deprecated addresses */
+ if(a.deprecated != b.deprecated){
+ if(b.deprecated)
+ goto Good;
+ continue;
+ }
+ /* prefer longer common prefix */
+ if(a.comprefixlen != b.comprefixlen){
+ if(a.comprefixlen > b.comprefixlen)
+ goto Good;
+ continue;
+ }
+ continue;
+ Good:
+ b = a;
+ ipmove(local, lifc->local);
}
- return 0;
+
+ return b.atype >= atype;
}
-int
-ipv6anylocal(Ipifc *ifc, uchar *addr)
+/*
+ * find the local address for a remote destination
+ */
+void
+findlocalip(Fs *f, uchar *local, uchar *remote)
{
- Iplifc *lifc;
-
- for(lifc = ifc->lifc; lifc; lifc = lifc->next){
- if(!isv4(lifc->local)){
- ipmove(addr, lifc->local);
- return SRC_UNI;
- }
+ if(isv4(remote)) {
+ memmove(local, v4prefix, IPv4off);
+ if(v4source(f, remote+IPv4off, local+IPv4off) == nil)
+ findprimaryipv4(f, local);
+ } else {
+ if(v6source(f, remote, local) == nil)
+ findprimaryipv6(f, local);
}
- return SRC_UNSPEC;
}
/*
@@ -1444,13 +1396,28 @@
{
Iplifc *lifc;
- for(lifc = ifc->lifc; lifc; lifc = lifc->next)
+ for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next)
if(ipcmp(ip, lifc->local) == 0)
return lifc;
+
return nil;
}
+Iplifc*
+ipremoteonifc(Ipifc *ifc, uchar *ip)
+{
+ uchar net[IPaddrlen];
+ Iplifc *lifc;
+ for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+ maskip(ip, lifc->mask, net);
+ if(ipcmp(net, lifc->remote) == 0)
+ return lifc;
+ }
+ return nil;
+}
+
+
/*
* See if we're proxying for this address on this interface
*/
@@ -1458,24 +1425,13 @@
ipproxyifc(Fs *f, Ipifc *ifc, uchar *ip)
{
Route *r;
- uchar net[IPaddrlen];
- Iplifc *lifc;
/* see if this is a direct connected pt to pt address */
- r = v6lookup(f, ip, nil);
- if(r == nil)
+ r = v6lookup(f, ip, ip, nil);
+ if(r == nil || (r->type & (Rifc|Rproxy)) != (Rifc|Rproxy))
return 0;
- if((r->type & (Rifc|Rproxy)) != (Rifc|Rproxy))
- return 0;
- /* see if this is on the right interface */
- for(lifc = ifc->lifc; lifc; lifc = lifc->next){
- maskip(ip, lifc->mask, net);
- if(ipcmp(net, lifc->remote) == 0)
- return 1;
- }
-
- return 0;
+ return ipremoteonifc(ifc, ip) != nil;
}
/*
@@ -1487,73 +1443,53 @@
if(isv4(ip)){
if(ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0)
return V4;
- } else {
- if(ip[0] == 0xff)
- return V6;
}
+ else if(ip[0] == 0xff)
+ return V6;
return 0;
}
-int
-ipisbm(uchar *ip)
-{
- if(isv4(ip)){
- if(ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0)
- return V4;
- if(ipcmp(ip, IPv4bcast) == 0)
- return V4;
- } else {
- if(ip[0] == 0xff)
- return V6;
- }
- return 0;
-}
-
-
/*
- * add a multicast address to an interface, called with c locked
+ * add a multicast address to an interface.
*/
void
ipifcaddmulti(Conv *c, uchar *ma, uchar *ia)
{
- Ipifc *ifc;
- Iplifc *lifc;
- Conv **p;
Ipmulti *multi, **l;
+ Iplifc *lifc;
+ Ipifc *ifc;
Fs *f;
- f = c->p->f;
-
- for(l = &c->multi; *l; l = &(*l)->next)
- if(ipcmp(ma, (*l)->ma) == 0)
- if(ipcmp(ia, (*l)->ia) == 0)
+ if(isv4(ma) != isv4(ia))
+ error("incompatible multicast/interface ip address");
+
+ for(l = &c->multi; *l != nil; l = &(*l)->next)
+ if(ipcmp(ma, (*l)->ma) == 0 && ipcmp(ia, (*l)->ia) == 0)
return; /* it's already there */
- multi = *l = smalloc(sizeof(*multi));
- ipmove(multi->ma, ma);
- ipmove(multi->ia, ia);
- multi->next = nil;
-
- for(p = f->ipifc->conv; *p; p++){
- if((*p)->inuse == 0)
- continue;
- ifc = (Ipifc*)(*p)->ptcl;
+ f = c->p->f;
+ if((ifc = findipifc(f, ia, ma, Rmulti)) != nil){
+ rlock(ifc);
if(waserror()){
- wunlock(ifc);
+ runlock(ifc);
nexterror();
}
- wlock(ifc);
- for(lifc = ifc->lifc; lifc; lifc = lifc->next)
- if(ipcmp(ia, lifc->local) == 0)
- addselfcache(f, ifc, lifc, ma, Rmulti);
- wunlock(ifc);
+ if((lifc = iplocalonifc(ifc, ia)) != nil)
+ addselfcache(f, ifc, lifc, ma, Rmulti);
+ runlock(ifc);
poperror();
}
+
+ multi = smalloc(sizeof(*multi));
+ ipmove(multi->ma, ma);
+ ipmove(multi->ia, ia);
+ multi->next = nil;
+ *l = multi;
}
/*
- * remove a multicast address from an interface, called with c locked
+ * remove a multicast address from an interface.
*/
void
ipifcremmulti(Conv *c, uchar *ma, uchar *ia)
@@ -1560,15 +1496,11 @@
{
Ipmulti *multi, **l;
Iplifc *lifc;
- Conv **p;
Ipifc *ifc;
Fs *f;
- f = c->p->f;
-
- for(l = &c->multi; *l; l = &(*l)->next)
- if(ipcmp(ma, (*l)->ma) == 0)
- if(ipcmp(ia, (*l)->ia) == 0)
+ for(l = &c->multi; *l != nil; l = &(*l)->next)
+ if(ipcmp(ma, (*l)->ma) == 0 && ipcmp(ia, (*l)->ia) == 0)
break;
multi = *l;
@@ -1576,161 +1508,101 @@
return; /* we don't have it open */
*l = multi->next;
+ multi->next = nil;
- for(p = f->ipifc->conv; *p; p++){
- if((*p)->inuse == 0)
- continue;
-
- ifc = (Ipifc*)(*p)->ptcl;
- if(waserror()){
- wunlock(ifc);
- nexterror();
- }
- wlock(ifc);
- for(lifc = ifc->lifc; lifc; lifc = lifc->next)
- if(ipcmp(ia, lifc->local) == 0)
+ f = c->p->f;
+ if((ifc = findipifc(f, ia, ma, Rmulti)) != nil){
+ rlock(ifc);
+ if(!waserror()){
+ if((lifc = iplocalonifc(ifc, ia)) != nil)
remselfcache(f, ifc, lifc, ma);
- wunlock(ifc);
- poperror();
+ poperror();
+ }
+ runlock(ifc);
}
-
free(multi);
}
-/*
- * make lifc's join and leave multicast groups
- */
-static char*
-ipifcjoinmulti(Ipifc *ifc, char **argv, int argc)
+/* register the address on this network for address resolution */
+static void
+ipifcregisteraddr(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *ip)
{
- USED(ifc, argv, argc);
- return nil;
+ if(waserror()){
+ print("ipifcregisteraddr %s %I %I: %s\n", ifc->dev, lifc->local, ip, up->errstr);
+ return;
+ }
+ if(ifc->m != nil && ifc->m->areg != nil)
+ (*ifc->m->areg)(f, ifc, lifc, ip);
+ poperror();
}
-static char*
-ipifcleavemulti(Ipifc *ifc, char **argv, int argc)
-{
- USED(ifc, argv, argc);
- return nil;
-}
-
static void
-ipifcregisterproxy(Fs *f, Ipifc *ifc, uchar *ip)
+ipifcregisterproxy(Fs *f, Ipifc *ifc, uchar *ip, int add)
{
- Conv **cp, **e;
- Ipifc *nifc;
+ uchar a[IPaddrlen];
Iplifc *lifc;
- Medium *m;
- uchar net[IPaddrlen];
+ Ipifc *nifc;
+ Conv **cp;
- /* register the address on any network that will proxy for us */
- e = &f->ipifc->conv[f->ipifc->nc];
+ /* register the address on any interface that will proxy for the ip */
+ for(cp = f->ipifc->conv; *cp != nil; cp++){
+ nifc = (Ipifc*)(*cp)->ptcl;
+ if(nifc == ifc || !canrlock(nifc))
+ continue;
- if(!isv4(ip)) { // V6
- for(cp = f->ipifc->conv; cp < e; cp++){
- if(*cp == nil)
- continue;
- nifc = (Ipifc*)(*cp)->ptcl;
- if(nifc == ifc)
- continue;
-
- rlock(nifc);
- m = nifc->m;
- if(m == nil || m->addmulti == nil) {
- runlock(nifc);
- continue;
- }
- for(lifc = nifc->lifc; lifc; lifc = lifc->next){
- maskip(ip, lifc->mask, net);
- if(ipcmp(net, lifc->remote) == 0) { /* add solicited-node multicast address */
- ipv62smcast(net, ip);
- addselfcache(f, nifc, lifc, net, Rmulti);
- arpenter(f, V6, ip, nifc->mac, 6, 0);
- //(*m->addmulti)(nifc, net, ip);
- break;
- }
- }
+ if(nifc->m == nil
+ || (lifc = ipremoteonifc(nifc, ip)) == nil
+ || (lifc->type & Rptpt) != 0
+ || waserror()){
runlock(nifc);
+ continue;
}
- return;
- }
- else { // V4
- for(cp = f->ipifc->conv; cp < e; cp++){
- if(*cp == nil)
- continue;
- nifc = (Ipifc*)(*cp)->ptcl;
- if(nifc == ifc)
- continue;
-
- rlock(nifc);
- m = nifc->m;
- if(m == nil || m->areg == nil){
- runlock(nifc);
- continue;
- }
- for(lifc = nifc->lifc; lifc; lifc = lifc->next){
- maskip(ip, lifc->mask, net);
- if(ipcmp(net, lifc->remote) == 0){
- (*m->areg)(nifc, ip);
- break;
- }
- }
- runlock(nifc);
+ if((lifc->type & Rv4) == 0){
+ /* add solicited-node multicast addr */
+ ipv62smcast(a, ip);
+ if(add)
+ addselfcache(f, nifc, lifc, a, Rmulti);
+ else
+ remselfcache(f, nifc, lifc, a);
}
+ if(add)
+ ipifcregisteraddr(f, nifc, lifc, ip);
+ runlock(nifc);
+ poperror();
}
}
-
-// added for new v6 mesg types
-static void
-adddefroute6(Fs *f, uchar *gate, int force)
-{
- Route *r;
-
- r = v6lookup(f, v6Unspecified, nil);
- if(r!=nil)
- if(!(force) && (strcmp(r->tag,"ra")!=0)) // route entries generated
- return; // by all other means take
- // precedence over router annc
-
- v6delroute(f, v6Unspecified, v6Unspecified, 1);
- v6addroute(f, "ra", v6Unspecified, v6Unspecified, gate, 0);
-}
-
-enum
-{
- Ngates = 3,
-};
-
char*
-ipifcaddpref6(Ipifc *ifc, char**argv, int argc)
+ipifcadd6(Ipifc *ifc, char **argv, int argc)
{
- uchar onlink = 1;
- uchar autoflag = 1;
- long validlt = 0xffffffff;
- long preflt = 0xffffffff;
- long origint = NOW / 10^3;
- uchar prefix[IPaddrlen];
- int plen = 64;
- Iplifc *lifc;
- char addr[40], preflen[6];
- char *params[3];
+ int plen = 64;
+ char addr[40], preflen[6];
+ char *params[3];
+ uchar prefix[IPaddrlen];
+ Iplifc lifc;
+ Medium *m;
+ lifc.onlink = 1;
+ lifc.autoflag = 1;
+ lifc.validlt = lifc.preflt = ~0UL;
+ lifc.origint = NOW / 1000;
+
switch(argc) {
case 7:
- preflt = atoi(argv[6]);
+ lifc.preflt = strtoul(argv[6], 0, 10);
/* fall through */
case 6:
- validlt = atoi(argv[5]);
+ lifc.validlt = strtoul(argv[5], 0, 10);
/* fall through */
case 5:
- autoflag = atoi(argv[4]);
+ lifc.autoflag = atoi(argv[4]) != 0;
/* fall through */
case 4:
- onlink = atoi(argv[3]);
+ lifc.onlink = atoi(argv[3]) != 0;
/* fall through */
case 3:
plen = atoi(argv[2]);
+ /* fall through */
case 2:
break;
default:
@@ -1737,25 +1609,16 @@
return Ebadarg;
}
- if((parseip(prefix, argv[1])!=6) ||
- (validlt < preflt) ||
- (plen < 0) || (plen > 64) ||
- (islinklocal(prefix))
- )
+ if (parseip(prefix, argv[1]) != 6 || lifc.validlt < lifc.preflt || plen < 0 ||
+ plen > 64 || islinklocal(prefix))
return Ebadarg;
- lifc = smalloc(sizeof(Iplifc));
- lifc->onlink = (onlink!=0);
- lifc->autoflag = (autoflag!=0);
- lifc->validlt = validlt;
- lifc->preflt = preflt;
- lifc->origint = origint;
+ /* issue "add" ctl msg for v6 link-local addr and prefix len */
+ m = ifc->m;
+ if(m == nil || m->pref2addr == nil)
+ return Eunbound;
+ (*m->pref2addr)(prefix, ifc->mac); /* mac → v6 link-local addr */
- if(ifc->m->pref2addr!=nil)
- ifc->m->pref2addr(prefix, ifc->mac);
- else
- return Ebadarg;
-
sprint(addr, "%I", prefix);
sprint(preflen, "/%d", plen);
params[0] = "add";
@@ -1762,6 +1625,28 @@
params[1] = addr;
params[2] = preflen;
- return ipifcadd(ifc, params, 3, 0, lifc);
+ return ipifcadd(ifc, params, 3, 0, &lifc);
}
+char*
+ipifcremove6(Ipifc *ifc, char**, int argc)
+{
+ Iplifc *lifc, **l;
+ ulong now;
+
+ if(argc != 1)
+ return Ebadarg;
+
+ wlock(ifc);
+ now = NOW/1000;
+ for(l = &ifc->lifc; (lifc = *l) != nil;) {
+ if((lifc->type & Rv4) == 0)
+ if(lifc->validlt != ~0UL && lifc->validlt < now-lifc->origint)
+ if(ipifcremlifc(ifc, l) == nil)
+ continue;
+ l = &lifc->next;
+ }
+ wunlock(ifc);
+
+ return nil;
+}
--- a/os/ip/ipmux.c
+++ b/os/ip/ipmux.c
@@ -1,3 +1,6 @@
+/*
+ * IP packet filter
+ */
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
@@ -6,30 +9,14 @@
#include "../port/error.h"
#include "ip.h"
-#define DPRINT if(0)print
+#include "ipv6.h"
typedef struct Ipmuxrock Ipmuxrock;
typedef struct Ipmux Ipmux;
-typedef struct Ip6hdr Ip6hdr;
enum
{
- IPHDR = 20, /* sizeof(Ip4hdr) */
-};
-
-struct Ip6hdr
-{
- uchar vcf[4]; /* version, class label, and flow label */
- uchar ploadlen[2]; /* payload length */
- uchar proto; /* next header, i.e. proto */
- uchar ttl; /* hop limit, i.e. ttl */
- uchar src[16]; /* IP source */
- uchar dst[16]; /* IP destination */
-};
-
-
-enum
-{
+ Tver,
Tproto,
Tdata,
Tiph,
@@ -36,28 +23,8 @@
Tdst,
Tsrc,
Tifc,
-
- Cother = 0,
- Cbyte, /* single byte */
- Cmbyte, /* single byte with mask */
- Cshort, /* single short */
- Cmshort, /* single short with mask */
- Clong, /* single long */
- Cmlong, /* single long with mask */
- Cifc,
- Cmifc,
};
-char *ftname[] =
-{
-[Tproto] "proto",
-[Tdata] "data",
-[Tiph] "iph",
-[Tdst] "dst",
-[Tsrc] "src",
-[Tifc] "ifc",
-};
-
/*
* a node in the decision tree
*/
@@ -66,16 +33,12 @@
Ipmux *yes;
Ipmux *no;
uchar type; /* type of field(Txxxx) */
- uchar ctype; /* tupe of comparison(Cxxxx) */
uchar len; /* length in bytes of item to compare */
uchar n; /* number of items val points to */
- short off; /* offset of comparison */
- short eoff; /* end offset of comparison */
- uchar skiphdr; /* should offset start after ipheader */
+ int off; /* offset of comparison */
uchar *val;
uchar *mask;
uchar *e; /* val+n*len*/
-
int ref; /* so we can garbage collect */
Conv *conv;
};
@@ -90,6 +53,7 @@
static int ipmuxsprint(Ipmux*, int, char*, int);
static void ipmuxkick(void *x);
+static void ipmuxfree(Ipmux *f);
static char*
skipwhite(char *p)
@@ -122,27 +86,33 @@
Ipmux *f;
p = skipwhite(p);
- if(strncmp(p, "dst", 3) == 0){
+ if(strncmp(p, "ver", 3) == 0){
+ type = Tver;
+ off = 0;
+ len = 1;
+ p += 3;
+ }
+ else if(strncmp(p, "dst", 3) == 0){
type = Tdst;
- off = offsetof(Ip4hdr, dst[0]);
- len = IPv4addrlen;
+ off = offsetof(Ip6hdr, dst[0]);
+ len = IPaddrlen;
p += 3;
}
else if(strncmp(p, "src", 3) == 0){
type = Tsrc;
- off = offsetof(Ip4hdr, src[0]);
- len = IPv4addrlen;
+ off = offsetof(Ip6hdr, src[0]);
+ len = IPaddrlen;
p += 3;
}
else if(strncmp(p, "ifc", 3) == 0){
type = Tifc;
- off = -IPv4addrlen;
- len = IPv4addrlen;
+ off = -IPaddrlen;
+ len = IPaddrlen;
p += 3;
}
else if(strncmp(p, "proto", 5) == 0){
type = Tproto;
- off = offsetof(Ip4hdr, proto);
+ off = offsetof(Ip6hdr, proto);
len = 1;
p += 5;
}
@@ -160,7 +130,7 @@
return nil;
p++;
off = strtoul(p, &p, 0);
- if(off < 0 || off > (64-IPHDR))
+ if(off < 0)
return nil;
p = skipwhite(p);
if(*p != ':')
@@ -189,11 +159,6 @@
f->mask = nil;
f->n = 1;
f->ref = 1;
- if(type == Tdata)
- f->skiphdr = 1;
- else
- f->skiphdr = 0;
-
return f;
}
@@ -229,7 +194,7 @@
static Ipmux*
parsemux(char *p)
{
- int n, nomask;
+ int n;
Ipmux *f;
char *val;
char *mask;
@@ -247,7 +212,7 @@
goto parseerror;
/* parse mask */
- mask = follows(val, '&');
+ mask = follows(p, '&');
if(mask != nil){
switch(f->type){
case Tsrc:
@@ -254,7 +219,7 @@
case Tdst:
case Tifc:
f->mask = smalloc(f->len);
- v4parseip(f->mask, mask);
+ parseipmask(f->mask, mask, 0);
break;
case Tdata:
case Tiph:
@@ -264,15 +229,13 @@
default:
goto parseerror;
}
- nomask = 0;
- } else {
- nomask = 1;
+ } else if(f->type == Tver){
f->mask = smalloc(f->len);
- memset(f->mask, 0xff, f->len);
+ f->mask[0] = 0xF0;
}
/* parse vals */
- f->n = getfields(val, vals, sizeof(vals)/sizeof(char*), 1, "|");
+ f->n = getfields(val, vals, nelem(vals), 1, "|");
if(f->n == 0)
goto parseerror;
f->val = smalloc(f->n*f->len);
@@ -279,10 +242,21 @@
v = f->val;
for(n = 0; n < f->n; n++){
switch(f->type){
+ case Tver:
+ if(f->n != 1)
+ goto parseerror;
+ if(strcmp(vals[n], "6") == 0)
+ *v = IP_VER6;
+ else if(strcmp(vals[n], "4") == 0)
+ *v = IP_VER4;
+ else
+ goto parseerror;
+ break;
case Tsrc:
case Tdst:
case Tifc:
- v4parseip(v, vals[n]);
+ if(parseip(v, vals[n]) == -1)
+ goto parseerror;
break;
case Tproto:
case Tdata:
@@ -292,34 +266,11 @@
}
v += f->len;
}
-
- f->eoff = f->off + f->len;
f->e = f->val + f->n*f->len;
- f->ctype = Cother;
- if(f->n == 1){
- switch(f->len){
- case 1:
- f->ctype = nomask ? Cbyte : Cmbyte;
- break;
- case 2:
- f->ctype = nomask ? Cshort : Cmshort;
- break;
- case 4:
- if(f->type == Tifc)
- f->ctype = nomask ? Cifc : Cmifc;
- else
- f->ctype = nomask ? Clong : Cmlong;
- break;
- }
- }
return f;
parseerror:
- if(f->mask)
- free(f->mask);
- if(f->val)
- free(f->val);
- free(f);
+ ipmuxfree(f);
return nil;
}
@@ -342,8 +293,7 @@
return n;
/* compare offsets, call earlier ones more specific */
- n = (a->off+((int)a->skiphdr)*offsetof(Ip4hdr, data[0])) -
- (b->off+((int)b->skiphdr)*offsetof(Ip4hdr, data[0]));
+ n = a->off - b->off;
if(n != 0)
return n;
@@ -413,6 +363,10 @@
*nf = *f;
nf->no = ipmuxcopy(f->no);
nf->yes = ipmuxcopy(f->yes);
+ if(f->mask != nil){
+ nf->mask = smalloc(f->len);
+ memmove(nf->mask, f->mask, f->len);
+ }
nf->val = smalloc(f->n*f->len);
nf->e = nf->val + f->len*f->n;
memmove(nf->val, f->val, f->n*f->len);
@@ -422,8 +376,10 @@
static void
ipmuxfree(Ipmux *f)
{
- if(f->val != nil)
- free(f->val);
+ if(f == nil)
+ return;
+ free(f->val);
+ free(f->mask);
free(f);
}
@@ -432,10 +388,8 @@
{
if(f == nil)
return;
- if(f->no != nil)
- ipmuxfree(f->no);
- if(f->yes != nil)
- ipmuxfree(f->yes);
+ ipmuxfree(f->no);
+ ipmuxfree(f->yes);
ipmuxfree(f);
}
@@ -510,6 +464,8 @@
return ipmuxremove(&ft->no, f);
}
+ ipmuxremove(&ft->no, f->no);
+
/* we found a match */
if(--(ft->ref) == 0){
/*
@@ -531,8 +487,55 @@
}
/*
+ * convert to ipv4 filter
+ */
+static Ipmux*
+ipmuxconv4(Ipmux *f)
+{
+ int i, n;
+
+ if(f == nil)
+ return nil;
+
+ switch(f->type){
+ case Tproto:
+ f->off = offsetof(Ip4hdr, proto);
+ break;
+ case Tdst:
+ f->off = offsetof(Ip4hdr, dst[0]);
+ if(0){
+ case Tsrc:
+ f->off = offsetof(Ip4hdr, src[0]);
+ }
+ if(f->len != IPaddrlen)
+ break;
+ n = 0;
+ for(i = 0; i < f->n; i++){
+ if(isv4(f->val + i*IPaddrlen)){
+ memmove(f->val + n*IPv4addrlen, f->val + i*IPaddrlen + IPv4off, IPv4addrlen);
+ n++;
+ }
+ }
+ if(n == 0){
+ ipmuxtreefree(f);
+ return nil;
+ }
+ f->n = n;
+ f->len = IPv4addrlen;
+ if(f->mask != nil)
+ memmove(f->mask, f->mask+IPv4off, IPv4addrlen);
+ }
+ f->e = f->val + f->n*f->len;
+
+ f->yes = ipmuxconv4(f->yes);
+ f->no = ipmuxconv4(f->no);
+
+ return f;
+}
+
+/*
* connection request is a semi separated list of filters
- * e.g. proto=17;dat[0:4]=11aa22bb;ifc=135.104.9.2&255.255.255.0
+ * e.g. ver=4;proto=17;data[0:4]=11aa22bb;ifc=135.104.9.2&255.255.255.0
*
* there's no protection against overlapping specs.
*/
@@ -568,6 +571,18 @@
return Ebadarg;
mux->conv = c;
+ if(chain->type != Tver) {
+ char ver6[] = "ver=6";
+ mux = parsemux(ver6);
+ mux->yes = chain;
+ mux->no = ipmuxcopy(chain);
+ chain = mux;
+ }
+ if(*chain->val == IP_VER4)
+ chain->yes = ipmuxconv4(chain->yes);
+ else
+ chain->no = ipmuxconv4(chain->no);
+
/* save a copy of the chain so we can later remove it */
mux = ipmuxcopy(chain);
r = (Ipmuxrock*)(c->ptcl);
@@ -642,95 +657,84 @@
Block *bp;
bp = qget(c->wq);
- if(bp == nil)
- return;
- else {
+ if(bp != nil) {
Ip4hdr *ih4 = (Ip4hdr*)(bp->rp);
- if((ih4->vihl)&0xF0 != 0x60)
+
+ if((ih4->vihl & 0xF0) != IP_VER6)
ipoput4(c->p->f, bp, 0, ih4->ttl, ih4->tos, nil);
- else {
- Ip6hdr *ih6 = (Ip6hdr*)(bp->rp);
- ipoput6(c->p->f, bp, 0, ih6->ttl, 0, nil);
- }
+ else
+ ipoput6(c->p->f, bp, 0, ((Ip6hdr*)ih4)->ttl, 0, nil);
}
}
+static int
+maskmemcmp(uchar *m, uchar *v, uchar *c, int n)
+{
+ int i;
+
+ if(m == nil)
+ return memcmp(v, c, n) != 0;
+
+ for(i = 0; i < n; i++)
+ if((v[i] & m[i]) != c[i])
+ return 1;
+ return 0;
+}
+
static void
ipmuxiput(Proto *p, Ipifc *ifc, Block *bp)
{
- int len, hl;
Fs *f = p->f;
- uchar *m, *h, *v, *e, *ve, *hp;
Conv *c;
+ Iplifc *lifc;
Ipmux *mux;
- Ip4hdr *ip;
+ uchar *v;
+ Ip4hdr *ip4;
Ip6hdr *ip6;
+ int off, hl;
- ip = (Ip4hdr*)bp->rp;
- hl = (ip->vihl&0x0F)<<2;
+ ip4 = (Ip4hdr*)bp->rp;
+ if((ip4->vihl & 0xF0) == IP_VER4) {
+ hl = (ip4->vihl&0x0F)<<2;
+ ip6 = nil;
+ } else {
+ hl = IP6HDR;
+ ip6 = (Ip6hdr*)ip4;
+ }
if(p->priv == nil)
goto nomatch;
- h = bp->rp;
- len = BLEN(bp);
+ c = nil;
+ lifc = nil;
- /* run the v4 filter */
+ /* run the filter */
rlock(f);
- c = nil;
mux = f->ipmux->priv;
while(mux != nil){
- if(mux->eoff > len){
- mux = mux->no;
- continue;
- }
- hp = h + mux->off + ((int)mux->skiphdr)*hl;
- switch(mux->ctype){
- case Cbyte:
- if(*mux->val == *hp)
- goto yes;
+ switch(mux->type){
+ case Tifc:
+ if(mux->len != IPaddrlen)
+ goto no;
+ for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next)
+ for(v = mux->val; v < mux->e; v += IPaddrlen)
+ if(maskmemcmp(mux->mask, lifc->local, v, IPaddrlen) == 0)
+ goto yes;
+ goto no;
+ case Tdata:
+ off = hl;
break;
- case Cmbyte:
- if((*hp & *mux->mask) == *mux->val)
- goto yes;
- break;
- case Cshort:
- if(*((ushort*)mux->val) == *(ushort*)hp)
- goto yes;
- break;
- case Cmshort:
- if((*(ushort*)hp & (*((ushort*)mux->mask))) == *((ushort*)mux->val))
- goto yes;
- break;
- case Clong:
- if(*((ulong*)mux->val) == *(ulong*)hp)
- goto yes;
- break;
- case Cmlong:
- if((*(ulong*)hp & (*((ulong*)mux->mask))) == *((ulong*)mux->val))
- goto yes;
- break;
- case Cifc:
- if(*((ulong*)mux->val) == *(ulong*)(ifc->lifc->local + IPv4off))
- goto yes;
- break;
- case Cmifc:
- if((*(ulong*)(ifc->lifc->local + IPv4off) & (*((ulong*)mux->mask))) == *((ulong*)mux->val))
- goto yes;
- break;
default:
- v = mux->val;
- for(e = mux->e; v < e; v = ve){
- m = mux->mask;
- hp = h + mux->off;
- for(ve = v + mux->len; v < ve; v++){
- if((*hp++ & *m++) != *v)
- break;
- }
- if(v == ve)
- goto yes;
- }
+ off = 0;
+ break;
}
+ off += mux->off;
+ if(off < 0 || off + mux->len > BLEN(bp))
+ goto no;
+ for(v = mux->val; v < mux->e; v += mux->len)
+ if(maskmemcmp(mux->mask, bp->rp + off, v, mux->len) == 0)
+ goto yes;
+no:
mux = mux->no;
continue;
yes:
@@ -743,28 +747,24 @@
if(c != nil){
/* tack on interface address */
bp = padblock(bp, IPaddrlen);
- ipmove(bp->rp, ifc->lifc->local);
- bp = concatblock(bp);
- if(bp != nil)
- if(qpass(c->rq, bp) < 0)
- print("Q");
+ if(lifc == nil)
+ lifc = ifc->lifc;
+ ipmove(bp->rp, lifc != nil ? lifc->local : IPnoaddr);
+ qpass(c->rq, concatblock(bp));
return;
}
nomatch:
/* doesn't match any filter, hand it to the specific protocol handler */
- ip = (Ip4hdr*)bp->rp;
- if((ip->vihl&0xF0)==0x40) {
- p = f->t2p[ip->proto];
- } else {
- ip6 = (Ip6hdr*)bp->rp;
+ if(ip6 != nil)
p = f->t2p[ip6->proto];
- }
- if(p && p->rcv)
- (*p->rcv)(p, ifc, bp);
else
- freeblist(bp);
- return;
+ p = f->t2p[ip4->proto];
+ if(p != nil && p->rcv != nil){
+ (*p->rcv)(p, ifc, bp);
+ return;
+ }
+ freeblist(bp);
}
static int
@@ -780,11 +780,14 @@
n += snprint(buf+n, len-n, "\n");
return n;
}
- n += snprint(buf+n, len-n, "h[%d:%d]&",
- mux->off+((int)mux->skiphdr)*((int)offsetof(Ip4hdr, data[0])),
- mux->off+(((int)mux->skiphdr)*((int)offsetof(Ip4hdr, data[0])))+mux->len-1);
- for(i = 0; i < mux->len; i++)
- n += snprint(buf+n, len - n, "%2.2ux", mux->mask[i]);
+ n += snprint(buf+n, len-n, "%s[%d:%d]",
+ mux->type == Tdata ? "data": "iph",
+ mux->off, mux->off+mux->len-1);
+ if(mux->mask != nil){
+ n += snprint(buf+n, len-n, "&");
+ for(i = 0; i < mux->len; i++)
+ n += snprint(buf+n, len - n, "%2.2ux", mux->mask[i]);
+ }
n += snprint(buf+n, len-n, "=");
v = mux->val;
for(j = 0; j < mux->n; j++){
--- a/os/ip/iproute.c
+++ b/os/ip/iproute.c
@@ -12,10 +12,10 @@
static void calcd(Route*);
/* these are used for all instances of IP */
-Route* v4freelist;
-Route* v6freelist;
-RWlock routelock;
-ulong v4routegeneration, v6routegeneration;
+static Route* v4freelist;
+static Route* v6freelist;
+static RWlock routelock;
+static ulong v4routegeneration, v6routegeneration;
static void
freeroute(Route *r)
@@ -22,6 +22,7 @@
{
Route **l;
+ r->ref = 0;
r->left = nil;
r->right = nil;
if(r->type & Rv4)
@@ -35,9 +36,8 @@
static Route*
allocroute(int type)
{
- Route *r;
+ Route *r, **l;
int n;
- Route **l;
if(type & Rv4){
n = sizeof(RouteTree) + sizeof(V4route);
@@ -72,9 +72,9 @@
return;
l = allocroute(r->type);
+ l->left = r;
l->mid = *q;
*q = l;
- l->left = r;
}
/*
@@ -99,11 +99,11 @@
*/
enum
{
- Rpreceeds,
- Rfollows,
- Requals,
- Rcontains,
- Rcontained,
+ Rpreceeds, /* a left of b */
+ Rfollows, /* a right of b */
+ Requals, /* a equals b */
+ Rcontains, /* a contians b */
+ Roverlaps, /* a overlaps b */
};
static int
@@ -112,44 +112,88 @@
if(a->type & Rv4){
if(a->v4.endaddress < b->v4.address)
return Rpreceeds;
-
if(a->v4.address > b->v4.endaddress)
return Rfollows;
-
if(a->v4.address <= b->v4.address
&& a->v4.endaddress >= b->v4.endaddress){
if(a->v4.address == b->v4.address
- && a->v4.endaddress == b->v4.endaddress)
- return Requals;
+ && a->v4.endaddress == b->v4.endaddress){
+ if(a->v4.source <= b->v4.source
+ && a->v4.endsource >= b->v4.endsource){
+ if(a->v4.source == b->v4.source
+ && a->v4.endsource == b->v4.endsource)
+ return Requals;
+ return Rcontains;
+ }
+ return Roverlaps;
+ }
return Rcontains;
}
- return Rcontained;
+ return Roverlaps;
}
if(lcmp(a->v6.endaddress, b->v6.address) < 0)
return Rpreceeds;
-
if(lcmp(a->v6.address, b->v6.endaddress) > 0)
return Rfollows;
-
if(lcmp(a->v6.address, b->v6.address) <= 0
&& lcmp(a->v6.endaddress, b->v6.endaddress) >= 0){
if(lcmp(a->v6.address, b->v6.address) == 0
- && lcmp(a->v6.endaddress, b->v6.endaddress) == 0)
- return Requals;
+ && lcmp(a->v6.endaddress, b->v6.endaddress) == 0){
+ if(lcmp(a->v6.source, b->v6.source) <= 0
+ && lcmp(a->v6.endsource, b->v6.endsource) >= 0){
+ if(lcmp(a->v6.source, b->v6.source) == 0
+ && lcmp(a->v6.endsource, b->v6.endsource) == 0)
+ return Requals;
+ return Rcontains;
+ }
+ return Roverlaps;
+ }
return Rcontains;
}
+ return Roverlaps;
+}
- return Rcontained;
+/* return 1 if a matches b, otherwise 0 */
+static int
+matchroute(Route *a, Route *b)
+{
+ if(a == b)
+ return 1;
+
+ if((a->type^b->type) & (Rifc|Runi|Rmulti|Rbcast))
+ return 0;
+
+ if(a->type & Rv4){
+ if(memcmp(a->v4.gate, IPnoaddr+IPv4off, IPv4addrlen) != 0
+ && memcmp(a->v4.gate, b->v4.gate, IPv4addrlen) != 0)
+ return 0;
+ } else {
+ if(ipcmp(a->v6.gate, IPnoaddr) != 0
+ && ipcmp(a->v6.gate, b->v6.gate) != 0)
+ return 0;
+ }
+
+ if(a->ifc != nil && b->ifc != nil && (a->ifc != b->ifc || a->ifcid != b->ifcid))
+ return 0;
+
+ if(*a->tag != 0 && strncmp(a->tag, b->tag, sizeof(a->tag)) != 0)
+ return 0;
+
+ return 1;
}
static void
copygate(Route *old, Route *new)
{
+ old->type = new->type;
+ old->ifc = new->ifc;
+ old->ifcid = new->ifcid;
if(new->type & Rv4)
memmove(old->v4.gate, new->v4.gate, IPv4addrlen);
else
- memmove(old->v6.gate, new->v6.gate, IPaddrlen);
+ ipmove(old->v6.gate, new->v6.gate);
+ strncpy(old->tag, new->tag, sizeof(new->tag));
}
/*
@@ -162,12 +206,12 @@
l = p->left;
r = p->right;
- p->left = 0;
- p->right = 0;
+ p->left = nil;
+ p->right = nil;
addnode(f, root, p);
- if(l)
+ if(l != nil)
walkadd(f, root, l);
- if(r)
+ if(r != nil)
walkadd(f, root, r);
}
@@ -180,16 +224,16 @@
Route *q;
int d;
- if(p) {
+ if(p != nil) {
d = 0;
q = p->left;
- if(q)
+ if(q != nil)
d = q->depth;
q = p->right;
- if(q && q->depth > d)
+ if(q != nil && q->depth > d)
d = q->depth;
q = p->mid;
- if(q && q->depth > d)
+ if(q != nil && q->depth > d)
d = q->depth;
p->depth = d+1;
}
@@ -210,8 +254,8 @@
* rotate tree node
*/
p = *cur;
- dl = 0; if(l = p->left) dl = l->depth;
- dr = 0; if(r = p->right) dr = r->depth;
+ dl = 0; if((l = p->left) != nil) dl = l->depth;
+ dr = 0; if((r = p->right) != nil) dr = r->depth;
if(dl > dr+1) {
p->left = l->right;
@@ -239,7 +283,7 @@
Route *p;
p = *cur;
- if(p == 0) {
+ if(p == nil) {
*cur = new;
new->depth = 1;
return;
@@ -269,15 +313,13 @@
* supercede the old entry if the old one isn't
* a local interface.
*/
- if((p->type & Rifc) == 0){
- p->type = new->type;
- p->ifcid = -1;
+ if((p->type & Rifc) == 0)
copygate(p, new);
- } else if(new->type & Rifc)
+ else if(new->type & Rifc)
p->ref++;
freeroute(new);
break;
- case Rcontained:
+ case Roverlaps:
addnode(f, &p->mid, new);
break;
}
@@ -285,241 +327,316 @@
balancetree(cur);
}
-#define V4H(a) ((a&0x07ffffff)>>(32-Lroot-5))
-
-void
-v4addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type)
+/*
+ * find node matching r
+ */
+static Route**
+looknode(Route **cur, Route *r)
{
Route *p;
- ulong sa;
- ulong m;
- ulong ea;
- int h, eh;
- m = nhgetl(mask);
- sa = nhgetl(a) & m;
- ea = sa | ~m;
+ for(;;){
+ p = *cur;
+ if(p == nil)
+ return nil;
+ switch(rangecompare(r, p)){
+ case Rcontains:
+ return nil;
+ case Rpreceeds:
+ cur = &p->left;
+ break;
+ case Rfollows:
+ cur = &p->right;
+ break;
+ case Roverlaps:
+ cur = &p->mid;
+ break;
+ case Requals:
+ if((p->type & Rifc) == 0 && !matchroute(r, p))
+ return nil;
+ return cur;
+ }
+ }
+}
- eh = V4H(ea);
- for(h=V4H(sa); h<=eh; h++) {
- p = allocroute(Rv4 | type);
- p->v4.address = sa;
- p->v4.endaddress = ea;
- memmove(p->v4.gate, gate, sizeof(p->v4.gate));
- memmove(p->tag, tag, sizeof(p->tag));
+static Route*
+looknodetag(Route *r, char *tag)
+{
+ Route *x;
- wlock(&routelock);
- addnode(f, &f->v4root[h], p);
- while(p = f->queue) {
- f->queue = p->mid;
- walkadd(f, &f->v4root[h], p->left);
- freeroute(p);
- }
- wunlock(&routelock);
+ if(r == nil)
+ return nil;
+
+ if((x = looknodetag(r->mid, tag)) != nil)
+ return x;
+ if((x = looknodetag(r->left, tag)) != nil)
+ return x;
+ if((x = looknodetag(r->right, tag)) != nil)
+ return x;
+
+ if((r->type & Rifc) == 0){
+ if(tag == nil || strncmp(tag, r->tag, sizeof(r->tag)) == 0)
+ return r;
}
- v4routegeneration++;
- ipifcaddroute(f, Rv4, a, mask, gate, type);
+ return nil;
}
-#define V6H(a) (((a)[IPllen-1] & 0x07ffffff)>>(32-Lroot-5))
-#define ISDFLT(a, mask, tag) ((ipcmp((a),v6Unspecified)==0) && (ipcmp((mask),v6Unspecified)==0) && (strcmp((tag), "ra")!=0))
+#define V4H(a) ((a&0x07ffffff)>>(32-Lroot-5))
+#define V6H(a) (((a)[IPllen-1]&0x07ffffff)>>(32-Lroot-5))
-void
-v6addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type)
+static void
+routeadd(Fs *f, Route *r)
{
- Route *p;
- ulong sa[IPllen], ea[IPllen];
- ulong x, y;
- int h, eh;
+ Route **h, **e, *p;
- /*
- if(ISDFLT(a, mask, tag))
- f->v6p->cdrouter = -1;
- */
+ if(r->type & Rv4){
+ h = &f->v4root[V4H(r->v4.address)];
+ e = &f->v4root[V4H(r->v4.endaddress)];
+ } else {
+ h = &f->v6root[V6H(r->v6.address)];
+ e = &f->v6root[V6H(r->v6.endaddress)];
+ }
+ for(; h <= e; h++) {
+ p = allocroute(r->type);
- for(h = 0; h < IPllen; h++){
- x = nhgetl(a+4*h);
- y = nhgetl(mask+4*h);
- sa[h] = x & y;
- ea[h] = x | ~y;
- }
+ p->ifc = r->ifc;
+ p->ifcid = r->ifcid;
- eh = V6H(ea);
- for(h = V6H(sa); h <= eh; h++) {
- p = allocroute(type);
- memmove(p->v6.address, sa, IPaddrlen);
- memmove(p->v6.endaddress, ea, IPaddrlen);
- memmove(p->v6.gate, gate, IPaddrlen);
- memmove(p->tag, tag, sizeof(p->tag));
+ if(r->type & Rv4)
+ memmove(&p->v4, &r->v4, sizeof(r->v4));
+ else
+ memmove(&p->v6, &r->v6, sizeof(r->v6));
- wlock(&routelock);
- addnode(f, &f->v6root[h], p);
- while(p = f->queue) {
+ memmove(p->tag, r->tag, sizeof(r->tag));
+
+ addnode(f, h, p);
+ while((p = f->queue) != nil) {
f->queue = p->mid;
- walkadd(f, &f->v6root[h], p->left);
+ walkadd(f, h, p->left);
freeroute(p);
}
- wunlock(&routelock);
}
- v6routegeneration++;
- ipifcaddroute(f, 0, a, mask, gate, type);
+ if(r->type & Rv4)
+ v4routegeneration++;
+ else
+ v6routegeneration++;
}
-Route**
-looknode(Route **cur, Route *r)
+static void
+routerem(Fs *f, Route *r)
{
- Route *p;
+ Route **h, **e, **l, *p;
- for(;;){
- p = *cur;
- if(p == 0)
- return 0;
-
- switch(rangecompare(r, p)){
- case Rcontains:
- return 0;
- case Rpreceeds:
- cur = &p->left;
- break;
- case Rfollows:
- cur = &p->right;
- break;
- case Rcontained:
- cur = &p->mid;
- break;
- case Requals:
- return cur;
+ if(r->type & Rv4){
+ h = &f->v4root[V4H(r->v4.address)];
+ e = &f->v4root[V4H(r->v4.endaddress)];
+ } else {
+ h = &f->v6root[V6H(r->v6.address)];
+ e = &f->v6root[V6H(r->v6.endaddress)];
+ }
+
+ for(; h <= e; h++) {
+ if((l = looknode(h, r)) == nil)
+ continue;
+ p = *l;
+ if(--(p->ref) != 0)
+ continue;
+ *l = nil;
+ addqueue(&f->queue, p->left);
+ addqueue(&f->queue, p->mid);
+ addqueue(&f->queue, p->right);
+ freeroute(p);
+
+ while((p = f->queue) != nil) {
+ f->queue = p->mid;
+ walkadd(f, h, p->left);
+ freeroute(p);
}
}
+
+ if(r->type & Rv4)
+ v4routegeneration++;
+ else
+ v6routegeneration++;
}
-void
-v4delroute(Fs *f, uchar *a, uchar *mask, int dolock)
+static Route
+mkroute(uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag)
{
- Route **r, *p;
- Route rt;
- int h, eh;
- ulong m;
+ ulong x, y;
+ Route r;
+ int h;
- m = nhgetl(mask);
- rt.v4.address = nhgetl(a) & m;
- rt.v4.endaddress = rt.v4.address | ~m;
- rt.type = Rv4;
+ memset(&r, 0, sizeof(r));
- eh = V4H(rt.v4.endaddress);
- for(h=V4H(rt.v4.address); h<=eh; h++) {
- if(dolock)
- wlock(&routelock);
- r = looknode(&f->v4root[h], &rt);
- if(r) {
- p = *r;
- if(--(p->ref) == 0){
- *r = 0;
- addqueue(&f->queue, p->left);
- addqueue(&f->queue, p->mid);
- addqueue(&f->queue, p->right);
- freeroute(p);
- while(p = f->queue) {
- f->queue = p->mid;
- walkadd(f, &f->v4root[h], p->left);
- freeroute(p);
- }
- }
+ r.type = type;
+
+ if(type & Rv4){
+ x = nhgetl(a+IPv4off);
+ y = nhgetl(mask+IPv4off);
+ r.v4.address = x & y;
+ r.v4.endaddress = x | ~y;
+
+ x = nhgetl(s+IPv4off);
+ y = nhgetl(smask+IPv4off);
+ if(y != 0)
+ r.type |= Rsrc;
+ r.v4.source = x & y;
+ r.v4.endsource = x | ~y;
+
+ memmove(r.v4.gate, gate+IPv4off, IPv4addrlen);
+ } else {
+ for(h = 0; h < IPllen; h++){
+ x = nhgetl(a+4*h);
+ y = nhgetl(mask+4*h);
+ r.v6.address[h] = x & y;
+ r.v6.endaddress[h] = x | ~y;
+
+ x = nhgetl(s+4*h);
+ y = nhgetl(smask+4*h);
+ if(y != 0)
+ r.type |= Rsrc;
+ r.v6.source[h] = x & y;
+ r.v6.endsource[h] = x | ~y;
}
- if(dolock)
- wunlock(&routelock);
+
+ memmove(r.v6.gate, gate, IPaddrlen);
}
- v4routegeneration++;
- ipifcremroute(f, Rv4, a, mask);
+ if(ifc != nil){
+ r.ifc = ifc;
+ r.ifcid = ifc->ifcid;
+ }
+
+ if(tag != nil)
+ strncpy(r.tag, tag, sizeof(r.tag));
+
+ return r;
}
void
-v6delroute(Fs *f, uchar *a, uchar *mask, int dolock)
+addroute(Fs *f, uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag)
{
- Route **r, *p;
- Route rt;
- int h, eh;
- ulong x, y;
+ Route r = mkroute(a, mask, s, smask, gate, type, ifc, tag);
+ wlock(&routelock);
+ routeadd(f, &r);
+ wunlock(&routelock);
+}
- for(h = 0; h < IPllen; h++){
- x = nhgetl(a+4*h);
- y = nhgetl(mask+4*h);
- rt.v6.address[h] = x & y;
- rt.v6.endaddress[h] = x | ~y;
+void
+remroute(Fs *f, uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag)
+{
+ Route r = mkroute(a, mask, s, smask, gate, type, ifc, tag);
+ wlock(&routelock);
+ routerem(f, &r);
+ wunlock(&routelock);
+}
+
+/* get the outgoing interface for route r */
+static Ipifc*
+routefindipifc(Route *r, Fs *f)
+{
+ uchar local[IPaddrlen], gate[IPaddrlen];
+ Ipifc *ifc;
+ int i;
+
+ ifc = r->ifc;
+ if(ifc != nil && ifc->ifcid == r->ifcid)
+ return ifc;
+
+ if(r->type & Rsrc) {
+ if(r->type & Rv4) {
+ hnputl(local+IPv4off, r->v4.source);
+ memmove(local, v4prefix, IPv4off);
+ } else {
+ for(i = 0; i < IPllen; i++)
+ hnputl(local+4*i, r->v6.source[i]);
+ }
+ } else {
+ ipmove(local, IPnoaddr);
}
- rt.type = 0;
- eh = V6H(rt.v6.endaddress);
- for(h=V6H(rt.v6.address); h<=eh; h++) {
- if(dolock)
- wlock(&routelock);
- r = looknode(&f->v6root[h], &rt);
- if(r) {
- p = *r;
- if(--(p->ref) == 0){
- *r = 0;
- addqueue(&f->queue, p->left);
- addqueue(&f->queue, p->mid);
- addqueue(&f->queue, p->right);
- freeroute(p);
- while(p = f->queue) {
- f->queue = p->mid;
- walkadd(f, &f->v6root[h], p->left);
- freeroute(p);
- }
- }
+ if(r->type & Rifc) {
+ if(r->type & Rv4) {
+ hnputl(gate+IPv4off, r->v4.address);
+ memmove(gate, v4prefix, IPv4off);
+ } else {
+ for(i = 0; i < IPllen; i++)
+ hnputl(gate+4*i, r->v6.address[i]);
}
- if(dolock)
- wunlock(&routelock);
+ } else {
+ if(r->type & Rv4)
+ v4tov6(gate, r->v4.gate);
+ else
+ ipmove(gate, r->v6.gate);
}
- v6routegeneration++;
- ipifcremroute(f, 0, a, mask);
+ if((ifc = findipifc(f, local, gate, r->type)) == nil)
+ return nil;
+
+ r->ifc = ifc;
+ r->ifcid = ifc->ifcid;
+ return ifc;
}
+/*
+ * v4lookup, v6lookup:
+ * lookup a route to destination address a from source address s
+ * and return the route. returns nil if no route was found.
+ * an optional Routehint can be passed in rh to cache the lookup.
+ *
+ * for v4lookup, addresses are in 4 byte format.
+ */
Route*
-v4lookup(Fs *f, uchar *a, Conv *c)
+v4lookup(Fs *f, uchar *a, uchar *s, Routehint *rh)
{
+ ulong la, ls;
Route *p, *q;
- ulong la;
- uchar gate[IPaddrlen];
Ipifc *ifc;
- if(c != nil && c->r != nil && c->r->ifc != nil && c->rgen == v4routegeneration)
- return c->r;
+ if(rh != nil
+ && rh->rgen == v4routegeneration
+ && (q = rh->r) != nil
+ && (ifc = q->ifc) != nil
+ && q->ifcid == ifc->ifcid
+ && q->ref > 0)
+ return q;
la = nhgetl(a);
+ ls = nhgetl(s);
q = nil;
- for(p=f->v4root[V4H(la)]; p;)
- if(la >= p->v4.address) {
- if(la <= p->v4.endaddress) {
- q = p;
- p = p->mid;
- } else
- p = p->right;
- } else
+ for(p = f->v4root[V4H(la)]; p != nil;){
+ if(la < p->v4.address){
p = p->left;
-
- if(q && (q->ifc == nil || q->ifcid != q->ifc->ifcid)){
- if(q->type & Rifc) {
- hnputl(gate+IPv4off, q->v4.address);
- memmove(gate, v4prefix, IPv4off);
- } else
- v4tov6(gate, q->v4.gate);
- ifc = findipifc(f, gate, q->type);
- if(ifc == nil)
- return nil;
- q->ifc = ifc;
- q->ifcid = ifc->ifcid;
+ continue;
+ }
+ if(la > p->v4.endaddress){
+ p = p->right;
+ continue;
+ }
+ if(p->type & Rsrc){
+ if(ls < p->v4.source){
+ p = p->mid;
+ continue;
+ }
+ if(ls > p->v4.endsource){
+ p = p->mid;
+ continue;
+ }
+ }
+ q = p;
+ p = p->mid;
}
- if(c != nil){
- c->r = q;
- c->rgen = v4routegeneration;
+ if(q == nil || q->ref == 0 || routefindipifc(q, f) == nil)
+ return nil;
+
+ if(rh != nil){
+ rh->r = q;
+ rh->rgen = v4routegeneration;
}
return q;
@@ -526,29 +643,35 @@
}
Route*
-v6lookup(Fs *f, uchar *a, Conv *c)
+v6lookup(Fs *f, uchar *a, uchar *s, Routehint *rh)
{
- Route *p, *q;
- ulong la[IPllen];
- int h;
+ ulong la[IPllen], ls[IPllen];
ulong x, y;
- uchar gate[IPaddrlen];
+ Route *p, *q;
Ipifc *ifc;
+ int h;
- if(memcmp(a, v4prefix, IPv4off) == 0){
- q = v4lookup(f, a+IPv4off, c);
- if(q != nil)
- return q;
+ if(isv4(s)){
+ if(isv4(a))
+ return v4lookup(f, a+IPv4off, s+IPv4off, rh);
+ return nil;
}
- if(c != nil && c->r != nil && c->r->ifc != nil && c->rgen == v6routegeneration)
- return c->r;
+ if(rh != nil
+ && rh->rgen == v6routegeneration
+ && (q = rh->r) != nil
+ && (ifc = q->ifc) != nil
+ && q->ifcid == ifc->ifcid
+ && q->ref > 0)
+ return q;
- for(h = 0; h < IPllen; h++)
+ for(h = 0; h < IPllen; h++){
la[h] = nhgetl(a+4*h);
+ ls[h] = nhgetl(s+4*h);
+ }
- q = 0;
- for(p=f->v6root[V6H(la)]; p;){
+ q = nil;
+ for(p = f->v6root[V6H(la)]; p != nil;){
for(h = 0; h < IPllen; h++){
x = la[h];
y = p->v6.address[h];
@@ -571,42 +694,202 @@
}
break;
}
+ if(p->type & Rsrc){
+ for(h = 0; h < IPllen; h++){
+ x = ls[h];
+ y = p->v6.source[h];
+ if(x == y)
+ continue;
+ if(x < y){
+ p = p->mid;
+ goto next;
+ }
+ break;
+ }
+ for(h = 0; h < IPllen; h++){
+ x = ls[h];
+ y = p->v6.endsource[h];
+ if(x == y)
+ continue;
+ if(x > y){
+ p = p->mid;
+ goto next;
+ }
+ break;
+ }
+ }
q = p;
p = p->mid;
next: ;
}
- if(q && (q->ifc == nil || q->ifcid != q->ifc->ifcid)){
- if(q->type & Rifc) {
- for(h = 0; h < IPllen; h++)
- hnputl(gate+4*h, q->v6.address[h]);
- ifc = findipifc(f, gate, q->type);
- } else
- ifc = findipifc(f, q->v6.gate, q->type);
- if(ifc == nil)
- return nil;
- q->ifc = ifc;
- q->ifcid = ifc->ifcid;
+ if(q == nil || q->ref == 0 || routefindipifc(q, f) == nil)
+ return nil;
+
+ if(rh != nil){
+ rh->r = q;
+ rh->rgen = v6routegeneration;
}
- if(c != nil){
- c->r = q;
- c->rgen = v6routegeneration;
- }
return q;
}
+/*
+ * v4source, v6source:
+ * lookup a route to destination address a and also find
+ * a suitable source address s on the outgoing interface.
+ * return the route on success or nil when no route
+ * was found.
+ *
+ * for v4source, addresses are in 4 byte format.
+ */
+Route*
+v4source(Fs *f, uchar *a, uchar *s)
+{
+ uchar src[IPv4addrlen];
+ int splen;
+ ulong x, la;
+ Route *p, *q;
+ Ipifc *ifc;
+
+ q = nil;
+ la = nhgetl(a);
+ rlock(&routelock);
+ for(p = f->v4root[V4H(la)]; p != nil;){
+ if(la < p->v4.address){
+ p = p->left;
+ continue;
+ }
+ if(la > p->v4.endaddress){
+ p = p->right;
+ continue;
+ }
+ splen = 0;
+ if(p->type & Rsrc){
+ /* calculate local prefix length for source specific routes */
+ for(x = ~(p->v4.endsource ^ p->v4.source); x & 0x80000000UL; x <<= 1)
+ splen++;
+ hnputl(src, p->v4.source);
+ }
+ if((ifc = routefindipifc(p, f)) == nil
+ || !ipv4local(ifc, src, splen, (p->type & (Rifc|Rbcast|Rmulti|Rv4))==Rv4? p->v4.gate: a)){
+ p = p->mid;
+ continue;
+ }
+ memmove(s, src, IPv4addrlen);
+ q = p;
+ p = p->mid;
+ }
+ runlock(&routelock);
+ return q;
+}
+
+Route*
+v6source(Fs *f, uchar *a, uchar *s)
+{
+ uchar src[IPaddrlen];
+ int splen, h;
+ ulong x, y, la[IPllen];
+ Route *p, *q;
+ Ipifc *ifc;
+
+ q = nil;
+ for(h = 0; h < IPllen; h++)
+ la[h] = nhgetl(a+4*h);
+ rlock(&routelock);
+ for(p = f->v6root[V6H(la)]; p != nil;){
+ for(h = 0; h < IPllen; h++){
+ x = la[h];
+ y = p->v6.address[h];
+ if(x == y)
+ continue;
+ if(x < y){
+ p = p->left;
+ goto next;
+ }
+ break;
+ }
+ for(h = 0; h < IPllen; h++){
+ x = la[h];
+ y = p->v6.endaddress[h];
+ if(x == y)
+ continue;
+ if(x > y){
+ p = p->right;
+ goto next;
+ }
+ break;
+ }
+ splen = 0;
+ if(p->type & Rsrc){
+ /* calculate local prefix length for source specific routes */
+ for(h = 0; h < IPllen; h++){
+ hnputl(src+4*h, p->v6.source[h]);
+ if((x = ~(p->v6.endsource[h] ^ p->v6.source[h])) != ~0UL){
+ for(; x & 0x80000000UL; x <<= 1)
+ splen++;
+ break;
+ }
+ splen += 32;
+ }
+ }
+ if((ifc = routefindipifc(p, f)) == nil
+ || !ipv6local(ifc, src, splen, a)){
+ p = p->mid;
+ continue;
+ }
+ ipmove(s, src);
+ q = p;
+ p = p->mid;
+next: ;
+ }
+ runlock(&routelock);
+ return q;
+}
+
+static int
+parseroutetype(char *p)
+{
+ int type = 0;
+ switch(*p++){
+ default: return -1;
+ case '4': type |= Rv4;
+ case '6': break;
+ }
+ for(;;) switch(*p++){
+ default:
+ return -1;
+ case 'i':
+ if(((type ^= Rifc) & Rifc) != Rifc) return -1;
+ break;
+ case 'u':
+ if(((type ^= Runi) & (Runi|Rbcast|Rmulti)) != Runi) return -1;
+ break;
+ case 'b':
+ if(((type ^= Rbcast) & (Runi|Rbcast|Rmulti)) != Rbcast) return -1;
+ break;
+ case 'm':
+ if(((type ^= Rmulti) & (Runi|Rbcast|Rmulti)) != Rmulti) return -1;
+ break;
+ case 'p':
+ if(((type ^= Rptpt) & Rptpt) != Rptpt) return -1;
+ break;
+ case '\0':
+ return type;
+ }
+}
+
void
-routetype(int type, char *p)
+routetype(int type, char p[8])
{
- memset(p, ' ', 4);
- p[4] = 0;
if(type & Rv4)
*p++ = '4';
else
*p++ = '6';
+
if(type & Rifc)
*p++ = 'i';
+
if(type & Runi)
*p++ = 'u';
else if(type & Rbcast)
@@ -613,14 +896,14 @@
*p++ = 'b';
else if(type & Rmulti)
*p++ = 'm';
+
if(type & Rptpt)
- *p = 'p';
+ *p++ = 'p';
+ *p = 0;
}
-char *rformat = "%-15I %-4M %-15I %4.4s %4.4s %3s\n";
-
-void
-convroute(Route *r, uchar *addr, uchar *mask, uchar *gate, char *t, int *nifc)
+static void
+convroute(Route *r, uchar *addr, uchar *mask, uchar *src, uchar *smask, uchar *gate)
{
int i;
@@ -627,8 +910,16 @@
if(r->type & Rv4){
memmove(addr, v4prefix, IPv4off);
hnputl(addr+IPv4off, r->v4.address);
+
memset(mask, 0xff, IPv4off);
hnputl(mask+IPv4off, ~(r->v4.endaddress ^ r->v4.address));
+
+ memmove(src, v4prefix, IPv4off);
+ hnputl(src+IPv4off, r->v4.source);
+
+ memset(smask, 0xff, IPv4off);
+ hnputl(smask+IPv4off, ~(r->v4.endsource ^ r->v4.source));
+
memmove(gate, v4prefix, IPv4off);
memmove(gate+IPv4off, r->v4.gate, IPv4addrlen);
} else {
@@ -635,162 +926,186 @@
for(i = 0; i < IPllen; i++){
hnputl(addr + 4*i, r->v6.address[i]);
hnputl(mask + 4*i, ~(r->v6.endaddress[i] ^ r->v6.address[i]));
+ hnputl(src + 4*i, r->v6.source[i]);
+ hnputl(smask + 4*i, ~(r->v6.endsource[i] ^ r->v6.source[i]));
}
memmove(gate, r->v6.gate, IPaddrlen);
}
+}
- routetype(r->type, t);
+static char*
+seprintroute(char *p, char *e, Route *r)
+{
+ uchar addr[IPaddrlen], mask[IPaddrlen], src[IPaddrlen], smask[IPaddrlen], gate[IPaddrlen];
+ char type[8], ifbuf[4], *iname;
- if(r->ifc)
- *nifc = r->ifc->conv->x;
+ convroute(r, addr, mask, src, smask, gate);
+ routetype(r->type, type);
+ if(r->ifc != nil && r->ifcid == r->ifc->ifcid)
+ snprint(iname = ifbuf, sizeof ifbuf, "%d", r->ifc->conv->x);
else
- *nifc = -1;
+ iname = "-";
+ return seprint(p, e, "%-15I %-4M %-15I %-4s %4.4s %3s %-15I %-4M\n",
+ addr, mask, gate, type, r->tag, iname, src, smask);
}
-/*
- * this code is not in rr to reduce stack size
- */
-static void
-sprintroute(Route *r, Routewalk *rw)
+typedef struct Routewalk Routewalk;
+struct Routewalk
{
- int nifc, n;
- char t[5], *iname, ifbuf[5];
- uchar addr[IPaddrlen], mask[IPaddrlen], gate[IPaddrlen];
- char *p;
+ int o;
+ int h;
+ char* p;
+ char* e;
+};
- convroute(r, addr, mask, gate, t, &nifc);
- iname = "-";
- if(nifc != -1) {
- iname = ifbuf;
- snprint(ifbuf, sizeof ifbuf, "%d", nifc);
- }
- p = seprint(rw->p, rw->e, rformat, addr, mask, gate, t, r->tag, iname);
+static int
+rr1(Routewalk *rw, Route *r)
+{
+ int n = seprintroute(rw->p, rw->e, r) - rw->p;
if(rw->o < 0){
- n = p - rw->p;
if(n > -rw->o){
- memmove(rw->p, rw->p-rw->o, n+rw->o);
- rw->p = p + rw->o;
+ memmove(rw->p, rw->p - rw->o, n + rw->o);
+ rw->p += n + rw->o;
}
rw->o += n;
} else
- rw->p = p;
+ rw->p += n;
+ return rw->p < rw->e;
}
-/*
- * recurse descending tree, applying the function in Routewalk
- */
static int
rr(Route *r, Routewalk *rw)
{
int h;
- if(rw->e <= rw->p)
- return 0;
if(r == nil)
return 1;
-
if(rr(r->left, rw) == 0)
return 0;
-
if(r->type & Rv4)
h = V4H(r->v4.address);
else
h = V6H(r->v6.address);
-
- if(h == rw->h)
- rw->walk(r, rw);
-
+ if(h == rw->h){
+ if(rr1(rw, r) == 0)
+ return 0;
+ }
if(rr(r->mid, rw) == 0)
return 0;
-
return rr(r->right, rw);
}
-void
-ipwalkroutes(Fs *f, Routewalk *rw)
+long
+routeread(Fs *f, char *p, ulong offset, int n)
{
+ Routewalk rw[1];
+
+ rw->p = p;
+ rw->e = p+n;
+ rw->o = -offset;
+ if(rw->o > 0)
+ return 0;
+
rlock(&routelock);
- if(rw->e > rw->p) {
+ if(rw->p < rw->e) {
for(rw->h = 0; rw->h < nelem(f->v4root); rw->h++)
if(rr(f->v4root[rw->h], rw) == 0)
break;
}
- if(rw->e > rw->p) {
+ if(rw->p < rw->e) {
for(rw->h = 0; rw->h < nelem(f->v6root); rw->h++)
if(rr(f->v6root[rw->h], rw) == 0)
break;
}
runlock(&routelock);
-}
-long
-routeread(Fs *f, char *p, ulong offset, int n)
-{
- Routewalk rw;
-
- rw.p = p;
- rw.e = p+n;
- rw.o = -offset;
- rw.walk = sprintroute;
-
- ipwalkroutes(f, &rw);
-
- return rw.p - p;
+ return rw->p - p;
}
/*
- * this code is not in routeflush to reduce stack size
+ * 4 add addr mask gate
+ * 5 add addr mask gate ifc
+ * 6 add addr mask gate src smask
+ * 7 add addr mask gate ifc src smask
+ * 8 add addr mask gate tag ifc src smask
+ * 9 add addr mask gate type tag ifc src smask
+ * 3 remove addr mask
+ * 4 remove addr mask gate
+ * 5 remove addr mask src smask
+ * 6 remove addr mask gate src smask
+ * 7 remove addr mask gate ifc src smask
+ * 8 remove addr mask gate tag ifc src smask
+ * 9 remove addr mask gate type tag ifc src smask
*/
-void
-delroute(Fs *f, Route *r, int dolock)
+static Route
+parseroute(Fs *f, char **argv, int argc)
{
- uchar addr[IPaddrlen];
- uchar mask[IPaddrlen];
+ uchar addr[IPaddrlen], mask[IPaddrlen];
+ uchar src[IPaddrlen], smask[IPaddrlen];
uchar gate[IPaddrlen];
- char t[5];
- int nifc;
+ Ipifc *ifc;
+ char *tag;
+ int type;
- convroute(r, addr, mask, gate, t, &nifc);
- if(r->type & Rv4)
- v4delroute(f, addr+IPv4off, mask+IPv4off, dolock);
- else
- v6delroute(f, addr, mask, dolock);
-}
+ type = 0;
+ tag = nil;
+ ifc = nil;
+ ipmove(gate, IPnoaddr);
+ ipmove(src, IPnoaddr);
+ ipmove(smask, IPnoaddr);
-/*
- * recurse until one route is deleted
- * returns 0 if nothing is deleted, 1 otherwise
- */
-int
-routeflush(Fs *f, Route *r, char *tag)
-{
- if(r == nil)
- return 0;
- if(routeflush(f, r->mid, tag))
- return 1;
- if(routeflush(f, r->left, tag))
- return 1;
- if(routeflush(f, r->right, tag))
- return 1;
- if((r->type & Rifc) == 0){
- if(tag == nil || strncmp(tag, r->tag, sizeof(r->tag)) == 0){
- delroute(f, r, 0);
- return 1;
- }
+ if(argc < 3)
+ error(Ebadctl);
+ if(parseipandmask(addr, mask, argv[1], argv[2]) == -1)
+ error(Ebadip);
+
+ if(strcmp(argv[0], "add") == 0 || (argc > 3 && argc != 5)){
+ if(argc < 4)
+ error(Ebadctl);
+ if(parseip(gate, argv[3]) == -1)
+ error(Ebadip);
}
- return 0;
+ if(argc > 4 && (strcmp(argv[0], "add") != 0 || argc != 5)){
+ if(parseipandmask(src, smask, argv[argc-2], argv[argc-1]) == -1)
+ error(Ebadip);
+ }
+ if(argc == 5 && strcmp(argv[0], "add") == 0)
+ ifc = findipifcstr(f, argv[4]);
+ if(argc > 6)
+ ifc = findipifcstr(f, argv[argc-3]);
+ if(argc > 7)
+ tag = argv[argc-4];
+ if(argc > 8){
+ if((type = parseroutetype(argv[argc-5])) < 0)
+ error(Ebadctl);
+ } else {
+ if(isv4(addr))
+ type |= Rv4;
+ }
+ if(argc > 9)
+ error(Ebadctl);
+
+ if(type & Rv4){
+ if(!isv4(addr))
+ error(Ebadip);
+ if(ipcmp(smask, IPnoaddr) != 0 && !isv4(src))
+ error(Ebadip);
+ if(ipcmp(gate, IPnoaddr) != 0 && !isv4(gate))
+ error(Ebadip);
+ } else {
+ if(isv4(addr))
+ error(Ebadip);
+ }
+
+ return mkroute(addr, mask, src, smask, gate, type, ifc, tag);
}
long
routewrite(Fs *f, Chan *c, char *p, int n)
{
- int h, changed;
- char *tag;
Cmdbuf *cb;
- uchar addr[IPaddrlen];
- uchar mask[IPaddrlen];
- uchar gate[IPaddrlen];
- IPaux *a, *na;
+ IPaux *a;
+ Route *x, r;
cb = parsecmd(p, n);
if(waserror()){
@@ -797,54 +1112,44 @@
free(cb);
nexterror();
}
-
+ if(cb->nf < 1)
+ error("short control request");
if(strcmp(cb->f[0], "flush") == 0){
- tag = cb->f[1];
+ char *tag = cb->nf < 2 ? nil : cb->f[1];
+ int h;
+
+ wlock(&routelock);
for(h = 0; h < nelem(f->v4root); h++)
- for(changed = 1; changed;){
- wlock(&routelock);
- changed = routeflush(f, f->v4root[h], tag);
- wunlock(&routelock);
+ while((x = looknodetag(f->v4root[h], tag)) != nil){
+ memmove(&r, x, sizeof(RouteTree) + sizeof(V4route));
+ routerem(f, &r);
}
for(h = 0; h < nelem(f->v6root); h++)
- for(changed = 1; changed;){
- wlock(&routelock);
- changed = routeflush(f, f->v6root[h], tag);
- wunlock(&routelock);
+ while((x = looknodetag(f->v6root[h], tag)) != nil){
+ memmove(&r, x, sizeof(RouteTree) + sizeof(V6route));
+ routerem(f, &r);
}
- } else if(strcmp(cb->f[0], "remove") == 0){
- if(cb->nf < 3)
- error(Ebadarg);
- parseip(addr, cb->f[1]);
- parseipmask(mask, cb->f[2]);
- if(memcmp(addr, v4prefix, IPv4off) == 0)
- v4delroute(f, addr+IPv4off, mask+IPv4off, 1);
- else
- v6delroute(f, addr, mask, 1);
- } else if(strcmp(cb->f[0], "add") == 0){
- if(cb->nf < 4)
- error(Ebadarg);
- parseip(addr, cb->f[1]);
- parseipmask(mask, cb->f[2]);
- parseip(gate, cb->f[3]);
- tag = "none";
- if(c != nil){
+ wunlock(&routelock);
+ } else if(strcmp(cb->f[0], "add") == 0 || strcmp(cb->f[0], "remove") == 0){
+ r = parseroute(f, cb->f, cb->nf);
+ if(*r.tag == 0){
a = c->aux;
- tag = a->tag;
+ strncpy(r.tag, a->tag, sizeof(r.tag));
}
- if(memcmp(addr, v4prefix, IPv4off) == 0)
- v4addroute(f, tag, addr+IPv4off, mask+IPv4off, gate+IPv4off, 0);
+ wlock(&routelock);
+ if(strcmp(cb->f[0], "add") == 0)
+ routeadd(f, &r);
else
- v6addroute(f, tag, addr, mask, gate, 0);
+ routerem(f, &r);
+ wunlock(&routelock);
} else if(strcmp(cb->f[0], "tag") == 0) {
if(cb->nf < 2)
error(Ebadarg);
-
a = c->aux;
- na = newipaux(a->owner, cb->f[1]);
- c->aux = na;
+ c->aux = newipaux(a->owner, cb->f[1]);
free(a);
- }
+ } else
+ error(Ebadctl);
poperror();
free(cb);
--- a/os/ip/ipv6.c
+++ b/os/ip/ipv6.c
@@ -8,250 +8,127 @@
#include "ip.h"
#include "ipv6.h"
-enum
-{
- IP4HDR = 20, /* sizeof(Ip4hdr) */
- IP6HDR = 40, /* sizeof(Ip6hdr) */
- IP_HLEN4 = 0x05, /* Header length in words */
- IP_DF = 0x4000, /* Don't fragment */
- IP_MF = 0x2000, /* More fragments */
- IP6FHDR = 8, /* sizeof(Fraghdr6) */
- IP_MAX = (32*1024), /* Maximum Internet packet size */
-};
-
-#define IPV6CLASS(hdr) ((hdr->vcf[0]&0x0F)<<2 | (hdr->vcf[1]&0xF0)>>2)
-#define BLKIPVER(xp) (((Ip6hdr*)((xp)->rp))->vcf[0]&0xF0)
-/*
- * This sleazy macro is stolen shamelessly from ip.c, see comment there.
- */
-#define BKFG(xp) ((Ipfrag*)((xp)->base))
-
-typedef struct IP IP;
-typedef struct Fragment4 Fragment4;
-typedef struct Fragment6 Fragment6;
-typedef struct Ipfrag Ipfrag;
-
-Block* ip6reassemble(IP*, int, Block*, Ip6hdr*);
-void ipfragfree6(IP*, Fragment6*);
-Fragment6* ipfragallo6(IP*);
+static Block* ip6reassemble(IP*, int, Block*);
+static Fragment6* ipfragallo6(IP*);
+static void ipfragfree6(IP*, Fragment6*);
+static Block* procopts(Block *bp);
static Block* procxtns(IP *ip, Block *bp, int doreasm);
-int unfraglen(Block *bp, uchar *nexthdr, int setfh);
-Block* procopts(Block *bp);
+static int unfraglen(Block *bp, uchar *nexthdr, int setfh, int popfh);
-/* MIB II counters */
-enum
+void
+ip_init_6(Fs *f)
{
- Forwarding,
- DefaultTTL,
- InReceives,
- InHdrErrors,
- InAddrErrors,
- ForwDatagrams,
- InUnknownProtos,
- InDiscards,
- InDelivers,
- OutRequests,
- OutDiscards,
- OutNoRoutes,
- ReasmTimeout,
- ReasmReqds,
- ReasmOKs,
- ReasmFails,
- FragOKs,
- FragFails,
- FragCreates,
+ v6params *v6p;
- Nstats,
-};
+ v6p = smalloc(sizeof(v6params));
-static char *statnames[] =
-{
-[Forwarding] "Forwarding",
-[DefaultTTL] "DefaultTTL",
-[InReceives] "InReceives",
-[InHdrErrors] "InHdrErrors",
-[InAddrErrors] "InAddrErrors",
-[ForwDatagrams] "ForwDatagrams",
-[InUnknownProtos] "InUnknownProtos",
-[InDiscards] "InDiscards",
-[InDelivers] "InDelivers",
-[OutRequests] "OutRequests",
-[OutDiscards] "OutDiscards",
-[OutNoRoutes] "OutNoRoutes",
-[ReasmTimeout] "ReasmTimeout",
-[ReasmReqds] "ReasmReqds",
-[ReasmOKs] "ReasmOKs",
-[ReasmFails] "ReasmFails",
-[FragOKs] "FragOKs",
-[FragFails] "FragFails",
-[FragCreates] "FragCreates",
-};
+ v6p->rp.mflag = 0; /* default not managed */
+ v6p->rp.oflag = 0;
+ v6p->rp.maxraint = 600000; /* millisecs */
+ v6p->rp.minraint = 200000;
+ v6p->rp.linkmtu = 0; /* no mtu sent */
+ v6p->rp.reachtime = 0;
+ v6p->rp.rxmitra = 0;
+ v6p->rp.ttl = MAXTTL;
+ v6p->rp.routerlt = (3 * v6p->rp.maxraint) / 1000;
-struct Fragment4
-{
- Block* blist;
- Fragment4* next;
- ulong src;
- ulong dst;
- ushort id;
- ulong age;
-};
+ v6p->hp.rxmithost = 1000; /* v6 RETRANS_TIMER */
-struct Fragment6
-{
- Block* blist;
- Fragment6* next;
- uchar src[IPaddrlen];
- uchar dst[IPaddrlen];
- uint id;
- ulong age;
-};
+ f->v6p = v6p;
+}
-struct Ipfrag
-{
- ushort foff;
- ushort flen;
-};
-
-/* an instance of IP */
-struct IP
-{
- ulong stats[Nstats];
-
- QLock fraglock4;
- Fragment4* flisthead4;
- Fragment4* fragfree4;
- Ref id4;
-
- QLock fraglock6;
- Fragment6* flisthead6;
- Fragment6* fragfree6;
- Ref id6;
-
- int iprouting; /* true if we route like a gateway */
-};
-
int
-ipoput6(Fs *f, Block *bp, int gating, int ttl, int tos, Conv *c)
+ipoput6(Fs *f, Block *bp, int gating, int ttl, int tos, Routehint *rh)
{
- int tentative;
- Ipifc *ifc;
+ int medialen, len, chunk, uflen, flen, seglen, lid, offset, fragoff;
+ int morefrags, blklen, rv = 0;
uchar *gate, nexthdr;
- Ip6hdr *eh;
- int medialen, len, chunk, uflen, flen, seglen, lid, offset, fragoff, morefrags, blklen;
- Route *r, *sr;
- Fraghdr6 fraghdr;
Block *xp, *nb;
+ Fraghdr6 fraghdr;
IP *ip;
- int rv = 0;
+ Ip6hdr *eh;
+ Ipifc *ifc;
+ Route *r;
ip = f->ip;
-
- /* Fill out the ip header */
- eh = (Ip6hdr*)(bp->rp);
-
ip->stats[OutRequests]++;
- /* Number of uchars in data and ip header to write */
+ /* Fill out the ip header */
+ eh = (Ip6hdr*)bp->rp;
+ assert(BLEN(bp) >= IP6HDR);
len = blocklen(bp);
-
- tentative = iptentative(f, eh->src);
- if(tentative){
- netlog(f, Logip, "reject tx of packet with tentative src address\n");
- goto free;
- }
-
- if(gating){
- chunk = nhgets(eh->ploadlen);
- if(chunk > len){
- ip->stats[OutDiscards]++;
- netlog(f, Logip, "short gated packet\n");
- goto free;
- }
- if(chunk + IPV6HDR_LEN < len)
- len = chunk + IPV6HDR_LEN;
- }
-
if(len >= IP_MAX){
-// print("len > IP_MAX, free\n");
ip->stats[OutDiscards]++;
- netlog(f, Logip, "exceeded ip max size %I\n", eh->dst);
+ netlog(f, Logip, "%I -> %I: exceeded ip max size: %d\n", eh->src, eh->dst, len);
goto free;
}
- r = v6lookup(f, eh->dst, c);
- if(r == nil){
-// print("no route for %I, src %I free\n", eh->dst, eh->src);
+ r = v6lookup(f, eh->dst, eh->src, rh);
+ if(r == nil || (r->type & Rv4) != 0 || (ifc = r->ifc) == nil){
ip->stats[OutNoRoutes]++;
- netlog(f, Logip, "no interface %I\n", eh->dst);
+ netlog(f, Logip, "%I -> %I: no interface\n", eh->src, eh->dst);
rv = -1;
goto free;
}
- ifc = r->ifc;
- if(r->type & (Rifc|Runi))
+ if(r->type & (Rifc|Runi|Rbcast|Rmulti))
gate = eh->dst;
else
- if(r->type & (Rbcast|Rmulti)) {
- gate = eh->dst;
- sr = v6lookup(f, eh->src, nil);
- if(sr != nil && (sr->type & Runi))
- ifc = sr->ifc;
- }
- else
gate = r->v6.gate;
- if(!gating)
- eh->vcf[0] = IP_VER6;
- eh->ttl = ttl;
- if(!gating) {
- eh->vcf[0] |= (tos >> 4);
- eh->vcf[1] = (tos << 4);
- }
-
- if(!canrlock(ifc)) {
+ if(!canrlock(ifc)){
+ ip->stats[OutDiscards]++;
goto free;
}
-
if(waserror()){
runlock(ifc);
nexterror();
}
- if(ifc->m == nil) {
+ if(ifc->m == nil)
goto raise;
+
+ if(!gating){
+ eh->vcf[0] = IP_VER6;
+ eh->vcf[0] |= tos >> 4;
+ eh->vcf[1] = tos << 4;
}
+ eh->ttl = ttl;
/* If we dont need to fragment just send it */
medialen = ifc->maxtu - ifc->m->hsize;
if(len <= medialen) {
- hnputs(eh->ploadlen, len-IPV6HDR_LEN);
- ifc->m->bwrite(ifc, bp, V6, gate);
+ hnputs(eh->ploadlen, len - IP6HDR);
+ ipifcoput(ifc, bp, V6, gate);
runlock(ifc);
poperror();
return 0;
}
- if(gating)
- if(ifc->reassemble <= 0) {
-
- /* v6 intermediate nodes are not supposed to fragment pkts;
- we fragment if ifc->reassemble is turned on; an exception
- needed for nat.
+ if(gating && !ifc->reassemble) {
+ /*
+ * v6 intermediate nodes are not supposed to fragment pkts;
+ * we fragment if ifc->reassemble is turned on; an exception
+ * needed for nat.
*/
-
ip->stats[OutDiscards]++;
icmppkttoobig6(f, ifc, bp);
- netlog(f, Logip, "%I: gated pkts not fragmented\n", eh->dst);
+ netlog(f, Logip, "%I -> %I: gated pkts not fragmented\n", eh->src, eh->dst);
goto raise;
}
-
+
/* start v6 fragmentation */
- uflen = unfraglen(bp, &nexthdr, 1);
+ uflen = unfraglen(bp, &nexthdr, 1, 0);
+ if(uflen < IP6HDR || nexthdr == FH) {
+ ip->stats[FragFails]++;
+ ip->stats[OutDiscards]++;
+ netlog(f, Logip, "%I -> %I: fragment header botch\n", eh->src, eh->dst);
+ goto raise;
+ }
if(uflen > medialen) {
ip->stats[FragFails]++;
ip->stats[OutDiscards]++;
- netlog(f, Logip, "%I: unfragmentable part too big\n", eh->dst);
+ netlog(f, Logip, "%I -> %I: unfragmentable part too big: %d\n", eh->src, eh->dst, uflen);
goto raise;
}
@@ -260,7 +137,7 @@
if(seglen < 8) {
ip->stats[FragFails]++;
ip->stats[OutDiscards]++;
- netlog(f, Logip, "%I: seglen < 8\n", eh->dst);
+ netlog(f, Logip, "%I -> %I: seglen < 8\n", eh->src, eh->dst);
goto raise;
}
@@ -271,13 +148,13 @@
xp = bp;
offset = uflen;
- while (xp != nil && offset && offset >= BLEN(xp)) {
+ while (offset && offset >= BLEN(xp)) {
offset -= BLEN(xp);
xp = xp->next;
}
xp->rp += offset;
- fragoff = 0;
+ fragoff = 0;
morefrags = 1;
for(; fragoff < flen; fragoff += seglen) {
@@ -292,7 +169,7 @@
memmove(nb->wp, eh, uflen);
nb->wp += uflen;
- hnputs(fraghdr.offsetRM, fragoff); // last 3 bits must be 0
+ hnputs(fraghdr.offsetRM, fragoff); /* last 3 bits must be 0 */
fraghdr.offsetRM[1] |= morefrags;
memmove(nb->wp, &fraghdr, IP6FHDR);
nb->wp += IP6FHDR;
@@ -300,11 +177,11 @@
/* Copy data */
chunk = seglen;
while (chunk) {
- if(!xp) {
+ if(xp == nil) {
ip->stats[OutDiscards]++;
ip->stats[FragFails]++;
freeblist(nb);
- netlog(f, Logip, "!xp: chunk in v6%d\n", chunk);
+ netlog(f, Logip, "xp == nil: chunk in v6%d\n", chunk);
goto raise;
}
blklen = chunk;
@@ -316,10 +193,9 @@
xp->rp += blklen;
chunk -= blklen;
if(xp->rp == xp->wp)
- xp = xp->next;
+ xp = xp->next;
}
-
- ifc->m->bwrite(ifc, nb, V6, gate);
+ ipifcoput(ifc, nb, V6, gate);
ip->stats[FragCreates]++;
}
ip->stats[FragOKs]++;
@@ -328,7 +204,7 @@
runlock(ifc);
poperror();
free:
- freeblist(bp);
+ freeblist(bp);
return rv;
}
@@ -335,16 +211,10 @@
void
ipiput6(Fs *f, Ipifc *ifc, Block *bp)
{
- int hl;
- int hop, tos;
- uchar proto;
+ int hl, len, hop, tos;
+ IP *ip;
Ip6hdr *h;
Proto *p;
- int notforme;
- int tentative;
- uchar v6dst[IPaddrlen];
- IP *ip;
- Route *r, *sr;
ip = f->ip;
ip->stats[InReceives]++;
@@ -365,40 +235,44 @@
return;
}
- h = (Ip6hdr *)(bp->rp);
-
- memmove(&v6dst[0], &(h->dst)[0], IPaddrlen);
- notforme = ipforme(f, v6dst) == 0;
- tentative = iptentative(f, v6dst);
-
- if(tentative && (h->proto != ICMPv6)) {
- print("tentative addr, drop\n");
- freeblist(bp);
- return;
- }
-
/* Check header version */
- if(BLKIPVER(bp) != IP_VER6) {
+ h = (Ip6hdr*)bp->rp;
+ if((h->vcf[0] & 0xF0) != IP_VER6) {
ip->stats[InHdrErrors]++;
netlog(f, Logip, "ip: bad version %ux\n", (h->vcf[0]&0xF0)>>2);
- freeblist(bp);
+ goto drop;
+ }
+ len = IP6HDR + nhgets(h->ploadlen);
+ if((bp = trimblock(bp, 0, len)) == nil){
+ ip->stats[InHdrErrors]++;
+ netlog(f, Logip, "%I -> %I: bogus packet length: %d\n", h->src, h->dst, len);
return;
}
+ h = (Ip6hdr*)bp->rp;
/* route */
- if(notforme) {
- if(!ip->iprouting){
- freeb(bp);
- return;
+ if(!ipforme(f, h->dst)) {
+ Route *r;
+ Routehint rh;
+ Ipifc *nifc;
+
+ if(!ip->iprouting)
+ goto drop;
+
+ /* don't forward to link-local destinations */
+ if(islinklocal(h->dst) ||
+ (isv6mcast(h->dst) && (h->dst[1]&0xF) <= Link_local_scop)){
+ ip->stats[OutDiscards]++;
+ goto drop;
}
+
/* don't forward to source's network */
- sr = v6lookup(f, h->src, nil);
- r = v6lookup(f, h->dst, nil);
-
- if(r == nil || sr == r){
+ rh.r = nil;
+ r = v6lookup(f, h->dst, h->src, &rh);
+ if(r == nil || (nifc = r->ifc) == nil || (r->type & Rv4) != 0
+ || (nifc == ifc && !ifc->reflect)){
ip->stats[OutDiscards]++;
- freeblist(bp);
- return;
+ goto drop;
}
/* don't forward if packet has timed out */
@@ -406,33 +280,29 @@
if(hop < 1) {
ip->stats[InHdrErrors]++;
icmpttlexceeded6(f, ifc, bp);
- freeblist(bp);
- return;
+ goto drop;
}
/* process headers & reassemble if the interface expects it */
- bp = procxtns(ip, bp, r->ifc->reassemble);
-
+ bp = procxtns(ip, bp, nifc->reassemble);
if(bp == nil)
return;
ip->stats[ForwDatagrams]++;
- h = (Ip6hdr *) (bp->rp);
- tos = IPV6CLASS(h);
+ h = (Ip6hdr*)bp->rp;
+ tos = (h->vcf[0]&0x0F)<<2 | (h->vcf[1]&0xF0)>>2;
hop = h->ttl;
- ipoput6(f, bp, 1, hop-1, tos, nil);
+ ipoput6(f, bp, 1, hop-1, tos, &rh);
return;
}
/* reassemble & process headers if needed */
bp = procxtns(ip, bp, 1);
-
if(bp == nil)
return;
- h = (Ip6hdr *) (bp->rp);
- proto = h->proto;
- p = Fsrcvpcol(f, proto);
+ h = (Ip6hdr*)bp->rp;
+ p = Fsrcvpcol(f, h->proto);
if(p != nil && p->rcv != nil) {
ip->stats[InDelivers]++;
(*p->rcv)(p, ifc, bp);
@@ -441,6 +311,7 @@
ip->stats[InDiscards]++;
ip->stats[InUnknownProtos]++;
+drop:
freeblist(bp);
}
@@ -447,20 +318,20 @@
/*
* ipfragfree6 - copied from ipfragfree4 - assume hold fraglock6
*/
-void
+static void
ipfragfree6(IP *ip, Fragment6 *frag)
{
Fragment6 *fl, **l;
- if(frag->blist)
+ if(frag->blist != nil)
freeblist(frag->blist);
-
- memset(frag->src, 0, IPaddrlen);
- frag->id = 0;
frag->blist = nil;
+ frag->id = 0;
+ memset(frag->src, 0, IPaddrlen);
+ memset(frag->dst, 0, IPaddrlen);
l = &ip->flisthead6;
- for(fl = *l; fl; fl = fl->next) {
+ for(fl = *l; fl != nil; fl = fl->next) {
if(fl == frag) {
*l = frag->next;
break;
@@ -470,13 +341,12 @@
frag->next = ip->fragfree6;
ip->fragfree6 = frag;
-
}
/*
* ipfragallo6 - copied from ipfragalloc4
*/
-Fragment6*
+static Fragment6*
ipfragallo6(IP *ip)
{
Fragment6 *f;
@@ -483,7 +353,7 @@
while(ip->fragfree6 == nil) {
/* free last entry on fraglist */
- for(f = ip->flisthead6; f->next; f = f->next)
+ for(f = ip->flisthead6; f->next != nil; f = f->next)
;
ipfragfree6(ip, f);
}
@@ -497,108 +367,109 @@
}
static Block*
-procxtns(IP *ip, Block *bp, int doreasm) {
-
- int offset;
+procxtns(IP *ip, Block *bp, int doreasm)
+{
uchar proto;
- Ip6hdr *h;
+ int offset;
- h = (Ip6hdr *) (bp->rp);
- offset = unfraglen(bp, &proto, 0);
-
- if((proto == FH) && (doreasm != 0)) {
- bp = ip6reassemble(ip, offset, bp, h);
- if(bp == nil)
- return nil;
- offset = unfraglen(bp, &proto, 0);
+ offset = unfraglen(bp, &proto, 0, doreasm);
+ if(offset >= IP6HDR && proto == FH && doreasm) {
+ bp = ip6reassemble(ip, offset, bp);
+ if(bp == nil)
+ return nil;
+ offset = unfraglen(bp, &proto, 0, 0);
+ if(proto == FH)
+ offset = -1;
}
-
- if(proto == DOH || offset > IP6HDR)
+ if(offset < IP6HDR){
+ ip->stats[InHdrErrors]++;
+ ip->stats[InDiscards]++;
+ freeblist(bp);
+ return nil;
+ }
+ if(proto == DOH || offset > IP6HDR)
bp = procopts(bp);
-
return bp;
}
-
-/* returns length of "Unfragmentable part", i.e., sum of lengths of ipv6 hdr,
- * hop-by-hop & routing headers if present; *nexthdr is set to nexthdr value
- * of the last header in the "Unfragmentable part"; if setfh != 0, nexthdr
- * field of the last header in the "Unfragmentable part" is set to FH.
+/*
+ * returns length of "Unfragmentable part", i.e., sum of lengths of ipv6 hdr,
+ * hop-by-hop & routing headers if present; *nexthdr is set to nexthdr value
+ * of the last header in the "Unfragmentable part"; if setfh != 0, nexthdr
+ * field of the last header in the "Unfragmentable part" is set to FH.
+ * When the last header is a fragment header and popfh != 0 then set
+ * the nexthdr value of the previous header to the nexthdr value of the
+ * fragment header. returns -1 on error.
*/
-int
-unfraglen(Block *bp, uchar *nexthdr, int setfh)
+static int
+unfraglen(Block *bp, uchar *nexthdr, int setfh, int popfh)
{
- uchar *p, *q;
- int ufl, hs;
+ uchar *e, *p, *q;
+ e = bp->wp;
p = bp->rp;
- q = p+6; /* proto, = p+sizeof(Ip6hdr.vcf)+sizeof(Ip6hdr.ploadlen) */
+ q = p+6; /* proto, = p+sizeof(Ip6hdr.vcf)+sizeof(Ip6hdr.ploadlen) */
*nexthdr = *q;
- ufl = IP6HDR;
- p += ufl;
-
- for(;;) {
- if(*nexthdr == HBH || *nexthdr == RH) {
- *nexthdr = *p;
- hs = ((int)*(p+1) + 1) * 8;
- ufl += hs;
- q = p;
- p += hs;
- }
- else
- break;
+ p += IP6HDR;
+ while(*nexthdr == HBH || *nexthdr == RH){
+ if(p+2 > e)
+ return -1;
+ q = p;
+ *nexthdr = *q;
+ p += ((int)p[1] + 1) * 8;
}
-
- if(*nexthdr == FH)
- *q = *p;
-
- if(setfh)
+ if(p > e)
+ return -1;
+ if(*nexthdr == FH){
+ if(p+IP6FHDR > e || *p == FH)
+ return -1;
+ if(popfh)
+ *q = *p;
+ } else if(setfh)
*q = FH;
-
- return ufl;
+ return p - bp->rp;
}
-Block*
+static Block*
procopts(Block *bp)
{
return bp;
}
-Block*
-ip6reassemble(IP* ip, int uflen, Block* bp, Ip6hdr* ih)
+static Block*
+ip6reassemble(IP* ip, int uflen, Block* bp)
{
-
- int fend, offset;
+ int offset, ovlap, fragsize, len;
+ uchar src[IPaddrlen], dst[IPaddrlen];
uint id;
- Fragment6 *f, *fnext;
+ Block *bl, **l, *prev;
Fraghdr6 *fraghdr;
- uchar src[IPaddrlen], dst[IPaddrlen];
- Block *bl, **l, *last, *prev;
- int ovlap, len, fragsize, pktposn;
+ Fragment6 *f, *fnext;
+ Ipfrag *fp, *fq;
+ Ip6hdr* ih;
- fraghdr = (Fraghdr6 *) (bp->rp + uflen);
- memmove(src, ih->src, IPaddrlen);
- memmove(dst, ih->dst, IPaddrlen);
- id = nhgetl(fraghdr->id);
- offset = nhgets(fraghdr->offsetRM) & ~7;
-
/*
- * block lists are too hard, pullupblock into a single block
+ * block lists are too hard, concatblock into a single block
*/
- if(bp->next){
- bp = pullupblock(bp, blocklen(bp));
- ih = (Ip6hdr *)(bp->rp);
- }
+ bp = concatblock(bp);
+ ih = (Ip6hdr*)bp->rp;
+ fraghdr = (Fraghdr6*)(bp->rp + uflen);
+ id = nhgetl(fraghdr->id);
+ offset = nhgets(fraghdr->offsetRM);
+ fragsize = BLEN(bp) - uflen - IP6FHDR;
+ memmove(src, ih->src, IPaddrlen);
+ memmove(dst, ih->dst, IPaddrlen);
+
qlock(&ip->fraglock6);
/*
* find a reassembly queue for this fragment
*/
- for(f = ip->flisthead6; f; f = fnext){
+ for(f = ip->flisthead6; f != nil; f = fnext){
fnext = f->next;
- if(ipcmp(f->src, src)==0 && ipcmp(f->dst, dst)==0 && f->id == id)
+ if(f->id == id && ipcmp(f->src, src) == 0 && ipcmp(f->dst, dst) == 0)
break;
if(f->age < NOW){
ip->stats[ReasmTimeout]++;
@@ -606,28 +477,35 @@
}
}
-
/*
* if this isn't a fragmented packet, accept it
* and get rid of any fragments that might go
* with it.
*/
- if(nhgets(fraghdr->offsetRM)==0) { // first frag is also the last
+ if((offset & ~6) == 0) { /* 1st frag is also last */
if(f != nil) {
- ipfragfree6(ip, f);
ip->stats[ReasmFails]++;
+ ipfragfree6(ip, f);
}
qunlock(&ip->fraglock6);
+
+ /* get rid of frag header */
+ memmove(bp->rp + IP6FHDR, bp->rp, uflen);
+ bp->rp += IP6FHDR;
+ ih = (Ip6hdr*)bp->rp;
+ hnputs(ih->ploadlen, BLEN(bp)-IP6HDR);
+
return bp;
}
- if(bp->base+sizeof(Ipfrag) >= bp->rp){
- bp = padblock(bp, sizeof(Ipfrag));
- bp->rp += sizeof(Ipfrag);
+ if(bp->base+IPFRAGSZ > bp->rp){
+ bp = padblock(bp, IPFRAGSZ);
+ bp->rp += IPFRAGSZ;
}
- BKFG(bp)->foff = offset;
- BKFG(bp)->flen = nhgets(ih->ploadlen) + IP6HDR - uflen - IP6FHDR;
+ fp = (Ipfrag*)bp->base;
+ fp->foff = offset & ~7;
+ fp->flen = fragsize;
/* First fragment allocates a reassembly queue */
if(f == nil) {
@@ -638,8 +516,9 @@
f->blist = bp;
- qunlock(&ip->fraglock6);
ip->stats[ReasmReqds]++;
+ qunlock(&ip->fraglock6);
+
return nil;
}
@@ -649,7 +528,7 @@
prev = nil;
l = &f->blist;
bl = f->blist;
- while(bl != nil && BKFG(bp)->foff > BKFG(bl)->foff) {
+ while(bl != nil && fp->foff > ((Ipfrag*)bl->base)->foff) {
prev = bl;
l = &bl->next;
bl = bl->next;
@@ -656,15 +535,16 @@
}
/* Check overlap of a previous fragment - trim away as necessary */
- if(prev) {
- ovlap = BKFG(prev)->foff + BKFG(prev)->flen - BKFG(bp)->foff;
+ if(prev != nil) {
+ fq = (Ipfrag*)prev->base;
+ ovlap = fq->foff + fq->flen - fp->foff;
if(ovlap > 0) {
- if(ovlap >= BKFG(bp)->flen) {
- freeblist(bp);
+ if(ovlap >= fp->flen) {
qunlock(&ip->fraglock6);
+ freeb(bp);
return nil;
}
- BKFG(prev)->flen -= ovlap;
+ fq->flen -= ovlap;
}
}
@@ -673,29 +553,27 @@
*l = bp;
/* Check to see if succeeding segments overlap */
- if(bp->next) {
+ if(bp->next != nil) {
l = &bp->next;
- fend = BKFG(bp)->foff + BKFG(bp)->flen;
+ offset = fp->foff + fp->flen;
/* Take completely covered segments out */
-
- while(*l) {
- ovlap = fend - BKFG(*l)->foff;
-
- if(ovlap <= 0)
- break;
- if(ovlap < BKFG(*l)->flen) {
- BKFG(*l)->flen -= ovlap;
- BKFG(*l)->foff += ovlap;
- /* move up ih hdrs */
- memmove((*l)->rp + ovlap, (*l)->rp, uflen);
- (*l)->rp += ovlap;
+ while((bl = *l) != nil) {
+ fq = (Ipfrag*)bl->base;
+ ovlap = offset - fq->foff;
+ if(ovlap <= 0)
break;
+ if(ovlap < fq->flen) {
+ /* move up ip and frag header */
+ memmove(bl->rp + ovlap, bl->rp, BLEN(bl) - fq->flen);
+ bl->rp += ovlap;
+ fq->flen -= ovlap;
+ fq->foff += ovlap;
+ break;
}
- last = (*l)->next;
- (*l)->next = nil;
- freeblist(*l);
- *l = last;
+ *l = bl->next;
+ bl->next = nil;
+ freeb(bl);
}
}
@@ -703,45 +581,55 @@
* look for a complete packet. if we get to a fragment
* with the trailing bit of fraghdr->offsetRM[1] set, we're done.
*/
- pktposn = 0;
- for(bl = f->blist; bl; bl = bl->next) {
- if(BKFG(bl)->foff != pktposn)
+ offset = 0;
+ for(bl = f->blist; bl != nil; bl = bl->next, offset += fp->flen) {
+ fp = (Ipfrag*)bl->base;
+ if(fp->foff != offset)
break;
-
- fraghdr = (Fraghdr6 *) (bl->rp + uflen);
- if((fraghdr->offsetRM[1] & 1) == 0) {
- bl = f->blist;
+ fraghdr = (Fraghdr6*)(bl->wp - fp->flen - IP6FHDR);
+ if(fraghdr->offsetRM[1] & 1)
+ continue;
- /* get rid of frag header in first fragment */
+ bl = f->blist;
+ fq = (Ipfrag*)bl->base;
- memmove(bl->rp + IP6FHDR, bl->rp, uflen);
- bl->rp += IP6FHDR;
- len = nhgets(((Ip6hdr*)(bl->rp))->ploadlen) - IP6FHDR;
- bl->wp = bl->rp + len + IP6HDR;
+ /* get rid of frag header in first fragment */
+ memmove(bl->rp + IP6FHDR, bl->rp, BLEN(bl) - fq->flen - IP6FHDR);
+ bl->rp += IP6FHDR;
+ len = BLEN(bl);
- /* Pullup all the fragment headers and
- * return a complete packet
- */
- for(bl = bl->next; bl; bl = bl->next) {
- fragsize = BKFG(bl)->flen;
- len += fragsize;
- bl->rp += uflen + IP6FHDR;
- bl->wp = bl->rp + fragsize;
- }
+ /*
+ * Pullup all the fragment headers and
+ * return a complete packet
+ */
+ for(bl = bl->next; bl != nil && len < IP_MAX; bl = bl->next) {
+ fq = (Ipfrag*)bl->base;
+ fragsize = fq->flen;
+ bl->rp = bl->wp - fragsize;
+ len += fragsize;
+ }
- bl = f->blist;
- f->blist = nil;
+ if(len >= IP_MAX){
ipfragfree6(ip, f);
- ih = (Ip6hdr*)(bl->rp);
- hnputs(ih->ploadlen, len);
+ ip->stats[ReasmFails]++;
qunlock(&ip->fraglock6);
- ip->stats[ReasmOKs]++;
- return bl;
+
+ return nil;
}
- pktposn += BKFG(bl)->flen;
+
+ bl = f->blist;
+ f->blist = nil;
+ ipfragfree6(ip, f);
+
+ ih = (Ip6hdr*)bl->rp;
+ hnputs(ih->ploadlen, len-IP6HDR);
+
+ ip->stats[ReasmOKs]++;
+ qunlock(&ip->fraglock6);
+
+ return bl;
}
qunlock(&ip->fraglock6);
return nil;
}
-
--- a/os/ip/ipv6.h
+++ b/os/ip/ipv6.h
@@ -1,53 +1,31 @@
-#undef MIN
-#define MIN(a, b) ((a) <= (b) ? (a) : (b))
-
-/* rfc 3513 defines the address prefices */
+/*
+ * Internet Protocol Version 6
+ *
+ * rfc2460 defines the protocol, rfc2461 neighbour discovery, and
+ * rfc2462 address autoconfiguration. rfc4443 defines ICMP; was rfc2463.
+ * rfc4291 defines the address architecture (including prefices), was rfc3513.
+ * rfc4007 defines the scoped address architecture.
+ *
+ * global unicast is anything but unspecified (::), loopback (::1),
+ * multicast (ff00::/8), and link-local unicast (fe80::/10).
+ *
+ * site-local (fec0::/10) is now deprecated, originally by rfc3879.
+ *
+ * Unique Local IPv6 Unicast Addresses are defined by rfc4193.
+ * prefix is fc00::/7, scope is global, routing is limited to roughly a site.
+ */
#define isv6mcast(addr) ((addr)[0] == 0xff)
#define islinklocal(addr) ((addr)[0] == 0xfe && ((addr)[1] & 0xc0) == 0x80)
-#define issitelocal(addr) ((addr)[0] == 0xfe && ((addr)[1] & 0xc0) == 0xc0)
-#define isv6global(addr) (((addr)[0] & 0xe0) == 0x20)
-#define optexsts(np) (nhgets((np)->ploadlen) > 24)
-#define issmcast(addr) (memcmp((addr), v6solicitednode, 13) == 0)
+#define optexsts(np) (nhgets((np)->ploadlen) > 24)
+#define issmcast(addr) (memcmp((addr), v6solicitednode, 13) == 0)
-/* from RFC 2460 */
+#ifndef MIN
+#define MIN(a, b) ((a) <= (b)? (a): (b))
+#endif
-typedef struct Ip6hdr Ip6hdr;
-typedef struct Opthdr Opthdr;
-typedef struct Routinghdr Routinghdr;
-typedef struct Fraghdr6 Fraghdr6;
-
-struct Ip6hdr {
- uchar vcf[4]; // version:4, traffic class:8, flow label:20
- uchar ploadlen[2]; // payload length: packet length - 40
- uchar proto; // next header type
- uchar ttl; // hop limit
- uchar src[IPaddrlen];
- uchar dst[IPaddrlen];
-};
-
-struct Opthdr {
- uchar nexthdr;
- uchar len;
-};
-
-struct Routinghdr {
- uchar nexthdr;
- uchar len;
- uchar rtetype;
- uchar segrem;
-};
-
-struct Fraghdr6 {
- uchar nexthdr;
- uchar res;
- uchar offsetRM[2]; // Offset, Res, M flag
- uchar id[4];
-};
-
-
-enum { /* Header Types */
- HBH = 0, //?
+enum { /* Header Types */
+ HBH = 0, /* hop-by-hop multicast routing protocol */
ICMP = 1,
IGMP = 2,
GGP = 3,
@@ -72,89 +50,113 @@
Maxhdrtype = 256,
};
-
enum {
- // multicast flgs and scop
+ /* multicast flags and scopes */
- well_known_flg = 0,
- transient_flg = 1,
+// Well_known_flg = 0,
+// Transient_flg = 1,
- node_local_scop = 1,
- link_local_scop = 2,
- site_local_scop = 5,
- org_local_scop = 8,
- global_scop = 14,
+// Interface_local_scop = 1,
+ Link_local_scop = 2,
+// Site_local_scop = 5,
+// Org_local_scop = 8,
+ Global_scop = 14,
- // various prefix lengths
+ /* various prefix lengths */
+ SOLN_PREF_LEN = 13,
- SOLN_PREF_LEN = 13,
+ /* icmpv6 unreachability codes */
+ Icmp6_no_route = 0,
+ Icmp6_ad_prohib = 1,
+ Icmp6_out_src_scope = 2,
+ Icmp6_adr_unreach = 3,
+ Icmp6_port_unreach = 4,
+ Icmp6_gress_src_fail = 5,
+ Icmp6_rej_route = 6,
+ Icmp6_unknown = 7, /* our own invention for internal use */
- // icmpv6 unreach codes
- icmp6_no_route = 0,
- icmp6_ad_prohib = 1,
- icmp6_unassigned = 2,
- icmp6_adr_unreach = 3,
- icmp6_port_unreach = 4,
- icmp6_unkn_code = 5,
+ /* various flags & constants */
+ v6MINTU = 1280,
+ HOP_LIMIT = 255,
+ IP6HDR = 40, /* sizeof(Ip6hdr) = 8 + 2*16 */
+ IP6FHDR = 8, /* sizeof(Fraghdr6) */
- // various flags & constants
+ /* option types */
- v6MINTU = 1280,
- HOP_LIMIT = 255,
- ETHERHDR_LEN = 14,
- IPV6HDR_LEN = 40,
- IPV4HDR_LEN = 20,
+ /* neighbour discovery */
+ SRC_LLADDR = 1,
+ TARGET_LLADDR = 2,
+ PREFIX_INFO = 3,
+ REDIR_HEADER = 4,
+ MTU_OPTION = 5,
+ /* new since rfc2461; see iana.org/assignments/icmpv6-parameters */
+ V6nd_home = 8,
+ V6nd_srcaddrs = 9, /* rfc3122 */
+ V6nd_ip = 17,
+ /* /lib/rfc/drafts/draft-jeong-dnsop-ipv6-dns-discovery-12.txt */
+ V6nd_rdns = 25,
+ /* plan 9 extensions */
+ V6nd_9fs = 250,
+ V6nd_9auth = 251,
- // option types
+ SRC_UNSPEC = 0,
+ SRC_UNI = 1,
+ TARG_UNI = 2,
+ TARG_MULTI = 3,
- SRC_LLADDRESS = 1,
- TARGET_LLADDRESS = 2,
- PREFIX_INFO = 3,
- REDIR_HEADER = 4,
- MTU_OPTION = 5,
+ Tunitent = 1,
+ Tuniproxy = 2,
+ Tunirany = 3,
- SRC_UNSPEC = 0,
- SRC_UNI = 1,
- TARG_UNI = 2,
- TARG_MULTI = 3,
+ /* Node constants */
+ MAX_MULTICAST_SOLICIT = 3,
+ RETRANS_TIMER = 1000,
+};
- t_unitent = 1,
- t_uniproxy = 2,
- t_unirany = 3,
+typedef struct Ip6hdr Ip6hdr;
+typedef struct Opthdr Opthdr;
+typedef struct Routinghdr Routinghdr;
+typedef struct Fraghdr6 Fraghdr6;
- // Router constants (all times in milliseconds)
+/* we do this in case there's padding at the end of Ip6hdr */
+#define IPV6HDR \
+ uchar vcf[4]; /* version:4, traffic class:8, flow label:20 */\
+ uchar ploadlen[2]; /* payload length: packet length - 40 */ \
+ uchar proto; /* next header type */ \
+ uchar ttl; /* hop limit */ \
+ uchar src[IPaddrlen]; \
+ uchar dst[IPaddrlen]
- MAX_INITIAL_RTR_ADVERT_INTERVAL = 16000,
- MAX_INITIAL_RTR_ADVERTISEMENTS = 3,
- MAX_FINAL_RTR_ADVERTISEMENTS = 3,
- MIN_DELAY_BETWEEN_RAS = 3000,
- MAX_RA_DELAY_TIME = 500,
+struct Ip6hdr {
+ IPV6HDR;
+ uchar payload[];
+};
- // Host constants
+struct Opthdr { /* unused */
+ uchar nexthdr;
+ uchar len;
+};
- MAX_RTR_SOLICITATION_DELAY = 1000,
- RTR_SOLICITATION_INTERVAL = 4000,
- MAX_RTR_SOLICITATIONS = 3,
+/*
+ * Beware routing header type 0 (loose source routing); see
+ * http://www.secdev.org/conf/IPv6_RH_security-csw07.pdf.
+ * Type 1 is unused. Type 2 is for MIPv6 (mobile IPv6) filtering
+ * against type 0 header.
+ */
+struct Routinghdr { /* unused */
+ uchar nexthdr;
+ uchar len;
+ uchar rtetype;
+ uchar segrem;
+};
- // Node constants
-
- MAX_MULTICAST_SOLICIT = 3,
- MAX_UNICAST_SOLICIT = 3,
- MAX_ANYCAST_DELAY_TIME = 1000,
- MAX_NEIGHBOR_ADVERTISEMENT = 3,
- REACHABLE_TIME = 30000,
- RETRANS_TIMER = 1000,
- DELAY_FIRST_PROBE_TIME = 5000,
-
+struct Fraghdr6 {
+ uchar nexthdr;
+ uchar res;
+ uchar offsetRM[2]; /* Offset, Res, M flag */
+ uchar id[4];
};
-extern void ipv62smcast(uchar *, uchar *);
-extern void icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac);
-extern void icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags);
-extern void icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp);
-extern void icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp);
-extern void icmphostunr(Fs *f, Ipifc *ifc, Block *bp, int code, int free);
-
extern uchar v6allnodesN[IPaddrlen];
extern uchar v6allnodesL[IPaddrlen];
extern uchar v6allroutersN[IPaddrlen];
@@ -161,23 +163,16 @@
extern uchar v6allroutersL[IPaddrlen];
extern uchar v6allnodesNmask[IPaddrlen];
extern uchar v6allnodesLmask[IPaddrlen];
-extern uchar v6allroutersS[IPaddrlen];
extern uchar v6solicitednode[IPaddrlen];
extern uchar v6solicitednodemask[IPaddrlen];
extern uchar v6Unspecified[IPaddrlen];
extern uchar v6loopback[IPaddrlen];
-extern uchar v6loopbackmask[IPaddrlen];
extern uchar v6linklocal[IPaddrlen];
extern uchar v6linklocalmask[IPaddrlen];
-extern uchar v6sitelocal[IPaddrlen];
-extern uchar v6sitelocalmask[IPaddrlen];
-extern uchar v6glunicast[IPaddrlen];
extern uchar v6multicast[IPaddrlen];
extern uchar v6multicastmask[IPaddrlen];
extern int v6llpreflen;
-extern int v6slpreflen;
-extern int v6lbpreflen;
extern int v6mcpreflen;
extern int v6snpreflen;
extern int v6aNpreflen;
@@ -184,3 +179,10 @@
extern int v6aLpreflen;
extern int ReTransTimer;
+
+void ipv62smcast(uchar *, uchar *);
+void icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac);
+void icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags);
+void icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp);
+void icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp);
+void icmphostunr6(Fs *f, Ipifc *ifc, Block *bp, int code, int tome);
--- a/os/ip/loopbackmedium.c
+++ b/os/ip/loopbackmedium.c
@@ -28,13 +28,12 @@
LB *lb;
lb = smalloc(sizeof(*lb));
+ lb->readp = (void*)-1;
lb->f = ifc->conv->p->f;
- /* TO DO: make queue size a function of kernel memory */
- lb->q = qopen(128*1024, Qmsg, nil, nil);
+ lb->q = qopen(1024*1024, Qmsg, nil, nil);
ifc->arg = lb;
- ifc->mbps = 1000;
- kproc("loopbackread", loopbackread, ifc, 0);
+ kproc("loopbackread", loopbackread, ifc);
}
@@ -43,13 +42,29 @@
{
LB *lb = ifc->arg;
- if(lb->readp)
+ while(waserror())
+ ;
+
+ /* wat for reader to start */
+ while(lb->readp == (void*)-1)
+ tsleep(&up->sleep, return0, 0, 300);
+
+ if(lb->readp != nil)
postnote(lb->readp, 1, "unbind", 0);
+ poperror();
+
+ wunlock(ifc);
+ while(waserror())
+ ;
+
/* wait for reader to die */
- while(lb->readp != 0)
+ while(lb->readp != nil)
tsleep(&up->sleep, return0, 0, 300);
+ poperror();
+ wlock(ifc);
+
/* clean up */
qfree(lb->q);
free(lb);
@@ -76,23 +91,14 @@
ifc = a;
lb = ifc->arg;
lb->readp = up; /* hide identity under a rock for unbind */
- if(waserror()){
- lb->readp = 0;
- pexit("hangup", 1);
- }
- for(;;){
- bp = qbread(lb->q, Maxtu);
- if(bp == nil)
- continue;
- ifc->in++;
- if(!canrlock(ifc)){
- freeb(bp);
- continue;
- }
+ if(!waserror())
+ while((bp = qbread(lb->q, Maxtu)) != nil){
+ rlock(ifc);
if(waserror()){
runlock(ifc);
nexterror();
}
+ ifc->in++;
if(ifc->lifc == nil)
freeb(bp);
else
@@ -100,6 +106,8 @@
runlock(ifc);
poperror();
}
+ lb->readp = nil;
+ pexit("hangup", 1);
}
Medium loopbackmedium =
--- a/os/ip/netdevmedium.c
+++ b/os/ip/netdevmedium.c
@@ -49,12 +49,13 @@
mchan = namec(argv[2], Aopen, ORDWR, 0);
er = smalloc(sizeof(*er));
+ er->readp = (void*)-1;
er->mchan = mchan;
er->f = ifc->conv->p->f;
ifc->arg = er;
- kproc("netdevread", netdevread, ifc, 0);
+ kproc("netdevread", netdevread, ifc);
}
/*
@@ -65,13 +66,29 @@
{
Netdevrock *er = ifc->arg;
+ while(waserror())
+ ;
+
+ /* wait for reader to start */
+ while(er->readp == (void*)-1)
+ tsleep(&up->sleep, return0, 0, 300);
+
if(er->readp != nil)
postnote(er->readp, 1, "unbind", 0);
- /* wait for readers to die */
+ poperror();
+
+ wunlock(ifc);
+ while(waserror())
+ ;
+
+ /* wait for reader to die */
while(er->readp != nil)
tsleep(&up->sleep, return0, 0, 300);
+ poperror();
+ wlock(ifc);
+
if(er->mchan != nil)
cclose(er->mchan);
@@ -86,8 +103,6 @@
{
Netdevrock *er = ifc->arg;
- if(bp->next)
- bp = concatblock(bp);
if(BLEN(bp) < ifc->mintu)
bp = adjustblock(bp, ifc->mintu);
@@ -104,34 +119,22 @@
Ipifc *ifc;
Block *bp;
Netdevrock *er;
- char *argv[1];
ifc = a;
er = ifc->arg;
er->readp = up; /* hide identity under a rock for unbind */
- if(waserror()){
- er->readp = nil;
- pexit("hangup", 1);
- }
+ if(!waserror())
for(;;){
bp = devtab[er->mchan->type]->bread(er->mchan, ifc->maxtu, 0);
if(bp == nil){
- /*
- * get here if mchan is a pipe and other side hangs up
- * clean up this interface & get out
-ZZZ is this a good idea?
- */
poperror();
- er->readp = nil;
- argv[0] = "unbind";
- if(!waserror())
+ if(!waserror()){
+ static char *argv[] = { "unbind" };
ifc->conv->p->ctl(ifc->conv, argv, 1);
- pexit("hangup", 1);
+ }
+ break;
}
- if(!canrlock(ifc)){
- freeb(bp);
- continue;
- }
+ rlock(ifc);
if(waserror()){
runlock(ifc);
nexterror();
@@ -144,6 +147,8 @@
runlock(ifc);
poperror();
}
+ er->readp = nil;
+ pexit("hangup", 1);
}
void
--- a/os/ip/netlog.c
+++ b/os/ip/netlog.c
@@ -7,7 +7,7 @@
#include "../ip/ip.h"
enum {
- Nlog = 4*1024,
+ Nlog = 16*1024,
};
/*
@@ -39,12 +39,12 @@
{ "ppp", Logppp, },
{ "ip", Logip, },
{ "fs", Logfs, },
- { "tcp", Logtcp, },
{ "il", Logil, },
+ { "tcp", Logtcp, },
{ "icmp", Logicmp, },
{ "udp", Logudp, },
{ "compress", Logcompress, },
- { "ilmsg", Logil|Logilmsg, },
+ { "logilmsg", Logilmsg, },
{ "gre", Loggre, },
{ "tcpwin", Logtcp|Logtcpwin, },
{ "tcprxmt", Logtcp|Logtcprxmt, },
@@ -85,8 +85,11 @@
nexterror();
}
if(f->alog->opens == 0){
- if(f->alog->buf == nil)
+ if(f->alog->buf == nil){
f->alog->buf = malloc(Nlog);
+ if(f->alog->buf == nil)
+ error(Enomem);
+ }
f->alog->rptr = f->alog->buf;
f->alog->end = f->alog->buf + Nlog;
}
@@ -202,6 +205,7 @@
else
f->alog->iponlyset = 1;
free(cb);
+ poperror();
return;
default:
@@ -227,7 +231,7 @@
void
netlog(Fs *f, int mask, char *fmt, ...)
{
- char buf[128], *t, *fp;
+ char buf[256], *t, *fp;
int i, n;
va_list arg;
--- a/os/ip/nullmedium.c
+++ b/os/ip/nullmedium.c
@@ -19,8 +19,9 @@
}
static void
-nullbwrite(Ipifc*, Block*, int, uchar*)
+nullbwrite(Ipifc*, Block *bp, int, uchar*)
{
+ freeb(bp);
error("nullbwrite");
}
--- a/os/ip/pktmedium.c
+++ b/os/ip/pktmedium.c
@@ -16,10 +16,10 @@
Medium pktmedium =
{
.name= "pkt",
-.hsize= 14,
-.mintu= 40,
+.hsize= 0,
+.mintu= 0,
.maxtu= 4*1024,
-.maclen= 6,
+.maclen= 0,
.bind= pktbind,
.unbind= pktunbind,
.bwrite= pktbwrite,
@@ -28,12 +28,13 @@
};
/*
- * called to bind an IP ifc to an ethernet device
+ * called to bind an IP ifc to an packet device
* called with ifc wlock'd
*/
static void
-pktbind(Ipifc*, int, char**)
+pktbind(Ipifc*, int argc, char **argv)
{
+ USED(argc, argv);
}
/*
@@ -51,7 +52,6 @@
pktbwrite(Ipifc *ifc, Block *bp, int, uchar*)
{
/* enqueue onto the conversation's rq */
- bp = concatblock(bp);
if(ifc->conv->snoopers.ref > 0)
qpass(ifc->conv->sq, copyblock(bp, BLEN(bp)));
qpass(ifc->conv->rq, bp);
--- a/os/ip/rudp.c
+++ b/os/ip/rudp.c
@@ -1,4 +1,5 @@
/*
+ * Reliable User Datagram Protocol, currently only for IPv4.
* This protocol is compatible with UDP's packet format.
* It could be done over UDP if need be.
*/
@@ -25,20 +26,17 @@
enum
{
- UDP_HDRSIZE = 20, /* pseudo header + udp header */
UDP_PHDRSIZE = 12, /* pseudo header */
+// UDP_HDRSIZE = 20, /* pseudo header + udp header */
UDP_RHDRSIZE = 36, /* pseudo header + udp header + rudp header */
UDP_IPHDR = 8, /* ip header */
IP_UDPPROTO = 254,
- UDP_USEAD7 = 52,
- UDP_USEAD6 = 36,
- UDP_USEAD4 = 12,
+ UDP_USEAD7 = 52, /* size of new ipv6 headers struct */
Rudprxms = 200,
Rudptickms = 50,
Rudpmaxxmit = 10,
Maxunacked = 100,
-
};
#define Hangupgen 0xffffffff /* used only in hangup messages */
@@ -205,7 +203,7 @@
qlock(&rpriv->apl);
if(rpriv->ackprocstarted == 0){
sprint(kpname, "#I%drudpack", rudp->f->dev);
- kproc(kpname, relackproc, rudp, 0);
+ kproc(kpname, relackproc, rudp);
rpriv->ackprocstarted = 1;
}
qunlock(&rpriv->apl);
@@ -240,6 +238,7 @@
qlock(ucb);
for(r = ucb->r; r; r = r->next)
m += snprint(state+m, n-m, " %I/%ld", r->addr, UNACKED(r));
+ m += snprint(state+m, n-m, "\n");
qunlock(ucb);
return m;
}
@@ -281,7 +280,7 @@
/* force out any delayed acks */
ucb = (Rudpcb*)c->ptcl;
qlock(ucb);
- for(r = ucb->r; r; r = r->next){
+ for(r = ucb->r; r != nil; r = r->next){
if(r->acksent != r->rcvseq)
relsendack(c, r, 0);
}
@@ -374,27 +373,10 @@
rport = nhgets(bp->rp);
bp->rp += 2+2; /* Ignore local port */
break;
- case 6:
- /* get user specified addresses */
- bp = pullupblock(bp, UDP_USEAD6);
- if(bp == nil)
- return;
- ipmove(raddr, bp->rp);
- bp->rp += IPaddrlen;
- ipmove(laddr, bp->rp);
- bp->rp += IPaddrlen;
- /* pick interface closest to dest */
- if(ipforme(f, laddr) != Runi)
- findlocalip(f, laddr, raddr);
- rport = nhgets(bp->rp);
-
- bp->rp += 4; /* Igonore local port */
- break;
default:
ipmove(raddr, c->raddr);
ipmove(laddr, c->laddr);
rport = c->rport;
-
break;
}
@@ -402,9 +384,6 @@
/* Make space to fit rudp & ip header */
bp = padblock(bp, UDP_IPHDR+UDP_RHDRSIZE);
- if(bp == nil)
- return;
-
uh = (Udphdr *)(bp->rp);
uh->vihl = IP_VER4;
@@ -417,7 +396,6 @@
uh->frag[1] = 0;
hnputs(uh->udpplen, ptcllen);
switch(ucb->headers){
- case 6:
case 7:
v6tov4(uh->udpdst, raddr);
hnputs(uh->udpdport, rport);
@@ -528,7 +506,7 @@
c = iphtlook(&upriv->ht, raddr, rport, laddr, lport);
if(c == nil){
- /* no converstation found */
+ /* no conversation found */
upriv->ustats.rudpNoPorts++;
qunlock(rudp);
netlog(f, Logudp, "udp: no conv %I!%d -> %I!%d\n", raddr, rport,
@@ -574,45 +552,32 @@
p = bp->rp;
ipmove(p, raddr); p += IPaddrlen;
ipmove(p, laddr); p += IPaddrlen;
- ipmove(p, ifc->lifc->local); p += IPaddrlen;
+ if(!ipv6local(ifc, p, 0, raddr))
+ ipmove(p, ifc->lifc != nil ? ifc->lifc->local : IPnoaddr);
+ p += IPaddrlen;
hnputs(p, rport); p += 2;
hnputs(p, lport);
break;
- case 6:
- /* pass the src address */
- bp = padblock(bp, UDP_USEAD6);
- p = bp->rp;
- ipmove(p, raddr); p += IPaddrlen;
- ipmove(p, ipforme(f, laddr)==Runi ? laddr : ifc->lifc->local); p += IPaddrlen;
- hnputs(p, rport); p += 2;
- hnputs(p, lport);
- break;
default:
/* connection oriented rudp */
if(ipcmp(c->raddr, IPnoaddr) == 0){
- /* save the src address in the conversation */
+ /* reply with the same ip address (if not broadcast) */
+ if(ipforme(f, laddr) != Runi)
+ ipv6local(ifc, laddr, 0, raddr);
+ ipmove(c->laddr, laddr);
ipmove(c->raddr, raddr);
c->rport = rport;
-
- /* reply with the same ip address (if not broadcast) */
- if(ipforme(f, laddr) == Runi)
- ipmove(c->laddr, laddr);
- else
- v4tov6(c->laddr, ifc->lifc->local);
}
break;
}
- if(bp->next)
- bp = concatblock(bp);
if(qfull(c->rq)) {
- netlog(f, Logrudp, "rudp: qfull %I.%d -> %I.%d\n", raddr, rport,
- laddr, lport);
+ netlog(f, Logrudp, "rudp: qfull %I.%d -> %I.%d\n",
+ raddr, rport, laddr, lport);
freeblist(bp);
+ } else {
+ qpass(c->rq, concatblock(bp));
}
- else
- qpass(c->rq, bp);
-
qunlock(ucb);
}
@@ -629,16 +594,14 @@
if(n < 1)
return rudpunknown;
- if(strcmp(f[0], "headers++4") == 0){
- ucb->headers = 7;
+ if(strcmp(f[0], "headers") == 0){
+ ucb->headers = 7; /* new headers format */
return nil;
- } else if(strcmp(f[0], "headers") == 0){
- ucb->headers = 6;
- return nil;
} else if(strcmp(f[0], "hangup") == 0){
if(n < 3)
return "bad syntax";
- parseip(ip, f[1]);
+ if (parseip(ip, f[1]) == -1)
+ return Ebadip;
x = atoi(f[2]);
qlock(ucb);
relforget(c, ip, x, 1);
@@ -645,7 +608,7 @@
qunlock(ucb);
return nil;
} else if(strcmp(f[0], "randdrop") == 0){
- x = 10; /* default is 10% */
+ x = 10; /* default is 10% */
if(n > 1)
x = atoi(f[1]);
if(x > 100 || x < 0)
@@ -672,12 +635,13 @@
pdest = nhgets(h->udpdport);
/* Look for a connection */
- for(p = rudp->conv; *p; p++) {
- s = *p;
+ for(p = rudp->conv; (s = *p) != nil; p++) {
if(s->rport == pdest)
if(s->lport == psource)
if(ipcmp(s->raddr, dest) == 0)
if(ipcmp(s->laddr, source) == 0){
+ if(s->ignoreadvice)
+ break;
qhangup(s->rq, msg);
qhangup(s->wq, msg);
break;
@@ -701,12 +665,6 @@
upriv->orders);
}
-int
-rudpgc(Proto *rudp)
-{
- return natgc(rudp->ipproto);
-}
-
void
rudpinit(Fs *fs)
{
@@ -725,9 +683,8 @@
rudp->rcv = rudpiput;
rudp->advise = rudpadvise;
rudp->stats = rudpstats;
- rudp->gc = rudpgc;
rudp->ipproto = IP_UDPPROTO;
- rudp->nc = 16;
+ rudp->nc = 32;
rudp->ptclsize = sizeof(Rudpcb);
Fsproto(fs, rudp);
@@ -770,6 +727,8 @@
rudp = (Proto *)a;
+ while(waserror())
+ ;
loop:
tsleep(&up->sleep, return0, 0, Rudptickms);
@@ -989,8 +948,6 @@
Fs *f;
bp = allocb(UDP_IPHDR + UDP_RHDRSIZE);
- if(bp == nil)
- return;
bp->wp += UDP_IPHDR + UDP_RHDRSIZE;
f = c->p->f;
uh = (Udphdr *)(bp->rp);
--- a/os/ip/tcp.c
+++ b/os/ip/tcp.c
@@ -41,13 +41,13 @@
EOLOPT = 0,
NOOPOPT = 1,
MSSOPT = 2,
- MSS_LENGTH = 4, /* Mean segment size */
+ MSS_LENGTH = 4, /* Maximum segment size */
WSOPT = 3,
WS_LENGTH = 3, /* Bits to scale window size by */
MSL2 = 10,
MSPTICK = 50, /* Milliseconds per timer tick */
- DEF_MSS = 1460, /* Default mean segment */
- DEF_MSS6 = 1280, /* Default mean segment (min) for v6 */
+ DEF_MSS = 1460, /* Default maximum segment */
+ DEF_MSS6 = 1220, /* Default maximum segment (min) for v6 */
DEF_RTT = 500, /* Default round trip */
DEF_KAT = 120000, /* Default time (ms) between keep alives */
TCP_LISTEN = 0, /* Listen connection */
@@ -81,7 +81,13 @@
NLHT = 256, /* hash table size, must be a power of 2 */
LHTMASK = NLHT-1,
- HaveWS = 1<<8,
+ /*
+ * window is 64kb · 2ⁿ
+ * these factors determine the ultimate bandwidth-delay product.
+ * 64kb · 2⁵ = 2mb, or 2x overkill for 100mbps · 70ms.
+ */
+ Maxqscale = 4, /* maximum queuing scale */
+ Defadvscale = 4, /* default advertisement */
};
/* Must correspond to the enumeration above */
@@ -169,8 +175,9 @@
ulong seq;
ulong ack;
uchar flags;
- ushort ws; /* window scale option (if not zero) */
- ulong wnd;
+ uchar update;
+ ushort ws; /* window scale option */
+ ulong wnd; /* prescaled window*/
ushort urg;
ushort mss; /* max segment size option (if not zero) */
ushort len; /* size of data */
@@ -205,44 +212,53 @@
ulong wnd; /* Tcp send window */
ulong urg; /* Urgent data pointer */
ulong wl2;
- int scale; /* how much to right shift window in xmitted packets */
+ uint scale; /* how much to right shift window in xmitted packets */
/* to implement tahoe and reno TCP */
ulong dupacks; /* number of duplicate acks rcvd */
+ ulong partialack;
int recovery; /* loss recovery flag */
- ulong rxt; /* right window marker for recovery */
+ int retransmit; /* retransmit 1 packet @ una flag */
+ int rto;
+ ulong rxt; /* right window marker for recovery "recover" rfc3782 */
} snd;
struct {
ulong nxt; /* Receive pointer to next uchar slot */
ulong wnd; /* Receive window incoming */
+ ulong wsnt; /* Last wptr sent. important to track for large bdp */
+ ulong wptr;
ulong urg; /* Urgent pointer */
+ ulong ackptr; /* last acked sequence */
int blocked;
- int una; /* unacked data segs */
- int scale; /* how much to left shift window in rcved packets */
+ uint scale; /* how much to left shift window in rcv'd packets */
} rcv;
ulong iss; /* Initial sequence number */
- int sawwsopt; /* true if we saw a wsopt on the incoming SYN */
ulong cwind; /* Congestion window */
- int scale; /* desired snd.scale */
- ushort ssthresh; /* Slow start threshold */
+ ulong abcbytes; /* appropriate byte counting rfc 3465 */
+ uint scale; /* desired snd.scale */
+ ulong ssthresh; /* Slow start threshold */
int resent; /* Bytes just resent */
int irs; /* Initial received squence */
- ushort mss; /* Mean segment size */
+ ushort mss; /* Maximum segment size */
int rerecv; /* Overlap of data rerecevived */
- ulong window; /* Recevive window */
+ ulong window; /* Our receive window (queue) */
+ uint qscale; /* Log2 of our receive window (queue) */
uchar backoff; /* Exponential backoff counter */
int backedoff; /* ms we've backed off for rexmits */
uchar flags; /* State flags */
Reseq *reseq; /* Resequencing queue */
+ int nreseq;
+ int reseqlen;
Tcptimer timer; /* Activity timer */
Tcptimer acktimer; /* Acknowledge timer */
Tcptimer rtt_timer; /* Round trip timer */
Tcptimer katimer; /* keep alive timer */
ulong rttseq; /* Round trip sequence */
- int srtt; /* Shortened round trip */
+ int srtt; /* Smoothed round trip */
int mdev; /* Mean deviation of round trip */
int kacounter; /* count down for keep alive */
uint sndsyntime; /* time syn sent */
ulong time; /* time Finwait2 or Syn_received was sent */
+ ulong timeuna; /* snd.una when time was set */
int nochecksum; /* non-zero means don't send checksums */
int flgcnt; /* number of flags in the sequence (FIN,SEQ) */
@@ -285,11 +301,11 @@
};
int tcp_irtt = DEF_RTT; /* Initial guess at round trip time */
-ushort tcp_mss = DEF_MSS; /* Maximum segment size to be sent */
enum {
/* MIB stats */
MaxConn,
+ Mss,
ActiveOpens,
PassiveOpens,
EstabResets,
@@ -297,6 +313,7 @@
InSegs,
OutSegs,
RetransSegs,
+ RetransSegsSent,
RetransTimeouts,
InErrs,
OutRsts,
@@ -305,14 +322,27 @@
CsumErrs,
HlenErrs,
LenErrs,
+ Resequenced,
OutOfOrder,
+ ReseqBytelim,
+ ReseqPktlim,
+ Delayack,
+ Wopenack,
+ Recovery,
+ RecoveryDone,
+ RecoveryRTO,
+ RecoveryNoSeq,
+ RecoveryCwind,
+ RecoveryPA,
+
Nstats
};
-static char *statnames[] =
+static char *statnames[Nstats] =
{
[MaxConn] "MaxConn",
+[Mss] "MaxSegment",
[ActiveOpens] "ActiveOpens",
[PassiveOpens] "PassiveOpens",
[EstabResets] "EstabResets",
@@ -320,6 +350,7 @@
[InSegs] "InSegs",
[OutSegs] "OutSegs",
[RetransSegs] "RetransSegs",
+[RetransSegsSent] "RetransSegsSent",
[RetransTimeouts] "RetransTimeouts",
[InErrs] "InErrs",
[OutRsts] "OutRsts",
@@ -327,6 +358,19 @@
[HlenErrs] "HlenErrs",
[LenErrs] "LenErrs",
[OutOfOrder] "OutOfOrder",
+[Resequenced] "Resequenced",
+[ReseqBytelim] "ReseqBytelim",
+[ReseqPktlim] "ReseqPktlim",
+[Delayack] "Delayack",
+[Wopenack] "Wopenack",
+
+[Recovery] "Recovery",
+[RecoveryDone] "RecoveryDone",
+[RecoveryRTO] "RecoveryRTO",
+
+[RecoveryNoSeq] "RecoveryNoSeq",
+[RecoveryCwind] "RecoveryCwind",
+[RecoveryPA] "RecoveryPA",
};
typedef struct Tcppriv Tcppriv;
@@ -347,7 +391,7 @@
QLock apl;
int ackprocstarted;
- ulong stats[Nstats];
+ uvlong stats[Nstats];
};
/*
@@ -356,34 +400,34 @@
* of DoS attack.
*
* To avoid stateless Conv hogs, we pick a sequence number at random. If
- * it that number gets acked by the other end, we shut down the connection.
- * Look for tcpporthogedefense in the code.
+ * that number gets acked by the other end, we shut down the connection.
+ * Look for tcpporthogdefense in the code.
*/
int tcpporthogdefense = 0;
-int addreseq(Tcpctl*, Tcppriv*, Tcp*, Block*, ushort);
-void getreseq(Tcpctl*, Tcp*, Block**, ushort*);
-void localclose(Conv*, char*);
-void procsyn(Conv*, Tcp*);
-void tcpiput(Proto*, Ipifc*, Block*);
-void tcpoutput(Conv*);
-int tcptrim(Tcpctl*, Tcp*, Block**, ushort*);
-void tcpstart(Conv*, int);
-void tcptimeout(void*);
-void tcpsndsyn(Conv*, Tcpctl*);
-void tcprcvwin(Conv*);
-void tcpacktimer(void*);
-void tcpkeepalive(void*);
-void tcpsetkacounter(Tcpctl*);
-void tcprxmit(Conv*);
-void tcpsettimer(Tcpctl*);
-void tcpsynackrtt(Conv*);
-void tcpsetscale(Conv*, Tcpctl*, ushort, ushort);
+static int addreseq(Fs*, Tcpctl*, Tcppriv*, Tcp*, Block*, ushort);
+static int dumpreseq(Tcpctl*);
+static void getreseq(Tcpctl*, Tcp*, Block**, ushort*);
+static void limbo(Conv*, uchar*, uchar*, Tcp*, int);
+static void limborexmit(Proto*);
+static void localclose(Conv*, char*);
+static void procsyn(Conv*, Tcp*);
+static void tcpacktimer(void*);
+static void tcpiput(Proto*, Ipifc*, Block*);
+static void tcpkeepalive(void*);
+static void tcpoutput(Conv*);
+static void tcprcvwin(Conv*);
+static void tcprxmit(Conv*);
+static void tcpsetkacounter(Tcpctl*);
+static void tcpsetscale(Conv*, Tcpctl*, ushort, ushort);
+static void tcpsettimer(Tcpctl*);
+static void tcpsndsyn(Conv*, Tcpctl*);
+static void tcpstart(Conv*, int);
+static void tcpsynackrtt(Conv*);
+static void tcptimeout(void*);
+static int tcptrim(Tcpctl*, Tcp*, Block**, ushort*);
-static void limborexmit(Proto*);
-static void limbo(Conv*, uchar*, uchar*, Tcp*, int);
-
-void
+static void
tcpsetstate(Conv *s, uchar newstate)
{
Tcpctl *tcb;
@@ -403,11 +447,6 @@
if(newstate == Established)
tpriv->stats[CurrEstab]++;
- /**
- print( "%d/%d %s->%s CurrEstab=%d\n", s->lport, s->rport,
- tcpstates[oldstate], tcpstates[newstate], tpriv->tstats.tcpCurrEstab );
- **/
-
switch(newstate) {
case Closed:
qclose(s->rq);
@@ -430,7 +469,12 @@
tcpconnect(Conv *c, char **argv, int argc)
{
char *e;
+ Tcpctl *tcb;
+ tcb = (Tcpctl*)(c->ptcl);
+ if(tcb->state != Closed)
+ return Econinuse;
+
e = Fsstdconnect(c, argv, argc);
if(e != nil)
return e;
@@ -447,12 +491,14 @@
s = (Tcpctl*)(c->ptcl);
return snprint(state, n,
- "%s qin %d qout %d srtt %d mdev %d cwin %lud swin %lud>>%d rwin %lud>>%d timer.start %d timer.count %d rerecv %d katimer.start %d katimer.count %d\n",
+ "%s qin %d qout %d rq %d.%d srtt %d mdev %d sst %lud cwin %lud swin %lud>>%d rwin %lud>>%d qscale %d timer.start %d timer.count %d rerecv %d katimer.start %d katimer.count %d\n",
tcpstates[s->state],
c->rq ? qlen(c->rq) : 0,
c->wq ? qlen(c->wq) : 0,
- s->srtt, s->mdev,
- s->cwind, s->snd.wnd, s->rcv.scale, s->rcv.wnd, s->snd.scale,
+ s->nreseq, s->reseqlen,
+ s->srtt, s->mdev, s->ssthresh,
+ s->cwind, s->snd.wnd, s->snd.scale, s->rcv.wnd, s->rcv.scale,
+ s->qscale,
s->timer.start, s->timer.count, s->rerecv,
s->katimer.start, s->katimer.count);
}
@@ -470,7 +516,12 @@
tcpannounce(Conv *c, char **argv, int argc)
{
char *e;
+ Tcpctl *tcb;
+ tcb = (Tcpctl*)(c->ptcl);
+ if(tcb->state != Closed)
+ return Econinuse;
+
e = Fsstdannounce(c, argv, argc);
if(e != nil)
return e;
@@ -524,7 +575,7 @@
}
}
-void
+static void
tcpkick(void *x)
{
Conv *s = x;
@@ -546,7 +597,6 @@
/*
* Push data
*/
- tcprcvwin(s);
tcpoutput(s);
break;
default:
@@ -558,7 +608,9 @@
poperror();
}
-void
+static int seq_lt(ulong, ulong);
+
+static void
tcprcvwin(Conv *s) /* Call with tcb locked */
{
int w;
@@ -568,12 +620,20 @@
w = tcb->window - qlen(s->rq);
if(w < 0)
w = 0;
- tcb->rcv.wnd = w;
- if(w == 0)
+ /* RFC 1122 § 4.2.2.17 do not move right edge of window left */
+ if(seq_lt(tcb->rcv.nxt + w, tcb->rcv.wptr))
+ w = tcb->rcv.wptr - tcb->rcv.nxt;
+ if(w != tcb->rcv.wnd)
+ if(w>>tcb->rcv.scale == 0 || tcb->window > 4*tcb->mss && w < tcb->mss/4){
tcb->rcv.blocked = 1;
+ netlog(s->p->f, Logtcp, "tcprcvwin: window %lud qlen %d ws %ud lport %d\n",
+ tcb->window, qlen(s->rq), tcb->rcv.scale, s->lport);
+ }
+ tcb->rcv.wnd = w;
+ tcb->rcv.wptr = tcb->rcv.nxt + w;
}
-void
+static void
tcpacktimer(void *v)
{
Tcpctl *tcb;
@@ -589,7 +649,6 @@
qlock(s);
if(tcb->state != Closed){
tcb->flags |= FORCE;
- tcprcvwin(s);
tcpoutput(s);
}
qunlock(s);
@@ -597,10 +656,52 @@
}
static void
+tcpcongestion(Tcpctl *tcb)
+{
+ ulong inflight;
+
+ inflight = tcb->snd.nxt - tcb->snd.una;
+ if(inflight > tcb->cwind)
+ inflight = tcb->cwind;
+ tcb->ssthresh = inflight / 2;
+ if(tcb->ssthresh < 2*tcb->mss)
+ tcb->ssthresh = 2*tcb->mss;
+}
+
+enum {
+ L = 2, /* aggressive slow start; legal values ∈ (1.0, 2.0) */
+};
+
+static void
+tcpabcincr(Tcpctl *tcb, uint acked)
+{
+ uint limit;
+
+ tcb->abcbytes += acked;
+ if(tcb->cwind < tcb->ssthresh){
+ /* slow start */
+ if(tcb->snd.rto)
+ limit = 1*tcb->mss;
+ else
+ limit = L*tcb->mss;
+ tcb->cwind += MIN(tcb->abcbytes, limit);
+ tcb->abcbytes = 0;
+ }
+ else{
+ tcb->snd.rto = 0;
+ /* avoidance */
+ if(tcb->abcbytes >= tcb->cwind){
+ tcb->abcbytes -= tcb->cwind;
+ tcb->cwind += tcb->mss;
+ }
+ }
+}
+
+static void
tcpcreate(Conv *c)
{
c->rq = qopen(QMAX, Qcoalesce, tcpacktimer, c);
- c->wq = qopen((3*QMAX)/2, Qkick, tcpkick, c);
+ c->wq = qopen(QMAX, Qkick, tcpkick, c);
}
static void
@@ -608,7 +709,7 @@
{
if(newstate != TcptimerON){
if(t->state == TcptimerON){
- // unchain
+ /* unchain */
if(priv->timers == t){
priv->timers = t->next;
if(t->prev != nil)
@@ -622,7 +723,7 @@
}
} else {
if(t->state != TcptimerON){
- // chain
+ /* chain */
if(t->prev != nil || t->next != nil)
panic("timerstate2");
t->prev = nil;
@@ -635,7 +736,7 @@
t->state = newstate;
}
-void
+static void
tcpackproc(void *a)
{
Tcptimer *t, *tp, *timeo;
@@ -646,6 +747,9 @@
tcp = a;
priv = tcp->priv;
+ while(waserror())
+ ;
+
for(;;) {
tsleep(&up->sleep, return0, 0, MSPTICK);
@@ -681,7 +785,7 @@
}
}
-void
+static void
tcpgo(Tcppriv *priv, Tcptimer *t)
{
if(t == nil || t->start == 0)
@@ -693,7 +797,7 @@
qunlock(&priv->tl);
}
-void
+static void
tcphalt(Tcppriv *priv, Tcptimer *t)
{
if(t == nil)
@@ -704,17 +808,16 @@
qunlock(&priv->tl);
}
-int
+static int
backoff(int n)
{
return 1 << n;
}
-void
+static void
localclose(Conv *s, char *reason) /* called with tcb locked */
{
Tcpctl *tcb;
- Reseq *rp,*rp1;
Tcppriv *tpriv;
tpriv = s->p->priv;
@@ -728,12 +831,7 @@
tcphalt(tpriv, &tcb->katimer);
/* Flush reassembly queue; nothing more can arrive */
- for(rp = tcb->reseq; rp != nil; rp = rp1) {
- rp1 = rp->next;
- freeblist(rp->bp);
- free(rp);
- }
- tcb->reseq = nil;
+ dumpreseq(tcb);
if(tcb->state == Syn_sent)
Fsconnected(s, reason);
@@ -747,45 +845,46 @@
}
/* mtu (- TCP + IP hdr len) of 1st hop */
-int
-tcpmtu(Proto *tcp, uchar *addr, int version, int *scale)
+static int
+tcpmtu(Route *r, int version, uint *scale)
{
Ipifc *ifc;
int mtu;
- ifc = findipifc(tcp->f, addr, 0);
- switch(version){
- default:
- case V4:
- mtu = DEF_MSS;
- if(ifc != nil)
- mtu = ifc->maxtu - ifc->m->hsize - (TCP4_PKT + TCP4_HDRSIZE);
- break;
- case V6:
- mtu = DEF_MSS6;
- if(ifc != nil)
- mtu = ifc->maxtu - ifc->m->hsize - (TCP6_PKT + TCP6_HDRSIZE);
- break;
- }
- if(ifc != nil){
- if(ifc->mbps > 100)
- *scale = HaveWS | 3;
- else if(ifc->mbps > 10)
- *scale = HaveWS | 1;
- else
- *scale = HaveWS | 0;
- } else
- *scale = HaveWS | 0;
+ /*
+ * set the ws. it doesn't commit us to anything.
+ * ws is the ultimate limit to the bandwidth-delay product.
+ */
+ *scale = Defadvscale;
- return mtu;
+ /*
+ * currently we do not implement path MTU discovery
+ * so use interface MTU *only* if directly reachable
+ * or when we use V4 which allows routers to fragment.
+ * otherwise, we use the default MSS which assumes a
+ * safe minimum MTU of 1280 bytes for V6.
+ */
+ if(r != nil && (ifc = r->ifc) != nil){
+ mtu = ifc->maxtu - ifc->m->hsize;
+ if(version == V4)
+ return mtu - (TCP4_PKT + TCP4_HDRSIZE);
+ mtu -= TCP6_PKT + TCP6_HDRSIZE;
+ if((r->type & (Rifc|Runi)) != 0 || mtu <= DEF_MSS6)
+ return mtu;
+ }
+ if(version == V6)
+ return DEF_MSS6;
+ else
+ return DEF_MSS;
}
-void
+static void
inittcpctl(Conv *s, int mode)
{
Tcpctl *tcb;
Tcp4hdr* h4;
Tcp6hdr* h6;
+ Tcppriv *tpriv;
int mss;
tcb = (Tcpctl*)s->ptcl;
@@ -792,7 +891,7 @@
memset(tcb, 0, sizeof(Tcpctl));
- tcb->ssthresh = 65535;
+ tcb->ssthresh = QMAX; /* reset by tcpsetscale() */
tcb->srtt = tcp_irtt<<LOGAGAIN;
tcb->mdev = 0;
@@ -841,19 +940,18 @@
}
tcb->mss = tcb->cwind = mss;
+ tcb->abcbytes = 0;
+ tpriv = s->p->priv;
+ tpriv->stats[Mss] = tcb->mss;
/* default is no window scaling */
- tcb->window = QMAX;
- tcb->rcv.wnd = QMAX;
- tcb->rcv.scale = 0;
- tcb->snd.scale = 0;
- qsetlimit(s->rq, QMAX);
+ tcpsetscale(s, tcb, 0, 0);
}
/*
* called with s qlocked
*/
-void
+static void
tcpstart(Conv *s, int mode)
{
Tcpctl *tcb;
@@ -865,8 +963,8 @@
if(tpriv->ackprocstarted == 0){
qlock(&tpriv->apl);
if(tpriv->ackprocstarted == 0){
- sprint(kpname, "#I%dtcpack", s->p->f->dev);
- kproc(kpname, tcpackproc, s->p, 0);
+ snprint(kpname, sizeof(kpname), "#I%dtcpack", s->p->f->dev);
+ kproc(kpname, tcpackproc, s->p);
tpriv->ackprocstarted = 1;
}
qunlock(&tpriv->apl);
@@ -895,28 +993,28 @@
}
static char*
-tcpflag(ushort flag)
+tcpflag(char *buf, char *e, ushort flag)
{
- static char buf[128];
+ char *p;
- sprint(buf, "%d", flag>>10); /* Head len */
+ p = seprint(buf, e, "%d", flag>>10); /* Head len */
if(flag & URG)
- strcat(buf, " URG");
+ p = seprint(p, e, " URG");
if(flag & ACK)
- strcat(buf, " ACK");
+ p = seprint(p, e, " ACK");
if(flag & PSH)
- strcat(buf, " PSH");
+ p = seprint(p, e, " PSH");
if(flag & RST)
- strcat(buf, " RST");
+ p = seprint(p, e, " RST");
if(flag & SYN)
- strcat(buf, " SYN");
+ p = seprint(p, e, " SYN");
if(flag & FIN)
- strcat(buf, " FIN");
-
+ p = seprint(p, e, " FIN");
+ USED(p);
return buf;
}
-Block *
+static Block*
htontcp6(Tcp *tcph, Block *data, Tcp6hdr *ph, Tcpctl *tcb)
{
int dlen;
@@ -940,14 +1038,10 @@
if(data) {
dlen = blocklen(data);
data = padblock(data, hdrlen + TCP6_PKT);
- if(data == nil)
- return nil;
}
else {
dlen = 0;
data = allocb(hdrlen + TCP6_PKT + 64); /* the 64 pad is to meet mintu's */
- if(data == nil)
- return nil;
data->wp += hdrlen + TCP6_PKT;
}
@@ -1000,7 +1094,7 @@
return data;
}
-Block *
+static Block*
htontcp4(Tcp *tcph, Block *data, Tcp4hdr *ph, Tcpctl *tcb)
{
int dlen;
@@ -1013,7 +1107,7 @@
if(tcph->flags & SYN){
if(tcph->mss)
hdrlen += MSS_LENGTH;
- if(tcph->ws)
+ if(1)
hdrlen += WS_LENGTH;
optpad = hdrlen & 3;
if(optpad)
@@ -1024,14 +1118,10 @@
if(data) {
dlen = blocklen(data);
data = padblock(data, hdrlen + TCP4_PKT);
- if(data == nil)
- return nil;
}
else {
dlen = 0;
data = allocb(hdrlen + TCP4_PKT + 64); /* the 64 pad is to meet mintu's */
- if(data == nil)
- return nil;
data->wp += hdrlen + TCP4_PKT;
}
@@ -1055,7 +1145,8 @@
hnputs(opt, tcph->mss);
opt += 2;
}
- if(tcph->ws != 0){
+ /* always offer. rfc1323 §2.2 */
+ if(1){
*opt++ = WSOPT;
*opt++ = WS_LENGTH;
*opt++ = tcph->ws;
@@ -1074,7 +1165,7 @@
return data;
}
-int
+static int
ntohtcp6(Tcp *tcph, Block **bpp)
{
Tcp6hdr *h;
@@ -1103,6 +1194,7 @@
tcph->urg = nhgets(h->tcpurg);
tcph->mss = 0;
tcph->ws = 0;
+ tcph->update = 0;
tcph->len = nhgets(h->ploadlen) - hdrlen;
*bpp = pullupblock(*bpp, hdrlen+TCP6_PKT);
@@ -1127,7 +1219,7 @@
break;
case WSOPT:
if(optlen == WS_LENGTH && *(optr+2) <= 14)
- tcph->ws = HaveWS | *(optr+2);
+ tcph->ws = *(optr+2);
break;
}
n -= optlen;
@@ -1136,7 +1228,7 @@
return hdrlen;
}
-int
+static int
ntohtcp4(Tcp *tcph, Block **bpp)
{
Tcp4hdr *h;
@@ -1166,6 +1258,7 @@
tcph->urg = nhgets(h->tcpurg);
tcph->mss = 0;
tcph->ws = 0;
+ tcph->update = 0;
tcph->len = nhgets(h->length) - (hdrlen + TCP4_PKT);
*bpp = pullupblock(*bpp, hdrlen+TCP4_PKT);
@@ -1190,7 +1283,7 @@
break;
case WSOPT:
if(optlen == WS_LENGTH && *(optr+2) <= 14)
- tcph->ws = HaveWS | *(optr+2);
+ tcph->ws = *(optr+2);
break;
}
n -= optlen;
@@ -1200,16 +1293,19 @@
}
/*
- * For outgiing calls, generate an initial sequence
+ * For outgoing calls, generate an initial sequence
* number and put a SYN on the send queue
*/
-void
+static void
tcpsndsyn(Conv *s, Tcpctl *tcb)
{
+ Tcppriv *tpriv;
+
tcb->iss = (nrand(1<<16)<<16)|nrand(1<<16);
tcb->rttseq = tcb->iss;
tcb->snd.wl2 = tcb->iss;
tcb->snd.una = tcb->iss;
+ tcb->snd.rxt = tcb->iss;
tcb->snd.ptr = tcb->rttseq;
tcb->snd.nxt = tcb->rttseq;
tcb->flgcnt++;
@@ -1217,7 +1313,9 @@
tcb->sndsyntime = NOW;
/* set desired mss and scale */
- tcb->mss = tcpmtu(s->p, s->laddr, s->ipversion, &tcb->scale);
+ tcb->mss = tcpmtu(v6lookup(s->p->f, s->raddr, s->laddr, s), s->ipversion, &tcb->scale);
+ tpriv = s->p->priv;
+ tpriv->stats[Mss] = tcb->mss;
}
void
@@ -1229,7 +1327,7 @@
Tcp4hdr ph4;
Tcp6hdr ph6;
- netlog(tcp->f, Logtcp, "sndrst: %s", reason);
+ netlog(tcp->f, Logtcp, "sndrst: %s\n", reason);
tpriv = tcp->priv;
@@ -1307,7 +1405,7 @@
* send a reset to the remote side and close the conversation
* called with s qlocked
*/
-char*
+static char*
tcphangup(Conv *s)
{
Tcp seg;
@@ -1322,7 +1420,7 @@
memset(&seg, 0, sizeof seg);
seg.flags = RST | ACK;
seg.ack = tcb->rcv.nxt;
- tcb->rcv.una = 0;
+ tcb->rcv.ackptr = seg.ack;
seg.seq = tcb->snd.ptr;
seg.wnd = 0;
seg.urg = 0;
@@ -1353,7 +1451,7 @@
/*
* (re)send a SYN ACK
*/
-int
+static int
sndsynack(Proto *tcp, Limbo *lp)
{
Block *hbp;
@@ -1360,7 +1458,7 @@
Tcp4hdr ph4;
Tcp6hdr ph6;
Tcp seg;
- int scale;
+ uint scale;
/* make pseudo header */
switch(lp->version) {
@@ -1388,11 +1486,12 @@
panic("sndrst: version %d", lp->version);
}
+ memset(&seg, 0, sizeof seg);
seg.seq = lp->iss;
seg.ack = lp->irs+1;
seg.flags = SYN|ACK;
seg.urg = 0;
- seg.mss = tcpmtu(tcp, lp->laddr, lp->version, &scale);
+ seg.mss = tcpmtu(v6lookup(tcp->f, lp->raddr, lp->laddr, nil), lp->version, &scale);
seg.wnd = QMAX;
/* if the other side set scale, we should too */
@@ -1570,6 +1669,18 @@
}
}
+static void
+initialwindow(Tcpctl *tcb)
+{
+ /* RFC 3390 initial window */
+ if(tcb->mss < 1095)
+ tcb->cwind = 4*tcb->mss;
+ else if(tcb->mss < 2190)
+ tcb->cwind = 4380;
+ else
+ tcb->cwind = 2*tcb->mss;
+}
+
/*
* come here when we finally get an ACK to our SYN-ACK.
* lookup call in limbo. if found, create a new conversation
@@ -1596,7 +1707,7 @@
/* find a call in limbo */
h = hashipa(src, segp->source);
for(l = &tpriv->lht[h]; (lp = *l) != nil; l = &lp->next){
- netlog(s->p->f, Logtcp, "tcpincoming s %I,%ux/%I,%ux d %I,%ux/%I,%ux v %d/%d",
+ netlog(s->p->f, Logtcp, "tcpincoming s %I!%ud/%I!%ud d %I!%ud/%I!%ud v %d/%d\n",
src, segp->source, lp->raddr, lp->rport,
dst, segp->dest, lp->laddr, lp->lport,
version, lp->version
@@ -1611,7 +1722,7 @@
/* we're assuming no data with the initial SYN */
if(segp->seq != lp->irs+1 || segp->ack != lp->iss+1){
- netlog(s->p->f, Logtcp, "tcpincoming s %lux/%lux a %lux %lux",
+ netlog(s->p->f, Logtcp, "tcpincoming s %lux/%lux a %lux %lux\n",
segp->seq, lp->irs+1, segp->ack, lp->iss+1);
lp = nil;
} else {
@@ -1641,6 +1752,8 @@
tcb->irs = lp->irs;
tcb->rcv.nxt = tcb->irs+1;
+ tcb->rcv.wptr = tcb->rcv.nxt;
+ tcb->rcv.wsnt = 0;
tcb->rcv.urg = tcb->rcv.nxt;
tcb->iss = lp->iss;
@@ -1649,19 +1762,24 @@
tcb->snd.una = tcb->iss+1;
tcb->snd.ptr = tcb->iss+1;
tcb->snd.nxt = tcb->iss+1;
+ tcb->snd.rxt = tcb->iss+1;
tcb->flgcnt = 0;
tcb->flags |= SYNACK;
+ /* set desired mss and scale */
+ tcb->mss = tcpmtu(v6lookup(s->p->f, src, dst, s), version, &tcb->scale);
+
/* our sending max segment size cannot be bigger than what he asked for */
if(lp->mss != 0 && lp->mss < tcb->mss)
tcb->mss = lp->mss;
+ tpriv->stats[Mss] = tcb->mss;
/* window scaling */
tcpsetscale(new, tcb, lp->rcvscale, lp->sndscale);
- /* the congestion window always starts out as a single segment */
+ /* congestion window */
tcb->snd.wnd = segp->wnd;
- tcb->cwind = tcb->mss;
+ initialwindow(tcb);
/* set initial round trip time */
tcb->sndsyntime = lp->lastsend+lp->rexmits*SYNACK_RXTIMER;
@@ -1700,7 +1818,7 @@
return new;
}
-int
+static int
seq_within(ulong x, ulong low, ulong high)
{
if(low <= high){
@@ -1714,25 +1832,25 @@
return 0;
}
-int
+static int
seq_lt(ulong x, ulong y)
{
return (int)(x-y) < 0;
}
-int
+static int
seq_le(ulong x, ulong y)
{
return (int)(x-y) <= 0;
}
-int
+static int
seq_gt(ulong x, ulong y)
{
return (int)(x-y) > 0;
}
-int
+static int
seq_ge(ulong x, ulong y)
{
return (int)(x-y) >= 0;
@@ -1742,7 +1860,7 @@
* use the time between the first SYN and it's ack as the
* initial round trip time
*/
-void
+static void
tcpsynackrtt(Conv *s)
{
Tcpctl *tcb;
@@ -1760,46 +1878,59 @@
tcphalt(tpriv, &tcb->rtt_timer);
}
-void
+static void
update(Conv *s, Tcp *seg)
{
int rtt, delta;
Tcpctl *tcb;
ulong acked;
- ulong expand;
Tcppriv *tpriv;
+ if(seg->update)
+ return;
+ seg->update = 1;
+
tpriv = s->p->priv;
tcb = (Tcpctl*)s->ptcl;
- /* if everything has been acked, force output(?) */
- if(seq_gt(seg->ack, tcb->snd.nxt)) {
- tcb->flags |= FORCE;
- return;
+ /* catch zero-window updates, update window & recover */
+ if(tcb->snd.wnd == 0 && seg->wnd > 0)
+ if(seq_lt(seg->ack, tcb->snd.ptr)){
+ netlog(s->p->f, Logtcp, "tcp: zwu ack %lud una %lud ptr %lud win %lud\n",
+ seg->ack, tcb->snd.una, tcb->snd.ptr, seg->wnd);
+ tcb->snd.wnd = seg->wnd;
+ goto recovery;
}
- /* added by Dong Lin for fast retransmission */
- if(seg->ack == tcb->snd.una
- && tcb->snd.una != tcb->snd.nxt
- && seg->len == 0
- && seg->wnd == tcb->snd.wnd) {
-
- /* this is a pure ack w/o window update */
- netlog(s->p->f, Logtcprxmt, "dupack %lud ack %lud sndwnd %d advwin %d\n",
- tcb->snd.dupacks, seg->ack, tcb->snd.wnd, seg->wnd);
-
- if(++tcb->snd.dupacks == TCPREXMTTHRESH) {
- /*
- * tahoe tcp rxt the packet, half sshthresh,
- * and set cwnd to one packet
- */
+ /* newreno fast retransmit */
+ if(seg->ack == tcb->snd.una)
+ if(tcb->snd.una != tcb->snd.nxt)
+ if(++tcb->snd.dupacks == 3){
+recovery:
+ if(tcb->snd.recovery){
+ tpriv->stats[RecoveryCwind]++;
+ tcb->cwind += tcb->mss;
+ }else if(seq_le(tcb->snd.rxt, seg->ack)){
+ tpriv->stats[Recovery]++;
+ tcb->abcbytes = 0;
tcb->snd.recovery = 1;
+ tcb->snd.partialack = 0;
tcb->snd.rxt = tcb->snd.nxt;
- netlog(s->p->f, Logtcprxmt, "fast rxt %lud, nxt %lud\n", tcb->snd.una, tcb->snd.nxt);
+ tcpcongestion(tcb);
+ tcb->cwind = tcb->ssthresh + 3*tcb->mss;
+ netlog(s->p->f, Logtcpwin, "recovery inflate %ld ss %ld @%lud\n",
+ tcb->cwind, tcb->ssthresh, tcb->snd.rxt);
tcprxmit(s);
- } else {
- /* do reno tcp here. */
+ }else{
+ tpriv->stats[RecoveryNoSeq]++;
+ netlog(s->p->f, Logtcpwin, "!recov %lud not ≤ %lud %ld\n",
+ tcb->snd.rxt, seg->ack, tcb->snd.rxt - seg->ack);
+ /* do not enter fast retransmit */
+ /* do not change ssthresh */
}
+ }else if(tcb->snd.recovery){
+ tpriv->stats[RecoveryCwind]++;
+ tcb->cwind += tcb->mss;
}
/*
@@ -1807,6 +1938,9 @@
*/
if(seq_gt(seg->ack, tcb->snd.wl2)
|| (tcb->snd.wl2 == seg->ack && seg->wnd > tcb->snd.wnd)){
+ /* clear dupack if we advance wl2 */
+ if(tcb->snd.wl2 != seg->ack)
+ tcb->snd.dupacks = 0;
tcb->snd.wnd = seg->wnd;
tcb->snd.wl2 = seg->ack;
}
@@ -1816,22 +1950,11 @@
* don't let us hangup if sending into a closed window and
* we're still getting acks
*/
- if((tcb->flags&RETRAN) && tcb->snd.wnd == 0){
+ if((tcb->flags&RETRAN) && tcb->snd.wnd == 0)
tcb->backedoff = MAXBACKMS/4;
- }
return;
}
- /*
- * any positive ack turns off fast rxt,
- * (should we do new-reno on partial acks?)
- */
- if(!tcb->snd.recovery || seq_ge(seg->ack, tcb->snd.rxt)) {
- tcb->snd.dupacks = 0;
- tcb->snd.recovery = 0;
- } else
- netlog(s->p->f, Logtcp, "rxt next %lud, cwin %ud\n", seg->ack, tcb->cwind);
-
/* Compute the new send window size */
acked = seg->ack - tcb->snd.una;
@@ -1843,24 +1966,41 @@
goto done;
}
- /* slow start as long as we're not recovering from lost packets */
- if(tcb->cwind < tcb->snd.wnd && !tcb->snd.recovery) {
- if(tcb->cwind < tcb->ssthresh) {
- expand = tcb->mss;
- if(acked < expand)
- expand = acked;
+ /*
+ * congestion control
+ */
+ if(tcb->snd.recovery){
+ if(seq_ge(seg->ack, tcb->snd.rxt)){
+ /* recovery finished; deflate window */
+ tpriv->stats[RecoveryDone]++;
+ tcb->snd.dupacks = 0;
+ tcb->snd.recovery = 0;
+ tcb->cwind = (tcb->snd.nxt - tcb->snd.una) + tcb->mss;
+ if(tcb->ssthresh < tcb->cwind)
+ tcb->cwind = tcb->ssthresh;
+ netlog(s->p->f, Logtcpwin, "recovery deflate %ld %ld\n",
+ tcb->cwind, tcb->ssthresh);
+ } else {
+ /* partial ack; we lost more than one segment */
+ tpriv->stats[RecoveryPA]++;
+ if(tcb->cwind > acked)
+ tcb->cwind -= acked;
+ else{
+ netlog(s->p->f, Logtcpwin, "partial ack neg\n");
+ tcb->cwind = tcb->mss;
+ }
+ netlog(s->p->f, Logtcpwin, "partial ack %ld left %ld cwind %ld\n",
+ acked, tcb->snd.rxt - seg->ack, tcb->cwind);
+
+ if(acked >= tcb->mss)
+ tcb->cwind += tcb->mss;
+ tcb->snd.partialack++;
}
- else
- expand = ((int)tcb->mss * tcb->mss) / tcb->cwind;
+ } else
+ tcpabcincr(tcb, acked);
- if(tcb->cwind + expand < tcb->cwind)
- expand = tcb->snd.wnd - tcb->cwind;
- if(tcb->cwind + expand > tcb->snd.wnd)
- expand = tcb->snd.wnd - tcb->cwind;
- tcb->cwind += expand;
- }
-
/* Adjust the timers according to the round trip time */
+ /* todo: fix sloppy treatment of overflow cases here. */
if(tcb->rtt_timer.state == TcptimerON && seq_ge(seg->ack, tcb->rttseq)) {
tcphalt(tpriv, &tcb->rtt_timer);
if((tcb->flags&RETRAN) == 0) {
@@ -1891,13 +2031,23 @@
done:
if(qdiscard(s->wq, acked) < acked)
tcb->flgcnt--;
-
tcb->snd.una = seg->ack;
+
+ /* newreno fast recovery */
+ if(tcb->snd.recovery)
+ tcprxmit(s);
+
if(seq_gt(seg->ack, tcb->snd.urg))
tcb->snd.urg = seg->ack;
- if(tcb->snd.una != tcb->snd.nxt)
- tcpgo(tpriv, &tcb->timer);
+ if(tcb->snd.una != tcb->snd.nxt){
+ /* “impatient” variant */
+ if(!tcb->snd.recovery || tcb->snd.partialack == 1){
+ tcb->time = NOW;
+ tcb->timeuna = tcb->snd.una;
+ tcpgo(tpriv, &tcb->timer);
+ }
+ }
else
tcphalt(tpriv, &tcb->timer);
@@ -1904,12 +2054,13 @@
if(seq_lt(tcb->snd.ptr, tcb->snd.una))
tcb->snd.ptr = tcb->snd.una;
- tcb->flags &= ~RETRAN;
+ if(!tcb->snd.recovery)
+ tcb->flags &= ~RETRAN;
tcb->backoff = 0;
tcb->backedoff = 0;
}
-void
+static void
tcpiput(Proto *tcp, Ipifc*, Block *bp)
{
Tcp seg;
@@ -1917,7 +2068,7 @@
Tcp6hdr *h6;
int hdrlen;
Tcpctl *tcb;
- ushort length;
+ ushort length, csum;
uchar source[IPaddrlen], dest[IPaddrlen];
Conv *s;
Fs *f;
@@ -1980,10 +2131,12 @@
h6->ttl = proto;
hnputl(h6->vcf, length);
if((h6->tcpcksum[0] || h6->tcpcksum[1]) &&
- ptclcsum(bp, TCP6_IPLEN, length+TCP6_PHDRSIZE)) {
+ (csum = ptclcsum(bp, TCP6_IPLEN, length+TCP6_PHDRSIZE)) != 0) {
tpriv->stats[CsumErrs]++;
tpriv->stats[InErrs]++;
- netlog(f, Logtcp, "bad tcp proto cksum\n");
+ netlog(f, Logtcp,
+ "bad tcpv6 proto cksum: got %#ux, computed %#ux\n",
+ h6->tcpcksum[0]<<8 | h6->tcpcksum[1], csum);
freeblist(bp);
return;
}
@@ -1995,7 +2148,7 @@
if(hdrlen < 0){
tpriv->stats[HlenErrs]++;
tpriv->stats[InErrs]++;
- netlog(f, Logtcp, "bad tcp hdr len\n");
+ netlog(f, Logtcp, "bad tcpv6 hdr len\n");
return;
}
@@ -2005,7 +2158,7 @@
if(bp == nil){
tpriv->stats[LenErrs]++;
tpriv->stats[InErrs]++;
- netlog(f, Logtcp, "tcp len < 0 after trim\n");
+ netlog(f, Logtcp, "tcpv6 len < 0 after trim\n");
return;
}
}
@@ -2016,7 +2169,8 @@
/* Look for a matching conversation */
s = iphtlook(&tpriv->ht, source, seg.source, dest, seg.dest);
if(s == nil){
- netlog(f, Logtcp, "iphtlook failed");
+ netlog(f, Logtcp, "iphtlook(src %I!%d, dst %I!%d) failed\n",
+ source, seg.source, dest, seg.dest);
reset:
qunlock(tcp);
sndrst(tcp, source, dest, length, &seg, version, "no conversation");
@@ -2136,8 +2290,12 @@
}
/* Cut the data to fit the receive window */
+ tcprcvwin(s);
if(tcptrim(tcb, &seg, &bp, &length) == -1) {
- netlog(f, Logtcp, "tcp len < 0, %lud %d\n", seg.seq, length);
+ if(seg.seq+1 != tcb->rcv.nxt || length != 1)
+ netlog(f, Logtcp, "tcp: trim: !inwind: seq %lud-%lud win %lud-%lud l %d from %I\n",
+ seg.seq, seg.seq + length - 1,
+ tcb->rcv.nxt, tcb->rcv.nxt + tcb->rcv.wnd-1, length, s->raddr);
update(s, &seg);
if(qlen(s->wq)+tcb->flgcnt == 0 && tcb->state == Closing) {
tcphalt(tpriv, &tcb->rtt_timer);
@@ -2168,12 +2326,15 @@
if(seg.seq != tcb->rcv.nxt)
if(length != 0 || (seg.flags & (SYN|FIN))) {
update(s, &seg);
- if(addreseq(tcb, tpriv, &seg, bp, length) < 0)
+ if(addreseq(f, tcb, tpriv, &seg, bp, length) < 0)
print("reseq %I.%d -> %I.%d\n", s->raddr, s->rport, s->laddr, s->lport);
- tcb->flags |= FORCE;
+ tcb->flags |= FORCE; /* force duplicate ack; RFC 5681 §3.2 */
goto output;
}
+ if(tcb->nreseq > 0)
+ tcb->flags |= FORCE; /* filled hole in sequence space; RFC 5681 §3.2 */
+
/*
* keep looping till we've processed this packet plus any
* adjacent packets in the resequence queue
@@ -2238,7 +2399,8 @@
goto raise;
}
case Time_wait:
- tcb->flags |= FORCE;
+ if(seg.flags & FIN)
+ tcb->flags |= FORCE;
if(tcb->timer.state != TcptimerON)
tcpgo(tpriv, &tcb->timer);
}
@@ -2272,34 +2434,12 @@
* receive queue
*/
if(bp) {
- bp = packblock(bp);
- if(bp == nil)
- panic("tcp packblock");
- qpassnolim(s->rq, bp);
+ qpassnolim(s->rq, packblock(bp));
bp = nil;
-
- /*
- * Force an ack every 2 data messages. This is
- * a hack for rob to make his home system run
- * faster.
- *
- * this also keeps the standard TCP congestion
- * control working since it needs an ack every
- * 2 max segs worth. This is not quite that,
- * but under a real stream is equivalent since
- * every packet has a max seg in it.
- */
- if(++(tcb->rcv.una) >= 2)
- tcb->flags |= FORCE;
}
tcb->rcv.nxt += length;
/*
- * update our rcv window
- */
- tcprcvwin(s);
-
- /*
* turn on the acktimer if there's something
* to ack
*/
@@ -2373,8 +2513,11 @@
getreseq(tcb, &seg, &bp, &length);
- if(tcptrim(tcb, &seg, &bp, &length) == 0)
+ tcprcvwin(s);
+ if(tcptrim(tcb, &seg, &bp, &length) == 0){
+ tcb->flags |= FORCE;
break;
+ }
}
}
output:
@@ -2394,15 +2537,15 @@
* the lock to ipoput the packet so some care has to be
* taken by callers.
*/
-void
+static void
tcpoutput(Conv *s)
{
Tcp seg;
- int msgs;
+ uint msgs;
Tcpctl *tcb;
Block *hbp, *bp;
- int sndcnt, n;
- ulong ssize, dsize, usable, sent;
+ int sndcnt;
+ ulong ssize, dsize, sent;
Fs *f;
Tcppriv *tpriv;
uchar version;
@@ -2411,9 +2554,26 @@
tpriv = s->p->priv;
version = s->ipversion;
- for(msgs = 0; msgs < 100; msgs++) {
- tcb = (Tcpctl*)s->ptcl;
+ tcb = (Tcpctl*)s->ptcl;
+ /* force ack every 2*mss */
+ if((tcb->flags & FORCE) == 0)
+ if(tcb->rcv.nxt - tcb->rcv.ackptr >= 2*tcb->mss){
+ tpriv->stats[Delayack]++;
+ tcb->flags |= FORCE;
+ }
+
+ /* force ack if window opening */
+ if(0)
+ if((tcb->flags & FORCE) == 0){
+ tcprcvwin(s);
+ if((int)(tcb->rcv.wptr - tcb->rcv.wsnt) >= 2*tcb->mss){
+ tpriv->stats[Wopenack]++;
+ tcb->flags |= FORCE;
+ }
+ }
+
+ for(msgs = 0; msgs < 100; msgs++) {
switch(tcb->state) {
case Listen:
case Closed:
@@ -2421,7 +2581,12 @@
return;
}
+ /* Don't send anything else until our SYN has been acked */
+ if(tcb->snd.ptr != tcb->iss && (tcb->flags & SYNACK) == 0)
+ break;
+
/* force an ack when a window has opened up */
+ tcprcvwin(s);
if(tcb->rcv.blocked && tcb->rcv.wnd > 0){
tcb->rcv.blocked = 0;
tcb->flags |= FORCE;
@@ -2429,54 +2594,57 @@
sndcnt = qlen(s->wq)+tcb->flgcnt;
sent = tcb->snd.ptr - tcb->snd.una;
-
- /* Don't send anything else until our SYN has been acked */
- if(tcb->snd.ptr != tcb->iss && (tcb->flags & SYNACK) == 0)
- break;
-
- /* Compute usable segment based on offered window and limit
- * window probes to one
- */
+ ssize = sndcnt;
if(tcb->snd.wnd == 0){
- if(sent != 0) {
- if((tcb->flags&FORCE) == 0)
- break;
-// tcb->snd.ptr = tcb->snd.una;
+ /* zero window probe */
+ if(sent > 0)
+ if(!(tcb->flags & FORCE))
+ break; /* already probing, rto re-probes */
+ if(ssize < sent)
+ ssize = 0;
+ else{
+ ssize -= sent;
+ if(ssize > 0)
+ ssize = 1;
}
- usable = 1;
+ } else {
+ /* calculate usable segment size */
+ if(ssize > tcb->cwind)
+ ssize = tcb->cwind;
+ if(ssize > tcb->snd.wnd)
+ ssize = tcb->snd.wnd;
+
+ if(ssize < sent)
+ ssize = 0;
+ else {
+ ssize -= sent;
+ if(ssize > tcb->mss)
+ ssize = tcb->mss;
+ }
}
- else {
- usable = tcb->cwind;
- if(tcb->snd.wnd < usable)
- usable = tcb->snd.wnd;
- usable -= sent;
- }
- ssize = sndcnt-sent;
- if(ssize && usable < 2)
- netlog(s->p->f, Logtcp, "throttled snd.wnd %lud cwind %lud\n",
- tcb->snd.wnd, tcb->cwind);
- if(usable < ssize)
- ssize = usable;
- if(tcb->mss < ssize)
- ssize = tcb->mss;
+
dsize = ssize;
seg.urg = 0;
- if(ssize == 0)
- if((tcb->flags&FORCE) == 0)
- break;
+ if(!(tcb->flags & FORCE)){
+ if(ssize == 0)
+ break;
+ if(ssize < tcb->mss)
+ if(tcb->snd.nxt == tcb->snd.ptr)
+ if(sent > TCPREXMTTHRESH*tcb->mss)
+ break;
+ }
tcb->flags &= ~FORCE;
- tcprcvwin(s);
/* By default we will generate an ack */
tcphalt(tpriv, &tcb->acktimer);
- tcb->rcv.una = 0;
seg.source = s->lport;
seg.dest = s->rport;
seg.flags = ACK;
seg.mss = 0;
seg.ws = 0;
+ seg.update = 0;
switch(tcb->state){
case Syn_sent:
seg.flags = 0;
@@ -2516,20 +2684,9 @@
}
}
- if(sent+dsize == sndcnt)
+ if(sent+dsize == sndcnt && dsize)
seg.flags |= PSH;
- /* keep track of balance of resent data */
- if(seq_lt(tcb->snd.ptr, tcb->snd.nxt)) {
- n = tcb->snd.nxt - tcb->snd.ptr;
- if(ssize < n)
- n = ssize;
- tcb->resent += n;
- netlog(f, Logtcp, "rexmit: %I.%d -> %I.%d ptr %lux nxt %lux\n",
- s->raddr, s->rport, s->laddr, s->lport, tcb->snd.ptr, tcb->snd.nxt);
- tpriv->stats[RetransSegs]++;
- }
-
tcb->snd.ptr += ssize;
/* Pull up the send pointer so we can accept acks
@@ -2565,13 +2722,17 @@
* expect acknowledges
*/
if(ssize != 0){
- if(tcb->timer.state != TcptimerON)
+ if(tcb->timer.state != TcptimerON){
+ tcb->time = NOW;
+ tcb->timeuna = tcb->snd.una;
tcpgo(tpriv, &tcb->timer);
+ }
/* If round trip timer isn't running, start it.
* measure the longest packet only in case the
* transmission time dominates RTT
*/
+ if(tcb->snd.retransmit == 0)
if(tcb->rtt_timer.state != TcptimerON)
if(ssize == tcb->mss) {
tcpgo(tpriv, &tcb->rtt_timer);
@@ -2580,6 +2741,10 @@
}
tpriv->stats[OutSegs]++;
+ if(tcb->snd.retransmit)
+ tpriv->stats[RetransSegsSent]++;
+ tcb->rcv.ackptr = seg.ack;
+ tcb->rcv.wsnt = tcb->rcv.wptr;
/* put off the next keep alive */
tcpgo(tpriv, &tcb->katimer);
@@ -2600,9 +2765,8 @@
default:
panic("tcpoutput2: version %d", version);
}
- if((msgs%4) == 1){
+ if((msgs%4) == 3){
qunlock(s);
- sched();
qlock(s);
}
}
@@ -2611,7 +2775,7 @@
/*
* the BSD convention (hack?) for keep alives. resend last uchar acked.
*/
-void
+static void
tcpsendka(Conv *s)
{
Tcp seg;
@@ -2621,6 +2785,7 @@
tcb = (Tcpctl*)s->ptcl;
dbp = nil;
+ memset(&seg, 0, sizeof seg);
seg.urg = 0;
seg.source = s->lport;
seg.dest = s->rport;
@@ -2632,7 +2797,8 @@
else
seg.seq = tcb->snd.una-1;
seg.ack = tcb->rcv.nxt;
- tcb->rcv.una = 0;
+ tcb->rcv.ackptr = seg.ack;
+ tcprcvwin(s);
seg.wnd = tcb->rcv.wnd;
if(tcb->state == Finwait2){
seg.flags |= FIN;
@@ -2666,7 +2832,7 @@
/*
* set connection to time out after 12 minutes
*/
-void
+static void
tcpsetkacounter(Tcpctl *tcb)
{
tcb->kacounter = (12 * 60 * 1000) / (tcb->katimer.start*MSPTICK);
@@ -2678,7 +2844,7 @@
* if we've timed out, close the connection
* otherwise, send a keepalive and restart the timer
*/
-void
+static void
tcpkeepalive(void *v)
{
Tcpctl *tcb;
@@ -2706,7 +2872,7 @@
/*
* start keepalive timer
*/
-char*
+static char*
tcpstartka(Conv *s, char **f, int n)
{
Tcpctl *tcb;
@@ -2729,7 +2895,7 @@
/*
* turn checksums on/off
*/
-char*
+static char*
tcpsetchecksum(Conv *s, char **f, int)
{
Tcpctl *tcb;
@@ -2740,30 +2906,38 @@
return nil;
}
-void
+/*
+ * retransmit (at most) one segment at snd.una.
+ * preserve cwind & snd.ptr
+ */
+static void
tcprxmit(Conv *s)
{
Tcpctl *tcb;
+ Tcppriv *tpriv;
+ ulong tcwind, tptr;
tcb = (Tcpctl*)s->ptcl;
-
tcb->flags |= RETRAN|FORCE;
- tcb->snd.ptr = tcb->snd.una;
- /*
- * We should be halving the slow start threshhold (down to one
- * mss) but leaving it at mss seems to work well enough
- */
- tcb->ssthresh = tcb->mss;
-
- /*
- * pull window down to a single packet
- */
+ tptr = tcb->snd.ptr;
+ tcwind = tcb->cwind;
+ tcb->snd.ptr = tcb->snd.una;
tcb->cwind = tcb->mss;
+ tcb->snd.retransmit = 1;
tcpoutput(s);
+ tcb->snd.retransmit = 0;
+ tcb->cwind = tcwind;
+ tcb->snd.ptr = tptr;
+
+ tpriv = s->p->priv;
+ tpriv->stats[RetransSegs]++;
}
-void
+/*
+ * todo: RFC 4138 F-RTO
+ */
+static void
tcptimeout(void *arg)
{
Conv *s;
@@ -2792,11 +2966,29 @@
localclose(s, Etimedout);
break;
}
- netlog(s->p->f, Logtcprxmt, "timeout rexmit 0x%lux %d/%d\n", tcb->snd.una, tcb->timer.start, NOW);
+ netlog(s->p->f, Logtcprxmt, "rxm %d/%d %ldms %lud rto %d %lud %s\n",
+ tcb->srtt, tcb->mdev, NOW-tcb->time,
+ tcb->snd.una-tcb->timeuna, tcb->snd.rto, tcb->snd.ptr,
+ tcpstates[s->state]);
tcpsettimer(tcb);
+ if(tcb->snd.rto == 0)
+ tcpcongestion(tcb);
tcprxmit(s);
+ tcb->snd.ptr = tcb->snd.una;
+ tcb->cwind = tcb->mss;
+ tcb->snd.rto = 1;
tpriv->stats[RetransTimeouts]++;
- tcb->snd.dupacks = 0;
+
+ if(tcb->snd.recovery){
+ tcb->snd.dupacks = 0; /* reno rto */
+ tcb->snd.recovery = 0;
+ tpriv->stats[RecoveryRTO]++;
+ tcb->snd.rxt = tcb->snd.nxt;
+ netlog(s->p->f, Logtcpwin,
+ "rto recovery rxt @%lud\n", tcb->snd.nxt);
+ }
+
+ tcb->abcbytes = 0;
break;
case Time_wait:
localclose(s, nil);
@@ -2808,7 +3000,7 @@
poperror();
}
-int
+static int
inwindow(Tcpctl *tcb, int seq)
{
return seq_within(seq, tcb->rcv.nxt, tcb->rcv.nxt+tcb->rcv.wnd-1);
@@ -2817,36 +3009,83 @@
/*
* set up state for a received SYN (or SYN ACK) packet
*/
-void
+static void
procsyn(Conv *s, Tcp *seg)
{
Tcpctl *tcb;
+ Tcppriv *tpriv;
tcb = (Tcpctl*)s->ptcl;
tcb->flags |= FORCE;
tcb->rcv.nxt = seg->seq + 1;
+ tcb->rcv.wptr = tcb->rcv.nxt;
+ tcb->rcv.wsnt = 0;
tcb->rcv.urg = tcb->rcv.nxt;
tcb->irs = seg->seq;
/* our sending max segment size cannot be bigger than what he asked for */
- if(seg->mss != 0 && seg->mss < tcb->mss)
+ if(seg->mss != 0 && seg->mss < tcb->mss) {
tcb->mss = seg->mss;
+ tpriv = s->p->priv;
+ tpriv->stats[Mss] = tcb->mss;
+ }
- /* the congestion window always starts out as a single segment */
+ /* if the server does not support ws option, disable window scaling */
+ if(seg->ws == 0){
+ tcb->scale = 0;
+ tcb->snd.scale = 0;
+ }
+
tcb->snd.wnd = seg->wnd;
- tcb->cwind = tcb->mss;
+ initialwindow(tcb);
}
-int
-addreseq(Tcpctl *tcb, Tcppriv *tpriv, Tcp *seg, Block *bp, ushort length)
+static int
+dumpreseq(Tcpctl *tcb)
{
- Reseq *rp, *rp1;
- int i, rqlen, qmax;
+ Reseq *r, *next;
+ for(r = tcb->reseq; r != nil; r = next){
+ next = r->next;
+ freeblist(r->bp);
+ free(r);
+ }
+ tcb->reseq = nil;
+ tcb->nreseq = 0;
+ tcb->reseqlen = 0;
+ return -1;
+}
+
+static void
+logreseq(Fs *f, Reseq *r, ulong n)
+{
+ char *s;
+
+ for(; r != nil; r = r->next){
+ s = nil;
+ if(r->next == nil && r->seg.seq != n)
+ s = "hole/end";
+ else if(r->next == nil)
+ s = "end";
+ else if(r->seg.seq != n)
+ s = "hole";
+ if(s != nil)
+ netlog(f, Logtcp, "%s %lud-%lud (%ld) %#ux\n", s,
+ n, r->seg.seq, r->seg.seq-n, r->seg.flags);
+ n = r->seg.seq + r->seg.len;
+ }
+}
+
+static int
+addreseq(Fs *f, Tcpctl *tcb, Tcppriv *tpriv, Tcp *seg, Block *bp, ushort length)
+{
+ Reseq *rp, **rr;
+ int qmax;
+
rp = malloc(sizeof(Reseq));
if(rp == nil){
- freeblist(bp); /* bp always consumed by add_reseq */
+ freeblist(bp); /* bp always consumed by addreseq */
return 0;
}
@@ -2854,56 +3093,39 @@
rp->bp = bp;
rp->length = length;
- /* Place on reassembly list sorting by starting seq number */
- rp1 = tcb->reseq;
- if(rp1 == nil || seq_lt(seg->seq, rp1->seg.seq)) {
- rp->next = rp1;
- tcb->reseq = rp;
- if(rp->next != nil)
- tpriv->stats[OutOfOrder]++;
- return 0;
- }
+ tcb->reseqlen += length;
+ tcb->nreseq++;
- rqlen = 0;
- for(i = 0;; i++) {
- rqlen += rp1->length;
- if(rp1->next == nil || seq_lt(seg->seq, rp1->next->seg.seq)) {
- rp->next = rp1->next;
- rp1->next = rp;
+ /* Place on reassembly list sorting by starting seq number */
+ for(rr = &tcb->reseq;; rr = &(*rr)->next)
+ if(*rr == nil || seq_lt(seg->seq, (*rr)->seg.seq)){
+ rp->next = *rr;
+ *rr = rp;
+ tpriv->stats[Resequenced]++;
if(rp->next != nil)
tpriv->stats[OutOfOrder]++;
break;
}
- rp1 = rp1->next;
- }
- qmax = QMAX<<tcb->rcv.scale;
- if(rqlen > qmax){
- print("resequence queue > window: %d > %d\n", rqlen, qmax);
- i = 0;
- for(rp1 = tcb->reseq; rp1 != nil; rp1 = rp1->next){
- print("%#lux %#lux %#ux\n", rp1->seg.seq,
- rp1->seg.ack, rp1->seg.flags);
- if(i++ > 10){
- print("...\n");
- break;
- }
- }
- // delete entire reassembly queue; wait for retransmit.
- // - should we be smarter and only delete the tail?
- for(rp = tcb->reseq; rp != nil; rp = rp1){
- rp1 = rp->next;
- freeblist(rp->bp);
- free(rp);
- }
- tcb->reseq = nil;
-
- return -1;
+ qmax = tcb->window;
+ if(tcb->reseqlen > qmax){
+ netlog(f, Logtcp, "tcp: reseq: queue > window: %d > %d; %d packets\n", tcb->reseqlen, qmax, tcb->nreseq);
+ logreseq(f, tcb->reseq, tcb->rcv.nxt);
+ tpriv->stats[ReseqBytelim]++;
+ return dumpreseq(tcb);
}
+ qmax = tcb->window / tcb->mss; /* ~190 for qscale==2, 390 for qscale=3 */
+ if(tcb->nreseq > qmax){
+ netlog(f, Logtcp, "resequence queue > packets: %d %d; %d bytes\n", tcb->nreseq, qmax, tcb->reseqlen);
+ logreseq(f, tcb->reseq, tcb->rcv.nxt);
+ tpriv->stats[ReseqPktlim]++;
+ return dumpreseq(tcb);
+ }
+
return 0;
}
-void
+static void
getreseq(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length)
{
Reseq *rp;
@@ -2918,10 +3140,13 @@
*bp = rp->bp;
*length = rp->length;
+ tcb->nreseq--;
+ tcb->reseqlen -= rp->length;
+
free(rp);
}
-int
+static int
tcptrim(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length)
{
ushort len;
@@ -2992,7 +3217,7 @@
return 0;
}
-void
+static void
tcpadvise(Proto *tcp, Block *bp, char *msg)
{
Tcp4hdr *h4;
@@ -3011,8 +3236,7 @@
v4tov6(source, h4->tcpsrc);
psource = nhgets(h4->tcpsport);
pdest = nhgets(h4->tcpdport);
- }
- else {
+ } else {
ipmove(dest, h6->tcpdst);
ipmove(source, h6->tcpsrc);
psource = nhgets(h6->tcpsport);
@@ -3021,8 +3245,7 @@
/* Look for a connection */
qlock(tcp);
- for(p = tcp->conv; *p; p++) {
- s = *p;
+ for(p = tcp->conv; (s = *p) != nil; p++) {
tcb = (Tcpctl*)s->ptcl;
if(s->rport == pdest)
if(s->lport == psource)
@@ -3029,6 +3252,8 @@
if(tcb->state != Closed)
if(ipcmp(s->raddr, dest) == 0)
if(ipcmp(s->laddr, source) == 0){
+ if(s->ignoreadvice)
+ break;
qlock(s);
qunlock(tcp);
switch(tcb->state){
@@ -3058,9 +3283,11 @@
}
/* called with c qlocked */
-char*
+static char*
tcpctl(Conv* c, char** f, int n)
{
+ if(n == 1 && strcmp(f[0], "close") == 0)
+ return tcpclose(c), nil;
if(n == 1 && strcmp(f[0], "hangup") == 0)
return tcphangup(c);
if(n >= 1 && strcmp(f[0], "keepalive") == 0)
@@ -3072,7 +3299,7 @@
return "unknown control request";
}
-int
+static int
tcpstats(Proto *tcp, char *buf, int len)
{
Tcppriv *priv;
@@ -3083,7 +3310,7 @@
p = buf;
e = p+len;
for(i = 0; i < Nstats; i++)
- p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
+ p = seprint(p, e, "%s: %llud\n", statnames[i], priv->stats[i]);
return p - buf;
}
@@ -3096,7 +3323,7 @@
* of questionable validity so we try to use them only when we're
* up against the wall.
*/
-int
+static int
tcpgc(Proto *tcp)
{
Conv *c, **pp, **ep;
@@ -3104,7 +3331,7 @@
Tcpctl *tcb;
- n = natgc(tcp->ipproto);
+ n = 0;
ep = &tcp->conv[tcp->nc];
for(pp = tcp->conv; pp < ep; pp++) {
c = *pp;
@@ -3116,13 +3343,13 @@
switch(tcb->state){
case Syn_received:
if(NOW - tcb->time > 5000){
- localclose(c, "timed out");
+ localclose(c, Etimedout);
n++;
}
break;
case Finwait2:
if(NOW - tcb->time > 5*60*1000){
- localclose(c, "timed out");
+ localclose(c, Etimedout);
n++;
}
break;
@@ -3132,7 +3359,7 @@
return n;
}
-void
+static void
tcpsettimer(Tcpctl *tcb)
{
int x;
@@ -3141,9 +3368,9 @@
x = backoff(tcb->backoff) *
(tcb->mdev + (tcb->srtt>>LOGAGAIN) + MSPTICK) / MSPTICK;
- /* bounded twixt 1/2 and 64 seconds */
- if(x < 500/MSPTICK)
- x = 500/MSPTICK;
+ /* bounded twixt 0.3 and 64 seconds */
+ if(x < 300/MSPTICK)
+ x = 300/MSPTICK;
else if(x > (64000/MSPTICK))
x = 64000/MSPTICK;
tcb->timer.start = x;
@@ -3177,18 +3404,37 @@
Fsproto(fs, tcp);
}
-void
+static void
tcpsetscale(Conv *s, Tcpctl *tcb, ushort rcvscale, ushort sndscale)
{
- if(rcvscale){
- tcb->rcv.scale = rcvscale & 0xff;
- tcb->snd.scale = sndscale & 0xff;
- tcb->window = QMAX<<tcb->snd.scale;
- qsetlimit(s->rq, tcb->window);
- } else {
- tcb->rcv.scale = 0;
- tcb->snd.scale = 0;
- tcb->window = QMAX;
- qsetlimit(s->rq, tcb->window);
- }
+ /*
+ * guess at reasonable queue sizes. there's no current way
+ * to know how many nic receive buffers we can safely tie up in the
+ * tcp stack, and we don't adjust our queues to maximize throughput
+ * and minimize bufferbloat. n.b. the offer (rcvscale) needs to be
+ * respected, but we still control our own buffer commitment by
+ * keeping a seperate qscale.
+ */
+ tcb->rcv.scale = rcvscale & 0xff;
+ tcb->snd.scale = sndscale & 0xff;
+ tcb->qscale = rcvscale & 0xff;
+ if(rcvscale > Maxqscale)
+ tcb->qscale = Maxqscale;
+
+ if(rcvscale != tcb->rcv.scale)
+ netlog(s->p->f, Logtcp, "tcpsetscale: window %lud qlen %d >> window %ud lport %d\n",
+ tcb->window, qlen(s->rq), QMAX<<tcb->qscale, s->lport);
+ tcb->window = QMAX<<tcb->qscale;
+ tcb->ssthresh = tcb->window;
+
+ /*
+ * it's important to set wq large enough to cover the full
+ * bandwidth-delay product. it's possible to be in loss
+ * recovery with a big window, and we need to keep sending
+ * into the inflated window. the difference can be huge
+ * for even modest (70ms) ping times.
+ */
+ qsetlimit(s->rq, QMAX<<tcb->qscale);
+ qsetlimit(s->wq, QMAX<<tcb->qscale);
+ tcprcvwin(s);
}
--- a/os/ip/udp.c
+++ b/os/ip/udp.c
@@ -24,7 +24,6 @@
IP_UDPPROTO = 17,
UDP_USEAD7 = 52,
- UDP_USEAD6 = 36,
Udprxms = 200,
Udptickms = 100,
@@ -40,7 +39,7 @@
uchar length[2]; /* packet length */
uchar id[2]; /* Identification */
uchar frag[2]; /* Fragment information */
- uchar Unused;
+ uchar Unused;
uchar udpproto; /* Protocol */
uchar udpplen[2]; /* Header plus data length */
uchar udpsrc[IPv4addrlen]; /* Ip source */
@@ -73,10 +72,10 @@
typedef struct Udpstats Udpstats;
struct Udpstats
{
- ulong udpInDatagrams;
+ uvlong udpInDatagrams;
ulong udpNoPorts;
ulong udpInErrors;
- ulong udpOutDatagrams;
+ uvlong udpOutDatagrams;
};
typedef struct Udppriv Udppriv;
@@ -101,7 +100,6 @@
typedef struct Udpcb Udpcb;
struct Udpcb
{
- QLock;
uchar headers;
};
@@ -125,7 +123,7 @@
static int
udpstate(Conv *c, char *state, int n)
{
- return snprint(state, n, "%s qin %d qout %d",
+ return snprint(state, n, "%s qin %d qout %d\n",
c->inuse ? "Open" : "Closed",
c->rq ? qlen(c->rq) : 0,
c->wq ? qlen(c->wq) : 0
@@ -151,7 +149,7 @@
static void
udpcreate(Conv *c)
{
- c->rq = qopen(64*1024, Qmsg, 0, 0);
+ c->rq = qopen(512*1024, Qmsg, 0, 0);
c->wq = qbypass(udpkick, c);
}
@@ -175,8 +173,6 @@
ucb = (Udpcb*)c->ptcl;
ucb->headers = 0;
-
- qunlock(c);
}
void
@@ -192,12 +188,13 @@
Udppriv *upriv;
Fs *f;
int version;
- Conv *rc;
+ Routehint *rh;
+ ushort csum;
upriv = c->p->priv;
f = c->p->f;
- netlog(c->p->f, Logudp, "udp: kick\n");
+// netlog(c->p->f, Logudp, "udp: kick\n"); /* frequent and uninteresting */
if(bp == nil)
return;
@@ -219,21 +216,6 @@
rport = nhgets(bp->rp);
bp->rp += 2+2; /* Ignore local port */
break;
- case 6:
- /* get user specified addresses */
- bp = pullupblock(bp, UDP_USEAD6);
- if(bp == nil)
- return;
- ipmove(raddr, bp->rp);
- bp->rp += IPaddrlen;
- ipmove(laddr, bp->rp);
- bp->rp += IPaddrlen;
- /* pick interface closest to dest */
- if(ipforme(f, laddr) != Runi)
- findlocalip(f, laddr, raddr);
- rport = nhgets(bp->rp);
- bp->rp += 2+2; /* Ignore local port */
- break;
default:
rport = 0;
break;
@@ -240,18 +222,12 @@
}
if(ucb->headers) {
- if(memcmp(laddr, v4prefix, IPv4off) == 0 ||
- ipcmp(laddr, IPnoaddr) == 0)
+ if(isv4(laddr) || ipcmp(laddr, IPnoaddr) == 0)
version = V4;
else
version = V6;
} else {
- if( (memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
- memcmp(c->laddr, v4prefix, IPv4off) == 0)
- || ipcmp(c->raddr, IPnoaddr) == 0)
- version = V4;
- else
- version = V6;
+ version = convipvers(c);
}
dlen = blocklen(bp);
@@ -260,9 +236,6 @@
switch(version){
case V4:
bp = padblock(bp, UDP4_IPHDR_SZ+UDP_UDPHDR_SZ);
- if(bp == nil)
- return;
-
uh4 = (Udp4hdr *)(bp->rp);
ptcllen = dlen + UDP_UDPHDR_SZ;
uh4->Unused = 0;
@@ -274,7 +247,7 @@
v6tov4(uh4->udpdst, raddr);
hnputs(uh4->udpdport, rport);
v6tov4(uh4->udpsrc, laddr);
- rc = nil;
+ rh = nil;
} else {
v6tov4(uh4->udpdst, c->raddr);
hnputs(uh4->udpdport, c->rport);
@@ -281,25 +254,26 @@
if(ipcmp(c->laddr, IPnoaddr) == 0)
findlocalip(f, c->laddr, c->raddr);
v6tov4(uh4->udpsrc, c->laddr);
- rc = c;
+ rh = c;
}
hnputs(uh4->udpsport, c->lport);
hnputs(uh4->udplen, ptcllen);
uh4->udpcksum[0] = 0;
uh4->udpcksum[1] = 0;
- hnputs(uh4->udpcksum,
- ptclcsum(bp, UDP4_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP4_PHDR_SZ));
+ csum = ptclcsum(bp, UDP4_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP4_PHDR_SZ);
+ if(csum == 0)
+ csum = 0xffff; /* -0 */
+ hnputs(uh4->udpcksum, csum);
uh4->vihl = IP_VER4;
- ipoput4(f, bp, 0, c->ttl, c->tos, rc);
+ ipoput4(f, bp, 0, c->ttl, c->tos, rh);
break;
case V6:
+ /*
+ * using the v6 ip header to create pseudo header
+ * first then reset it to the normal ip header
+ */
bp = padblock(bp, UDP6_IPHDR_SZ+UDP_UDPHDR_SZ);
- if(bp == nil)
- return;
-
- // using the v6 ip header to create pseudo header
- // first then reset it to the normal ip header
uh6 = (Udp6hdr *)(bp->rp);
memset(uh6, 0, 8);
ptcllen = dlen + UDP_UDPHDR_SZ;
@@ -309,7 +283,7 @@
ipmove(uh6->udpdst, raddr);
hnputs(uh6->udpdport, rport);
ipmove(uh6->udpsrc, laddr);
- rc = nil;
+ rh = nil;
} else {
ipmove(uh6->udpdst, c->raddr);
hnputs(uh6->udpdport, c->rport);
@@ -316,19 +290,21 @@
if(ipcmp(c->laddr, IPnoaddr) == 0)
findlocalip(f, c->laddr, c->raddr);
ipmove(uh6->udpsrc, c->laddr);
- rc = c;
+ rh = c;
}
hnputs(uh6->udpsport, c->lport);
hnputs(uh6->udplen, ptcllen);
uh6->udpcksum[0] = 0;
uh6->udpcksum[1] = 0;
- hnputs(uh6->udpcksum,
- ptclcsum(bp, UDP6_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP6_PHDR_SZ));
+ csum = ptclcsum(bp, UDP6_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP6_PHDR_SZ);
+ if(csum == 0)
+ csum = 0xffff; /* -0 */
+ hnputs(uh6->udpcksum, csum);
memset(uh6, 0, 8);
uh6->viclfl[0] = IP_VER6;
hnputs(uh6->len, ptcllen);
uh6->nextheader = IP_UDPPROTO;
- ipoput6(f, bp, 0, c->ttl, c->tos, rc);
+ ipoput6(f, bp, 0, c->ttl, c->tos, rh);
break;
default:
@@ -360,10 +336,8 @@
uh4 = (Udp4hdr*)(bp->rp);
version = ((uh4->vihl&0xF0)==IP_VER6) ? V6 : V4;
- /*
- * Put back pseudo header for checksum
- * (remember old values for icmpnoconv())
- */
+ /* Put back pseudo header for checksum
+ * (remember old values for icmpnoconv()) */
switch(version) {
case V4:
ottl = uh4->Unused;
@@ -423,7 +397,7 @@
c = iphtlook(&upriv->ht, raddr, rport, laddr, lport);
if(c == nil){
- /* no converstation found */
+ /* no conversation found */
upriv->ustats.udpNoPorts++;
qunlock(udp);
netlog(f, Logudp, "udp: no conv %I!%d -> %I!%d\n", raddr, rport,
@@ -434,7 +408,7 @@
icmpnoconv(f, bp);
break;
case V6:
- icmphostunr(f, ifc, bp, icmp6_port_unreach, 0);
+ icmphostunr6(f, ifc, bp, Icmp6_port_unreach, 0);
break;
default:
panic("udpiput2: version %d", version);
@@ -448,18 +422,8 @@
if(c->state == Announced){
if(ucb->headers == 0){
/* create a new conversation */
- if(ipforme(f, laddr) != Runi) {
- switch(version){
- case V4:
- v4tov6(laddr, ifc->lifc->local);
- break;
- case V6:
- ipmove(laddr, ifc->lifc->local);
- break;
- default:
- panic("udpiput3: version %d", version);
- }
- }
+ if(ipforme(f, laddr) != Runi)
+ ipv6local(ifc, laddr, 0, raddr);
c = Fsnewcall(c, raddr, rport, laddr, lport, version);
if(c == nil){
qunlock(udp);
@@ -507,33 +471,21 @@
p = bp->rp;
ipmove(p, raddr); p += IPaddrlen;
ipmove(p, laddr); p += IPaddrlen;
- ipmove(p, ifc->lifc->local); p += IPaddrlen;
+ if(!ipv6local(ifc, p, 0, raddr))
+ ipmove(p, ifc->lifc != nil ? ifc->lifc->local : IPnoaddr);
+ p += IPaddrlen;
hnputs(p, rport); p += 2;
hnputs(p, lport);
break;
- case 6:
- /* pass the src address */
- bp = padblock(bp, UDP_USEAD6);
- p = bp->rp;
- ipmove(p, raddr); p += IPaddrlen;
- ipmove(p, ipforme(f, laddr)==Runi ? laddr : ifc->lifc->local); p += IPaddrlen;
- hnputs(p, rport); p += 2;
- hnputs(p, lport);
- break;
}
- if(bp->next)
- bp = concatblock(bp);
-
if(qfull(c->rq)){
- qunlock(c);
- netlog(f, Logudp, "udp: qfull %I.%d -> %I.%d\n", raddr, rport,
- laddr, lport);
+ netlog(f, Logudp, "udp: qfull %I.%d -> %I.%d\n",
+ raddr, rport, laddr, lport);
freeblist(bp);
- return;
+ } else {
+ qpass(c->rq, concatblock(bp));
}
-
- qpass(c->rq, bp);
qunlock(c);
}
@@ -545,11 +497,13 @@
ucb = (Udpcb*)c->ptcl;
if(n == 1){
- if(strcmp(f[0], "oldheaders") == 0){
- ucb->headers = 6;
+ if(strcmp(f[0], "hangup") == 0){
+ qhangup(c->rq, nil);
+ qhangup(c->wq, nil);
return nil;
- } else if(strcmp(f[0], "headers") == 0){
- ucb->headers = 7;
+ }
+ if(strcmp(f[0], "headers") == 0){
+ ucb->headers = 7; /* new headers format */
return nil;
}
}
@@ -564,34 +518,25 @@
uchar source[IPaddrlen], dest[IPaddrlen];
ushort psource, pdest;
Conv *s, **p;
- int version;
h4 = (Udp4hdr*)(bp->rp);
- version = ((h4->vihl&0xF0)==IP_VER6) ? V6 : V4;
+ h6 = (Udp6hdr*)(bp->rp);
- switch(version) {
- case V4:
+ if((h4->vihl&0xF0)==IP_VER4) {
v4tov6(dest, h4->udpdst);
v4tov6(source, h4->udpsrc);
psource = nhgets(h4->udpsport);
pdest = nhgets(h4->udpdport);
- break;
- case V6:
- h6 = (Udp6hdr*)(bp->rp);
+ } else {
ipmove(dest, h6->udpdst);
ipmove(source, h6->udpsrc);
psource = nhgets(h6->udpsport);
pdest = nhgets(h6->udpdport);
- break;
- default:
- panic("udpadvise: version %d", version);
- return; /* to avoid a warning */
}
/* Look for a connection */
qlock(udp);
- for(p = udp->conv; *p; p++) {
- s = *p;
+ for(p = udp->conv; (s = *p) != nil; p++) {
if(s->rport == pdest)
if(s->lport == psource)
if(ipcmp(s->raddr, dest) == 0)
@@ -617,7 +562,8 @@
Udppriv *upriv;
upriv = udp->priv;
- return snprint(buf, len, "InDatagrams: %lud\nNoPorts: %lud\nInErrors: %lud\nOutDatagrams: %lud\n",
+ return snprint(buf, len, "InDatagrams: %llud\nNoPorts: %lud\n"
+ "InErrors: %lud\nOutDatagrams: %llud\n",
upriv->ustats.udpInDatagrams,
upriv->ustats.udpNoPorts,
upriv->ustats.udpInErrors,
@@ -624,12 +570,6 @@
upriv->ustats.udpOutDatagrams);
}
-int
-udpgc(Proto *udp)
-{
- return natgc(udp->ipproto);
-}
-
void
udpinit(Fs *fs)
{
@@ -647,7 +587,6 @@
udp->rcv = udpiput;
udp->advise = udpadvise;
udp->stats = udpstats;
- udp->gc = udpgc;
udp->ipproto = IP_UDPPROTO;
udp->nc = Nchans;
udp->ptclsize = sizeof(Udpcb);
diff -u a/os/ip//arp.c b/os/ip//arp.c
--- a/os/ip//arp.c
+++ b/os/ip//arp.c
@@ -47,7 +47,8 @@
#define haship(s) ((s)[IPaddrlen-1]%NHASH)
-extern int ReTransTimer = RETRANS_TIMER;
+int ReTransTimer = RETRANS_TIMER;
+
static void rxmitproc(void *v);
void
@@ -57,145 +58,121 @@
f->arp->f = f;
f->arp->rxmt = nil;
f->arp->dropf = f->arp->dropl = nil;
- kproc("rxmitproc", rxmitproc, f->arp, 0);
+ kproc("rxmitproc", rxmitproc, f->arp);
}
-/*
- * create a new arp entry for an ip address.
- */
-static Arpent*
-newarp6(Arp *arp, uchar *ip, Ipifc *ifc, int addrxt)
+static void
+freeblistchain(Block *bp)
{
- uint t;
- Block *next, *xp;
- Arpent *a, *e, *f, **l;
- Medium *m = ifc->m;
- int empty;
+ Block *next;
- /* find oldest entry */
- e = &arp->cache[NCACHE];
- a = arp->cache;
- t = a->utime;
- for(f = a; f < e; f++){
- if(f->utime < t){
- t = f->utime;
- a = f;
- }
+ while(bp != nil){
+ next = bp->list;
+ freeblist(bp);
+ bp = next;
}
+}
- /* dump waiting packets */
- xp = a->hold;
- a->hold = nil;
+/* take out of re-transmit chain */
+static Arpent**
+rxmtunchain(Arp *arp, Arpent *a)
+{
+ Arpent **l;
- if(isv4(a->ip)){
- while(xp){
- next = xp->list;
- freeblist(xp);
- xp = next;
+ for(l = &arp->rxmt; *l != nil; l = &((*l)->nextrxt)){
+ if(*l == a){
+ *l = a->nextrxt;
+ break;
}
}
- else { // queue icmp unreachable for rxmitproc later on, w/o arp lock
- if(xp){
- if(arp->dropl == nil)
- arp->dropf = xp;
- else
- arp->dropl->list = xp;
+ a->nextrxt = nil;
+ return l;
+}
- for(next = xp->list; next; next = next->list)
- xp = next;
- arp->dropl = xp;
- wakeup(&arp->rxmtq);
- }
- }
+static void
+cleanarpent(Arp *arp, Arpent *a)
+{
+ Arpent **l;
+ Block *bp;
/* take out of current chain */
- l = &arp->hash[haship(a->ip)];
- for(f = *l; f; f = f->hash){
- if(f == a){
+ for(l = &arp->hash[haship(a->ip)]; *l != nil; l = &((*l)->hash)){
+ if(*l == a){
*l = a->hash;
break;
}
- l = &f->hash;
}
+ a->hash = nil;
- /* insert into new chain */
- l = &arp->hash[haship(ip)];
- a->hash = *l;
- *l = a;
+ /* dump waiting packets */
+ bp = a->hold;
+ a->hold = nil;
+ if(isv4(a->ip))
+ freeblistchain(bp);
+ else {
+ rxmtunchain(arp, a);
- memmove(a->ip, ip, sizeof(a->ip));
- a->utime = NOW;
- a->ctime = 0;
- a->type = m;
+ /* queue icmp unreachable for rxmitproc later on, w/o arp lock */
+ if(bp != nil){
+ if(arp->dropf == nil)
+ arp->dropf = bp;
+ else
+ arp->dropl->list = bp;
+ arp->dropl = a->last;
- a->rtime = NOW + ReTransTimer;
- a->rxtsrem = MAX_MULTICAST_SOLICIT;
- a->ifc = ifc;
- a->ifcid = ifc->ifcid;
-
- /* put to the end of re-transmit chain; addrxt is 0 when isv4(a->ip) */
- if(!ipismulticast(a->ip) && addrxt){
- l = &arp->rxmt;
- empty = (*l==nil);
-
- for(f = *l; f; f = f->nextrxt){
- if(f == a){
- *l = a->nextrxt;
- break;
- }
- l = &f->nextrxt;
+ if(bp == arp->dropf)
+ wakeup(&arp->rxmtq);
}
- for(f = *l; f; f = f->nextrxt){
- l = &f->nextrxt;
- }
- *l = a;
- if(empty)
- wakeup(&arp->rxmtq);
}
+ a->last = nil;
- a->nextrxt = nil;
+ a->ifc = nil;
+ a->ifcid = 0;
- return a;
-}
+ a->state = 0;
+ a->rxtsrem = 0;
-/* called with arp qlocked */
+ a->utime = 0;
+ a->ctime = 0;
-void
-cleanarpent(Arp *arp, Arpent *a)
+ memset(a->ip, 0, sizeof(a->ip));
+ memset(a->mac, 0, sizeof(a->mac));
+}
+
+/*
+ * create a new arp entry for an ip address on ifc.
+ */
+static Arpent*
+newarpent(Arp *arp, uchar *ip, Ipifc *ifc)
{
- Arpent *f, **l;
+ Arpent *a, *e, *f, **l;
+ ulong t;
- a->utime = 0;
- a->ctime = 0;
- a->type = 0;
- a->state = 0;
-
- /* take out of current chain */
- l = &arp->hash[haship(a->ip)];
- for(f = *l; f; f = f->hash){
- if(f == a){
- *l = a->hash;
- break;
+ /* find oldest entry */
+ e = &arp->cache[NCACHE];
+ a = arp->cache;
+ t = a->utime;
+ for(f = a; f < e; f++){
+ if(f->utime < t){
+ t = f->utime;
+ a = f;
}
- l = &f->hash;
}
+ cleanarpent(arp, a);
- /* take out of re-transmit chain */
- l = &arp->rxmt;
- for(f = *l; f; f = f->nextrxt){
- if(f == a){
- *l = a->nextrxt;
- break;
- }
- l = &f->nextrxt;
- }
- a->nextrxt = nil;
- a->hash = nil;
- a->hold = nil;
- a->last = nil;
- a->ifc = nil;
+ ipmove(a->ip, ip);
+ a->ifc = ifc;
+ a->ifcid = ifc->ifcid;
+
+ /* insert into new chain */
+ l = &arp->hash[haship(ip)];
+ a->hash = *l;
+ *l = a;
+
+ return a;
}
+
/*
* fill in the media address if we have it. Otherwise return an
* Arpent that represents the state of the address resolution FSM
@@ -207,7 +184,6 @@
{
int hash;
Arpent *a;
- Medium *type = ifc->m;
uchar v6ip[IPaddrlen];
if(version == V4){
@@ -217,30 +193,28 @@
qlock(arp);
hash = haship(ip);
- for(a = arp->hash[hash]; a; a = a->hash){
- if(memcmp(ip, a->ip, sizeof(a->ip)) == 0)
- if(type == a->type)
+ for(a = arp->hash[hash]; a != nil; a = a->hash){
+ if(a->ifc == ifc && a->ifcid == ifc->ifcid && ipcmp(ip, a->ip) == 0)
break;
}
-
if(a == nil){
- a = newarp6(arp, ip, ifc, (version != V4));
+ a = newarpent(arp, ip, ifc);
a->state = AWAIT;
}
a->utime = NOW;
if(a->state == AWAIT){
if(bp != nil){
- if(a->hold)
- a->last->list = bp;
- else
+ bp->list = nil;
+ if(a->hold == nil)
a->hold = bp;
+ else
+ a->last->list = bp;
a->last = bp;
- bp->list = nil;
}
return a; /* return with arp qlocked */
}
- memmove(mac, a->mac, a->type->maclen);
+ memmove(mac, a->mac, ifc->m->maclen);
/* remove old entries */
if(NOW - a->ctime > 15*60*1000)
@@ -269,132 +243,82 @@
arpresolve(Arp *arp, Arpent *a, Medium *type, uchar *mac)
{
Block *bp;
- Arpent *f, **l;
- if(!isv4(a->ip)){
- l = &arp->rxmt;
- for(f = *l; f; f = f->nextrxt){
- if(f == a){
- *l = a->nextrxt;
- break;
- }
- l = &f->nextrxt;
- }
- }
-
memmove(a->mac, mac, type->maclen);
- a->type = type;
+ if(a->state == AWAIT && !isv4(a->ip)){
+ rxmtunchain(arp, a);
+ a->rxtsrem = 0;
+ }
a->state = AOK;
- a->utime = NOW;
+ a->ctime = a->utime = NOW;
bp = a->hold;
- a->hold = nil;
+ a->hold = a->last = nil;
qunlock(arp);
return bp;
}
-void
-arpenter(Fs *fs, int version, uchar *ip, uchar *mac, int n, int refresh)
+int
+arpenter(Fs *fs, int version, uchar *ip, uchar *mac, int n, uchar *ia, Ipifc *ifc, int refresh)
{
- Arp *arp;
- Route *r;
- Arpent *a, *f, **l;
- Ipifc *ifc;
- Medium *type;
- Block *bp, *next;
uchar v6ip[IPaddrlen];
+ Block *bp, *next;
+ Arpent *a;
+ Route *r;
+ Arp *arp;
- arp = fs->arp;
+ if(ifc->m == nil || ifc->m->maclen != n || ifc->m->maclen == 0)
+ return -1;
- if(n != 6){
-// print("arp: len = %d\n", n);
- return;
- }
-
switch(version){
case V4:
- r = v4lookup(fs, ip, nil);
+ r = v4lookup(fs, ip, ia, nil);
v4tov6(v6ip, ip);
ip = v6ip;
break;
case V6:
- r = v6lookup(fs, ip, nil);
+ r = v6lookup(fs, ip, ia, nil);
break;
default:
panic("arpenter: version %d", version);
- return; /* to supress warnings */
+ return -1; /* to supress warnings */
}
- if(r == nil){
-// print("arp: no route for entry\n");
- return;
- }
+ if(r == nil || r->ifc != ifc || (r->type & (Rbcast|Rmulti)) != 0)
+ return -1;
- ifc = r->ifc;
- type = ifc->m;
-
+ arp = fs->arp;
qlock(arp);
- for(a = arp->hash[haship(ip)]; a; a = a->hash){
- if(a->type != type || (a->state != AWAIT && a->state != AOK))
+ for(a = arp->hash[haship(ip)]; a != nil; a = a->hash){
+ if(a->ifc != ifc || a->ifcid != ifc->ifcid)
continue;
-
if(ipcmp(a->ip, ip) == 0){
- a->state = AOK;
- memmove(a->mac, mac, type->maclen);
-
- if(version == V6){
- /* take out of re-transmit chain */
- l = &arp->rxmt;
- for(f = *l; f; f = f->nextrxt){
- if(f == a){
- *l = a->nextrxt;
- break;
- }
- l = &f->nextrxt;
- }
- }
-
- a->ifc = ifc;
- a->ifcid = ifc->ifcid;
- bp = a->hold;
- a->hold = nil;
if(version == V4)
ip += IPv4off;
- a->utime = NOW;
- a->ctime = a->utime;
- qunlock(arp);
-
- while(bp){
+ bp = arpresolve(arp, a, ifc->m, mac); /* unlocks arp */
+ for(; bp != nil; bp = next){
next = bp->list;
- if(ifc != nil){
- if(waserror()){
- runlock(ifc);
- nexterror();
- }
- rlock(ifc);
- if(ifc->m != nil)
- ifc->m->bwrite(ifc, bp, version, ip);
- else
- freeb(bp);
- runlock(ifc);
- poperror();
- } else
- freeb(bp);
- bp = next;
+ bp->list = nil;
+ if(waserror()){
+ freeblistchain(next);
+ break;
+ }
+ ipifcoput(ifc, bp, version, ip);
+ poperror();
}
- return;
+ return 1;
}
}
if(refresh == 0){
- a = newarp6(arp, ip, ifc, 0);
+ a = newarpent(arp, ip, ifc);
a->state = AOK;
- a->type = type;
- a->ctime = NOW;
- memmove(a->mac, mac, type->maclen);
+ a->ctime = a->utime = NOW;
+ memmove(a->mac, mac, n);
}
-
qunlock(arp);
+
+ return refresh == 0;
}
int
@@ -401,13 +325,12 @@
arpwrite(Fs *fs, char *s, int len)
{
int n;
- Route *r;
Arp *arp;
- Block *bp;
- Arpent *a, *fl, **l;
+ Arpent *a, *x;
Medium *m;
- char *f[4], buf[256];
- uchar ip[IPaddrlen], mac[MAClen];
+ Ipifc *ifc;
+ char *f[5], buf[256];
+ uchar ip[IPaddrlen], ia[IPaddrlen], mac[MAClen];
arp = fs->arp;
@@ -420,7 +343,7 @@
if(len > 0 && buf[len-1] == '\n')
buf[len-1] = 0;
- n = getfields(buf, f, 4, 1, " ");
+ n = getfields(buf, f, nelem(f), 1, " ");
if(strcmp(f[0], "flush") == 0){
qlock(arp);
for(a = arp->cache; a < &arp->cache[NCACHE]; a++){
@@ -427,19 +350,20 @@
memset(a->ip, 0, sizeof(a->ip));
memset(a->mac, 0, sizeof(a->mac));
a->hash = nil;
+ a->nextrxt = nil;
+ a->ifc = nil;
+ a->ifcid = 0;
a->state = 0;
+ a->rxtsrem = 0;
+ a->ctime = 0;
a->utime = 0;
- while(a->hold != nil){
- bp = a->hold->list;
- freeblist(a->hold);
- a->hold = bp;
- }
+ freeblistchain(a->hold);
+ a->hold = a->last = nil;
}
memset(arp->hash, 0, sizeof(arp->hash));
-// clear all pkts on these lists (rxmt, dropf/l)
+ freeblistchain(arp->dropf);
+ arp->dropf = arp->dropl = nil;
arp->rxmt = nil;
- arp->dropf = nil;
- arp->dropl = nil;
qunlock(arp);
} else if(strcmp(f[0], "add") == 0){
switch(n){
@@ -446,64 +370,53 @@
default:
error(Ebadarg);
case 3:
- parseip(ip, f[1]);
- if(isv4(ip))
- r = v4lookup(fs, ip+IPv4off, nil);
- else
- r = v6lookup(fs, ip, nil);
- if(r == nil)
- error("Destination unreachable");
- m = r->ifc->m;
- n = parsemac(mac, f[2], m->maclen);
+ if(parseip(ip, f[1]) == -1)
+ error(Ebadip);
+ if((n = parsemac(mac, f[2], sizeof(mac))) <= 0)
+ error(Ebadarp);
+ findlocalip(fs, ia, ip);
break;
case 4:
m = ipfindmedium(f[1]);
- if(m == nil)
+ if(m == nil || m->maclen == 0)
error(Ebadarp);
- parseip(ip, f[2]);
- n = parsemac(mac, f[3], m->maclen);
+ if(parseip(ip, f[2]) == -1)
+ error(Ebadip);
+ if((n = parsemac(mac, f[3], sizeof(mac))) != m->maclen)
+ error(Ebadarp);
+ findlocalip(fs, ia, ip);
break;
+ case 5:
+ m = ipfindmedium(f[1]);
+ if(m == nil || m->maclen == 0)
+ error(Ebadarp);
+ if(parseip(ip, f[2]) == -1)
+ error(Ebadip);
+ if((n = parsemac(mac, f[3], sizeof(mac))) != m->maclen)
+ error(Ebadarp);
+ if(parseip(ia, f[4]) == -1)
+ error(Ebadip);
+ break;
}
-
- if(m->ares == nil)
- error(Ebadarp);
-
- m->ares(fs, V6, ip, mac, n, 0);
+ if((ifc = findipifc(fs, ia, ia, Runi)) == nil)
+ error("no interface");
+ rlock(ifc);
+ if(!ipv6local(ifc, ia, 0, ip) || arpenter(fs, V6, ip, mac, n, ia, ifc, 0) < 0){
+ runlock(ifc);
+ error("destination unreachable");
+ }
+ runlock(ifc);
} else if(strcmp(f[0], "del") == 0){
- if(n != 2)
+ if (n != 2)
error(Ebadarg);
-
- parseip(ip, f[1]);
+ if (parseip(ip, f[1]) == -1)
+ error(Ebadip);
qlock(arp);
-
- l = &arp->hash[haship(ip)];
- for(a = *l; a; a = a->hash){
- if(memcmp(ip, a->ip, sizeof(a->ip)) == 0){
- *l = a->hash;
- break;
- }
- l = &a->hash;
+ for(a = arp->hash[haship(ip)]; a != nil; a = x){
+ x = a->hash;
+ if(ipcmp(ip, a->ip) == 0)
+ cleanarpent(arp, a);
}
-
- if(a){
- /* take out of re-transmit chain */
- l = &arp->rxmt;
- for(fl = *l; fl; fl = fl->nextrxt){
- if(fl == a){
- *l = a->nextrxt;
- break;
- }
- l = &fl->nextrxt;
- }
-
- a->nextrxt = nil;
- a->hash = nil;
- a->hold = nil;
- a->last = nil;
- a->ifc = nil;
- memset(a->ip, 0, sizeof(a->ip));
- memset(a->mac, 0, sizeof(a->mac));
- }
qunlock(arp);
} else
error(Ebadarp);
@@ -511,13 +424,6 @@
return len;
}
-enum
-{
- Alinelen= 90,
-};
-
-char *aformat = "%-6.6s %-8.8s %-40.40I %-32.32s\n";
-
static void
convmac(char *p, uchar *mac, int n)
{
@@ -526,136 +432,136 @@
}
int
-arpread(Arp *arp, char *p, ulong offset, int len)
+arpread(Arp *arp, char *s, ulong offset, int len)
{
+ char mac[2*MAClen+1], *state, *mname, *p;
+ uchar ip[IPaddrlen], ia[IPaddrlen];
+ Ipifc *ifc;
Arpent *a;
- int n;
- char mac[2*MAClen+1];
+ long n, o;
- if(offset % Alinelen)
- return 0;
-
- offset = offset/Alinelen;
- len = len/Alinelen;
-
- n = 0;
+ p = s;
+ o = -offset;
for(a = arp->cache; len > 0 && a < &arp->cache[NCACHE]; a++){
- if(a->state == 0)
+ if(a->state == 0 || (ifc = a->ifc) == nil)
continue;
- if(offset > 0){
- offset--;
+
+ rlock(ifc);
+ qlock(arp);
+ state = arpstate[a->state];
+ ipmove(ip, a->ip);
+ if(ifc->m == nil || a->ifcid != ifc->ifcid || !ipv6local(ifc, ia, 0, ip)){
+ qunlock(arp);
+ runlock(ifc);
continue;
}
- len--;
- qlock(arp);
- convmac(mac, a->mac, a->type->maclen);
- n += sprint(p+n, aformat, a->type->name, arpstate[a->state], a->ip, mac);
+ mname = ifc->m->name;
+ convmac(mac, a->mac, ifc->m->maclen);
qunlock(arp);
+ runlock(ifc);
+
+ n = snprint(up->genbuf, sizeof up->genbuf,
+ "%-6.6s %-4.4s %-40.40I %-16.16s %I\n",
+ mname, state, ip, mac, ia);
+ o += n;
+ if(o <= 0)
+ continue;
+ if(n > len)
+ break;
+ memmove(p, up->genbuf, n);
+ len -= n;
+ p += n;
}
- return n;
+ return p - s;
}
-extern int
-rxmitsols(Arp *arp)
+void
+ndpsendsol(Fs *f, Ipifc *ifc, Arpent *a)
{
- uint sflag;
- Block *next, *xp;
- Arpent *a, *b, **l;
- Fs *f;
- uchar ipsrc[IPaddrlen];
- Ipifc *ifc = nil;
- long nrxt;
+ uchar targ[IPaddrlen], src[IPaddrlen];
+ Arpent **l;
- qlock(arp);
- f = arp->f;
+ a->ctime = NOW;
+ if(a->rxtsrem == 0)
+ a->rxtsrem = MAX_MULTICAST_SOLICIT;
+ else
+ a->rxtsrem--;
- a = arp->rxmt;
- if(a==nil){
- nrxt = 0;
- goto dodrops; //return nrxt;
- }
- nrxt = a->rtime - NOW;
- if(nrxt > 3*ReTransTimer/4)
- goto dodrops; //return nrxt;
+ /* put on end of re-transmit chain */
+ for(l = rxmtunchain(f->arp, a); *l != nil; l = &(*l)->nextrxt)
+ ;
+ *l = a;
- for(; a; a = a->nextrxt){
- ifc = a->ifc;
- assert(ifc != nil);
- if((a->rxtsrem <= 0) || !(canrlock(ifc)) || (a->ifcid != ifc->ifcid)){
- xp = a->hold;
- a->hold = nil;
+ if(l == &f->arp->rxmt)
+ wakeup(&f->arp->rxmtq);
- if(xp){
- if(arp->dropl == nil)
- arp->dropf = xp;
- else
- arp->dropl->list = xp;
- }
+ /* try to use source address of original packet */
+ ipmove(targ, a->ip);
+ if(a->last != nil){
+ ipmove(src, ((Ip6hdr*)a->last->rp)->src);
+ arprelease(f->arp, a);
- cleanarpent(arp, a);
- }
- else
- break;
+ if(iplocalonifc(ifc, src) != nil || ipproxyifc(f, ifc, src))
+ goto send;
+ } else {
+ arprelease(f->arp, a);
}
- if(a == nil)
- goto dodrops;
+ if(!ipv6local(ifc, src, 0, targ))
+ return;
+send:
+ if(!waserror()){
+ icmpns(f, src, SRC_UNI, targ, TARG_MULTI, ifc->mac);
+ poperror();
+ }
+}
+static void
+rxmitsols(Arp *arp)
+{
+ Block *next, *bp;
+ Arpent *a;
+ Ipifc *ifc;
+ Route *r;
- qunlock(arp); /* for icmpns */
- if((sflag = ipv6anylocal(ifc, ipsrc)) != SRC_UNSPEC)
- icmpns(f, ipsrc, sflag, a->ip, TARG_MULTI, ifc->mac);
-
- runlock(ifc);
- qlock(arp);
-
- /* put to the end of re-transmit chain */
- l = &arp->rxmt;
- for(b = *l; b; b = b->nextrxt){
- if(b == a){
- *l = a->nextrxt;
- break;
+ qlock(arp);
+ while((a = arp->rxmt) != nil && NOW - a->ctime > 3*ReTransTimer/4){
+ if(a->rxtsrem > 0 && (ifc = a->ifc) != nil && canrlock(ifc)){
+ if(a->ifcid == ifc->ifcid){
+ ndpsendsol(arp->f, ifc, a); /* unlocks arp */
+ runlock(ifc);
+ qlock(arp);
+ continue;
+ }
+ runlock(ifc);
}
- l = &b->nextrxt;
+ cleanarpent(arp, a);
}
- for(b = *l; b; b = b->nextrxt){
- l = &b->nextrxt;
- }
- *l = a;
- a->rxtsrem--;
- a->nextrxt = nil;
- a->rtime = NOW + ReTransTimer;
-
- a = arp->rxmt;
- if(a==nil)
- nrxt = 0;
- else
- nrxt = a->rtime - NOW;
-
-dodrops:
- xp = arp->dropf;
- arp->dropf = nil;
- arp->dropl = nil;
+ bp = arp->dropf;
+ arp->dropf = arp->dropl = nil;
qunlock(arp);
- for(; xp; xp = next){
- next = xp->list;
- icmphostunr(f, ifc, xp, icmp6_adr_unreach, 1);
+ for(; bp != nil; bp = next){
+ next = bp->list;
+ bp->list = nil;
+ r = v6lookup(arp->f, ((Ip6hdr*)bp->rp)->src, ((Ip6hdr*)bp->rp)->dst, nil);
+ if(r != nil && (ifc = r->ifc) != nil && canrlock(ifc)){
+ if(!waserror()){
+ icmphostunr6(arp->f, ifc, bp, Icmp6_adr_unreach, (r->type & Runi) != 0);
+ poperror();
+ }
+ runlock(ifc);
+ }
+ freeblist(bp);
}
-
- return nrxt;
-
}
static int
rxready(void *v)
{
- Arp *arp = (Arp *) v;
- int x;
+ Arp *arp = (Arp *)v;
- x = ((arp->rxmt != nil) || (arp->dropf != nil));
-
- return x;
+ return arp->rxmt != nil || arp->dropf != nil;
}
static void
@@ -662,20 +568,15 @@
rxmitproc(void *v)
{
Arp *arp = v;
- long wakeupat;
arp->rxmitp = up;
- //print("arp rxmitproc started\n");
if(waserror()){
- arp->rxmitp = 0;
+ arp->rxmitp = nil;
pexit("hangup", 1);
}
for(;;){
- wakeupat = rxmitsols(arp);
- if(wakeupat == 0)
- sleep(&arp->rxmtq, rxready, v);
- else if(wakeupat > ReTransTimer/4)
- tsleep(&arp->rxmtq, return0, 0, wakeupat);
+ sleep(&arp->rxmtq, rxready, v);
+ rxmitsols(arp);
+ tsleep(&arp->rxmtq, return0, nil, ReTransTimer/4);
}
}
-
diff -u a/os/ip//devip.c b/os/ip//devip.c
--- a/os/ip//devip.c
+++ b/os/ip//devip.c
@@ -14,7 +14,6 @@
Qbootp,
Qndb,
Qiproute,
- Qiprouter,
Qipselftab,
Qlog,
@@ -43,11 +42,11 @@
Maskproto= (1<<Logproto)-1,
Shiftproto= Logtype + Logconv,
- Nfs= 32,
+ Nfs= 128,
};
-#define TYPE(x) ( ((u32)(x).path) & Masktype )
-#define CONV(x) ( (((u32)(x).path) >> Shiftconv) & Maskconv )
-#define PROTO(x) ( (((u32)(x).path) >> Shiftproto) & Maskproto )
+#define TYPE(x) ( ((ulong)(x).path) & Masktype )
+#define CONV(x) ( (((ulong)(x).path) >> Shiftconv) & Maskconv )
+#define PROTO(x) ( (((ulong)(x).path) >> Shiftproto) & Maskproto )
#define QID(p, c, y) ( ((p)<<(Shiftproto)) | ((c)<<Shiftconv) | (y) )
static char network[] = "network";
@@ -58,8 +57,7 @@
extern void nullmediumlink(void);
extern void pktmediumlink(void);
-static long ndbwrite(Fs*, char*, ulong, int);
-extern void closeconv(Conv*);
+ long ndbwrite(Fs *f, char *a, ulong off, int n);
static int
ip3gen(Chan *c, int i, Dir *dp)
@@ -121,7 +119,7 @@
mkqid(&q, QID(PROTO(c->qid), 0, Qstats), 0, QTFILE);
devdir(c, q, "stats", 0, network, 0444, dp);
return 1;
- }
+ }
return -1;
}
@@ -144,11 +142,10 @@
return -1;
case Qarp:
p = "arp";
+ prot = 0664;
break;
case Qbootp:
p = "bootp";
- if(bootp == nil)
- return 0;
break;
case Qndb:
p = "ndb";
@@ -157,14 +154,12 @@
break;
case Qiproute:
p = "iproute";
+ prot = 0664;
break;
case Qipselftab:
p = "ipselftab";
prot = 0444;
break;
- case Qiprouter:
- p = "iprouter";
- break;
case Qlog:
p = "log";
break;
@@ -188,7 +183,7 @@
case Qtopdir:
if(s == DEVDOTDOT){
mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
- sprint(up->genbuf, "#I%ud", c->dev);
+ snprint(up->genbuf, sizeof up->genbuf, "#I%lud", c->dev);
devdir(c, q, up->genbuf, 0, network, 0555, dp);
return 1;
}
@@ -206,19 +201,18 @@
case Qndb:
case Qlog:
case Qiproute:
- case Qiprouter:
case Qipselftab:
return ip1gen(c, TYPE(c->qid), dp);
case Qprotodir:
if(s == DEVDOTDOT){
mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
- sprint(up->genbuf, "#I%ud", c->dev);
+ snprint(up->genbuf, sizeof up->genbuf, "#I%lud", c->dev);
devdir(c, q, up->genbuf, 0, network, 0555, dp);
return 1;
}
if(s < f->p[PROTO(c->qid)]->ac) {
cv = f->p[PROTO(c->qid)]->conv[s];
- sprint(up->genbuf, "%d", s);
+ snprint(up->genbuf, sizeof up->genbuf, "%d", s);
mkqid(&q, QID(PROTO(c->qid), s, Qconvdir), 0, QTDIR);
devdir(c, q, up->genbuf, 0, cv->owner, 0555, dp);
return 1;
@@ -262,45 +256,14 @@
fmtinstall('M', eipfmt);
}
-static Fs*
-ipgetfs(int dev)
-{
- extern void (*ipprotoinit[])(Fs*);
- Fs *f;
- int i;
-
- if(dev >= Nfs)
- return nil;
-
- qlock(&fslock);
- if(ipfs[dev] == nil){
- f = smalloc(sizeof(Fs));
- ip_init(f);
- arpinit(f);
- netloginit(f);
- for(i = 0; ipprotoinit[i]; i++)
- ipprotoinit[i](f);
- f->dev = dev;
- ipfs[dev] = f;
- }
- qunlock(&fslock);
-
- return ipfs[dev];
-}
-
IPaux*
newipaux(char *owner, char *tag)
{
IPaux *a;
- int n;
a = smalloc(sizeof(*a));
kstrdup(&a->owner, owner);
- memset(a->tag, ' ', sizeof(a->tag));
- n = strlen(tag);
- if(n > sizeof(a->tag))
- n = sizeof(a->tag);
- memmove(a->tag, tag, n);
+ strncpy(a->tag, tag, sizeof(a->tag));
return a;
}
@@ -310,13 +273,29 @@
ipattach(char* spec)
{
Chan *c;
- int dev;
+ ulong dev;
- dev = atoi(spec);
+ dev = strtoul(spec, nil, 10);
if(dev >= Nfs)
- error("bad specification");
+ error(Enodev);
- ipgetfs(dev);
+ qlock(&fslock);
+ if(ipfs[dev] == nil){
+ extern void (*ipprotoinit[])(Fs*);
+ Fs *f;
+ int i;
+
+ f = smalloc(sizeof(Fs));
+ ip_init(f);
+ arpinit(f);
+ netloginit(f);
+ for(i = 0; ipprotoinit[i]; i++)
+ ipprotoinit[i](f);
+ f->dev = dev;
+ ipfs[dev] = f;
+ }
+ qunlock(&fslock);
+
c = devattach('I', spec);
mkqid(&c->qid, QID(0, 0, Qtopdir), 0, QTDIR);
c->dev = dev;
@@ -327,7 +306,7 @@
}
static Walkqid*
-ipwalk(Chan* c, Chan *nc, char **name, s32 nname)
+ipwalk(Chan* c, Chan *nc, char **name, int nname)
{
IPaux *a = c->aux;
Walkqid* w;
@@ -338,8 +317,9 @@
return w;
}
-static s32
-ipstat(Chan* c, uchar* db, s32 n)
+
+static int
+ipstat(Chan* c, uchar* db, int n)
{
return devstat(c, db, n, nil, 0, ipgen);
}
@@ -360,7 +340,7 @@
};
static Chan*
-ipopen(Chan* c, u32 omode)
+ipopen(Chan* c, int omode)
{
Conv *cv, *nc;
Proto *p;
@@ -375,7 +355,7 @@
default:
break;
case Qndb:
- if(omode & (OWRITE|OTRUNC) && !iseve())
+ if((omode & (OWRITE|OTRUNC)) != 0 && !iseve())
error(Eperm);
if((omode & (OWRITE|OTRUNC)) == (OWRITE|OTRUNC))
f->ndb[0] = 0;
@@ -383,10 +363,10 @@
case Qlog:
netlogopen(f);
break;
- case Qiprouter:
- iprouteropen(f);
- break;
case Qiproute:
+ case Qarp:
+ if(omode != OREAD && !iseve())
+ error(Eperm);
break;
case Qtopdir:
case Qprotodir:
@@ -412,13 +392,8 @@
case Qclone:
p = f->p[PROTO(c->qid)];
qlock(p);
- if(waserror()){
- qunlock(p);
- nexterror();
- }
cv = Fsprotoclone(p, ATTACHER(c));
qunlock(p);
- poperror();
if(cv == nil) {
error(Enodev);
break;
@@ -437,15 +412,12 @@
qunlock(p);
nexterror();
}
- if((perm & (cv->perm>>6)) != perm) {
- if(strcmp(ATTACHER(c), cv->owner) != 0)
- error(Eperm);
- if((perm & cv->perm) != perm)
- error(Eperm);
+ if(strcmp(ATTACHER(c), cv->owner) == 0)
+ perm <<= 6;
+ if((perm & cv->perm) != perm && !iseve())
+ error(Eperm);
- }
- cv->inuse++;
- if(cv->inuse == 1){
+ if(++cv->inuse == 1){
kstrdup(&cv->owner, ATTACHER(c));
cv->perm = 0660;
}
@@ -455,24 +427,26 @@
break;
case Qlisten:
cv = f->p[PROTO(c->qid)]->conv[CONV(c->qid)];
- if((perm & (cv->perm>>6)) != perm) {
- if(strcmp(ATTACHER(c), cv->owner) != 0)
- error(Eperm);
- if((perm & cv->perm) != perm)
- error(Eperm);
-
+ qlock(cv);
+ if(waserror()){
+ qunlock(cv);
+ nexterror();
}
+ if(strcmp(ATTACHER(c), cv->owner) == 0)
+ perm <<= 6;
+ if((perm & cv->perm) != perm && !iseve())
+ error(Eperm);
if(cv->state != Announced)
error("not announced");
+ cv->inuse++;
+ qunlock(cv);
+ poperror();
if(waserror()){
closeconv(cv);
nexterror();
}
- qlock(cv);
- cv->inuse++;
- qunlock(cv);
nc = nil;
while(nc == nil) {
@@ -494,7 +468,6 @@
if(nc != nil){
cv->incall = nc->next;
mkqid(&c->qid, QID(PROTO(c->qid), nc->x, Qctl), 0, QTFILE);
- kstrdup(&cv->owner, ATTACHER(c));
}
qunlock(cv);
@@ -511,13 +484,25 @@
return c;
}
-static s32
-ipwstat(Chan *c, uchar *dp, s32 n)
+static Chan*
+ipcreate(Chan*, char*, int, ulong)
{
- Dir *d;
+ error(Eperm);
+ return 0;
+}
+
+static void
+ipremove(Chan*)
+{
+ error(Eperm);
+}
+
+static int
+ipwstat(Chan *c, uchar *dp, int n)
+{
+ Dir *dir;
Conv *cv;
Fs *f;
- Proto *p;
f = ipfs[c->dev];
switch(TYPE(c->qid)) {
@@ -529,28 +514,40 @@
break;
}
- d = smalloc(sizeof(*d)+n);
+ dir = smalloc(sizeof(Dir)+n);
if(waserror()){
- free(d);
+ free(dir);
nexterror();
}
- n = convM2D(dp, n, d, (char*)&d[1]);
+ n = convM2D(dp, n, &dir[0], (char*)&dir[1]);
if(n == 0)
error(Eshortstat);
- p = f->p[PROTO(c->qid)];
- cv = p->conv[CONV(c->qid)];
- if(!iseve() && strcmp(ATTACHER(c), cv->owner) != 0)
+
+ cv = f->p[PROTO(c->qid)]->conv[CONV(c->qid)];
+ qlock(cv);
+ if(waserror()){
+ qunlock(cv);
+ nexterror();
+ }
+ if(strcmp(ATTACHER(c), cv->owner) != 0 && !iseve())
error(Eperm);
- if(!emptystr(d->uid))
- kstrdup(&cv->owner, d->uid);
- if(d->mode != ~0UL)
- cv->perm = d->mode & 0777;
+ if(!emptystr(dir->uid)){
+ if(strcmp(dir->uid, commonuser()) != 0 && !iseve())
+ error(Eperm);
+ kstrdup(&cv->owner, dir->uid);
+ }
+ if(dir->mode != ~0UL)
+ cv->perm = dir->mode & 0666;
+ qunlock(cv);
poperror();
- free(d);
+
+ free(dir);
+ poperror();
+
return n;
}
-extern void
+void
closeconv(Conv *cv)
{
Conv *nc;
@@ -564,7 +561,7 @@
}
/* close all incoming calls since no listen will ever happen */
- for(nc = cv->incall; nc; nc = cv->incall){
+ for(nc = cv->incall; nc != nil; nc = cv->incall){
cv->incall = nc->next;
closeconv(nc);
}
@@ -576,9 +573,9 @@
while((mp = cv->multi) != nil)
ipifcremmulti(cv, mp->ma, mp->ia);
- cv->r = nil;
- cv->rgen = 0;
- cv->p->close(cv);
+ if(cv->p->close != nil)
+ (*cv->p->close)(cv);
+
cv->state = Idle;
qunlock(cv);
}
@@ -596,10 +593,6 @@
if(c->flag & COPEN)
netlogclose(f);
break;
- case Qiprouter:
- if(c->flag & COPEN)
- iprouterclose(f);
- break;
case Qdata:
case Qctl:
case Qerr:
@@ -620,13 +613,13 @@
Statelen= 32*1024,
};
-static s32
-ipread(Chan *ch, void *a, s32 n, s64 off)
+static long
+ipread(Chan *ch, void *a, long n, vlong off)
{
Conv *c;
Proto *x;
char *buf, *p;
- s32 rv;
+ long rv;
Fs *f;
ulong offset = off;
@@ -648,21 +641,22 @@
return readstr(offset, a, n, f->ndb);
case Qiproute:
return routeread(f, a, offset, n);
- case Qiprouter:
- return iprouterread(f, a, n);
case Qipselftab:
return ipselftabread(f, a, offset, n);
case Qlog:
return netlogread(f, a, offset, n);
case Qctl:
- sprint(up->genbuf, "%ud", CONV(ch->qid));
- return readstr(offset, p, n, up->genbuf);
+ buf = smalloc(16);
+ snprint(buf, 16, "%lud", CONV(ch->qid));
+ rv = readstr(offset, p, n, buf);
+ free(buf);
+ return rv;
case Qremote:
buf = smalloc(Statelen);
x = f->p[PROTO(ch->qid)];
c = x->conv[CONV(ch->qid)];
if(x->remote == nil) {
- sprint(buf, "%I!%d\n", c->raddr, c->rport);
+ snprint(buf, Statelen, "%I!%d\n", c->raddr, c->rport);
} else {
(*x->remote)(c, buf, Statelen-2);
}
@@ -674,7 +668,7 @@
x = f->p[PROTO(ch->qid)];
c = x->conv[CONV(ch->qid)];
if(x->local == nil) {
- sprint(buf, "%I!%d\n", c->laddr, c->lport);
+ snprint(buf, Statelen, "%I!%d\n", c->laddr, c->lport);
} else {
(*x->local)(c, buf, Statelen-2);
}
@@ -711,7 +705,7 @@
}
static Block*
-ipbread(Chan* ch, s32 n, u32 offset)
+ipbread(Chan* ch, long n, ulong offset)
{
Conv *c;
Proto *x;
@@ -740,7 +734,7 @@
/*
* set a local port making sure the quad of raddr,rport,laddr,lport is unique
*/
-static char*
+char*
setluniqueport(Conv* c, int lport)
{
Proto *p;
@@ -771,51 +765,63 @@
}
/*
+ * is lport in use by anyone?
+ */
+static int
+lportinuse(Proto *p, ushort lport)
+{
+ int x;
+
+ for(x = 0; x < p->nc && p->conv[x]; x++)
+ if(p->conv[x]->lport == lport)
+ return 1;
+ return 0;
+}
+
+/*
* pick a local port and set it
*/
-extern void
+char *
setlport(Conv* c)
{
Proto *p;
- ushort *pp;
- int x, found;
+ int i, port;
p = c->p;
- if(c->restricted)
- pp = &p->nextrport;
- else
- pp = &p->nextport;
qlock(p);
- for(;;(*pp)++){
+ if(c->restricted){
+ /* Restricted ports cycle between 600 and 1024. */
+ for(i=0; i<1024-600; i++){
+ if(p->nextrport >= 1024 || p->nextrport < 600)
+ p->nextrport = 600;
+ port = p->nextrport++;
+ if(!lportinuse(p, port))
+ goto chosen;
+ }
+ }else{
/*
- * Fsproto initialises p->nextport to 0 and the restricted
- * ports (p->nextrport) to 600.
- * Restricted ports must lie between 600 and 1024.
- * For the initial condition or if the unrestricted port number
- * has wrapped round, select a random port between 5000 and 1<<15
- * to start at.
+ * Unrestricted ports are chosen randomly
+ * between 2^15 and 2^16. There are at most
+ * 4*Nchan = 4096 ports in use at any given time,
+ * so even in the worst case, a random probe has a
+ * 1 - 4096/2^15 = 87% chance of success.
+ * If 64 successive probes fail, there is a bug somewhere
+ * (or a once in 10^58 event has happened, but that's
+ * less likely than a venti collision).
*/
- if(c->restricted){
- if(*pp >= 1024)
- *pp = 600;
+ for(i=0; i<64; i++){
+ port = (1<<15) + nrand(1<<15);
+ if(!lportinuse(p, port))
+ goto chosen;
}
- else while(*pp < 5000)
- *pp = nrand(1<<15);
-
- found = 0;
- for(x = 0; x < p->nc; x++){
- if(p->conv[x] == nil)
- break;
- if(p->conv[x]->lport == *pp){
- found = 1;
- break;
- }
- }
- if(found == 0)
- break;
}
- c->lport = (*pp)++;
qunlock(p);
+ return "no ports available";
+
+chosen:
+ c->lport = port;
+ qunlock(p);
+ return nil;
}
/*
@@ -822,7 +828,7 @@
* set a local address and port from a string of the form
* [address!]port[!r]
*/
-static char*
+char*
setladdrport(Conv* c, char* str, int announcing)
{
char *p;
@@ -830,8 +836,6 @@
ushort lport;
uchar addr[IPaddrlen];
- rv = nil;
-
/*
* ignore restricted part if it exists. it's
* meaningless on local ports.
@@ -854,8 +858,9 @@
if(strcmp(str, "*") == 0)
ipmove(c->laddr, IPnoaddr);
else {
- parseip(addr, str);
- if(ipforme(c->p->f, addr))
+ if(parseip(addr, str) == -1)
+ return Ebadip;
+ if(ipforme(c->p->f, addr) != 0 || ipismulticast(addr))
ipmove(c->laddr, addr);
else
return "not a local IP address";
@@ -869,9 +874,13 @@
return setluniqueport(c, 0);
}
- lport = atoi(p);
+ str = p;
+ lport = strtol(str, &p, 10);
+ if(p <= str || strchr("!", *p) == nil)
+ return "bad numeric port";
+
if(lport <= 0)
- setlport(c);
+ rv = setlport(c);
else
rv = setluniqueport(c, lport);
return rv;
@@ -886,13 +895,17 @@
if(p == nil)
return "malformed address";
*p++ = 0;
- parseip(c->raddr, str);
- c->rport = atoi(p);
- p = strchr(p, '!');
- if(p){
- if(strstr(p, "!r") != nil)
- c->restricted = 1;
- }
+ if(parseip(c->raddr, str) == -1)
+ return Ebadip;
+
+ str = p;
+ c->rport = strtol(str, &p, 10);
+ if(p <= str || strchr("!", *p) == nil)
+ return "bad numeric port";
+
+ if(strstr(p, "!r") != nil)
+ c->restricted = 1;
+
return nil;
}
@@ -912,7 +925,9 @@
if(p != nil)
return p;
setladdr(c);
- setlport(c);
+ p = setlport(c);
+ if (p != nil)
+ return p;
break;
case 3:
p = setraddrport(c, argv[1]);
@@ -923,12 +938,7 @@
return p;
}
- if((memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
- memcmp(c->laddr, v4prefix, IPv4off) == 0)
- || ipcmp(c->raddr, IPnoaddr) == 0)
- c->ipversion = V4;
- else
- c->ipversion = V6;
+ c->ipversion = convipvers(c);
return nil;
}
@@ -978,10 +988,11 @@
c->rport = 0;
switch(argc){
default:
- return "bad args to announce";
+ break;
case 2:
return setladdrport(c, argv[1], 1);
}
+ return "bad args to announce";
}
/*
@@ -1028,10 +1039,11 @@
{
switch(argc){
default:
- return "bad args to bind";
+ break;
case 2:
return setladdrport(c, argv[1], 0);
}
+ return "bad args to bind";
}
static void
@@ -1042,7 +1054,7 @@
if(x->bind == nil)
p = Fsstdbind(c, cb->f, cb->nf);
else
- p = x->bind(c, cb->f, cb->nf);
+ p = (*x->bind)(c, cb->f, cb->nf);
if(p != nil)
error(p);
}
@@ -1065,8 +1077,8 @@
c->ttl = atoi(cb->f[1]);
}
-static s32
-ipwrite(Chan* ch, void *v, s32 n, s64 off)
+static long
+ipwrite(Chan* ch, void *v, long n, vlong off)
{
Conv *c;
Proto *x;
@@ -1075,6 +1087,7 @@
uchar ia[IPaddrlen], ma[IPaddrlen];
Fs *f;
char *a;
+ ulong offset = off;
a = v;
f = ipfs[ch->dev];
@@ -1099,7 +1112,8 @@
netlogctl(f, a, n);
return n;
case Qndb:
- return ndbwrite(f, a, off, n);
+ return ndbwrite(f, a, offset, n);
+ break;
case Qctl:
x = f->p[PROTO(ch->qid)];
c = x->conv[CONV(ch->qid)];
@@ -1131,13 +1145,15 @@
if(cb->nf == 2){
if(!ipismulticast(c->raddr))
error("addmulti for a non multicast address");
- parseip(ia, cb->f[1]);
+ if (parseip(ia, cb->f[1]) == -1)
+ error(Ebadip);
ipifcaddmulti(c, c->raddr, ia);
} else {
- parseip(ma, cb->f[2]);
+ if (parseip(ia, cb->f[1]) == -1 ||
+ parseip(ma, cb->f[2]) == -1)
+ error(Ebadip);
if(!ipismulticast(ma))
error("addmulti for a non multicast address");
- parseip(ia, cb->f[1]);
ipifcaddmulti(c, ma, ia);
}
} else if(strcmp(cb->f[0], "remmulti") == 0){
@@ -1145,10 +1161,11 @@
error("remmulti needs interface address");
if(!ipismulticast(c->raddr))
error("remmulti for a non multicast address");
- parseip(ia, cb->f[1]);
+ if (parseip(ia, cb->f[1]) == -1)
+ error(Ebadip);
ipifcremmulti(c, c->raddr, ia);
} else if(x->ctl != nil) {
- p = x->ctl(c, cb->f, cb->nf);
+ p = (*x->ctl)(c, cb->f, cb->nf);
if(p != nil)
error(p);
} else
@@ -1160,13 +1177,12 @@
return n;
}
-static s32
-ipbwrite(Chan* ch, Block* bp, u32 offset)
+static long
+ipbwrite(Chan* ch, Block* bp, ulong offset)
{
Conv *c;
Proto *x;
Fs *f;
- int n;
switch(TYPE(ch->qid)){
case Qdata:
@@ -1177,11 +1193,7 @@
if(c->wq == nil)
error(Eperm);
- if(bp->next)
- bp = concatblock(bp);
- n = BLEN(bp);
- qbwrite(c->wq, bp);
- return n;
+ return qbwrite(c->wq, bp);
default:
return devbwrite(ch, bp, offset);
}
@@ -1198,13 +1210,13 @@
ipwalk,
ipstat,
ipopen,
- devcreate,
+ ipcreate,
ipclose,
ipread,
ipbread,
ipwrite,
ipbwrite,
- devremove,
+ ipremove,
ipwstat,
};
@@ -1224,12 +1236,15 @@
p->qid.type = QTDIR;
p->qid.path = QID(f->np, 0, Qprotodir);
+ if(p->nc > Maskconv+1){
+ print("Fsproto: %s nc %d > %d\n", p->name, p->nc, Maskconv+1);
+ p->nc = Maskconv+1;
+ }
p->conv = malloc(sizeof(Conv*)*(p->nc+1));
if(p->conv == nil)
panic("Fsproto");
p->x = f->np;
- p->nextport = 0;
p->nextrport = 600;
f->p[f->np++] = p;
@@ -1262,21 +1277,33 @@
if(c == nil){
c = malloc(sizeof(Conv));
if(c == nil)
- error(Enomem);
- qlock(c);
+ return nil;
+ if(waserror()){
+ qfree(c->rq);
+ qfree(c->wq);
+ qfree(c->eq);
+ qfree(c->sq);
+ free(c->ptcl);
+ free(c);
+ return nil;
+ }
c->p = p;
c->x = pp - p->conv;
if(p->ptclsize != 0){
c->ptcl = malloc(p->ptclsize);
- if(c->ptcl == nil) {
- free(c);
+ if(c->ptcl == nil)
error(Enomem);
- }
}
- *pp = c;
- p->ac++;
c->eq = qopen(1024, Qmsg, 0, 0);
+ if(c->eq == nil)
+ error(Enomem);
(*p->create)(c);
+ if(c->rq == nil || c->wq == nil)
+ error(Enomem);
+ poperror();
+ qlock(c);
+ *pp = c;
+ p->ac++;
break;
}
if(canqlock(c)){
@@ -1291,8 +1318,11 @@
}
}
if(pp >= ep) {
- if(p->gc != nil && (*p->gc)(p))
- goto retry;
+ if(p->gc != nil){
+ print("Fsprotoclone: garbage collecting %s Convs\n", p->name);
+ if((*p->gc)(p))
+ goto retry;
+ }
return nil;
}
@@ -1307,8 +1337,9 @@
c->lport = 0;
c->rport = 0;
c->restricted = 0;
+ c->ignoreadvice = 0;
c->ttl = MAXTTL;
- c->tos = DFLTTOS;
+ c->tos = 0;
qreopen(c->rq);
qreopen(c->wq);
qreopen(c->eq);
@@ -1321,7 +1352,7 @@
Fsconnected(Conv* c, char* msg)
{
if(msg != nil && *msg != '\0')
- kstrcpy(c->cerr, msg, sizeof(c->cerr));
+ strncpy(c->cerr, msg, ERRMAX-1);
switch(c->state){
@@ -1368,12 +1399,19 @@
for(l = &c->incall; *l; l = &(*l)->next)
i++;
if(i >= Maxincall) {
+ static int beenhere;
+
qunlock(c);
+ if (!beenhere) {
+ beenhere = 1;
+ print("Fsnewcall: incall queue full (%d) on port %d\n",
+ i, c->lport);
+ }
return nil;
}
/* find a free conversation */
- nc = Fsprotoclone(c->p, network);
+ nc = Fsprotoclone(c->p, c->owner);
if(nc == nil) {
qunlock(c);
return nil;
@@ -1394,12 +1432,12 @@
return nc;
}
-static long
+long
ndbwrite(Fs *f, char *a, ulong off, int n)
{
if(off > strlen(f->ndb))
error(Eio);
- if(off+n >= sizeof(f->ndb)-1)
+ if(off+n >= sizeof(f->ndb))
error(Eio);
memmove(f->ndb+off, a, n);
f->ndb[off+n] = 0;
@@ -1411,7 +1449,7 @@
ulong
scalednconv(void)
{
- if(conf.npage*BY2PG >= 128*MB)
+ if(cpuserver && conf.npage*BY2PG >= 128*MB)
return Nchans*4;
return Nchans;
}
diff -u a/os/ip//esp.c b/os/ip//esp.c
--- a/os/ip//esp.c
+++ b/os/ip//esp.c
@@ -1,3 +1,11 @@
+/*
+ * Encapsulating Security Payload for IPsec for IPv4, rfc1827.
+ * extended to IPv6.
+ * rfc2104 defines hmac computation.
+ * currently only implements tunnel mode.
+ * TODO: verify aes algorithms;
+ * transport mode (host-to-host)
+ */
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
@@ -6,47 +14,79 @@
#include "../port/error.h"
#include "ip.h"
+#include "ipv6.h"
+#include <libsec.h>
-#include "libsec.h"
+#define BITS2BYTES(bi) (((bi) + BI2BY - 1) / BI2BY)
+#define BYTES2BITS(by) ((by) * BI2BY)
+typedef struct Algorithm Algorithm;
+typedef struct Esp4hdr Esp4hdr;
+typedef struct Esp6hdr Esp6hdr;
+typedef struct Espcb Espcb;
typedef struct Esphdr Esphdr;
+typedef struct Esppriv Esppriv;
typedef struct Esptail Esptail;
typedef struct Userhdr Userhdr;
-typedef struct Esppriv Esppriv;
-typedef struct Espcb Espcb;
-typedef struct Algorithm Algorithm;
-typedef struct Esprc4 Esprc4;
-#define DPRINT if(0)print
+enum {
+ Encrypt,
+ Decrypt,
-enum
-{
- IP_ESPPROTO = 50,
- EsphdrSize = 28, // includes IP header
- IphdrSize = 20, // options have been striped
- EsptailSize = 2, // does not include pad or auth data
- UserhdrSize = 4, // user visable header size - if enabled
+ IP_ESPPROTO = 50, /* IP v4 and v6 protocol number */
+ Esp4hdrlen = IP4HDR + 8,
+ Esp6hdrlen = IP6HDR + 8,
+
+ Esptaillen = 2, /* does not include pad or auth data */
+ Userhdrlen = 4, /* user-visible header size - if enabled */
+
+ Desblk = BITS2BYTES(64),
+ Des3keysz = BITS2BYTES(192),
+
+ Aesblk = BITS2BYTES(128),
+ Aeskeysz = BITS2BYTES(128),
};
struct Esphdr
{
- /* ip header */
+ uchar espspi[4]; /* Security parameter index */
+ uchar espseq[4]; /* Sequence number */
+ uchar payload[];
+};
+
+/*
+ * tunnel-mode (network-to-network, etc.) layout is:
+ * new IP hdrs | ESP hdr |
+ * enc { orig IP hdrs | TCP/UDP hdr | user data | ESP trailer } | ESP ICV
+ *
+ * transport-mode (host-to-host) layout would be:
+ * orig IP hdrs | ESP hdr |
+ * enc { TCP/UDP hdr | user data | ESP trailer } | ESP ICV
+ */
+struct Esp4hdr
+{
+ /* ipv4 header */
uchar vihl; /* Version and header length */
uchar tos; /* Type of service */
uchar length[2]; /* packet length */
uchar id[2]; /* Identification */
uchar frag[2]; /* Fragment information */
- uchar Unused;
+ uchar Unused;
uchar espproto; /* Protocol */
uchar espplen[2]; /* Header plus data length */
uchar espsrc[4]; /* Ip source */
uchar espdst[4]; /* Ip destination */
- /* esp header */
- uchar espspi[4]; /* Security parameter index */
- uchar espseq[4]; /* Sequence number */
+ Esphdr;
};
+/* tunnel-mode layout */
+struct Esp6hdr
+{
+ IPV6HDR;
+ Esphdr;
+};
+
struct Esptail
{
uchar pad;
@@ -53,16 +93,28 @@
uchar nexthdr;
};
+/* IP-version-dependent data */
+typedef struct Versdep Versdep;
+struct Versdep
+{
+ ulong version;
+ ulong iphdrlen;
+ ulong hdrlen; /* iphdrlen + esp hdr len */
+ ulong spi;
+ uchar laddr[IPaddrlen];
+ uchar raddr[IPaddrlen];
+};
+
/* header as seen by the user */
struct Userhdr
{
- uchar nexthdr; // next protocol
+ uchar nexthdr; /* next protocol */
uchar unused[3];
};
struct Esppriv
{
- ulong in;
+ uvlong in;
ulong inerrors;
};
@@ -72,77 +124,68 @@
struct Espcb
{
int incoming;
- int header; // user user level header
+ int header; /* user-level header */
ulong spi;
- ulong seq; // last seq sent
- ulong window; // for replay attacks
+ ulong seq; /* last seq sent */
+ ulong window; /* for replay attacks */
+
char *espalg;
- void *espstate; // other state for esp
- int espivlen; // in bytes
+ void *espstate; /* other state for esp */
+ int espivlen; /* in bytes */
int espblklen;
int (*cipher)(Espcb*, uchar *buf, int len);
+
char *ahalg;
- void *ahstate; // other state for esp
- int ahlen; // auth data length in bytes
+ void *ahstate; /* other state for esp */
+ int ahlen; /* auth data length in bytes */
int ahblklen;
int (*auth)(Espcb*, uchar *buf, int len, uchar *hash);
+ DigestState *ds;
};
struct Algorithm
{
char *name;
- int keylen; // in bits
- void (*init)(Espcb*, char* name, uchar *key, int keylen);
+ int keylen; /* in bits */
+ void (*init)(Espcb*, char* name, uchar *key, unsigned keylen);
};
-
-enum {
- RC4forward = 10*1024*1024, // maximum skip forward
- RC4back = 100*1024, // maximum look back
-};
-
-struct Esprc4
-{
- ulong cseq; // current byte sequence number
- RC4state current;
-
- int ovalid; // old is valid
- ulong lgseq; // last good sequence
- ulong oseq; // old byte sequence number
- RC4state old;
-};
-
static Conv* convlookup(Proto *esp, ulong spi);
static char *setalg(Espcb *ecb, char **f, int n, Algorithm *alg);
-static void nullespinit(Espcb*, char*, uchar *key, int keylen);
-static void nullahinit(Espcb*, char*, uchar *key, int keylen);
-static void shaahinit(Espcb*, char*, uchar *key, int keylen);
-static void md5ahinit(Espcb*, char*, uchar *key, int keylen);
-static void desespinit(Espcb *ecb, char *name, uchar *k, int n);
-static void rc4espinit(Espcb *ecb, char *name, uchar *k, int n);
static void espkick(void *x);
+static void nullespinit(Espcb*, char*, uchar *key, unsigned keylen);
+static void des3espinit(Espcb*, char*, uchar *key, unsigned keylen);
+static void aescbcespinit(Espcb*, char*, uchar *key, unsigned keylen);
+static void aesctrespinit(Espcb*, char*, uchar *key, unsigned keylen);
+static void desespinit(Espcb *ecb, char *name, uchar *k, unsigned n);
+
+static void nullahinit(Espcb*, char*, uchar *key, unsigned keylen);
+static void shaahinit(Espcb*, char*, uchar *key, unsigned keylen);
+static void md5ahinit(Espcb*, char*, uchar *key, unsigned keylen);
+
static Algorithm espalg[] =
{
- "null", 0, nullespinit,
- "des_56_cbc", 64, desespinit,
- "rc4_128", 128, rc4espinit,
- nil, 0, nil,
+ "null", 0, nullespinit,
+ "des3_cbc", 192, des3espinit, /* new rfc2451, des-ede3 */
+ "aes_128_cbc", 128, aescbcespinit, /* new rfc3602 */
+ "aes_ctr", 128, aesctrespinit, /* new rfc3686 */
+ "des_56_cbc", 64, desespinit, /* rfc2405, deprecated */
+ nil, 0, nil,
};
static Algorithm ahalg[] =
{
- "null", 0, nullahinit,
- "hmac_sha1_96", 128, shaahinit,
- "hmac_md5_96", 128, md5ahinit,
- nil, 0, nil,
+ "null", 0, nullahinit,
+ "hmac_sha1_96", 128, shaahinit, /* rfc2404 */
+ "hmac_md5_96", 128, md5ahinit, /* rfc2403 */
+ nil, 0, nil,
};
static char*
espconnect(Conv *c, char **argv, int argc)
{
- char *p, *pp;
- char *e = nil;
+ char *p, *pp, *e = nil;
ulong spi;
Espcb *ecb = (Espcb*)c->ptcl;
@@ -157,7 +200,10 @@
break;
}
*p++ = 0;
- parseip(c->raddr, argv[1]);
+ if (parseip(c->raddr, argv[1]) == -1) {
+ e = Ebadip;
+ break;
+ }
findlocalip(c->p->f, c->laddr, c->raddr);
ecb->incoming = 0;
ecb->seq = 0;
@@ -215,26 +261,86 @@
ipmove(c->raddr, IPnoaddr);
ecb = (Espcb*)c->ptcl;
- free(ecb->espstate);
- free(ecb->ahstate);
+ secfree(ecb->espstate);
+ secfree(ecb->ahstate);
memset(ecb, 0, sizeof(Espcb));
}
+static int
+pktipvers(Fs *f, Block **bpp)
+{
+ if (*bpp == nil || BLEN(*bpp) == 0) {
+ /* get enough to identify the IP version */
+ *bpp = pullupblock(*bpp, IP4HDR);
+ if(*bpp == nil) {
+ netlog(f, Logesp, "esp: short packet\n");
+ return 0;
+ }
+ }
+ return (((Esp4hdr*)(*bpp)->rp)->vihl & 0xf0) == IP_VER4? V4: V6;
+}
+
static void
+getverslens(int version, Versdep *vp)
+{
+ vp->version = version;
+ switch(vp->version) {
+ case V4:
+ vp->iphdrlen = IP4HDR;
+ vp->hdrlen = Esp4hdrlen;
+ break;
+ case V6:
+ vp->iphdrlen = IP6HDR;
+ vp->hdrlen = Esp6hdrlen;
+ break;
+ default:
+ panic("esp: getverslens version %d wrong", version);
+ }
+}
+
+static void
+getpktspiaddrs(uchar *pkt, Versdep *vp)
+{
+ Esp4hdr *eh4;
+ Esp6hdr *eh6;
+
+ switch(vp->version) {
+ case V4:
+ eh4 = (Esp4hdr*)pkt;
+ v4tov6(vp->raddr, eh4->espsrc);
+ v4tov6(vp->laddr, eh4->espdst);
+ vp->spi = nhgetl(eh4->espspi);
+ break;
+ case V6:
+ eh6 = (Esp6hdr*)pkt;
+ ipmove(vp->raddr, eh6->src);
+ ipmove(vp->laddr, eh6->dst);
+ vp->spi = nhgetl(eh6->espspi);
+ break;
+ default:
+ panic("esp: getpktspiaddrs vp->version %ld wrong", vp->version);
+ }
+}
+
+/*
+ * encapsulate next IP packet on x's write queue in IP/ESP packet
+ * and initiate output of the result.
+ */
+static void
espkick(void *x)
{
+ int nexthdr, payload, pad, align;
+ uchar *auth;
+ Block *bp;
Conv *c = x;
- Esphdr *eh;
+ Esp4hdr *eh4;
+ Esp6hdr *eh6;
+ Espcb *ecb;
Esptail *et;
Userhdr *uh;
- Espcb *ecb;
- Block *bp;
- int nexthdr;
- int payload;
- int pad;
- int align;
- uchar *auth;
+ Versdep vers;
+ getverslens(convipvers(c), &vers);
bp = qget(c->wq);
if(bp == nil)
return;
@@ -244,7 +350,7 @@
if(ecb->header) {
/* make sure the message has a User header */
- bp = pullupblock(bp, UserhdrSize);
+ bp = pullupblock(bp, Userhdrlen);
if(bp == nil) {
qunlock(c);
return;
@@ -251,15 +357,16 @@
}
uh = (Userhdr*)bp->rp;
nexthdr = uh->nexthdr;
- bp->rp += UserhdrSize;
+ bp->rp += Userhdrlen;
} else {
- nexthdr = 0; // what should this be?
+ nexthdr = 0; /* what should this be? */
}
payload = BLEN(bp) + ecb->espivlen;
/* Make space to fit ip header */
- bp = padblock(bp, EsphdrSize + ecb->espivlen);
+ bp = padblock(bp, vers.hdrlen + ecb->espivlen);
+ getpktspiaddrs(bp->rp, &vers);
align = 4;
if(ecb->espblklen > align)
@@ -266,7 +373,7 @@
align = ecb->espblklen;
if(align % ecb->ahblklen != 0)
panic("espkick: ahblklen is important after all");
- pad = (align-1) - (payload + EsptailSize-1)%align;
+ pad = (align-1) - (payload + Esptaillen-1)%align;
/*
* Make space for tail
@@ -273,70 +380,88 @@
* this is done by calling padblock with a negative size
* Padblock does not change bp->wp!
*/
- bp = padblock(bp, -(pad+EsptailSize+ecb->ahlen));
- bp->wp += pad+EsptailSize+ecb->ahlen;
+ bp = padblock(bp, -(pad+Esptaillen+ecb->ahlen));
+ bp->wp += pad+Esptaillen+ecb->ahlen;
- eh = (Esphdr *)(bp->rp);
- et = (Esptail*)(bp->rp + EsphdrSize + payload + pad);
+ et = (Esptail*)(bp->rp + vers.hdrlen + payload + pad);
- // fill in tail
+ /* fill in tail */
et->pad = pad;
et->nexthdr = nexthdr;
- ecb->cipher(ecb, bp->rp+EsphdrSize, payload+pad+EsptailSize);
- auth = bp->rp + EsphdrSize + payload + pad + EsptailSize;
+ /* encrypt the payload */
+ ecb->cipher(ecb, bp->rp + vers.hdrlen, payload + pad + Esptaillen);
+ auth = bp->rp + vers.hdrlen + payload + pad + Esptaillen;
- // fill in head
- eh->vihl = IP_VER4;
- hnputl(eh->espspi, ecb->spi);
- hnputl(eh->espseq, ++ecb->seq);
- v6tov4(eh->espsrc, c->laddr);
- v6tov4(eh->espdst, c->raddr);
- eh->espproto = IP_ESPPROTO;
- eh->frag[0] = 0;
- eh->frag[1] = 0;
+ /* fill in head; construct a new IP header and an ESP header */
+ if (vers.version == V4) {
+ eh4 = (Esp4hdr *)bp->rp;
+ eh4->vihl = IP_VER4;
+ v6tov4(eh4->espsrc, c->laddr);
+ v6tov4(eh4->espdst, c->raddr);
+ eh4->espproto = IP_ESPPROTO;
+ eh4->frag[0] = 0;
+ eh4->frag[1] = 0;
- ecb->auth(ecb, bp->rp+IphdrSize, (EsphdrSize-IphdrSize)+payload+pad+EsptailSize, auth);
+ hnputl(eh4->espspi, ecb->spi);
+ hnputl(eh4->espseq, ++ecb->seq);
+ } else {
+ eh6 = (Esp6hdr *)bp->rp;
+ eh6->vcf[0] = IP_VER6;
+ ipmove(eh6->src, c->laddr);
+ ipmove(eh6->dst, c->raddr);
+ eh6->proto = IP_ESPPROTO;
+ hnputl(eh6->espspi, ecb->spi);
+ hnputl(eh6->espseq, ++ecb->seq);
+ }
+
+ /* compute secure hash */
+ ecb->auth(ecb, bp->rp + vers.iphdrlen, (vers.hdrlen - vers.iphdrlen) +
+ payload + pad + Esptaillen, auth);
+
qunlock(c);
- //print("esp: pass down: %uld\n", BLEN(bp));
- ipoput4(c->p->f, bp, 0, c->ttl, c->tos, c);
+ /* print("esp: pass down: %uld\n", BLEN(bp)); */
+ if (vers.version == V4)
+ ipoput4(c->p->f, bp, 0, c->ttl, c->tos, c);
+ else
+ ipoput6(c->p->f, bp, 0, c->ttl, c->tos, c);
}
+/*
+ * decapsulate IP packet from IP/ESP packet in bp and
+ * pass the result up the spi's Conv's read queue.
+ */
void
espiput(Proto *esp, Ipifc*, Block *bp)
{
- Esphdr *eh;
- Esptail *et;
- Userhdr *uh;
+ int payload, nexthdr;
+ uchar *auth, *espspi;
Conv *c;
Espcb *ecb;
- uchar raddr[IPaddrlen], laddr[IPaddrlen];
+ Esptail *et;
Fs *f;
- uchar *auth;
- ulong spi;
- int payload, nexthdr;
+ Userhdr *uh;
+ Versdep vers;
f = esp->f;
- bp = pullupblock(bp, EsphdrSize+EsptailSize);
+ getverslens(pktipvers(f, &bp), &vers);
+
+ bp = pullupblock(bp, vers.hdrlen + Esptaillen);
if(bp == nil) {
netlog(f, Logesp, "esp: short packet\n");
return;
}
+ getpktspiaddrs(bp->rp, &vers);
- eh = (Esphdr*)(bp->rp);
- spi = nhgetl(eh->espspi);
- v4tov6(raddr, eh->espsrc);
- v4tov6(laddr, eh->espdst);
-
qlock(esp);
/* Look for a conversation structure for this port */
- c = convlookup(esp, spi);
+ c = convlookup(esp, vers.spi);
if(c == nil) {
qunlock(esp);
- netlog(f, Logesp, "esp: no conv %I -> %I!%d\n", raddr,
- laddr, spi);
+ netlog(f, Logesp, "esp: no conv %I -> %I!%lud\n", vers.raddr,
+ vers.laddr, vers.spi);
icmpnoconv(f, bp);
freeblist(bp);
return;
@@ -346,76 +471,83 @@
qunlock(esp);
ecb = c->ptcl;
- // too hard to do decryption/authentication on block lists
- if(bp->next)
+ /* too hard to do decryption/authentication on block lists */
+ if(bp->next != nil)
bp = concatblock(bp);
- if(BLEN(bp) < EsphdrSize + ecb->espivlen + EsptailSize + ecb->ahlen) {
+ if(BLEN(bp) < vers.hdrlen + ecb->espivlen + Esptaillen + ecb->ahlen) {
qunlock(c);
- netlog(f, Logesp, "esp: short block %I -> %I!%d\n", raddr,
- laddr, spi);
+ netlog(f, Logesp, "esp: short block %I -> %I!%lud\n", vers.raddr,
+ vers.laddr, vers.spi);
freeb(bp);
return;
}
- eh = (Esphdr*)(bp->rp);
auth = bp->wp - ecb->ahlen;
- if(!ecb->auth(ecb, eh->espspi, auth-eh->espspi, auth)) {
+ espspi = vers.version == V4? ((Esp4hdr*)bp->rp)->espspi:
+ ((Esp6hdr*)bp->rp)->espspi;
+
+ /* compute secure hash and authenticate */
+ if(!ecb->auth(ecb, espspi, auth - espspi, auth)) {
qunlock(c);
-print("esp: bad auth %I -> %I!%ld\n", raddr, laddr, spi);
- netlog(f, Logesp, "esp: bad auth %I -> %I!%d\n", raddr,
- laddr, spi);
+print("esp: bad auth %I -> %I!%ld\n", vers.raddr, vers.laddr, vers.spi);
+ netlog(f, Logesp, "esp: bad auth %I -> %I!%lud\n", vers.raddr,
+ vers.laddr, vers.spi);
freeb(bp);
return;
}
- payload = BLEN(bp)-EsphdrSize-ecb->ahlen;
- if(payload<=0 || payload%4 != 0 || payload%ecb->espblklen!=0) {
+ payload = BLEN(bp) - vers.hdrlen - ecb->ahlen;
+ if(payload <= 0 || payload % 4 != 0 || payload % ecb->espblklen != 0) {
qunlock(c);
- netlog(f, Logesp, "esp: bad length %I -> %I!%d payload=%d BLEN=%d\n", raddr,
- laddr, spi, payload, BLEN(bp));
+ netlog(f, Logesp, "esp: bad length %I -> %I!%lud payload=%d BLEN=%zd\n",
+ vers.raddr, vers.laddr, vers.spi, payload, BLEN(bp));
freeb(bp);
return;
}
- if(!ecb->cipher(ecb, bp->rp+EsphdrSize, payload)) {
+
+ /* decrypt payload */
+ if(!ecb->cipher(ecb, bp->rp + vers.hdrlen, payload)) {
qunlock(c);
-print("esp: cipher failed %I -> %I!%ld: %r\n", raddr, laddr, spi);
- netlog(f, Logesp, "esp: cipher failed %I -> %I!%d: %r\n", raddr,
- laddr, spi);
+print("esp: cipher failed %I -> %I!%ld: %s\n", vers.raddr, vers.laddr, vers.spi, up->errstr);
+ netlog(f, Logesp, "esp: cipher failed %I -> %I!%lud: %s\n",
+ vers.raddr, vers.laddr, vers.spi, up->errstr);
freeb(bp);
return;
}
- payload -= EsptailSize;
- et = (Esptail*)(bp->rp + EsphdrSize + payload);
+ payload -= Esptaillen;
+ et = (Esptail*)(bp->rp + vers.hdrlen + payload);
payload -= et->pad + ecb->espivlen;
nexthdr = et->nexthdr;
if(payload <= 0) {
qunlock(c);
- netlog(f, Logesp, "esp: short packet after decrypt %I -> %I!%d\n", raddr,
- laddr, spi);
+ netlog(f, Logesp, "esp: short packet after decrypt %I -> %I!%lud\n",
+ vers.raddr, vers.laddr, vers.spi);
freeb(bp);
return;
}
- // trim packet
- bp->rp += EsphdrSize + ecb->espivlen;
+ /* trim packet */
+ bp->rp += vers.hdrlen + ecb->espivlen; /* toss original IP & ESP hdrs */
bp->wp = bp->rp + payload;
if(ecb->header) {
- // assume UserhdrSize < EsphdrSize
- bp->rp -= UserhdrSize;
+ /* assume Userhdrlen < Esp4hdrlen < Esp6hdrlen */
+ bp->rp -= Userhdrlen;
uh = (Userhdr*)bp->rp;
- memset(uh, 0, UserhdrSize);
+ memset(uh, 0, Userhdrlen);
uh->nexthdr = nexthdr;
}
+ /* ingress filtering here? */
+
if(qfull(c->rq)){
- netlog(f, Logesp, "esp: qfull %I -> %I.%uld\n", raddr,
- laddr, spi);
+ netlog(f, Logesp, "esp: qfull %I -> %I.%uld\n", vers.raddr,
+ vers.laddr, vers.spi);
freeblist(bp);
}else {
-//print("esp: pass up: %uld\n", BLEN(bp));
- qpass(c->rq, bp);
+// print("esp: pass up: %uld\n", BLEN(bp));
+ qpass(c->rq, bp); /* pass packet up the read queue */
}
qunlock(c);
@@ -440,19 +572,19 @@
return e;
}
+/* called from icmp(v6) for unreachable hosts, time exceeded, etc. */
void
espadvise(Proto *esp, Block *bp, char *msg)
{
- Esphdr *h;
Conv *c;
- ulong spi;
+ Versdep vers;
- h = (Esphdr*)(bp->rp);
+ getverslens(pktipvers(esp->f, &bp), &vers);
+ getpktspiaddrs(bp->rp, &vers);
- spi = nhgets(h->espspi);
qlock(esp);
- c = convlookup(esp, spi);
- if(c != nil) {
+ c = convlookup(esp, vers.spi);
+ if(c != nil && !c->ignoreadvice) {
qhangup(c->rq, msg);
qhangup(c->wq, msg);
}
@@ -466,7 +598,7 @@
Esppriv *upriv;
upriv = esp->priv;
- return snprint(buf, len, "%lud %lud\n",
+ return snprint(buf, len, "%llud %lud\n",
upriv->in,
upriv->inerrors);
}
@@ -520,10 +652,10 @@
setalg(Espcb *ecb, char **f, int n, Algorithm *alg)
{
uchar *key;
- int i, nbyte, nchar;
- int c;
+ int c, nbyte, nchar;
+ uint i;
- if(n < 2)
+ if(n < 2 || n > 3)
return "bad format";
for(; alg->name; alg++)
if(strcmp(f[1], alg->name) == 0)
@@ -531,10 +663,14 @@
if(alg->name == nil)
return "unknown algorithm";
- if(n != 3)
- return "bad format";
nbyte = (alg->keylen + 7) >> 3;
- nchar = strlen(f[2]);
+ if (n == 2)
+ nchar = 0;
+ else
+ nchar = strlen(f[2]);
+ if(nchar != 2 * nbyte) /* TODO: maybe < is ok */
+ return "key not required length";
+ /* convert hex digits from ascii, in place */
for(i=0; i<nchar; i++) {
c = f[2][i];
if(c >= '0' && c <= '9')
@@ -544,21 +680,27 @@
else if(c >= 'A' && c <= 'F')
f[2][i] -= 'A'-10;
else
- return "bad character in key";
+ return "non-hex character in key";
}
- key = smalloc(nbyte);
- for(i=0; i<nchar && i*2<nbyte; i++) {
+ /* collapse hex digits into complete bytes in reverse order in key */
+ key = secalloc(nbyte);
+ for(i = 0; i < nchar && i/2 < nbyte; i++) {
c = f[2][nchar-i-1];
if(i&1)
c <<= 4;
- key[i>>1] |= c;
+ key[i/2] |= c;
}
-
+ memset(f[2], 0, nchar);
alg->init(ecb, alg->name, key, alg->keylen);
- free(key);
+ secfree(key);
return nil;
}
+
+/*
+ * null encryption
+ */
+
static int
nullcipher(Espcb*, uchar*, int)
{
@@ -566,7 +708,7 @@
}
static void
-nullespinit(Espcb *ecb, char *name, uchar*, int)
+nullespinit(Espcb *ecb, char *name, uchar*, unsigned)
{
ecb->espalg = name;
ecb->espblklen = 1;
@@ -581,7 +723,7 @@
}
static void
-nullahinit(Espcb *ecb, char *name, uchar*, int)
+nullahinit(Espcb *ecb, char *name, uchar*, unsigned)
{
ecb->ahalg = name;
ecb->ahblklen = 1;
@@ -589,26 +731,28 @@
ecb->auth = nullauth;
}
-void
+
+/*
+ * sha1
+ */
+
+static void
seanq_hmac_sha1(uchar hash[SHA1dlen], uchar *t, long tlen, uchar *key, long klen)
{
- uchar ipad[65], opad[65];
int i;
+ uchar ipad[Hmacblksz+1], opad[Hmacblksz+1], innerhash[SHA1dlen];
DigestState *digest;
- uchar innerhash[SHA1dlen];
- for(i=0; i<64; i++){
- ipad[i] = 0x36;
- opad[i] = 0x5c;
- }
- ipad[64] = opad[64] = 0;
- for(i=0; i<klen; i++){
+ memset(ipad, 0x36, Hmacblksz);
+ memset(opad, 0x5c, Hmacblksz);
+ ipad[Hmacblksz] = opad[Hmacblksz] = 0;
+ for(i = 0; i < klen; i++){
ipad[i] ^= key[i];
opad[i] ^= key[i];
}
- digest = sha1(ipad, 64, nil, nil);
+ digest = sha1(ipad, Hmacblksz, nil, nil);
sha1(t, tlen, innerhash, digest);
- digest = sha1(opad, 64, nil, nil);
+ digest = sha1(opad, Hmacblksz, nil, nil);
sha1(innerhash, SHA1dlen, hash, digest);
}
@@ -615,11 +759,11 @@
static int
shaauth(Espcb *ecb, uchar *t, int tlen, uchar *auth)
{
- uchar hash[SHA1dlen];
int r;
+ uchar hash[SHA1dlen];
memset(hash, 0, SHA1dlen);
- seanq_hmac_sha1(hash, t, tlen, (uchar*)ecb->ahstate, 16);
+ seanq_hmac_sha1(hash, t, tlen, (uchar*)ecb->ahstate, BITS2BYTES(128));
r = memcmp(auth, hash, ecb->ahlen) == 0;
memmove(auth, hash, ecb->ahlen);
return r;
@@ -626,40 +770,162 @@
}
static void
-shaahinit(Espcb *ecb, char *name, uchar *key, int klen)
+shaahinit(Espcb *ecb, char *name, uchar *key, unsigned klen)
{
if(klen != 128)
panic("shaahinit: bad keylen");
- klen >>= 8; // convert to bytes
+ klen /= BI2BY;
ecb->ahalg = name;
ecb->ahblklen = 1;
- ecb->ahlen = 12;
+ ecb->ahlen = BITS2BYTES(96);
ecb->auth = shaauth;
- ecb->ahstate = smalloc(klen);
+ ecb->ahstate = secalloc(klen);
memmove(ecb->ahstate, key, klen);
}
-void
+
+/*
+ * aes
+ */
+static int
+aescbccipher(Espcb *ecb, uchar *p, int n) /* 128-bit blocks */
+{
+ uchar tmp[AESbsize], q[AESbsize];
+ uchar *pp, *tp, *ip, *eip, *ep;
+ AESstate *ds = ecb->espstate;
+
+ ep = p + n;
+ if(ecb->incoming) {
+ memmove(ds->ivec, p, AESbsize);
+ p += AESbsize;
+ while(p < ep){
+ memmove(tmp, p, AESbsize);
+ aes_decrypt(ds->dkey, ds->rounds, p, q);
+ memmove(p, q, AESbsize);
+ tp = tmp;
+ ip = ds->ivec;
+ for(eip = ip + AESbsize; ip < eip; ){
+ *p++ ^= *ip;
+ *ip++ = *tp++;
+ }
+ }
+ } else {
+ memmove(p, ds->ivec, AESbsize);
+ for(p += AESbsize; p < ep; p += AESbsize){
+ pp = p;
+ ip = ds->ivec;
+ for(eip = ip + AESbsize; ip < eip; )
+ *pp++ ^= *ip++;
+ aes_encrypt(ds->ekey, ds->rounds, p, q);
+ memmove(ds->ivec, q, AESbsize);
+ memmove(p, q, AESbsize);
+ }
+ }
+ return 1;
+}
+
+static void
+aescbcespinit(Espcb *ecb, char *name, uchar *k, unsigned n)
+{
+ uchar key[Aeskeysz], ivec[Aeskeysz];
+
+ n = BITS2BYTES(n);
+ if(n > Aeskeysz)
+ n = Aeskeysz;
+ memset(key, 0, sizeof(key));
+ memmove(key, k, n);
+ prng(ivec, Aeskeysz);
+ ecb->espalg = name;
+ ecb->espblklen = Aesblk;
+ ecb->espivlen = Aesblk;
+ ecb->cipher = aescbccipher;
+ ecb->espstate = secalloc(sizeof(AESstate));
+ setupAESstate(ecb->espstate, key, n /* keybytes */, ivec);
+ memset(ivec, 0, sizeof(ivec));
+ memset(key, 0, sizeof(key));
+}
+
+static int
+aesctrcipher(Espcb *ecb, uchar *p, int n) /* 128-bit blocks */
+{
+ uchar tmp[AESbsize], q[AESbsize];
+ uchar *pp, *tp, *ip, *eip, *ep;
+ AESstate *ds = ecb->espstate;
+
+ ep = p + n;
+ if(ecb->incoming) {
+ memmove(ds->ivec, p, AESbsize);
+ p += AESbsize;
+ while(p < ep){
+ memmove(tmp, p, AESbsize);
+ aes_decrypt(ds->dkey, ds->rounds, p, q);
+ memmove(p, q, AESbsize);
+ tp = tmp;
+ ip = ds->ivec;
+ for(eip = ip + AESbsize; ip < eip; ){
+ *p++ ^= *ip;
+ *ip++ = *tp++;
+ }
+ }
+ } else {
+ memmove(p, ds->ivec, AESbsize);
+ for(p += AESbsize; p < ep; p += AESbsize){
+ pp = p;
+ ip = ds->ivec;
+ for(eip = ip + AESbsize; ip < eip; )
+ *pp++ ^= *ip++;
+ aes_encrypt(ds->ekey, ds->rounds, p, q);
+ memmove(ds->ivec, q, AESbsize);
+ memmove(p, q, AESbsize);
+ }
+ }
+ return 1;
+}
+
+static void
+aesctrespinit(Espcb *ecb, char *name, uchar *k, unsigned n)
+{
+ uchar key[Aesblk], ivec[Aesblk];
+
+ n = BITS2BYTES(n);
+ if(n > Aeskeysz)
+ n = Aeskeysz;
+ memset(key, 0, sizeof(key));
+ memmove(key, k, n);
+ prng(ivec, Aesblk);
+ ecb->espalg = name;
+ ecb->espblklen = Aesblk;
+ ecb->espivlen = Aesblk;
+ ecb->cipher = aesctrcipher;
+ ecb->espstate = secalloc(sizeof(AESstate));
+ setupAESstate(ecb->espstate, key, n /* keybytes */, ivec);
+ memset(ivec, 0, sizeof(ivec));
+ memset(key, 0, sizeof(key));
+}
+
+
+/*
+ * md5
+ */
+
+static void
seanq_hmac_md5(uchar hash[MD5dlen], uchar *t, long tlen, uchar *key, long klen)
{
- uchar ipad[65], opad[65];
int i;
+ uchar ipad[Hmacblksz+1], opad[Hmacblksz+1], innerhash[MD5dlen];
DigestState *digest;
- uchar innerhash[MD5dlen];
- for(i=0; i<64; i++){
- ipad[i] = 0x36;
- opad[i] = 0x5c;
- }
- ipad[64] = opad[64] = 0;
- for(i=0; i<klen; i++){
+ memset(ipad, 0x36, Hmacblksz);
+ memset(opad, 0x5c, Hmacblksz);
+ ipad[Hmacblksz] = opad[Hmacblksz] = 0;
+ for(i = 0; i < klen; i++){
ipad[i] ^= key[i];
opad[i] ^= key[i];
}
- digest = md5(ipad, 64, nil, nil);
+ digest = md5(ipad, Hmacblksz, nil, nil);
md5(t, tlen, innerhash, digest);
- digest = md5(opad, 64, nil, nil);
+ digest = md5(opad, Hmacblksz, nil, nil);
md5(innerhash, MD5dlen, hash, digest);
}
@@ -670,7 +936,7 @@
int r;
memset(hash, 0, MD5dlen);
- seanq_hmac_md5(hash, t, tlen, (uchar*)ecb->ahstate, 16);
+ seanq_hmac_md5(hash, t, tlen, (uchar*)ecb->ahstate, BITS2BYTES(128));
r = memcmp(auth, hash, ecb->ahlen) == 0;
memmove(auth, hash, ecb->ahlen);
return r;
@@ -677,168 +943,102 @@
}
static void
-md5ahinit(Espcb *ecb, char *name, uchar *key, int klen)
+md5ahinit(Espcb *ecb, char *name, uchar *key, unsigned klen)
{
if(klen != 128)
panic("md5ahinit: bad keylen");
- klen >>= 3; // convert to bytes
-
-
+ klen = BITS2BYTES(klen);
ecb->ahalg = name;
ecb->ahblklen = 1;
- ecb->ahlen = 12;
+ ecb->ahlen = BITS2BYTES(96);
ecb->auth = md5auth;
- ecb->ahstate = smalloc(klen);
+ ecb->ahstate = secalloc(klen);
memmove(ecb->ahstate, key, klen);
}
+
+/*
+ * des, single and triple
+ */
+
static int
descipher(Espcb *ecb, uchar *p, int n)
{
- uchar tmp[8];
- uchar *pp, *tp, *ip, *eip, *ep;
DESstate *ds = ecb->espstate;
- ep = p + n;
if(ecb->incoming) {
- memmove(ds->ivec, p, 8);
- p += 8;
- while(p < ep){
- memmove(tmp, p, 8);
- block_cipher(ds->expanded, p, 1);
- tp = tmp;
- ip = ds->ivec;
- for(eip = ip+8; ip < eip; ){
- *p++ ^= *ip;
- *ip++ = *tp++;
- }
- }
+ memmove(ds->ivec, p, Desblk);
+ desCBCdecrypt(p + Desblk, n - Desblk, ds);
} else {
- memmove(p, ds->ivec, 8);
- for(p += 8; p < ep; p += 8){
- pp = p;
- ip = ds->ivec;
- for(eip = ip+8; ip < eip; )
- *pp++ ^= *ip++;
- block_cipher(ds->expanded, p, 0);
- memmove(ds->ivec, p, 8);
- }
+ memmove(p, ds->ivec, Desblk);
+ desCBCencrypt(p + Desblk, n - Desblk, ds);
}
return 1;
}
-
+
+static int
+des3cipher(Espcb *ecb, uchar *p, int n)
+{
+ DES3state *ds = ecb->espstate;
+
+ if(ecb->incoming) {
+ memmove(ds->ivec, p, Desblk);
+ des3CBCdecrypt(p + Desblk, n - Desblk, ds);
+ } else {
+ memmove(p, ds->ivec, Desblk);
+ des3CBCencrypt(p + Desblk, n - Desblk, ds);
+ }
+ return 1;
+}
+
static void
-desespinit(Espcb *ecb, char *name, uchar *k, int n)
+desespinit(Espcb *ecb, char *name, uchar *k, unsigned n)
{
- uchar key[8];
- uchar ivec[8];
- int i;
-
- // bits to bytes
- n = (n+7)>>3;
- if(n > 8)
- n = 8;
+ uchar key[Desblk], ivec[Desblk];
+
+ n = BITS2BYTES(n);
+ if(n > Desblk)
+ n = Desblk;
memset(key, 0, sizeof(key));
memmove(key, k, n);
- for(i=0; i<8; i++)
- ivec[i] = nrand(256);
+ prng(ivec, Desblk);
ecb->espalg = name;
- ecb->espblklen = 8;
- ecb->espivlen = 8;
+ ecb->espblklen = Desblk;
+ ecb->espivlen = Desblk;
+
ecb->cipher = descipher;
- ecb->espstate = smalloc(sizeof(DESstate));
+ ecb->espstate = secalloc(sizeof(DESstate));
setupDESstate(ecb->espstate, key, ivec);
+ memset(ivec, 0, sizeof(ivec));
+ memset(key, 0, sizeof(key));
}
-static int
-rc4cipher(Espcb *ecb, uchar *p, int n)
+static void
+des3espinit(Espcb *ecb, char *name, uchar *k, unsigned n)
{
- Esprc4 *esprc4;
- RC4state tmpstate;
- ulong seq;
- long d, dd;
+ uchar key[3][Desblk], ivec[Desblk];
- if(n < 4)
- return 0;
+ n = BITS2BYTES(n);
+ if(n > Des3keysz)
+ n = Des3keysz;
+ memset(key, 0, sizeof(key));
+ memmove(key, k, n);
+ prng(ivec, Desblk);
+ ecb->espalg = name;
+ ecb->espblklen = Desblk;
+ ecb->espivlen = Desblk;
- esprc4 = ecb->espstate;
- if(ecb->incoming) {
- seq = nhgetl(p);
- p += 4;
- n -= 4;
- d = seq-esprc4->cseq;
- if(d == 0) {
- rc4(&esprc4->current, p, n);
- esprc4->cseq += n;
- if(esprc4->ovalid) {
- dd = esprc4->cseq - esprc4->lgseq;
- if(dd > RC4back)
- esprc4->ovalid = 0;
- }
- } else if(d > 0) {
-print("missing packet: %uld %ld\n", seq, d);
- // this link is hosed
- if(d > RC4forward) {
- strcpy(up->errstr, "rc4cipher: skipped too much");
- return 0;
- }
- esprc4->lgseq = seq;
- if(!esprc4->ovalid) {
- esprc4->ovalid = 1;
- esprc4->oseq = esprc4->cseq;
- memmove(&esprc4->old, &esprc4->current, sizeof(RC4state));
- }
- rc4skip(&esprc4->current, d);
- rc4(&esprc4->current, p, n);
- esprc4->cseq = seq+n;
- } else {
-print("reordered packet: %uld %ld\n", seq, d);
- dd = seq - esprc4->oseq;
- if(!esprc4->ovalid || -d > RC4back || dd < 0) {
- strcpy(up->errstr, "rc4cipher: too far back");
- return 0;
- }
- memmove(&tmpstate, &esprc4->old, sizeof(RC4state));
- rc4skip(&tmpstate, dd);
- rc4(&tmpstate, p, n);
- return 1;
- }
-
- // move old state up
- if(esprc4->ovalid) {
- dd = esprc4->cseq - RC4back - esprc4->oseq;
- if(dd > 0) {
- rc4skip(&esprc4->old, dd);
- esprc4->oseq += dd;
- }
- }
- } else {
- hnputl(p, esprc4->cseq);
- p += 4;
- n -= 4;
- rc4(&esprc4->current, p, n);
- esprc4->cseq += n;
- }
- return 1;
+ ecb->cipher = des3cipher;
+ ecb->espstate = secalloc(sizeof(DES3state));
+ setupDES3state(ecb->espstate, key, ivec);
+ memset(ivec, 0, sizeof(ivec));
+ memset(key, 0, sizeof(key));
}
-static void
-rc4espinit(Espcb *ecb, char *name, uchar *k, int n)
-{
- Esprc4 *esprc4;
- // bits to bytes
- n = (n+7)>>3;
- esprc4 = smalloc(sizeof(Esprc4));
- memset(esprc4, 0, sizeof(Esprc4));
- setupRC4state(&esprc4->current, k, n);
- ecb->espalg = name;
- ecb->espblklen = 4;
- ecb->espivlen = 4;
- ecb->cipher = rc4cipher;
- ecb->espstate = esprc4;
-}
-
+/*
+ * interfacing to devip
+ */
void
espinit(Fs *fs)
{
diff -u a/os/ip//ethermedium.c b/os/ip//ethermedium.c
--- a/os/ip//ethermedium.c
+++ b/os/ip//ethermedium.c
@@ -5,9 +5,9 @@
#include "fns.h"
#include "../port/error.h"
+#include "../port/netif.h"
#include "ip.h"
#include "ipv6.h"
-#include "kernel.h"
typedef struct Etherhdr Etherhdr;
struct Etherhdr
@@ -18,10 +18,10 @@
};
static uchar ipbroadcast[IPaddrlen] = {
- 0xff,0xff,0xff,0xff,
- 0xff,0xff,0xff,0xff,
- 0xff,0xff,0xff,0xff,
0xff,0xff,0xff,0xff,
+ 0xff,0xff,0xff,0xff,
+ 0xff,0xff,0xff,0xff,
+ 0xff,0xff,0xff,0xff,
};
static uchar etherbroadcast[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
@@ -33,12 +33,12 @@
static void etherbwrite(Ipifc *ifc, Block *bp, int version, uchar *ip);
static void etheraddmulti(Ipifc *ifc, uchar *a, uchar *ia);
static void etherremmulti(Ipifc *ifc, uchar *a, uchar *ia);
+static void etherareg(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *ip);
static Block* multicastarp(Fs *f, Arpent *a, Medium*, uchar *mac);
static void sendarp(Ipifc *ifc, Arpent *a);
-static void sendgarp(Ipifc *ifc, uchar*);
+static void sendndp(Ipifc *ifc, Arpent *a);
static int multicastea(uchar *ea, uchar *ip);
static void recvarpproc(void*);
-static void resolveaddr6(Ipifc *ifc, Arpent *a);
static void etherpref2addr(uchar *pref, uchar *ea);
Medium ethermedium =
@@ -53,8 +53,7 @@
.bwrite= etherbwrite,
.addmulti= etheraddmulti,
.remmulti= etherremmulti,
-.ares= arpenter,
-.areg= sendgarp,
+.areg= etherareg,
.pref2addr= etherpref2addr,
};
@@ -70,8 +69,7 @@
.bwrite= etherbwrite,
.addmulti= etheraddmulti,
.remmulti= etherremmulti,
-.ares= arpenter,
-.areg= sendgarp,
+.areg= etherareg,
.pref2addr= etherpref2addr,
};
@@ -94,9 +92,6 @@
*/
enum
{
- ETARP = 0x0806,
- ETIP4 = 0x0800,
- ETIP6 = 0x86DD,
ARPREQUEST = 1,
ARPREPLY = 2,
};
@@ -127,128 +122,92 @@
static void
etherbind(Ipifc *ifc, int argc, char **argv)
{
- Chan *mchan4, *cchan4, *achan, *mchan6, *cchan6;
- char addr[Maxpath]; //char addr[2*KNAMELEN];
- char dir[Maxpath]; //char dir[2*KNAMELEN];
- char *buf;
- int fd, cfd, n;
- char *ptr;
+ char addr[Maxpath], dir[Maxpath];
Etherrock *er;
+ Chan *c;
+ int n;
if(argc < 2)
error(Ebadarg);
- mchan4 = cchan4 = achan = mchan6 = cchan6 = nil;
- buf = nil;
+ /*
+ * get mac address
+ */
+ snprint(addr, sizeof(addr), "%s/addr", argv[2]);
+ c = namec(addr, Aopen, OREAD, 0);
if(waserror()){
- if(mchan4 != nil)
- cclose(mchan4);
- if(cchan4 != nil)
- cclose(cchan4);
- if(achan != nil)
- cclose(achan);
- if(mchan6 != nil)
- cclose(mchan6);
- if(cchan6 != nil)
- cclose(cchan6);
- if(buf != nil)
- free(buf);
- nexterror();
+ cclose(c);
+ nexterror();
}
+ n = devtab[c->type]->read(c, addr, sizeof(addr)-1, 0);
+ if(n < 0)
+ error(Eio);
+ addr[n] = 0;
+ if(parsemac(ifc->mac, addr, sizeof(ifc->mac)) != 6)
+ error("could not find mac address");
+ cclose(c);
+ poperror();
+ er = smalloc(sizeof(*er));
+ er->read4p = er->read6p = er->arpp = (void*)-1;
+ er->mchan4 = er->cchan4 = er->mchan6 = er->cchan6 = er->achan = nil;
+ er->f = ifc->conv->p->f;
+
+ if(waserror()){
+ if(er->mchan4 != nil)
+ cclose(er->mchan4);
+ if(er->cchan4 != nil)
+ cclose(er->cchan4);
+ if(er->mchan6 != nil)
+ cclose(er->mchan6);
+ if(er->cchan6 != nil)
+ cclose(er->cchan6);
+ if(er->achan != nil)
+ cclose(er->achan);
+ free(er);
+ nexterror();
+ }
+
/*
- * open ip converstation
+ * open ipv4 conversation
*
* the dial will fail if the type is already open on
* this device.
*/
- snprint(addr, sizeof(addr), "%s!0x800", argv[2]);
- fd = kdial(addr, nil, dir, &cfd);
- if(fd < 0)
- errorf("dial 0x800 failed: %s", up->env->errstr);
- mchan4 = commonfdtochan(fd, ORDWR, 0, 1);
- cchan4 = commonfdtochan(cfd, ORDWR, 0, 1);
- kclose(fd);
- kclose(cfd);
+ snprint(addr, sizeof(addr), "%s!0x800", argv[2]); /* ETIP4 */
+ er->mchan4 = chandial(addr, nil, dir, &er->cchan4);
/*
* make it non-blocking
*/
- devtab[cchan4->type]->write(cchan4, nbmsg, strlen(nbmsg), 0);
+ devtab[er->cchan4->type]->write(er->cchan4, nbmsg, strlen(nbmsg), 0);
/*
- * get mac address and speed
- */
- snprint(addr, sizeof(addr), "%s/stats", dir);
- fd = kopen(addr, OREAD);
- if(fd < 0)
- errorf("can't open ether stats: %s", up->env->errstr);
-
- buf = smalloc(512);
- n = kread(fd, buf, 511);
- kclose(fd);
- if(n <= 0)
- error(Eio);
- buf[n] = 0;
-
- ptr = strstr(buf, "addr: ");
- if(!ptr)
- error(Eio);
- ptr += 6;
- parsemac(ifc->mac, ptr, 6);
-
- ptr = strstr(buf, "mbps: ");
- if(ptr){
- ptr += 6;
- ifc->mbps = atoi(ptr);
- } else
- ifc->mbps = 100;
-
- /*
- * open arp conversation
- */
- snprint(addr, sizeof(addr), "%s!0x806", argv[2]);
- fd = kdial(addr, nil, nil, nil);
- if(fd < 0)
- errorf("dial 0x806 failed: %s", up->env->errstr);
- achan = commonfdtochan(fd, ORDWR, 0, 1);
- kclose(fd);
-
- /*
- * open ip conversation
+ * open ipv6 conversation
*
* the dial will fail if the type is already open on
* this device.
*/
- snprint(addr, sizeof(addr), "%s!0x86DD", argv[2]);
- fd = kdial(addr, nil, dir, &cfd);
- if(fd < 0)
- errorf("dial 0x86DD failed: %s", up->env->errstr);
- mchan6 = commonfdtochan(fd, ORDWR, 0, 1);
- cchan6 = commonfdtochan(cfd, ORDWR, 0, 1);
- kclose(fd);
- kclose(cfd);
+ snprint(addr, sizeof(addr), "%s!0x86DD", argv[2]); /* ETIP6 */
+ er->mchan6 = chandial(addr, nil, dir, &er->cchan6);
/*
* make it non-blocking
*/
- devtab[cchan6->type]->write(cchan6, nbmsg, strlen(nbmsg), 0);
+ devtab[er->cchan6->type]->write(er->cchan6, nbmsg, strlen(nbmsg), 0);
- er = smalloc(sizeof(*er));
- er->mchan4 = mchan4;
- er->cchan4 = cchan4;
- er->achan = achan;
- er->mchan6 = mchan6;
- er->cchan6 = cchan6;
- er->f = ifc->conv->p->f;
- ifc->arg = er;
-
- free(buf);
+ /*
+ * open arp conversation
+ */
+ snprint(addr, sizeof(addr), "%s!0x806", argv[2]); /* ETARP */
+ er->achan = chandial(addr, nil, nil, nil);
poperror();
- kproc("etherread4", etherread4, ifc, 0);
- kproc("recvarpproc", recvarpproc, ifc, 0);
- kproc("etherread6", etherread6, ifc, 0);
+ ifc->arg = er;
+
+ kproc("etherread4", etherread4, ifc);
+ kproc("etherread6", etherread6, ifc);
+ kproc("recvarpproc", recvarpproc, ifc);
}
/*
@@ -259,21 +218,35 @@
{
Etherrock *er = ifc->arg;
- if(er->read4p)
+ while(waserror())
+ ;
+
+ /* wait for readers to start */
+ while(er->arpp == (void*)-1 || er->read4p == (void*)-1 || er->read6p == (void*)-1)
+ tsleep(&up->sleep, return0, 0, 300);
+
+ if(er->read4p != nil)
postnote(er->read4p, 1, "unbind", 0);
- if(er->read6p)
+ if(er->read6p != nil)
postnote(er->read6p, 1, "unbind", 0);
- if(er->arpp)
+ if(er->arpp != nil)
postnote(er->arpp, 1, "unbind", 0);
+ poperror();
+
+ wunlock(ifc);
+ while(waserror())
+ ;
+
/* wait for readers to die */
- while(er->arpp != 0 || er->read4p != 0 || er->read6p != 0)
+ while(er->arpp != nil || er->read4p != nil || er->read6p != nil)
tsleep(&up->sleep, return0, 0, 300);
+ poperror();
+ wlock(ifc);
+
if(er->mchan4 != nil)
cclose(er->mchan4);
- if(er->achan != nil)
- cclose(er->achan);
if(er->cchan4 != nil)
cclose(er->cchan4);
if(er->mchan6 != nil)
@@ -280,6 +253,8 @@
cclose(er->mchan6);
if(er->cchan6 != nil)
cclose(er->cchan6);
+ if(er->achan != nil)
+ cclose(er->achan);
free(er);
}
@@ -297,16 +272,16 @@
/* get mac address of destination */
a = arpget(er->f->arp, bp, version, ifc, ip, mac);
- if(a){
+ if(a != nil){
/* check for broadcast or multicast */
bp = multicastarp(er->f, a, ifc->m, mac);
- if(bp==nil){
+ if(bp == nil){
switch(version){
case V4:
sendarp(ifc, a);
break;
- case V6:
- resolveaddr6(ifc, a);
+ case V6:
+ sendndp(ifc, a);
break;
default:
panic("etherbwrite: version %d", version);
@@ -317,8 +292,6 @@
/* make it a single block with space for the ether header */
bp = padblock(bp, ifc->m->hsize);
- if(bp->next)
- bp = concatblock(bp);
if(BLEN(bp) < ifc->mintu)
bp = adjustblock(bp, ifc->mintu);
eh = (Etherhdr*)bp->rp;
@@ -358,29 +331,28 @@
ifc = a;
er = ifc->arg;
er->read4p = up; /* hide identity under a rock for unbind */
- if(waserror()){
- er->read4p = 0;
- pexit("hangup", 1);
- }
+ if(!waserror())
for(;;){
bp = devtab[er->mchan4->type]->bread(er->mchan4, ifc->maxtu, 0);
- if(!canrlock(ifc)){
- freeb(bp);
- continue;
- }
+ if(bp == nil)
+ break;
+ rlock(ifc);
if(waserror()){
runlock(ifc);
nexterror();
}
ifc->in++;
- bp->rp += ifc->m->hsize;
- if(ifc->lifc == nil)
+ if(ifc->lifc == nil || BLEN(bp) <= ifc->m->hsize)
freeb(bp);
- else
+ else {
+ bp->rp += ifc->m->hsize;
ipiput4(er->f, ifc, bp);
+ }
runlock(ifc);
poperror();
}
+ er->read4p = nil;
+ pexit("hangup", 1);
}
@@ -397,29 +369,28 @@
ifc = a;
er = ifc->arg;
er->read6p = up; /* hide identity under a rock for unbind */
- if(waserror()){
- er->read6p = 0;
- pexit("hangup", 1);
- }
+ if(!waserror())
for(;;){
bp = devtab[er->mchan6->type]->bread(er->mchan6, ifc->maxtu, 0);
- if(!canrlock(ifc)){
- freeb(bp);
- continue;
- }
+ if(bp == nil)
+ break;
+ rlock(ifc);
if(waserror()){
runlock(ifc);
nexterror();
}
ifc->in++;
- bp->rp += ifc->m->hsize;
- if(ifc->lifc == nil)
+ if(ifc->lifc == nil || BLEN(bp) <= ifc->m->hsize)
freeb(bp);
- else
+ else {
+ bp->rp += ifc->m->hsize;
ipiput6(er->f, ifc, bp);
+ }
runlock(ifc);
poperror();
}
+ er->read6p = nil;
+ pexit("hangup", 1);
}
static void
@@ -477,6 +448,7 @@
Block *bp;
Etherarp *e;
Etherrock *er = ifc->arg;
+ uchar targ[IPv4addrlen], src[IPv4addrlen];
/* don't do anything if it's been less than a second since the last */
if(NOW - a->ctime < 1000){
@@ -484,6 +456,9 @@
return;
}
+ /* try to keep it around for a second more */
+ a->ctime = NOW;
+
/* remove all but the last message */
while((bp = a->hold) != nil){
if(bp == a->last)
@@ -492,18 +467,20 @@
freeblist(bp);
}
- /* try to keep it around for a second more */
- a->ctime = NOW;
+ memmove(targ, a->ip+IPv4off, IPv4addrlen);
arprelease(er->f->arp, a);
+ if(!ipv4local(ifc, src, 0, targ))
+ return;
+
n = sizeof(Etherarp);
- if(n < a->type->mintu)
- n = a->type->mintu;
+ if(n < ifc->m->mintu)
+ n = ifc->m->mintu;
bp = allocb(n);
memset(bp->rp, 0, n);
e = (Etherarp*)bp->rp;
- memmove(e->tpa, a->ip+IPv4off, sizeof(e->tpa));
- ipv4local(ifc, e->spa);
+ memmove(e->tpa, targ, sizeof(e->tpa));
+ memmove(e->spa, src, sizeof(e->spa));
memmove(e->sha, ifc->mac, sizeof(e->sha));
memset(e->d, 0xff, sizeof(e->d)); /* ethernet broadcast */
memmove(e->s, ifc->mac, sizeof(e->s));
@@ -516,18 +493,14 @@
hnputs(e->op, ARPREQUEST);
bp->wp += n;
- n = devtab[er->achan->type]->bwrite(er->achan, bp, 0);
- if(n < 0)
- print("arp: send: %r\n");
+ devtab[er->achan->type]->bwrite(er->achan, bp, 0);
}
static void
-resolveaddr6(Ipifc *ifc, Arpent *a)
+sendndp(Ipifc *ifc, Arpent *a)
{
- int sflag;
Block *bp;
Etherrock *er = ifc->arg;
- uchar ipsrc[IPaddrlen];
/* don't do anything if it's been less than a second since the last */
if(NOW - a->ctime < ReTransTimer){
@@ -543,19 +516,7 @@
freeblist(bp);
}
- /* try to keep it around for a second more */
- a->ctime = NOW;
- a->rtime = NOW + ReTransTimer;
- if(a->rxtsrem <= 0) {
- arprelease(er->f->arp, a);
- return;
- }
-
- a->rxtsrem--;
- arprelease(er->f->arp, a);
-
- if(sflag = ipv6anylocal(ifc, ipsrc))
- icmpns(er->f, ipsrc, sflag, a->ip, TARG_MULTI, ifc->mac);
+ ndpsendsol(er->f, ifc, a); /* unlocks arp */
}
/*
@@ -569,10 +530,6 @@
Etherarp *e;
Etherrock *er = ifc->arg;
- /* don't arp for our initial non address */
- if(ipcmp(ip, IPnoaddr) == 0)
- return;
-
n = sizeof(Etherarp);
if(n < ifc->m->mintu)
n = ifc->m->mintu;
@@ -593,15 +550,13 @@
hnputs(e->op, ARPREQUEST);
bp->wp += n;
- n = devtab[er->achan->type]->bwrite(er->achan, bp, 0);
- if(n < 0)
- print("garp: send: %r\n");
+ devtab[er->achan->type]->bwrite(er->achan, bp, 0);
}
static void
recvarp(Ipifc *ifc)
{
- int n;
+ int n, forme;
Block *ebp, *rbp;
Etherarp *e, *r;
uchar ip[IPaddrlen];
@@ -609,11 +564,11 @@
Etherrock *er = ifc->arg;
ebp = devtab[er->achan->type]->bread(er->achan, ifc->maxtu, 0);
- if(ebp == nil) {
- print("arp: rcv: %r\n");
+ if(ebp == nil)
return;
- }
+ rlock(ifc);
+
e = (Etherarp*)ebp->rp;
switch(nhgets(e->op)) {
default:
@@ -620,9 +575,13 @@
break;
case ARPREPLY:
+ /* make sure not to enter multi/broadcat address */
+ if(e->sha[0] & 1)
+ break;
+
/* check for machine using my ip address */
v4tov6(ip, e->spa);
- if(iplocalonifc(ifc, ip) || ipproxyifc(er->f, ifc, ip)){
+ if(iplocalonifc(ifc, ip) != nil || ipproxyifc(er->f, ifc, ip)){
if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) != 0){
print("arprep: 0x%E/0x%E also has ip addr %V\n",
e->s, e->sha, e->spa);
@@ -630,46 +589,47 @@
}
}
- /* make sure we're not entering broadcast addresses */
- if(ipcmp(ip, ipbroadcast) == 0 ||
- !memcmp(e->sha, etherbroadcast, sizeof(e->sha))){
- print("arprep: 0x%E/0x%E cannot register broadcast address %I\n",
- e->s, e->sha, e->spa);
- break;
- }
-
- arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), 0);
+ /* refresh what we know about sender */
+ arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), e->tpa, ifc, 1);
break;
case ARPREQUEST:
+ /* don't reply to multi/broadcat addresses */
+ if(e->sha[0] & 1)
+ break;
+
/* don't answer arps till we know who we are */
- if(ifc->lifc == 0)
+ if(ifc->lifc == nil)
break;
/* check for machine using my ip or ether address */
v4tov6(ip, e->spa);
- if(iplocalonifc(ifc, ip) || ipproxyifc(er->f, ifc, ip)){
+ if(iplocalonifc(ifc, ip) != nil || ipproxyifc(er->f, ifc, ip)){
if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) != 0){
- if (memcmp(eprinted, e->spa, sizeof(e->spa))){
+ if(memcmp(eprinted, e->spa, sizeof(e->spa)) != 0){
/* print only once */
- print("arpreq: 0x%E also has ip addr %V\n", e->sha, e->spa);
+ print("arpreq: 0x%E also has ip addr %V\n",
+ e->sha, e->spa);
memmove(eprinted, e->spa, sizeof(e->spa));
}
+ break;
}
} else {
if(memcmp(e->sha, ifc->mac, sizeof(e->sha)) == 0){
- print("arpreq: %V also has ether addr %E\n", e->spa, e->sha);
+ print("arpreq: %V also has ether addr %E\n",
+ e->spa, e->sha);
break;
}
}
- /* refresh what we know about sender */
- arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), 1);
-
- /* answer only requests for our address or systems we're proxying for */
+ /*
+ * when request is for our address or systems we're proxying for,
+ * enter senders address into arp table and reply, otherwise just
+ * refresh the senders address.
+ */
v4tov6(ip, e->tpa);
- if(!iplocalonifc(ifc, ip))
- if(!ipproxyifc(er->f, ifc, ip))
+ forme = iplocalonifc(ifc, ip) != nil || ipproxyifc(er->f, ifc, ip);
+ if(arpenter(er->f, V4, e->spa, e->sha, sizeof(e->sha), e->tpa, ifc, !forme) < 0 || !forme)
break;
n = sizeof(Etherarp);
@@ -692,10 +652,14 @@
memmove(r->s, ifc->mac, sizeof(r->s));
rbp->wp += n;
- n = devtab[er->achan->type]->bwrite(er->achan, rbp, 0);
- if(n < 0)
- print("arp: write: %r\n");
+ runlock(ifc);
+ freeb(ebp);
+
+ devtab[er->achan->type]->bwrite(er->achan, rbp, 0);
+ return;
}
+
+ runlock(ifc);
freeb(ebp);
}
@@ -707,7 +671,7 @@
er->arpp = up;
if(waserror()){
- er->arpp = 0;
+ er->arpp = nil;
pexit("hangup", 1);
}
for(;;)
@@ -749,14 +713,9 @@
multicastarp(Fs *f, Arpent *a, Medium *medium, uchar *mac)
{
/* is it broadcast? */
- switch(ipforme(f, a->ip)){
- case Runi:
- return nil;
- case Rbcast:
- memset(mac, 0xff, 6);
+ if(ipforme(f, a->ip) == Rbcast){
+ memset(mac, 0xff, medium->maclen);
return arpresolve(f->arp, a, medium, mac);
- default:
- break;
}
/* if multicast, fill in mac */
@@ -778,11 +737,11 @@
}
-static void
+static void
etherpref2addr(uchar *pref, uchar *ea)
{
- pref[8] = ea[0] | 0x2;
- pref[9] = ea[1];
+ pref[8] = ea[0] ^ 0x2;
+ pref[9] = ea[1];
pref[10] = ea[2];
pref[11] = 0xFF;
pref[12] = 0xFE;
@@ -789,4 +748,41 @@
pref[13] = ea[3];
pref[14] = ea[4];
pref[15] = ea[5];
+}
+
+static void
+etherareg(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *ip)
+{
+ static char tdad[] = "dad6";
+ uchar a[IPaddrlen];
+
+ if(ipcmp(ip, IPnoaddr) == 0 || ipcmp(ip, v4prefix) == 0)
+ return;
+
+ if(isv4(ip)){
+ sendgarp(ifc, ip);
+ return;
+ }
+
+ if((lifc->type&Rv4) != 0)
+ return;
+
+ if(!lifc->tentative){
+ icmpna(f, lifc->local, v6allnodesL, ip, ifc->mac, 1<<5);
+ return;
+ }
+
+ if(ipcmp(lifc->local, ip) != 0)
+ return;
+
+ /* temporarily add route for duplicate address detection */
+ ipv62smcast(a, ip);
+ addroute(f, a, IPallbits, v6Unspecified, IPallbits, ip, Rmulti, ifc, tdad);
+ if(waserror()){
+ remroute(f, a, IPallbits, v6Unspecified, IPallbits, ip, Rmulti, ifc, tdad);
+ nexterror();
+ }
+ icmpns(f, 0, SRC_UNSPEC, ip, TARG_MULTI, ifc->mac);
+ poperror();
+ remroute(f, a, IPallbits, v6Unspecified, IPallbits, ip, Rmulti, ifc, tdad);
}
diff -u a/os/ip//gre.c b/os/ip//gre.c
--- a/os/ip//gre.c
+++ b/os/ip//gre.c
@@ -1,3 +1,6 @@
+/*
+ * Generic Routing Encapsulation over IPv4, rfc1702
+ */
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
@@ -7,10 +10,7 @@
#include "ip.h"
-#define DPRINT if(0)print
-
-enum
-{
+enum {
GRE_IPONLY = 12, /* size of ip header */
GRE_IPPLUSGRE = 12, /* minimum size of GRE header */
IP_GREPROTO = 47,
@@ -18,10 +18,33 @@
GRErxms = 200,
GREtickms = 100,
GREmaxxmit = 10,
+
+ K = 1024,
+ GREqlen = 256 * K,
+
+ GRE_cksum = 0x8000,
+ GRE_routing = 0x4000,
+ GRE_key = 0x2000,
+ GRE_seq = 0x1000,
+
+ Nring = 1 << 10, /* power of two, please */
+ Ringmask = Nring - 1,
+
+ GREctlraw = 0,
+ GREctlcooked,
+ GREctlretunnel,
+ GREctlreport,
+ GREctldlsuspend,
+ GREctlulsuspend,
+ GREctldlresume,
+ GREctlulresume,
+ GREctlforward,
+ GREctlulkey,
+ Ncmds,
};
-typedef struct GREhdr
-{
+typedef struct GREhdr GREhdr;
+struct GREhdr{
/* ip header */
uchar vihl; /* Version and header length */
uchar tos; /* Type of service */
@@ -28,7 +51,7 @@
uchar len[2]; /* packet length (including headers) */
uchar id[2]; /* Identification */
uchar frag[2]; /* Fragment information */
- uchar Unused;
+ uchar ttl;
uchar proto; /* Protocol */
uchar cksum[2]; /* checksum */
uchar src[4]; /* Ip source */
@@ -37,21 +60,115 @@
/* gre header */
uchar flags[2];
uchar eproto[2]; /* encapsulation protocol */
-} GREhdr;
+};
typedef struct GREpriv GREpriv;
-struct GREpriv
-{
- int raw; /* Raw GRE mode */
-
+struct GREpriv{
/* non-MIB stats */
- ulong csumerr; /* checksum errors */
- ulong lenerr; /* short packet */
+ uvlong lenerr; /* short packet */
};
+typedef struct Bring Bring;
+struct Bring{
+ Block *ring[Nring];
+ long produced;
+ long consumed;
+};
+
+typedef struct GREconv GREconv;
+struct GREconv{
+ int raw;
+
+ /* Retunnelling information. v4 only */
+ uchar north[4]; /* HA */
+ uchar south[4]; /* Base station */
+ uchar hoa[4]; /* Home address */
+ uchar coa[4]; /* Careof address */
+ ulong seq; /* Current sequence # */
+ int dlsusp; /* Downlink suspended? */
+ int ulsusp; /* Uplink suspended? */
+ ulong ulkey; /* GRE key */
+
+ QLock lock; /* Lock for rings */
+ Bring dlpending; /* Ring of pending packets */
+ Bring dlbuffered; /* Received while suspended */
+ Bring ulbuffered; /* Received while suspended */
+};
+
+typedef struct Metablock Metablock;
+struct Metablock{
+ uchar *rp;
+ ulong seq;
+};
+
+static char *grectlcooked(Conv *, int, char **);
+static char *grectldlresume(Conv *, int, char **);
+static char *grectldlsuspend(Conv *, int, char **);
+static char *grectlforward(Conv *, int, char **);
+static char *grectlraw(Conv *, int, char **);
+static char *grectlreport(Conv *, int, char **);
+static char *grectlretunnel(Conv *, int, char **);
+static char *grectlulkey(Conv *, int, char **);
+static char *grectlulresume(Conv *, int, char **);
+static char *grectlulsuspend(Conv *, int, char **);
+
+static struct{
+ char *cmd;
+ int argc;
+ char *(*f)(Conv *, int, char **);
+} grectls[Ncmds] = {
+[GREctlraw] = { "raw", 1, grectlraw, },
+[GREctlcooked] = { "cooked", 1, grectlcooked, },
+[GREctlretunnel]= { "retunnel", 5, grectlretunnel, },
+[GREctlreport] = { "report", 2, grectlreport, },
+[GREctldlsuspend]= { "dlsuspend", 1, grectldlsuspend,},
+[GREctlulsuspend]= { "ulsuspend", 1, grectlulsuspend,},
+[GREctldlresume]= { "dlresume", 1, grectldlresume, },
+[GREctlulresume]= { "ulresume", 1, grectlulresume, },
+[GREctlforward] = { "forward", 2, grectlforward, },
+[GREctlulkey] = { "ulkey", 2, grectlulkey, },
+};
+
+static uchar nulladdr[4];
+static char *sessend = "session end";
+
static void grekick(void *x, Block *bp);
+static char *gresetup(Conv *, char *, char *, char *);
-static char*
+uvlong grepdin, grepdout, grebdin, grebdout;
+uvlong grepuin, grepuout, grebuin, grebuout;
+
+static Block *
+getring(Bring *r)
+{
+ Block *bp;
+
+ if(r->consumed == r->produced)
+ return nil;
+
+ bp = r->ring[r->consumed & Ringmask];
+ r->ring[r->consumed & Ringmask] = nil;
+ r->consumed++;
+ return bp;
+}
+
+static void
+addring(Bring *r, Block *bp)
+{
+ Block *tbp;
+
+ if(r->produced - r->consumed > Ringmask){
+ /* Full! */
+ tbp = r->ring[r->produced & Ringmask];
+ assert(tbp);
+ freeb(tbp);
+ r->consumed++;
+ }
+ r->ring[r->produced & Ringmask] = bp;
+ r->produced++;
+}
+
+static char *
greconnect(Conv *c, char **argv, int argc)
{
Proto *p;
@@ -91,7 +208,7 @@
static void
grecreate(Conv *c)
{
- c->rq = qopen(64*1024, Qmsg, 0, c);
+ c->rq = qopen(GREqlen, Qmsg, 0, c);
c->wq = qbypass(grekick, c);
}
@@ -98,44 +215,88 @@
static int
grestate(Conv *c, char *state, int n)
{
- USED(c);
- return snprint(state, n, "%s", "Datagram");
+ GREconv *grec;
+ char *ep, *p;
+
+ grec = c->ptcl;
+ p = state;
+ ep = p + n;
+ p = seprint(p, ep, "%s%s%s%shoa %V north %V south %V seq %ulx "
+ "pending %uld %uld buffered dl %uld %uld ul %uld %uld ulkey %.8ulx\n",
+ c->inuse? "Open ": "Closed ",
+ grec->raw? "raw ": "",
+ grec->dlsusp? "DL suspended ": "",
+ grec->ulsusp? "UL suspended ": "",
+ grec->hoa, grec->north, grec->south, grec->seq,
+ grec->dlpending.consumed, grec->dlpending.produced,
+ grec->dlbuffered.consumed, grec->dlbuffered.produced,
+ grec->ulbuffered.consumed, grec->ulbuffered.produced,
+ grec->ulkey);
+ return p - state;
}
static char*
greannounce(Conv*, char**, int)
{
- return "pktifc does not support announce";
+ return "gre does not support announce";
}
static void
greclose(Conv *c)
{
- qclose(c->rq);
- qclose(c->wq);
- qclose(c->eq);
+ GREconv *grec;
+ Block *bp;
+
+ grec = c->ptcl;
+
+ /* Make sure we don't forward any more packets */
+ memset(grec->hoa, 0, sizeof grec->hoa);
+ memset(grec->north, 0, sizeof grec->north);
+ memset(grec->south, 0, sizeof grec->south);
+
+ qlock(&grec->lock);
+ while((bp = getring(&grec->dlpending)) != nil)
+ freeb(bp);
+
+ while((bp = getring(&grec->dlbuffered)) != nil)
+ freeb(bp);
+
+ while((bp = getring(&grec->ulbuffered)) != nil)
+ freeb(bp);
+
+ grec->dlpending.produced = grec->dlpending.consumed = 0;
+ grec->dlbuffered.produced = grec->dlbuffered.consumed = 0;
+ grec->ulbuffered.produced = grec->ulbuffered.consumed = 0;
+ qunlock(&grec->lock);
+
+ grec->raw = 0;
+ grec->seq = 0;
+ grec->dlsusp = grec->ulsusp = 1;
+
+ qhangup(c->rq, sessend);
+ qhangup(c->wq, sessend);
+ qhangup(c->eq, sessend);
ipmove(c->laddr, IPnoaddr);
ipmove(c->raddr, IPnoaddr);
- c->lport = 0;
- c->rport = 0;
+ c->lport = c->rport = 0;
}
-int drop;
-
static void
grekick(void *x, Block *bp)
{
- Conv *c = x;
- GREhdr *ghp;
+ Conv *c;
+ GREconv *grec;
+ GREhdr *gre;
uchar laddr[IPaddrlen], raddr[IPaddrlen];
if(bp == nil)
return;
+ c = x;
+ grec = c->ptcl;
+
/* Make space to fit ip header (gre header already there) */
bp = padblock(bp, GRE_IPONLY);
- if(bp == nil)
- return;
/* make sure the message has a GRE header */
bp = pullupblock(bp, GRE_IPONLY+GRE_IPPLUSGRE);
@@ -142,90 +303,344 @@
if(bp == nil)
return;
- ghp = (GREhdr *)(bp->rp);
- ghp->vihl = IP_VER4;
+ gre = (GREhdr *)bp->rp;
+ gre->vihl = IP_VER4;
- if(!((GREpriv*)c->p->priv)->raw){
- v4tov6(raddr, ghp->dst);
+ if(grec->raw == 0){
+ v4tov6(raddr, gre->dst);
if(ipcmp(raddr, v4prefix) == 0)
- memmove(ghp->dst, c->raddr + IPv4off, IPv4addrlen);
- v4tov6(laddr, ghp->src);
+ memmove(gre->dst, c->raddr + IPv4off, IPv4addrlen);
+ v4tov6(laddr, gre->src);
if(ipcmp(laddr, v4prefix) == 0){
if(ipcmp(c->laddr, IPnoaddr) == 0)
- findlocalip(c->p->f, c->laddr, raddr); /* pick interface closest to dest */
- memmove(ghp->src, c->laddr + IPv4off, IPv4addrlen);
+ /* pick interface closest to dest */
+ findlocalip(c->p->f, c->laddr, raddr);
+ memmove(gre->src, c->laddr + IPv4off, sizeof gre->src);
}
- hnputs(ghp->eproto, c->rport);
+ hnputs(gre->eproto, c->rport);
}
- ghp->proto = IP_GREPROTO;
- ghp->frag[0] = 0;
- ghp->frag[1] = 0;
+ gre->proto = IP_GREPROTO;
+ gre->frag[0] = gre->frag[1] = 0;
+ grepdout++;
+ grebdout += BLEN(bp);
ipoput4(c->p->f, bp, 0, c->ttl, c->tos, nil);
}
static void
-greiput(Proto *gre, Ipifc*, Block *bp)
+gredownlink(Conv *c, Block *bp)
{
- int len;
- GREhdr *ghp;
- Conv *c, **p;
- ushort eproto;
+ Metablock *m;
+ GREconv *grec;
+ GREhdr *gre;
+ int hdrlen, suspended, extra;
+ ushort flags;
+ ulong seq;
+
+ gre = (GREhdr *)bp->rp;
+ if(gre->ttl == 1){
+ freeb(bp);
+ return;
+ }
+
+ /*
+ * We've received a packet with a GRE header and we need to
+ * re-adjust the packet header to strip all unwanted parts
+ * but leave room for only a sequence number.
+ */
+ grec = c->ptcl;
+ flags = nhgets(gre->flags);
+ hdrlen = 0;
+ if(flags & GRE_cksum)
+ hdrlen += 2;
+ if(flags & GRE_routing){
+ print("%V routing info present. Discarding packet", gre->src);
+ freeb(bp);
+ return;
+ }
+ if(flags & (GRE_cksum|GRE_routing))
+ hdrlen += 2; /* Offset field */
+ if(flags & GRE_key)
+ hdrlen += 4;
+ if(flags & GRE_seq)
+ hdrlen += 4;
+
+ /*
+ * The outgoing packet only has the sequence number set. Make room
+ * for the sequence number.
+ */
+ if(hdrlen != sizeof(ulong)){
+ extra = hdrlen - sizeof(ulong);
+ if(extra < 0 && bp->rp - bp->base < -extra){
+ print("gredownlink: cannot add sequence number\n");
+ freeb(bp);
+ return;
+ }
+ memmove(bp->rp + extra, bp->rp, sizeof(GREhdr));
+ bp->rp += extra;
+ assert(BLEN(bp) >= sizeof(GREhdr) + sizeof(ulong));
+ gre = (GREhdr *)bp->rp;
+ }
+ seq = grec->seq++;
+ hnputs(gre->flags, GRE_seq);
+ hnputl(bp->rp + sizeof(GREhdr), seq);
+
+ /*
+ * Keep rp and seq at the base. ipoput4 consumes rp for
+ * refragmentation.
+ */
+ assert(bp->rp - bp->base >= sizeof(Metablock));
+ m = (Metablock *)bp->base;
+ m->rp = bp->rp;
+ m->seq = seq;
+
+ /*
+ * Here we make a decision what we're doing with the packet. We're
+ * doing this w/o holding a lock which means that later on in the
+ * process we may discover we've done the wrong thing. I don't want
+ * to call ipoput with the lock held.
+ */
+restart:
+ suspended = grec->dlsusp;
+ if(suspended){
+ if(!canqlock(&grec->lock)){
+ /*
+ * just give up. too bad, we lose a packet. this
+ * is just too hard and my brain already hurts.
+ */
+ freeb(bp);
+ return;
+ }
+
+ if(!grec->dlsusp){
+ /*
+ * suspend race. We though we were suspended, but
+ * we really weren't.
+ */
+ qunlock(&grec->lock);
+ goto restart;
+ }
+
+ /* Undo the incorrect ref count addition */
+ addring(&grec->dlbuffered, bp);
+ qunlock(&grec->lock);
+ return;
+ }
+
+ /*
+ * When we get here, we're not suspended. Proceed to send the
+ * packet.
+ */
+ memmove(gre->src, grec->coa, sizeof gre->dst);
+ memmove(gre->dst, grec->south, sizeof gre->dst);
+
+ ipoput4(c->p->f, copyblock(bp, BLEN(bp)), 0, gre->ttl - 1, gre->tos, nil);
+ grepdout++;
+ grebdout += BLEN(bp);
+
+ /*
+ * Now make sure we didn't do the wrong thing.
+ */
+ if(!canqlock(&grec->lock)){
+ freeb(bp); /* The packet just goes away */
+ return;
+ }
+
+ /* We did the right thing */
+ addring(&grec->dlpending, bp);
+ qunlock(&grec->lock);
+}
+
+static void
+greuplink(Conv *c, Block *bp)
+{
+ GREconv *grec;
+ GREhdr *gre;
+ ushort flags;
+
+ gre = (GREhdr *)bp->rp;
+ if(gre->ttl == 1)
+ return;
+
+ grec = c->ptcl;
+ memmove(gre->src, grec->coa, sizeof gre->src);
+ memmove(gre->dst, grec->north, sizeof gre->dst);
+
+ /*
+ * Add a key, if needed.
+ */
+ if(grec->ulkey){
+ flags = nhgets(gre->flags);
+ if(flags & (GRE_cksum|GRE_routing)){
+ print("%V routing info present. Discarding packet\n",
+ gre->src);
+ freeb(bp);
+ return;
+ }
+
+ if((flags & GRE_key) == 0){
+ /* Make room for the key */
+ if(bp->rp - bp->base < sizeof(ulong)){
+ print("%V can't add key\n", gre->src);
+ freeb(bp);
+ return;
+ }
+
+ bp->rp -= 4;
+ memmove(bp->rp, bp->rp + 4, sizeof(GREhdr));
+
+ gre = (GREhdr *)bp->rp;
+ hnputs(gre->flags, flags | GRE_key);
+ }
+
+ /* Add the key */
+ hnputl(bp->rp + sizeof(GREhdr), grec->ulkey);
+ }
+
+ if(!canqlock(&grec->lock)){
+ freeb(bp);
+ return;
+ }
+
+ if(grec->ulsusp)
+ addring(&grec->ulbuffered, bp);
+ else{
+ ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+ grepuout++;
+ grebuout += BLEN(bp);
+ }
+ qunlock(&grec->lock);
+}
+
+static void
+greiput(Proto *proto, Ipifc *, Block *bp)
+{
+ int len, hdrlen;
+ ushort eproto, flags;
uchar raddr[IPaddrlen];
+ Conv *c, **p;
+ GREconv *grec;
+ GREhdr *gre;
GREpriv *gpriv;
+ Ip4hdr *ip;
- gpriv = gre->priv;
- ghp = (GREhdr*)(bp->rp);
+ /*
+ * We don't want to deal with block lists. Ever. The problem is
+ * that when the block is forwarded, devether.c puts the block into
+ * a queue that also uses ->next. Just do not use ->next here!
+ */
+ if(bp->next != nil)
+ bp = pullupblock(bp, blocklen(bp));
- v4tov6(raddr, ghp->src);
- eproto = nhgets(ghp->eproto);
- qlock(gre);
+ gre = (GREhdr *)bp->rp;
+ if(BLEN(bp) < sizeof(GREhdr) || gre->proto != IP_GREPROTO){
+ freeb(bp);
+ return;
+ }
- /* Look for a conversation structure for this port and address */
- c = nil;
- for(p = gre->conv; *p; p++) {
+ v4tov6(raddr, gre->src);
+ eproto = nhgets(gre->eproto);
+ flags = nhgets(gre->flags);
+ hdrlen = sizeof(GREhdr);
+
+ if(flags & GRE_cksum)
+ hdrlen += 2;
+ if(flags & GRE_routing){
+ print("%I routing info present. Discarding packet\n", raddr);
+ freeb(bp);
+ return;
+ }
+ if(flags & (GRE_cksum|GRE_routing))
+ hdrlen += 2; /* Offset field */
+ if(flags & GRE_key)
+ hdrlen += 4;
+ if(flags & GRE_seq)
+ hdrlen += 4;
+
+ qlock(proto);
+
+ if(eproto != 0x880B && BLEN(bp) - hdrlen >= sizeof(Ip4hdr)){
+ ip = (Ip4hdr *)(bp->rp + hdrlen);
+
+ /*
+ * Look for a conversation structure for this port and address, or
+ * match the retunnel part, or match on the raw flag.
+ */
+ for(p = proto->conv; *p; p++) {
+ c = *p;
+
+ if(c->inuse == 0)
+ continue;
+
+ /*
+ * Do not stop this session - blocking here
+ * implies that etherread is blocked.
+ */
+ grec = c->ptcl;
+ if(memcmp(ip->dst, grec->hoa, sizeof ip->dst) == 0){
+ grepdin++;
+ grebdin += BLEN(bp);
+ gredownlink(c, bp);
+ qunlock(proto);
+ return;
+ }
+
+ if(memcmp(ip->src, grec->hoa, sizeof ip->src) == 0){
+ grepuin++;
+ grebuin += BLEN(bp);
+ greuplink(c, bp);
+ qunlock(proto);
+ return;
+ }
+ }
+ }
+
+
+ /*
+ * when we get here, none of the forwarding tunnels matched. now
+ * try to match on raw and conversational sessions.
+ */
+ for(c = nil, p = proto->conv; *p; p++) {
c = *p;
+
if(c->inuse == 0)
continue;
- if(c->rport == eproto &&
- (gpriv->raw || ipcmp(c->raddr, raddr) == 0))
+
+ /*
+ * Do not stop this session - blocking here
+ * implies that etherread is blocked.
+ */
+ grec = c->ptcl;
+ if(c->rport == eproto &&
+ (grec->raw || ipcmp(c->raddr, raddr) == 0))
break;
}
- if(*p == nil) {
- qunlock(gre);
- freeblist(bp);
+ qunlock(proto);
+
+ if(*p == nil){
+ freeb(bp);
return;
}
- qunlock(gre);
-
/*
* Trim the packet down to data size
*/
- len = nhgets(ghp->len) - GRE_IPONLY;
+ len = nhgets(gre->len) - GRE_IPONLY;
if(len < GRE_IPPLUSGRE){
- freeblist(bp);
+ freeb(bp);
return;
}
+
bp = trimblock(bp, GRE_IPONLY, len);
if(bp == nil){
+ gpriv = proto->priv;
gpriv->lenerr++;
return;
}
- /*
- * Can't delimit packet so pull it all into one block.
- */
- if(qlen(c->rq) > 64*1024)
- freeblist(bp);
- else{
- bp = concatblock(bp);
- if(bp == 0)
- panic("greiput");
- qpass(c->rq, bp);
- }
+ qpass(c->rq, bp);
}
int
@@ -234,29 +649,258 @@
GREpriv *gpriv;
gpriv = gre->priv;
+ return snprint(buf, len,
+ "gre: %llud %llud %llud %llud %llud %llud %llud %llud, lenerrs %llud\n",
+ grepdin, grepdout, grepuin, grepuout,
+ grebdin, grebdout, grebuin, grebuout, gpriv->lenerr);
+}
- return snprint(buf, len, "gre: len %lud\n", gpriv->lenerr);
+static char *
+grectlraw(Conv *c, int, char **)
+{
+ GREconv *grec;
+
+ grec = c->ptcl;
+ grec->raw = 1;
+ return nil;
}
-char*
-grectl(Conv *c, char **f, int n)
+static char *
+grectlcooked(Conv *c, int, char **)
{
- GREpriv *gpriv;
+ GREconv *grec;
- gpriv = c->p->priv;
- if(n == 1){
- if(strcmp(f[0], "raw") == 0){
- gpriv->raw = 1;
- return nil;
- }
- else if(strcmp(f[0], "cooked") == 0){
- gpriv->raw = 0;
- return nil;
- }
+ grec = c->ptcl;
+ grec->raw = 0;
+ return nil;
+}
+
+static char *
+grectlretunnel(Conv *c, int, char **argv)
+{
+ GREconv *grec;
+ uchar ipaddr[4];
+
+ grec = c->ptcl;
+ if(memcmp(grec->hoa, nulladdr, sizeof grec->hoa))
+ return "tunnel already set up";
+
+ v4parseip(ipaddr, argv[1]);
+ if(memcmp(ipaddr, nulladdr, sizeof ipaddr) == 0)
+ return "bad hoa";
+ memmove(grec->hoa, ipaddr, sizeof grec->hoa);
+ v4parseip(ipaddr, argv[2]);
+ memmove(grec->north, ipaddr, sizeof grec->north);
+ v4parseip(ipaddr, argv[3]);
+ memmove(grec->south, ipaddr, sizeof grec->south);
+ v4parseip(ipaddr, argv[4]);
+ memmove(grec->coa, ipaddr, sizeof grec->coa);
+ grec->ulsusp = 1;
+ grec->dlsusp = 0;
+
+ return nil;
+}
+
+static char *
+grectlreport(Conv *c, int, char **argv)
+{
+ ulong seq;
+ Block *bp;
+ Bring *r;
+ GREconv *grec;
+ Metablock *m;
+
+ grec = c->ptcl;
+ seq = strtoul(argv[1], nil, 0);
+
+ qlock(&grec->lock);
+ r = &grec->dlpending;
+ while(r->produced - r->consumed > 0){
+ bp = r->ring[r->consumed & Ringmask];
+
+ assert(bp && bp->rp - bp->base >= sizeof(Metablock));
+ m = (Metablock *)bp->base;
+ if((long)(seq - m->seq) <= 0)
+ break;
+
+ r->ring[r->consumed & Ringmask] = nil;
+ r->consumed++;
+
+ freeb(bp);
}
- return "unknown control request";
+ qunlock(&grec->lock);
+ return nil;
}
+static char *
+grectldlsuspend(Conv *c, int, char **)
+{
+ GREconv *grec;
+
+ grec = c->ptcl;
+ if(grec->dlsusp)
+ return "already suspended";
+
+ grec->dlsusp = 1;
+ return nil;
+}
+
+static char *
+grectlulsuspend(Conv *c, int, char **)
+{
+ GREconv *grec;
+
+ grec = c->ptcl;
+ if(grec->ulsusp)
+ return "already suspended";
+
+ grec->ulsusp = 1;
+ return nil;
+}
+
+static char *
+grectldlresume(Conv *c, int, char **)
+{
+ GREconv *grec;
+ GREhdr *gre;
+ Block *bp;
+
+ grec = c->ptcl;
+
+ qlock(&grec->lock);
+ if(!grec->dlsusp){
+ qunlock(&grec->lock);
+ return "not suspended";
+ }
+
+ while((bp = getring(&grec->dlbuffered)) != nil){
+ gre = (GREhdr *)bp->rp;
+ qunlock(&grec->lock);
+
+ ipoput4(c->p->f, copyblock(bp, BLEN(bp)), 0, gre->ttl - 1, gre->tos, nil);
+
+ qlock(&grec->lock);
+ addring(&grec->dlpending, bp);
+ }
+ grec->dlsusp = 0;
+ qunlock(&grec->lock);
+ return nil;
+}
+
+static char *
+grectlulresume(Conv *c, int, char **)
+{
+ GREconv *grec;
+ GREhdr *gre;
+ Block *bp;
+
+ grec = c->ptcl;
+
+ qlock(&grec->lock);
+ while((bp = getring(&grec->ulbuffered)) != nil){
+ gre = (GREhdr *)bp->rp;
+
+ qunlock(&grec->lock);
+ ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+ qlock(&grec->lock);
+ }
+ grec->ulsusp = 0;
+ qunlock(&grec->lock);
+ return nil;
+}
+
+static char *
+grectlforward(Conv *c, int, char **argv)
+{
+ Block *bp;
+ GREconv *grec;
+ GREhdr *gre;
+ Metablock *m;
+
+ grec = c->ptcl;
+
+ v4parseip(grec->south, argv[1]);
+ memmove(grec->north, grec->south, sizeof grec->north);
+
+ qlock(&grec->lock);
+ if(!grec->dlsusp){
+ qunlock(&grec->lock);
+ return "not suspended";
+ }
+ grec->dlsusp = 0;
+ grec->ulsusp = 0;
+
+ while((bp = getring(&grec->dlpending)) != nil){
+
+ assert(bp->rp - bp->base >= sizeof(Metablock));
+ m = (Metablock *)bp->base;
+ assert(m->rp >= bp->base && m->rp < bp->lim);
+
+ bp->rp = m->rp;
+
+ gre = (GREhdr *)bp->rp;
+ memmove(gre->src, grec->coa, sizeof gre->dst);
+ memmove(gre->dst, grec->south, sizeof gre->dst);
+
+ qunlock(&grec->lock);
+ ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+ qlock(&grec->lock);
+ }
+
+ while((bp = getring(&grec->dlbuffered)) != nil){
+ gre = (GREhdr *)bp->rp;
+ memmove(gre->src, grec->coa, sizeof gre->dst);
+ memmove(gre->dst, grec->south, sizeof gre->dst);
+
+ qunlock(&grec->lock);
+ ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+ qlock(&grec->lock);
+ }
+
+ while((bp = getring(&grec->ulbuffered)) != nil){
+ gre = (GREhdr *)bp->rp;
+
+ memmove(gre->src, grec->coa, sizeof gre->dst);
+ memmove(gre->dst, grec->south, sizeof gre->dst);
+
+ qunlock(&grec->lock);
+ ipoput4(c->p->f, bp, 0, gre->ttl - 1, gre->tos, nil);
+ qlock(&grec->lock);
+ }
+ qunlock(&grec->lock);
+ return nil;
+}
+
+static char *
+grectlulkey(Conv *c, int, char **argv)
+{
+ GREconv *grec;
+
+ grec = c->ptcl;
+ grec->ulkey = strtoul(argv[1], nil, 0);
+ return nil;
+}
+
+char *
+grectl(Conv *c, char **f, int n)
+{
+ int i;
+
+ if(n < 1)
+ return "too few arguments";
+
+ for(i = 0; i < Ncmds; i++)
+ if(strcmp(f[0], grectls[i].cmd) == 0)
+ break;
+
+ if(i == Ncmds)
+ return "no such command";
+ if(grectls[i].argc != 0 && grectls[i].argc != n)
+ return "incorrect number of arguments";
+
+ return grectls[i].f(c, n, f);
+}
+
void
greinit(Fs *fs)
{
@@ -276,7 +920,7 @@
gre->stats = grestats;
gre->ipproto = IP_GREPROTO;
gre->nc = 64;
- gre->ptclsize = 0;
+ gre->ptclsize = sizeof(GREconv);
Fsproto(fs, gre);
}
diff -u a/os/ip//icmp.c b/os/ip//icmp.c
--- a/os/ip//icmp.c
+++ b/os/ip//icmp.c
@@ -44,11 +44,6 @@
Maxtype = 18,
};
-enum
-{
- MinAdvise = 24, /* minimum needed for us to advise another protocol */
-};
-
char *icmpnames[Maxtype+1] =
{
[EchoReply] "EchoReply",
@@ -70,6 +65,8 @@
IP_ICMPPROTO = 1,
ICMP_IPSIZE = 20,
ICMP_HDRSIZE = 8,
+
+ MinAdvise = ICMP_IPSIZE+4, /* minimum needed for us to advise another protocol */
};
enum
@@ -113,7 +110,7 @@
c->wq = qbypass(icmpkick, c);
}
-extern char*
+char*
icmpconnect(Conv *c, char **argv, int argc)
{
char *e;
@@ -126,11 +123,11 @@
return nil;
}
-extern int
+int
icmpstate(Conv *c, char *state, int n)
{
USED(c);
- return snprint(state, n, "%s qin %d qout %d",
+ return snprint(state, n, "%s qin %d qout %d\n",
"Datagram",
c->rq ? qlen(c->rq) : 0,
c->wq ? qlen(c->wq) : 0
@@ -137,7 +134,7 @@
);
}
-extern char*
+char*
icmpannounce(Conv *c, char **argv, int argc)
{
char *e;
@@ -150,7 +147,7 @@
return nil;
}
-extern void
+void
icmpclose(Conv *c)
{
qclose(c->rq);
@@ -169,8 +166,7 @@
if(bp == nil)
return;
-
- if(blocklen(bp) < ICMP_IPSIZE + ICMP_HDRSIZE){
+ if(BLEN(bp) < ICMP_IPSIZE + ICMP_HDRSIZE){
freeblist(bp);
return;
}
@@ -190,21 +186,50 @@
ipoput4(c->p->f, bp, 0, c->ttl, c->tos, nil);
}
-extern void
-icmpttlexceeded(Fs *f, uchar *ia, Block *bp)
+static int
+ip4reply(Fs *f, uchar ip4[4])
{
+ uchar addr[IPaddrlen];
+ int i;
+
+ v4tov6(addr, ip4);
+ if(ipismulticast(addr))
+ return 0;
+ i = ipforme(f, addr);
+ return i == 0 || i == Runi;
+}
+
+static int
+ip4me(Fs *f, uchar ip4[4])
+{
+ uchar addr[IPaddrlen];
+
+ v4tov6(addr, ip4);
+ if(ipismulticast(addr))
+ return 0;
+ return ipforme(f, addr) == Runi;
+}
+
+void
+icmpttlexceeded(Fs *f, Ipifc *ifc, Block *bp)
+{
Block *nbp;
Icmp *p, *np;
+ uchar ia[IPv4addrlen];
p = (Icmp *)bp->rp;
+ if(!ip4reply(f, p->src) || !ipv4local(ifc, ia, 0, p->src))
+ return;
- netlog(f, Logicmp, "sending icmpttlexceeded -> %V\n", p->src);
+ netlog(f, Logicmp, "sending icmpttlexceeded %V -> src %V dst %V\n",
+ ia, p->src, p->dst);
+
nbp = allocb(ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8);
nbp->wp += ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8;
np = (Icmp *)nbp->rp;
np->vihl = IP_VER4;
+ memmove(np->src, ia, sizeof(np->src));
memmove(np->dst, p->src, sizeof(np->dst));
- v6tov4(np->src, ia);
memmove(np->data, bp->rp, ICMP_IPSIZE + 8);
np->type = TimeExceed;
np->code = 0;
@@ -214,7 +239,6 @@
memset(np->cksum, 0, sizeof(np->cksum));
hnputs(np->cksum, ptclcsum(nbp, ICMP_IPSIZE, blocklen(nbp) - ICMP_IPSIZE));
ipoput4(f, nbp, 0, MAXTTL, DFLTTOS, nil);
-
}
static void
@@ -222,20 +246,10 @@
{
Block *nbp;
Icmp *p, *np;
- int i;
- uchar addr[IPaddrlen];
p = (Icmp *)bp->rp;
-
- /* only do this for unicast sources and destinations */
- v4tov6(addr, p->dst);
- i = ipforme(f, addr);
- if((i&Runi) == 0)
+ if(!ip4me(f, p->dst) || !ip4reply(f, p->src))
return;
- v4tov6(addr, p->src);
- i = ipforme(f, addr);
- if(i != 0 && (i&Runi) == 0)
- return;
netlog(f, Logicmp, "sending icmpnoconv -> %V\n", p->src);
nbp = allocb(ICMP_IPSIZE + ICMP_HDRSIZE + ICMP_IPSIZE + 8);
@@ -255,13 +269,13 @@
ipoput4(f, nbp, 0, MAXTTL, DFLTTOS, nil);
}
-extern void
+void
icmpnoconv(Fs *f, Block *bp)
{
icmpunreachable(f, bp, 3, 0);
}
-extern void
+void
icmpcantfrag(Fs *f, Block *bp, int mtu)
{
icmpunreachable(f, bp, 4, mtu);
@@ -270,35 +284,34 @@
static void
goticmpkt(Proto *icmp, Block *bp)
{
+ ushort recid;
+ uchar dst[IPaddrlen], src[IPaddrlen];
Conv **c, *s;
Icmp *p;
- uchar dst[IPaddrlen];
- ushort recid;
p = (Icmp *) bp->rp;
- v4tov6(dst, p->src);
+ v4tov6(dst, p->dst);
+ v4tov6(src, p->src);
recid = nhgets(p->icmpid);
- for(c = icmp->conv; *c; c++) {
- s = *c;
+ for(c = icmp->conv; (s = *c) != nil; c++){
if(s->lport == recid)
- if(ipcmp(s->raddr, dst) == 0){
- bp = concatblock(bp);
- if(bp != nil)
- qpass(s->rq, bp);
- return;
- }
+ if(ipcmp(s->laddr, dst) == 0 || ipcmp(s->raddr, src) == 0)
+ qpass(s->rq, copyblock(bp, blocklen(bp)));
}
freeblist(bp);
}
static Block *
-mkechoreply(Block *bp)
+mkechoreply(Block *bp, Fs *f)
{
Icmp *q;
uchar ip[4];
q = (Icmp *)bp->rp;
+ if(!ip4me(f, q->dst) || !ip4reply(f, q->src))
+ return nil;
+
q->vihl = IP_VER4;
memmove(ip, q->src, sizeof(q->dst));
memmove(q->src, q->dst, sizeof(q->src));
@@ -318,12 +331,22 @@
[3] "port unreachable",
[4] "fragmentation needed and DF set",
[5] "source route failed",
+[6] "destination network unknown",
+[7] "destination host unknown",
+[8] "source host isolated",
+[9] "network administratively prohibited",
+[10] "host administratively prohibited",
+[11] "network unreachable for tos",
+[12] "host unreachable for tos",
+[13] "communication administratively prohibited",
+[14] "host precedence violation",
+[15] "precedence cutoff in effect",
};
static void
icmpiput(Proto *icmp, Ipifc*, Block *bp)
{
- int n, iplen;
+ int n;
Icmp *p;
Block *r;
Proto *pr;
@@ -332,12 +355,10 @@
Icmppriv *ipriv;
ipriv = icmp->priv;
-
ipriv->stats[InMsgs]++;
- p = (Icmp *)bp->rp;
- netlog(icmp->f, Logicmp, "icmpiput %d %d\n", p->type, p->code);
- n = blocklen(bp);
+ bp = concatblock(bp);
+ n = BLEN(bp);
if(n < ICMP_IPSIZE+ICMP_HDRSIZE){
ipriv->stats[InErrors]++;
ipriv->stats[HlenErrs]++;
@@ -344,69 +365,57 @@
netlog(icmp->f, Logicmp, "icmp hlen %d\n", n);
goto raise;
}
- iplen = nhgets(p->length);
- if(iplen > n || (iplen % 1)){
- ipriv->stats[LenErrs]++;
+ if(ptclcsum(bp, ICMP_IPSIZE, n - ICMP_IPSIZE)){
ipriv->stats[InErrors]++;
- netlog(icmp->f, Logicmp, "icmp length %d\n", iplen);
- goto raise;
- }
- if(ptclcsum(bp, ICMP_IPSIZE, iplen - ICMP_IPSIZE)){
- ipriv->stats[InErrors]++;
ipriv->stats[CsumErrs]++;
netlog(icmp->f, Logicmp, "icmp checksum error\n");
goto raise;
}
+ p = (Icmp *)bp->rp;
+ netlog(icmp->f, Logicmp, "icmpiput %s (%d) %d\n",
+ (p->type < nelem(icmpnames)? icmpnames[p->type]: ""),
+ p->type, p->code);
if(p->type <= Maxtype)
ipriv->in[p->type]++;
switch(p->type) {
case EchoRequest:
- if (iplen < n)
- bp = trimblock(bp, 0, iplen);
- r = mkechoreply(bp);
+ r = mkechoreply(bp, icmp->f);
+ if(r == nil)
+ goto raise;
ipriv->out[EchoReply]++;
ipoput4(icmp->f, r, 0, MAXTTL, DFLTTOS, nil);
break;
case Unreachable:
- if(p->code > 5)
- msg = unreachcode[1];
- else
+ if(p->code >= nelem(unreachcode)) {
+ snprint(m2, sizeof m2, "unreachable %V -> %V code %d",
+ p->src, p->dst, p->code);
+ msg = m2;
+ } else
msg = unreachcode[p->code];
+ Advise:
bp->rp += ICMP_IPSIZE+ICMP_HDRSIZE;
- if(blocklen(bp) < MinAdvise){
+ if(BLEN(bp) < MinAdvise){
ipriv->stats[LenErrs]++;
goto raise;
}
p = (Icmp *)bp->rp;
- pr = Fsrcvpcolx(icmp->f, p->proto);
- if(pr != nil && pr->advise != nil) {
- (*pr->advise)(pr, bp, msg);
- return;
+ if((nhgets(p->frag) & IP_FO) == 0){
+ pr = Fsrcvpcolx(icmp->f, p->proto);
+ if(pr != nil && pr->advise != nil) {
+ (*pr->advise)(pr, bp, msg);
+ return;
+ }
}
-
bp->rp -= ICMP_IPSIZE+ICMP_HDRSIZE;
goticmpkt(icmp, bp);
break;
case TimeExceed:
if(p->code == 0){
- sprint(m2, "ttl exceeded at %V", p->src);
-
- bp->rp += ICMP_IPSIZE+ICMP_HDRSIZE;
- if(blocklen(bp) < MinAdvise){
- ipriv->stats[LenErrs]++;
- goto raise;
- }
- p = (Icmp *)bp->rp;
- pr = Fsrcvpcolx(icmp->f, p->proto);
- if(pr != nil && pr->advise != nil) {
- (*pr->advise)(pr, bp, m2);
- return;
- }
- bp->rp -= ICMP_IPSIZE+ICMP_HDRSIZE;
+ snprint(msg = m2, sizeof m2, "ttl exceeded at %V", p->src);
+ goto Advise;
}
-
goticmpkt(icmp, bp);
break;
default:
@@ -419,22 +428,25 @@
freeblist(bp);
}
-void
+static void
icmpadvise(Proto *icmp, Block *bp, char *msg)
{
+ ushort recid;
+ uchar dst[IPaddrlen], src[IPaddrlen];
Conv **c, *s;
Icmp *p;
- uchar dst[IPaddrlen];
- ushort recid;
p = (Icmp *) bp->rp;
v4tov6(dst, p->dst);
+ v4tov6(src, p->src);
recid = nhgets(p->icmpid);
- for(c = icmp->conv; *c; c++) {
- s = *c;
+ for(c = icmp->conv; (s = *c) != nil; c++){
if(s->lport == recid)
+ if(ipcmp(s->laddr, src) == 0)
if(ipcmp(s->raddr, dst) == 0){
+ if(s->ignoreadvice)
+ break;
qhangup(s->rq, msg);
qhangup(s->wq, msg);
break;
@@ -443,7 +455,7 @@
freeblist(bp);
}
-int
+static int
icmpstats(Proto *icmp, char *buf, int len)
{
Icmppriv *priv;
@@ -456,7 +468,7 @@
for(i = 0; i < Nstats; i++)
p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
for(i = 0; i <= Maxtype; i++){
- if(icmpnames[i])
+ if(icmpnames[i] != nil)
p = seprint(p, e, "%s: %lud %lud\n", icmpnames[i], priv->in[i], priv->out[i]);
else
p = seprint(p, e, "%d: %lud %lud\n", i, priv->in[i], priv->out[i]);
@@ -463,13 +475,7 @@
}
return p - buf;
}
-
-int
-icmpgc(Proto *icmp)
-{
- return natgc(icmp->ipproto);
-}
-
+
void
icmpinit(Fs *fs)
{
@@ -487,7 +493,7 @@
icmp->stats = icmpstats;
icmp->ctl = nil;
icmp->advise = icmpadvise;
- icmp->gc = icmpgc;
+ icmp->gc = nil;
icmp->ipproto = IP_ICMPPROTO;
icmp->nc = 128;
icmp->ptclsize = 0;
diff -u a/os/ip//icmp6.c b/os/ip//icmp6.c
--- a/os/ip//icmp6.c
+++ b/os/ip//icmp6.c
@@ -1,3 +1,6 @@
+/*
+ * Internet Control Message Protocol for IPv6
+ */
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
@@ -7,41 +10,36 @@
#include "ip.h"
#include "ipv6.h"
-typedef struct ICMPpkt ICMPpkt;
-typedef struct IPICMP IPICMP;
-typedef struct Ndpkt Ndpkt;
-typedef struct NdiscC NdiscC;
+enum
+{
+ InMsgs6,
+ InErrors6,
+ OutMsgs6,
+ CsumErrs6,
+ LenErrs6,
+ HlenErrs6,
+ HoplimErrs6,
+ IcmpCodeErrs6,
+ TargetErrs6,
+ OptlenErrs6,
+ AddrmxpErrs6,
+ RouterAddrErrs6,
-struct ICMPpkt {
- uchar type;
- uchar code;
- uchar cksum[2];
- uchar icmpid[2];
- uchar seq[2];
+ Nstats6,
};
-struct IPICMP {
- Ip6hdr;
- ICMPpkt;
+enum {
+ ICMP_USEAD6 = 40,
};
-struct NdiscC
-{
- IPICMP;
- uchar target[IPaddrlen];
+enum {
+ Oflag = 1<<5,
+ Sflag = 1<<6,
+ Rflag = 1<<7,
};
-struct Ndpkt
-{
- NdiscC;
- uchar otype;
- uchar olen; // length in units of 8 octets(incl type, code),
- // 1 for IEEE 802 addresses
- uchar lnaddr[6]; // link-layer address
-};
-
-enum {
- // ICMPv6 types
+enum {
+ /* ICMPv6 types */
EchoReply = 0,
UnreachableV6 = 1,
PacketTooBigV6 = 2,
@@ -69,6 +67,65 @@
Maxtype6 = 137,
};
+enum {
+ MinAdvise = IP6HDR+4, /* minimum needed for us to advise another protocol */
+};
+
+/* on-the-wire packet formats */
+typedef struct IPICMP IPICMP;
+typedef struct Ndpkt Ndpkt;
+typedef struct NdiscC NdiscC;
+
+/* we do this to avoid possible struct padding */
+#define ICMPHDR \
+ IPV6HDR; \
+ uchar type; \
+ uchar code; \
+ uchar cksum[2]; \
+ uchar icmpid[2]; \
+ uchar seq[2]
+
+struct IPICMP {
+ ICMPHDR;
+ uchar payload[];
+};
+
+#define IPICMPSZ offsetof(IPICMP, payload[0])
+
+struct NdiscC {
+ ICMPHDR;
+ uchar target[IPaddrlen];
+ uchar payload[];
+};
+
+#define NDISCSZ offsetof(NdiscC, payload[0])
+
+struct Ndpkt {
+ ICMPHDR;
+ uchar target[IPaddrlen];
+ uchar otype;
+ uchar olen; /* length in units of 8 octets(incl type, code),
+ * 1 for IEEE 802 addresses */
+ uchar lnaddr[6]; /* link-layer address */
+ uchar payload[];
+};
+
+#define NDPKTSZ offsetof(Ndpkt, payload[0])
+
+typedef struct Icmppriv6
+{
+ ulong stats[Nstats6];
+
+ /* message counts */
+ ulong in[Maxtype6+1];
+ ulong out[Maxtype6+1];
+} Icmppriv6;
+
+typedef struct Icmpcb6
+{
+ uchar headers;
+} Icmpcb6;
+
char *icmpnames6[Maxtype6+1] =
{
[EchoReply] "EchoReply",
@@ -95,24 +152,6 @@
[RedirectV6] "RedirectV6",
};
-enum
-{
- InMsgs6,
- InErrors6,
- OutMsgs6,
- CsumErrs6,
- LenErrs6,
- HlenErrs6,
- HoplimErrs6,
- IcmpCodeErrs6,
- TargetErrs6,
- OptlenErrs6,
- AddrmxpErrs6,
- RouterAddrErrs6,
-
- Nstats6,
-};
-
static char *statnames6[Nstats6] =
{
[InMsgs6] "InMsgs",
@@ -129,49 +168,18 @@
[RouterAddrErrs6] "RouterAddrErrs",
};
-typedef struct Icmppriv6
-{
- ulong stats[Nstats6];
-
- /* message counts */
- ulong in[Maxtype6+1];
- ulong out[Maxtype6+1];
-} Icmppriv6;
-
-typedef struct Icmpcb6
-{
- QLock;
- uchar headers;
-} Icmpcb6;
-
static char *unreachcode[] =
{
-[icmp6_no_route] "no route to destination",
-[icmp6_ad_prohib] "comm with destination administratively prohibited",
-[icmp6_unassigned] "icmp unreachable: unassigned error code (2)",
-[icmp6_adr_unreach] "address unreachable",
-[icmp6_port_unreach] "port unreachable",
-[icmp6_unkn_code] "icmp unreachable: unknown code",
+[Icmp6_no_route] "no route to destination",
+[Icmp6_ad_prohib] "comm with destination administratively prohibited",
+[Icmp6_out_src_scope] "beyond scope of source address",
+[Icmp6_adr_unreach] "address unreachable",
+[Icmp6_port_unreach] "port unreachable",
+[Icmp6_gress_src_fail] "source address failed ingress/egress policy",
+[Icmp6_rej_route] "reject route to destination",
+[Icmp6_unknown] "icmp unreachable: unknown code",
};
-enum {
- ICMP_USEAD6 = 40,
-};
-
-enum {
- Oflag = 1<<5,
- Sflag = 1<<6,
- Rflag = 1<<7,
-};
-
-enum {
- slladd = 1,
- tlladd = 2,
- prfinfo = 3,
- redhdr = 4,
- mtuopt = 5,
-};
-
static void icmpkick6(void *x, Block *bp);
static void
@@ -185,13 +193,14 @@
set_cksum(Block *bp)
{
IPICMP *p = (IPICMP *)(bp->rp);
+ int n = blocklen(bp);
- hnputl(p->vcf, 0); // borrow IP header as pseudoheader
- hnputs(p->ploadlen, blocklen(bp)-IPV6HDR_LEN);
+ hnputl(p->vcf, 0); /* borrow IP header as pseudoheader */
+ hnputs(p->ploadlen, n - IP6HDR);
p->proto = 0;
- p->ttl = ICMPv6; // ttl gets set later
+ p->ttl = ICMPv6; /* ttl gets set later */
hnputs(p->cksum, 0);
- hnputs(p->cksum, ptclcsum(bp, 0, blocklen(bp)));
+ hnputs(p->cksum, ptclcsum(bp, 0, n));
p->proto = ICMPv6;
}
@@ -198,7 +207,8 @@
static Block *
newIPICMP(int packetlen)
{
- Block *nbp;
+ Block *nbp;
+
nbp = allocb(packetlen);
nbp->wp += packetlen;
memset(nbp->rp, 0, packetlen);
@@ -205,20 +215,22 @@
return nbp;
}
-void
+static void
icmpadvise6(Proto *icmp, Block *bp, char *msg)
{
- Conv **c, *s;
- IPICMP *p;
- ushort recid;
+ ushort recid;
+ Conv **c, *s;
+ IPICMP *p;
- p = (IPICMP *) bp->rp;
+ p = (IPICMP *)bp->rp;
recid = nhgets(p->icmpid);
- for(c = icmp->conv; *c; c++) {
- s = *c;
+ for(c = icmp->conv; (s = *c) != nil; c++){
if(s->lport == recid)
+ if(ipcmp(s->laddr, p->src) == 0)
if(ipcmp(s->raddr, p->dst) == 0){
+ if(s->ignoreadvice)
+ break;
qhangup(s->rq, msg);
qhangup(s->wq, msg);
break;
@@ -230,9 +242,9 @@
static void
icmpkick6(void *x, Block *bp)
{
+ uchar laddr[IPaddrlen], raddr[IPaddrlen];
Conv *c = x;
IPICMP *p;
- uchar laddr[IPaddrlen], raddr[IPaddrlen];
Icmppriv6 *ipriv = c->p->priv;
Icmpcb6 *icb = (Icmpcb6*)c->ptcl;
@@ -249,10 +261,10 @@
bp->rp += IPaddrlen;
ipmove(raddr, bp->rp);
bp->rp += IPaddrlen;
- bp = padblock(bp, sizeof(Ip6hdr));
+ bp = padblock(bp, IP6HDR);
}
- if(blocklen(bp) < sizeof(IPICMP)){
+ if(BLEN(bp) < IPICMPSZ){
freeblist(bp);
return;
}
@@ -268,23 +280,20 @@
set_cksum(bp);
p->vcf[0] = 0x06 << 4;
- if(p->type <= Maxtype6)
+ if(p->type <= Maxtype6)
ipriv->out[p->type]++;
ipoput6(c->p->f, bp, 0, c->ttl, c->tos, nil);
}
-char*
+static char*
icmpctl6(Conv *c, char **argv, int argc)
{
Icmpcb6 *icb;
icb = (Icmpcb6*) c->ptcl;
-
- if(argc==1) {
- if(strcmp(argv[0], "headers")==0) {
- icb->headers = 6;
- return nil;
- }
+ if(argc==1 && strcmp(argv[0], "headers")==0) {
+ icb->headers = 6;
+ return nil;
}
return "unknown control request";
}
@@ -292,41 +301,39 @@
static void
goticmpkt6(Proto *icmp, Block *bp, int muxkey)
{
- Conv **c, *s;
- IPICMP *p = (IPICMP *)bp->rp;
- ushort recid;
- uchar *addr;
+ ushort recid;
+ uchar *addr;
+ Conv **c, *s;
+ IPICMP *p = (IPICMP *)bp->rp;
if(muxkey == 0) {
recid = nhgets(p->icmpid);
addr = p->src;
- }
- else {
+ } else {
recid = muxkey;
addr = p->dst;
}
-
- for(c = icmp->conv; *c; c++){
- s = *c;
- if(s->lport == recid && ipcmp(s->raddr, addr) == 0){
- bp = concatblock(bp);
- if(bp != nil)
- qpass(s->rq, bp);
- return;
- }
+ for(c = icmp->conv; (s = *c) != nil; c++){
+ if(s->lport == recid)
+ if(ipcmp(s->laddr, p->dst) == 0 || ipcmp(s->raddr, addr) == 0)
+ qpass(s->rq, copyblock(bp, blocklen(bp)));
}
-
freeblist(bp);
}
static Block *
-mkechoreply6(Block *bp)
+mkechoreply6(Block *bp, Ipifc *ifc)
{
+ uchar addr[IPaddrlen];
IPICMP *p = (IPICMP *)(bp->rp);
- uchar addr[IPaddrlen];
+ if(isv6mcast(p->src))
+ return nil;
ipmove(addr, p->src);
- ipmove(p->src, p->dst);
+ if(!isv6mcast(p->dst))
+ ipmove(p->src, p->dst);
+ else if (!ipv6local(ifc, p->src, 0, addr))
+ return nil;
ipmove(p->dst, addr);
p->type = EchoReplyV6;
set_cksum(bp);
@@ -335,49 +342,43 @@
/*
* sends out an ICMPv6 neighbor solicitation
- * suni == SRC_UNSPEC or SRC_UNI,
+ * suni == SRC_UNSPEC or SRC_UNI,
* tuni == TARG_MULTI => multicast for address resolution,
* and tuni == TARG_UNI => neighbor reachability.
*/
-
-extern void
+void
icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac)
{
- Block *nbp;
+ Block *nbp;
Ndpkt *np;
Proto *icmp = f->t2p[ICMPv6];
Icmppriv6 *ipriv = icmp->priv;
-
- nbp = newIPICMP(sizeof(Ndpkt));
+ nbp = newIPICMP(NDPKTSZ);
np = (Ndpkt*) nbp->rp;
+ if(suni == SRC_UNSPEC)
+ ipmove(np->src, v6Unspecified);
+ else
+ ipmove(np->src, src);
- if(suni == SRC_UNSPEC)
- memmove(np->src, v6Unspecified, IPaddrlen);
- else
- memmove(np->src, src, IPaddrlen);
-
if(tuni == TARG_UNI)
- memmove(np->dst, targ, IPaddrlen);
+ ipmove(np->dst, targ);
else
ipv62smcast(np->dst, targ);
np->type = NbrSolicit;
np->code = 0;
- memmove(np->target, targ, IPaddrlen);
+ ipmove(np->target, targ);
if(suni != SRC_UNSPEC) {
- np->otype = SRC_LLADDRESS;
- np->olen = 1; /* 1+1+6 = 8 = 1 8-octet */
+ np->otype = SRC_LLADDR;
+ np->olen = 1; /* 1+1+6 = 8 = 1 8-octet */
memmove(np->lnaddr, mac, sizeof(np->lnaddr));
- }
- else {
- int r = sizeof(Ndpkt)-sizeof(NdiscC);
- nbp->wp -= r;
- }
+ } else
+ nbp->wp -= NDPKTSZ - NDISCSZ;
set_cksum(nbp);
- np = (Ndpkt*) nbp->rp;
+ np = (Ndpkt*)nbp->rp;
np->ttl = HOP_LIMIT;
np->vcf[0] = 0x06 << 4;
ipriv->out[NbrSolicit]++;
@@ -388,122 +389,101 @@
/*
* sends out an ICMPv6 neighbor advertisement. pktflags == RSO flags.
*/
-extern void
+void
icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags)
{
- Block *nbp;
+ Block *nbp;
Ndpkt *np;
Proto *icmp = f->t2p[ICMPv6];
Icmppriv6 *ipriv = icmp->priv;
- nbp = newIPICMP(sizeof(Ndpkt));
- np = (Ndpkt*) nbp->rp;
+ nbp = newIPICMP(NDPKTSZ);
+ np = (Ndpkt*)nbp->rp;
- memmove(np->src, src, IPaddrlen);
- memmove(np->dst, dst, IPaddrlen);
+ ipmove(np->src, src);
+ ipmove(np->dst, dst);
np->type = NbrAdvert;
np->code = 0;
np->icmpid[0] = flags;
- memmove(np->target, targ, IPaddrlen);
+ ipmove(np->target, targ);
- np->otype = TARGET_LLADDRESS;
- np->olen = 1;
+ np->otype = TARGET_LLADDR;
+ np->olen = 1;
memmove(np->lnaddr, mac, sizeof(np->lnaddr));
set_cksum(nbp);
- np = (Ndpkt*) nbp->rp;
+ np = (Ndpkt*)nbp->rp;
np->ttl = HOP_LIMIT;
np->vcf[0] = 0x06 << 4;
ipriv->out[NbrAdvert]++;
- netlog(f, Logicmp, "sending neighbor advertisement %I\n", src);
+ netlog(f, Logicmp, "sending neighbor advertisement %I\n", targ);
ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
}
-extern void
-icmphostunr(Fs *f, Ipifc *ifc, Block *bp, int code, int free)
+void
+icmphostunr6(Fs *f, Ipifc *ifc, Block *bp, int code, int tome)
{
+ int osz = BLEN(bp);
+ int sz = MIN(IPICMPSZ + osz, v6MINTU);
Block *nbp;
IPICMP *np;
- Ip6hdr *p;
- int osz = BLEN(bp);
- int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
- Proto *icmp = f->t2p[ICMPv6];
+ Ip6hdr *p;
+ Proto *icmp = f->t2p[ICMPv6];
Icmppriv6 *ipriv = icmp->priv;
+ uchar ia[IPaddrlen];
- p = (Ip6hdr *) bp->rp;
+ p = (Ip6hdr *)bp->rp;
+ if(isv6mcast(p->dst) || isv6mcast(p->src) || !ipv6local(ifc, ia, 0, p->src))
+ return;
- if(isv6mcast(p->src))
- goto clean;
+ netlog(f, Logicmp, "send icmphostunr %I -> src %I dst %I\n",
+ ia, p->src, p->dst);
nbp = newIPICMP(sz);
- np = (IPICMP *) nbp->rp;
-
- rlock(ifc);
- if(ipv6anylocal(ifc, np->src)) {
- netlog(f, Logicmp, "send icmphostunr -> s%I d%I\n", p->src, p->dst);
- }
- else {
- netlog(f, Logicmp, "icmphostunr fail -> s%I d%I\n", p->src, p->dst);
- freeblist(nbp);
- if(free)
- goto clean;
- else
- return;
- }
-
- memmove(np->dst, p->src, IPaddrlen);
+ np = (IPICMP *)nbp->rp;
+ ipmove(np->src, ia);
+ ipmove(np->dst, p->src);
np->type = UnreachableV6;
np->code = code;
- memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+ memmove(nbp->rp + IPICMPSZ, bp->rp, sz - IPICMPSZ);
set_cksum(nbp);
np->ttl = HOP_LIMIT;
np->vcf[0] = 0x06 << 4;
ipriv->out[UnreachableV6]++;
- if(free)
+ if(tome)
ipiput6(f, ifc, nbp);
- else {
+ else
ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
- return;
- }
-
-clean:
- runlock(ifc);
- freeblist(bp);
}
-extern void
+void
icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp)
{
+ int osz = BLEN(bp);
+ int sz = MIN(IPICMPSZ + osz, v6MINTU);
Block *nbp;
IPICMP *np;
- Ip6hdr *p;
- int osz = BLEN(bp);
- int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
- Proto *icmp = f->t2p[ICMPv6];
+ Ip6hdr *p;
+ Proto *icmp = f->t2p[ICMPv6];
Icmppriv6 *ipriv = icmp->priv;
+ uchar ia[IPaddrlen];
- p = (Ip6hdr *) bp->rp;
-
- if(isv6mcast(p->src))
+ p = (Ip6hdr *)bp->rp;
+ if(isv6mcast(p->dst) || isv6mcast(p->src) || !ipv6local(ifc, ia, 0, p->src))
return;
+ netlog(f, Logicmp, "send icmpttlexceeded6 %I -> src %I dst %I\n",
+ ia, p->src, p->dst);
+
nbp = newIPICMP(sz);
np = (IPICMP *) nbp->rp;
-
- if(ipv6anylocal(ifc, np->src)) {
- netlog(f, Logicmp, "send icmpttlexceeded6 -> s%I d%I\n", p->src, p->dst);
- }
- else {
- netlog(f, Logicmp, "icmpttlexceeded6 fail -> s%I d%I\n", p->src, p->dst);
- return;
- }
-
- memmove(np->dst, p->src, IPaddrlen);
+ ipmove(np->src, ia);
+ ipmove(np->dst, p->src);
np->type = TimeExceedV6;
np->code = 0;
- memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+ memmove(nbp->rp + IPICMPSZ, bp->rp, sz - IPICMPSZ);
set_cksum(nbp);
np->ttl = HOP_LIMIT;
np->vcf[0] = 0x06 << 4;
@@ -511,38 +491,33 @@
ipoput6(f, nbp, 0, MAXTTL, DFLTTOS, nil);
}
-extern void
+void
icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp)
{
+ int osz = BLEN(bp);
+ int sz = MIN(IPICMPSZ + osz, v6MINTU);
Block *nbp;
IPICMP *np;
- Ip6hdr *p;
- int osz = BLEN(bp);
- int sz = MIN(sizeof(IPICMP) + osz, v6MINTU);
- Proto *icmp = f->t2p[ICMPv6];
+ Ip6hdr *p;
+ Proto *icmp = f->t2p[ICMPv6];
Icmppriv6 *ipriv = icmp->priv;
+ uchar ia[IPaddrlen];
- p = (Ip6hdr *) bp->rp;
-
- if(isv6mcast(p->src))
+ p = (Ip6hdr *)bp->rp;
+ if(isv6mcast(p->dst) || isv6mcast(p->src) || !ipv6local(ifc, ia, 0, p->src))
return;
- nbp = newIPICMP(sz);
- np = (IPICMP *) nbp->rp;
+ netlog(f, Logicmp, "send icmppkttoobig6 %I -> src %I dst %I\n",
+ ia, p->src, p->dst);
- if(ipv6anylocal(ifc, np->src)) {
- netlog(f, Logicmp, "send icmppkttoobig6 -> s%I d%I\n", p->src, p->dst);
- }
- else {
- netlog(f, Logicmp, "icmppkttoobig6 fail -> s%I d%I\n", p->src, p->dst);
- return;
- }
-
- memmove(np->dst, p->src, IPaddrlen);
+ nbp = newIPICMP(sz);
+ np = (IPICMP *)nbp->rp;
+ ipmove(np->src, ia);
+ ipmove(np->dst, p->src);
np->type = PacketTooBigV6;
np->code = 0;
hnputl(np->icmpid, ifc->maxtu - ifc->m->hsize);
- memmove(nbp->rp + sizeof(IPICMP), bp->rp, sz - sizeof(IPICMP));
+ memmove(nbp->rp + IPICMPSZ, bp->rp, sz - IPICMPSZ);
set_cksum(nbp);
np->ttl = HOP_LIMIT;
np->vcf[0] = 0x06 << 4;
@@ -554,31 +529,23 @@
* RFC 2461, pages 39-40, pages 57-58.
*/
static int
-valid(Proto *icmp, Ipifc *ifc, Block *bp, Icmppriv6 *ipriv) {
- int sz, osz, unsp, n, ttl, iplen;
- int pktsz = BLEN(bp);
- uchar *packet = bp->rp;
- IPICMP *p = (IPICMP *) packet;
- Ndpkt *np;
+valid(Proto *icmp, Ipifc *, Block *bp, Icmppriv6 *ipriv)
+{
+ int sz, osz, unsp, ttl;
+ int pktsz = BLEN(bp);
+ uchar *packet = bp->rp;
+ IPICMP *p = (IPICMP *) packet;
+ Ndpkt *np;
- USED(ifc);
- n = blocklen(bp);
- if(n < sizeof(IPICMP)) {
+ if(pktsz < IPICMPSZ) {
ipriv->stats[HlenErrs6]++;
- netlog(icmp->f, Logicmp, "icmp hlen %d\n", n);
+ netlog(icmp->f, Logicmp, "icmp hlen %d\n", pktsz);
goto err;
}
- iplen = nhgets(p->ploadlen);
- if(iplen > n-IPV6HDR_LEN || (iplen % 1)) {
- ipriv->stats[LenErrs6]++;
- netlog(icmp->f, Logicmp, "icmp length %d\n", iplen);
- goto err;
- }
-
- // Rather than construct explicit pseudoheader, overwrite IPv6 header
+ /* Rather than construct explicit pseudoheader, overwrite IPv6 header */
if(p->proto != ICMPv6) {
- // This code assumes no extension headers!!!
+ /* This code assumes no extension headers!!! */
netlog(icmp->f, Logicmp, "icmp error: extension header\n");
goto err;
}
@@ -586,7 +553,7 @@
ttl = p->ttl;
p->ttl = p->proto;
p->proto = 0;
- if(ptclcsum(bp, 0, iplen + IPV6HDR_LEN)) {
+ if(ptclcsum(bp, 0, pktsz)) {
ipriv->stats[CsumErrs6]++;
netlog(icmp->f, Logicmp, "icmp checksum error\n");
goto err;
@@ -595,19 +562,16 @@
p->ttl = ttl;
/* additional tests for some pkt types */
- if( (p->type == NbrSolicit) ||
- (p->type == NbrAdvert) ||
- (p->type == RouterAdvert) ||
- (p->type == RouterSolicit) ||
- (p->type == RedirectV6) ) {
-
+ if (p->type == NbrSolicit || p->type == NbrAdvert ||
+ p->type == RouterAdvert || p->type == RouterSolicit ||
+ p->type == RedirectV6) {
if(p->ttl != HOP_LIMIT) {
- ipriv->stats[HoplimErrs6]++;
- goto err;
+ ipriv->stats[HoplimErrs6]++;
+ goto err;
}
if(p->code != 0) {
- ipriv->stats[IcmpCodeErrs6]++;
- goto err;
+ ipriv->stats[IcmpCodeErrs6]++;
+ goto err;
}
switch (p->type) {
@@ -615,82 +579,78 @@
case NbrAdvert:
np = (Ndpkt*) p;
if(isv6mcast(np->target)) {
- ipriv->stats[TargetErrs6]++;
- goto err;
+ ipriv->stats[TargetErrs6]++;
+ goto err;
}
- if(optexsts(np) && (np->olen == 0)) {
- ipriv->stats[OptlenErrs6]++;
- goto err;
+ if(optexsts(np) && np->olen == 0) {
+ ipriv->stats[OptlenErrs6]++;
+ goto err;
}
-
- if(p->type == NbrSolicit) {
- if(ipcmp(np->src, v6Unspecified) == 0) {
- if(!issmcast(np->dst) || optexsts(np)) {
- ipriv->stats[AddrmxpErrs6]++;
- goto err;
- }
+
+ if (p->type == NbrSolicit &&
+ ipcmp(np->src, v6Unspecified) == 0)
+ if(!issmcast(np->dst) || optexsts(np)) {
+ ipriv->stats[AddrmxpErrs6]++;
+ goto err;
}
- }
-
- if(p->type == NbrAdvert) {
- if((isv6mcast(np->dst))&&(nhgets(np->icmpid) & Sflag)){
- ipriv->stats[AddrmxpErrs6]++;
- goto err;
+
+ if(p->type == NbrAdvert)
+ if(isv6mcast(np->dst) &&
+ (nhgets(np->icmpid) & Sflag)){
+ ipriv->stats[AddrmxpErrs6]++;
+ goto err;
}
- }
break;
-
+
case RouterAdvert:
- if(pktsz - sizeof(Ip6hdr) < 16) {
- ipriv->stats[HlenErrs6]++;
- goto err;
+ if(pktsz - IP6HDR < 16) {
+ ipriv->stats[HlenErrs6]++;
+ goto err;
}
if(!islinklocal(p->src)) {
- ipriv->stats[RouterAddrErrs6]++;
- goto err;
+ ipriv->stats[RouterAddrErrs6]++;
+ goto err;
}
- sz = sizeof(IPICMP) + 8;
- while ((sz+1) < pktsz) {
- osz = *(packet+sz+1);
+ sz = IPICMPSZ + 8;
+ while (sz+8 <= pktsz) {
+ osz = packet[sz+1];
if(osz <= 0) {
- ipriv->stats[OptlenErrs6]++;
- goto err;
- }
+ ipriv->stats[OptlenErrs6]++;
+ goto err;
+ }
sz += 8*osz;
}
break;
-
+
case RouterSolicit:
- if(pktsz - sizeof(Ip6hdr) < 8) {
- ipriv->stats[HlenErrs6]++;
- goto err;
+ if(pktsz - IP6HDR < 8) {
+ ipriv->stats[HlenErrs6]++;
+ goto err;
}
unsp = (ipcmp(p->src, v6Unspecified) == 0);
- sz = sizeof(IPICMP) + 8;
- while ((sz+1) < pktsz) {
- osz = *(packet+sz+1);
- if((osz <= 0) ||
- (unsp && (*(packet+sz) == slladd)) ) {
- ipriv->stats[OptlenErrs6]++;
- goto err;
+ sz = IPICMPSZ + 8;
+ while (sz+8 <= pktsz) {
+ osz = packet[sz+1];
+ if(osz <= 0 ||
+ (unsp && packet[sz] == SRC_LLADDR)) {
+ ipriv->stats[OptlenErrs6]++;
+ goto err;
}
sz += 8*osz;
}
break;
-
+
case RedirectV6:
- //to be filled in
+ /* to be filled in */
break;
-
+
default:
goto err;
}
}
-
return 1;
-
err:
- ipriv->stats[InErrors6]++;
+ ipriv->stats[InErrors6]++;
return 0;
}
@@ -700,169 +660,162 @@
Iplifc *lifc;
int t;
- rlock(ifc);
- if(ipproxyifc(f, ifc, target)) {
- runlock(ifc);
- return t_uniproxy;
- }
-
- for(lifc = ifc->lifc; lifc; lifc = lifc->next) {
- if(ipcmp(lifc->local, target) == 0) {
- t = (lifc->tentative) ? t_unitent : t_unirany;
- runlock(ifc);
- return t;
- }
- }
-
- runlock(ifc);
- return 0;
+ if((lifc = iplocalonifc(ifc, target)) != nil)
+ t = lifc->tentative? Tunitent: Tunirany;
+ else if(ipproxyifc(f, ifc, target))
+ t = Tuniproxy;
+ else
+ t = 0;
+ return t;
}
static void
-icmpiput6(Proto *icmp, Ipifc *ipifc, Block *bp)
+icmpiput6(Proto *icmp, Ipifc *ifc, Block *bp)
{
- uchar *packet = bp->rp;
- IPICMP *p = (IPICMP *)packet;
- Icmppriv6 *ipriv = icmp->priv;
- Block *r;
- Proto *pr;
- char *msg, m2[128];
- Ndpkt* np;
+ char *msg, m2[128];
uchar pktflags;
- uchar lsrc[IPaddrlen];
- int refresh = 1;
+ uchar ia[IPaddrlen];
+ Block *r;
+ IPICMP *p;
+ Icmppriv6 *ipriv = icmp->priv;
Iplifc *lifc;
+ Ndpkt* np;
+ Proto *pr;
- if(!valid(icmp, ipifc, bp, ipriv))
- goto raise;
+ bp = concatblock(bp);
+ p = (IPICMP*)bp->rp;
- if(p->type <= Maxtype6)
- ipriv->in[p->type]++;
- else
+ if(!valid(icmp, ifc, bp, ipriv) || p->type > Maxtype6)
goto raise;
+ ipriv->in[p->type]++;
+
switch(p->type) {
case EchoRequestV6:
- r = mkechoreply6(bp);
+ r = mkechoreply6(bp, ifc);
+ if(r == nil)
+ goto raise;
ipriv->out[EchoReply]++;
ipoput6(icmp->f, r, 0, MAXTTL, DFLTTOS, nil);
break;
case UnreachableV6:
- if(p->code > 4)
- msg = unreachcode[icmp6_unkn_code];
+ if(p->code >= nelem(unreachcode))
+ msg = unreachcode[Icmp6_unknown];
else
msg = unreachcode[p->code];
-
- bp->rp += sizeof(IPICMP);
- if(blocklen(bp) < 8){
+ Advise:
+ bp->rp += IPICMPSZ;
+ if(BLEN(bp) < MinAdvise){
ipriv->stats[LenErrs6]++;
goto raise;
}
p = (IPICMP *)bp->rp;
- pr = Fsrcvpcolx(icmp->f, p->proto);
- if(pr != nil && pr->advise != nil) {
- (*pr->advise)(pr, bp, msg);
- return;
- }
- bp->rp -= sizeof(IPICMP);
- goticmpkt6(icmp, bp, 0);
- break;
-
- case TimeExceedV6:
- if(p->code == 0){
- sprint(m2, "ttl exceeded at %I", p->src);
-
- bp->rp += sizeof(IPICMP);
- if(blocklen(bp) < 8){
- ipriv->stats[LenErrs6]++;
- goto raise;
+ /* get rid of fragment header if this is the first fragment */
+ if(p->proto == FH && BLEN(bp) >= MinAdvise+IP6FHDR && MinAdvise > IP6HDR){
+ Fraghdr6 *fh = (Fraghdr6*)(bp->rp + IP6HDR);
+ if((nhgets(fh->offsetRM) & ~7) == 0){ /* first fragment */
+ p->proto = fh->nexthdr;
+ /* copy down payload over fragment header */
+ bp->rp += IP6HDR;
+ bp->wp -= IP6FHDR;
+ memmove(bp->rp, bp->rp+IP6FHDR, BLEN(bp));
+ hnputs(p->ploadlen, BLEN(bp));
+ bp->rp -= IP6HDR;
}
- p = (IPICMP *)bp->rp;
+ }
+ if(p->proto != FH){
pr = Fsrcvpcolx(icmp->f, p->proto);
if(pr != nil && pr->advise != nil) {
- (*pr->advise)(pr, bp, m2);
+ (*pr->advise)(pr, bp, msg);
return;
}
- bp->rp -= sizeof(IPICMP);
}
+ bp->rp -= IPICMPSZ;
+ goticmpkt6(icmp, bp, 0);
+ break;
+ case TimeExceedV6:
+ if(p->code == 0){
+ snprint(msg = m2, sizeof m2, "ttl exceeded at %I", p->src);
+ goto Advise;
+ }
+ if(p->code == 1){
+ snprint(msg = m2, sizeof m2, "frag time exceeded at %I", p->src);
+ goto Advise;
+ }
goticmpkt6(icmp, bp, 0);
break;
+ case PacketTooBigV6:
+ snprint(msg = m2, sizeof(m2), "packet too big for %lud mtu at %I",
+ (ulong)nhgetl(p->icmpid), p->src);
+ goto Advise;
+
case RouterAdvert:
case RouterSolicit:
- /* using lsrc as a temp, munge hdr for goticmp6
- memmove(lsrc, p->src, IPaddrlen);
- memmove(p->src, p->dst, IPaddrlen);
- memmove(p->dst, lsrc, IPaddrlen); */
-
goticmpkt6(icmp, bp, p->type);
break;
case NbrSolicit:
- np = (Ndpkt*) p;
+ np = (Ndpkt*)p;
pktflags = 0;
- switch (targettype(icmp->f, ipifc, np->target)) {
- case t_unirany:
+ if(ifc->sendra6)
+ pktflags |= Rflag;
+ switch (targettype(icmp->f, ifc, np->target)) {
+ case Tunirany:
pktflags |= Oflag;
/* fall through */
- case t_uniproxy:
- if(ipcmp(np->src, v6Unspecified) != 0) {
- arpenter(icmp->f, V6, np->src, np->lnaddr, 8*np->olen-2, 0);
+ case Tuniproxy:
+ if(ipv6local(ifc, ia, 0, np->src)) {
+ if(arpenter(icmp->f, V6, np->src, np->lnaddr, 8*np->olen-2, ia, ifc, 0) < 0)
+ break;
pktflags |= Sflag;
- }
- if(ipv6local(ipifc, lsrc)) {
- icmpna(icmp->f, lsrc,
- (ipcmp(np->src, v6Unspecified)==0)?v6allnodesL:np->src,
- np->target, ipifc->mac, pktflags);
- }
- else
- freeblist(bp);
+ } else
+ ipmove(ia, np->target);
+ icmpna(icmp->f, ia, (pktflags & Sflag)? np->src: v6allnodesL,
+ np->target, ifc->mac, pktflags);
break;
-
- case t_unitent:
- /* not clear what needs to be done. send up
- * an icmp mesg saying don't use this address? */
-
- default:
- freeblist(bp);
+ case Tunitent:
+ /*
+ * not clear what needs to be done. send up
+ * an icmp mesg saying don't use this address?
+ */
+ break;
}
-
+ freeblist(bp);
break;
case NbrAdvert:
- np = (Ndpkt*) p;
+ np = (Ndpkt*)p;
- /* if the target address matches one of the local interface
- * address and the local interface address has tentative bit set,
- * then insert into ARP table. this is so the duplication address
- * detection part of ipconfig can discover duplication through
- * the arp table
+ /*
+ * if the target address matches one of the local interface
+ * addresses and the local interface address has tentative bit
+ * set, insert into ARP table. this is so the duplicate address
+ * detection part of ipconfig can discover duplication through
+ * the arp table.
*/
- lifc = iplocalonifc(ipifc, np->target);
- if(lifc && lifc->tentative)
- refresh = 0;
- arpenter(icmp->f, V6, np->target, np->lnaddr, 8*np->olen-2, refresh);
+ lifc = iplocalonifc(ifc, np->target);
+ if(lifc != nil && lifc->tentative)
+ arpenter(icmp->f, V6, np->target, np->lnaddr, 8*np->olen-2, np->target, ifc, 0);
+ else if(ipv6local(ifc, ia, 0, np->target))
+ arpenter(icmp->f, V6, np->target, np->lnaddr, 8*np->olen-2, ia, ifc, 1);
freeblist(bp);
break;
- case PacketTooBigV6:
-
default:
goticmpkt6(icmp, bp, 0);
break;
}
return;
-
raise:
freeblist(bp);
-
}
-int
+static int
icmpstats6(Proto *icmp6, char *buf, int len)
{
Icmppriv6 *priv;
@@ -874,23 +827,28 @@
e = p+len;
for(i = 0; i < Nstats6; i++)
p = seprint(p, e, "%s: %lud\n", statnames6[i], priv->stats[i]);
- for(i = 0; i <= Maxtype6; i++){
+ for(i = 0; i <= Maxtype6; i++)
if(icmpnames6[i])
- p = seprint(p, e, "%s: %lud %lud\n", icmpnames6[i], priv->in[i], priv->out[i]);
-/* else
- p = seprint(p, e, "%d: %lud %lud\n", i, priv->in[i], priv->out[i]);
-*/
- }
+ p = seprint(p, e, "%s: %lud %lud\n", icmpnames6[i],
+ priv->in[i], priv->out[i]);
return p - buf;
}
-// need to import from icmp.c
+/* import from icmp.c */
extern int icmpstate(Conv *c, char *state, int n);
extern char* icmpannounce(Conv *c, char **argv, int argc);
extern char* icmpconnect(Conv *c, char **argv, int argc);
extern void icmpclose(Conv *c);
+static void
+icmpclose6(Conv *c)
+{
+ Icmpcb6 *icb = (Icmpcb6*)c->ptcl;
+ icb->headers = 0;
+ icmpclose(c);
+}
+
void
icmp6init(Fs *fs)
{
@@ -902,7 +860,7 @@
icmp6->announce = icmpannounce;
icmp6->state = icmpstate;
icmp6->create = icmpcreate6;
- icmp6->close = icmpclose;
+ icmp6->close = icmpclose6;
icmp6->rcv = icmpiput6;
icmp6->stats = icmpstats6;
icmp6->ctl = icmpctl6;
@@ -914,4 +872,3 @@
Fsproto(fs, icmp6);
}
-
diff -u a/os/ip//igmp.c b/os/ip//igmp.c
--- a/os/ip//igmp.c
+++ b/os/ip//igmp.c
@@ -1,3 +1,7 @@
+/*
+ * igmp - internet group management protocol
+ * unfinished.
+ */
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
@@ -40,8 +44,12 @@
uchar unused;
uchar igmpcksum[2]; /* checksum of igmp portion */
uchar group[IPaddrlen]; /* multicast group */
+
+ uchar payload[];
};
+#define IGMPPKTSZ offsetof(IGMPpkt, payload[0])
+
/*
* lists for group reports
*/
@@ -49,7 +57,7 @@
struct IGMPrep
{
IGMPrep *next;
- Media *m;
+ Medium *m;
int ticks;
Multicast *multi;
};
@@ -76,19 +84,17 @@
} stats;
void
-igmpsendreport(Media *m, uchar *addr)
+igmpsendreport(Medium *m, uchar *addr)
{
IGMPpkt *p;
Block *bp;
bp = allocb(sizeof(IGMPpkt));
- if(bp == nil)
- return;
p = (IGMPpkt*)bp->wp;
p->vihl = IP_VER4;
- bp->wp += sizeof(IGMPpkt);
- memset(bp->rp, 0, sizeof(IGMPpkt));
- hnputl(p->src, Mediagetaddr(m));
+ bp->wp += IGMPPKTSZ;
+ memset(bp->rp, 0, IGMPPKTSZ);
+ hnputl(p->src, Mediumgetaddr(m));
hnputl(p->dst, Ipallsys);
p->vertype = (1<<4) | IGMPreport;
p->proto = IP_IGMPPROTO;
@@ -166,7 +172,7 @@
}
void
-igmpiput(Media *m, Ipifc *, Block *bp)
+igmpiput(Medium *m, Ipifc *, Block *bp)
{
int n;
IGMPpkt *ghp;
@@ -206,7 +212,7 @@
if(rp != nil)
break; /* already reporting */
- mp = Mediacopymulti(m);
+ mp = Mediumcopymulti(m);
if(mp == nil)
break;
@@ -285,7 +291,7 @@
igmp.ptclsize = 0;
igmpreportfn = igmpsendreport;
- kproc("igmpproc", igmpproc, 0, 0);
+ kproc("igmpproc", igmpproc, 0);
Fsproto(fs, &igmp);
}
diff -u a/os/ip//il.c b/os/ip//il.c
--- a/os/ip//il.c
+++ b/os/ip//il.c
@@ -189,7 +189,7 @@
{
Ipht ht;
- ulong stats[Nstats];
+ uvlong stats[Nstats];
ulong csumerr; /* checksum errors */
ulong hlenerr; /* header length error */
@@ -208,7 +208,7 @@
void ilrcvmsg(Conv*, Block*);
-void ilsendctl(Conv*, Ilhdr*, int, ulong, ulong, int);
+int ilsendctl(Conv*, Ilhdr*, int, ulong, ulong, int);
void ilackq(Ilcb*, Block*);
void ilprocess(Conv*, Ilhdr*, Block*);
void ilpullup(Conv*);
@@ -251,6 +251,9 @@
e = Fsstdconnect(c, argv, argc);
if(e != nil)
return e;
+ if(c->ipversion != V4)
+ return "only IP version 4 supported";
+
return ilstart(c, IL_CONNECT, fast);
}
@@ -260,7 +263,7 @@
Ilcb *ic;
ic = (Ilcb*)(c->ptcl);
- return snprint(state, n, "%s qin %d qout %d del %5.5d Br %5.5d md %5.5d una %5.5lud rex %5.5d rxq %5.5d max %5.5d",
+ return snprint(state, n, "%s qin %d qout %d del %5.5d Br %5.5d md %5.5d una %5.5lud rex %5.5d rxq %5.5d max %5.5d\n",
ilstates[ic->state],
c->rq ? qlen(c->rq) : 0,
c->wq ? qlen(c->wq) : 0,
@@ -434,7 +437,7 @@
p = buf;
e = p+len;
for(i = 0; i < Nstats; i++)
- p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
+ p = seprint(p, e, "%s: %llud\n", statnames[i], priv->stats[i]);
return p - buf;
}
@@ -548,6 +551,9 @@
ih = (Ilhdr *)bp->rp;
plen = blocklen(bp);
+ if(plen > 0 && (ih->vihl&0xF0)!=IP_VER4)
+ goto raise; /* ignore non V4 packets */
+
if(plen < IL_IPSIZE+IL_HDRSIZE){
netlog(il->f, Logil, "il: hlenerr\n");
ipriv->stats[HlenErrs]++;
@@ -572,7 +578,7 @@
else
st = iltype[ih->iltype];
ipriv->stats[CsumErrs]++;
- netlog(il->f, Logil, "il: cksum %ux %ux, pkt(%s id %lud ack %lud %I/%d->%d)\n",
+ netlog(il->f, Logil, "il: cksum %ux %s, pkt(%ux id %ud ack %I/%d->%d)\n",
csum, st, nhgetl(ih->ilid), nhgetl(ih->ilack), raddr, sp, dp);
goto raise;
}
@@ -595,7 +601,7 @@
else
st = iltype[ih->iltype];
ilreject(il->f, ih); /* no channel and not sync */
- netlog(il->f, Logil, "il: no channel, pkt(%s id %lud ack %lud %I/%ud->%ud)\n",
+ netlog(il->f, Logil, "il: no channel, pkt(%s id %ud ack %ud %I/%ud->%ud)\n",
st, nhgetl(ih->ilid), nhgetl(ih->ilack), raddr, sp, dp);
goto raise;
}
@@ -829,7 +835,7 @@
c = ic->conv;
id = nhgetl(h->ilid);
- netlog(c->p->f, Logil, "il: rexmit %d %ud: %d %d: %i %d/%d\n", id, ic->recvd,
+ netlog(c->p->f, Logil, "il: rexmit %lud %lud: %d %lud: %I %d/%d\n", id, ic->recvd,
ic->rexmit, ic->timeout,
c->raddr, c->lport, c->rport);
@@ -852,7 +858,7 @@
ic = (Ilcb*)s->ptcl;
USED(ic);
- netlog(s->p->f, Logilmsg, "%11s rcv %d/%d snt %d/%d pkt(%s id %d ack %d %d->%d) ",
+ netlog(s->p->f, Logilmsg, "%11s rcv %lud/%lud snt %lud/%lud pkt(%s id %d ack %ud %ud->%ud) ",
ilstates[ic->state], ic->rstart, ic->recvd, ic->start,
ic->next, iltype[h->iltype], nhgetl(h->ilid),
nhgetl(h->ilack), nhgets(h->ilsrc), nhgets(h->ildst));
@@ -859,7 +865,7 @@
_ilprocess(s, h, bp);
- netlog(s->p->f, Logilmsg, "%11s rcv %d snt %d\n", ilstates[ic->state], ic->recvd, ic->next);
+ netlog(s->p->f, Logilmsg, "%11s rcv %lud snt %lud\n", ilstates[ic->state], ic->recvd, ic->next);
}
void
@@ -917,17 +923,12 @@
bp->list = nil;
dlen = nhgets(oh->illen)-IL_HDRSIZE;
bp = trimblock(bp, IL_IPSIZE+IL_HDRSIZE, dlen);
+
/*
* Upper levels don't know about multiple-block
* messages so copy all into one (yick).
*/
- bp = concatblock(bp);
- if(bp == 0)
- panic("ilpullup");
- bp = packblock(bp);
- if(bp == 0)
- panic("ilpullup2");
- qpass(s->rq, bp);
+ qpass(s->rq, packblock(concatblock(bp)));
}
qunlock(&ic->outo);
}
@@ -948,7 +949,7 @@
id = nhgetl(h->ilid);
/* Window checks */
if(id <= ic->recvd || id > ic->recvd+ic->window) {
- netlog(s->p->f, Logil, "il: message outside window %ud <%ud-%ud>: %i %d/%d\n",
+ netlog(s->p->f, Logil, "il: message outside window %lud <%lud-%lud>: %I %d/%d\n",
id, ic->recvd, ic->recvd+ic->window, s->raddr, s->lport, s->rport);
freeblist(bp);
return;
@@ -983,7 +984,7 @@
qunlock(&ic->outo);
}
-void
+int
ilsendctl(Conv *ipc, Ilhdr *inih, int type, ulong id, ulong ack, int ilspec)
{
Ilhdr *ih;
@@ -1034,7 +1035,7 @@
hnputs(ih->ilsum, ptclcsum(bp, IL_IPSIZE, IL_HDRSIZE));
if(ipc==nil)
- panic("ipc is nil caller is %.8lux", getcallerpc(&ipc));
+ panic("ipc is nil caller is %#p", getcallerpc(&ipc));
if(ipc->p==nil)
panic("ipc->p is nil");
@@ -1042,7 +1043,7 @@
iltype[ih->iltype], nhgetl(ih->ilid), nhgetl(ih->ilack),
nhgets(ih->ilsrc), nhgets(ih->ildst));
- ipoput4(ipc->p->f, bp, 0, ttl, tos, ipc);
+ return ipoput4(ipc->p->f, bp, 0, ttl, tos, ipc);
}
void
@@ -1145,6 +1146,8 @@
il = x;
+ while(waserror())
+ ;
loop:
tsleep(&up->sleep, return0, 0, Iltickms);
for(s = il->conv; s && *s; s++) {
@@ -1248,7 +1251,7 @@
qlock(&ipriv->apl);
if(ipriv->ackprocstarted == 0){
sprint(kpname, "#I%dilack", c->p->f->dev);
- kproc(kpname, ilackproc, c->p, 0);
+ kproc(kpname, ilackproc, c->p);
ipriv->ackprocstarted = 1;
}
qunlock(&ipriv->apl);
@@ -1280,7 +1283,8 @@
case IL_CONNECT:
ic->state = Ilsyncer;
iphtadd(&ipriv->ht, c);
- ilsendctl(c, nil, Ilsync, ic->start, ic->recvd, 0);
+ if(ilsendctl(c, nil, Ilsync, ic->start, ic->recvd, 0) < 0)
+ ilhangup(c, "no route");
break;
}
@@ -1332,6 +1336,8 @@
if(s->lport == psource)
if(ipcmp(s->laddr, source) == 0)
if(ipcmp(s->raddr, dest) == 0){
+ if(s->ignoreadvice)
+ break;
qunlock(il);
ic = (Ilcb*)s->ptcl;
switch(ic->state){
@@ -1380,12 +1386,6 @@
}
}
-int
-ilgc(Proto *il)
-{
- return natgc(il->ipproto);
-}
-
void
ilinit(Fs *f)
{
@@ -1406,7 +1406,7 @@
il->advise = iladvise;
il->stats = ilxstats;
il->inuse = ilinuse;
- il->gc = ilgc;
+ il->gc = nil;
il->ipproto = IP_ILPROTO;
il->nc = scalednconv();
il->ptclsize = sizeof(Ilcb);
diff -u a/os/ip//ip.c b/os/ip//ip.c
--- a/os/ip//ip.c
+++ b/os/ip//ip.c
@@ -7,94 +7,6 @@
#include "ip.h"
-typedef struct IP IP;
-typedef struct Fragment4 Fragment4;
-typedef struct Fragment6 Fragment6;
-typedef struct Ipfrag Ipfrag;
-
-enum
-{
- IP4HDR = 20, /* sizeof(Ip4hdr) */
- IP6HDR = 40, /* sizeof(Ip6hdr) */
- IP_HLEN4 = 0x05, /* Header length in words */
- IP_DF = 0x4000, /* Don't fragment */
- IP_MF = 0x2000, /* More fragments */
- IP6FHDR = 8, /* sizeof(Fraghdr6) */
- IP_MAX = 64*1024, /* Maximum Internet packet size */
-};
-
-#define BLKIPVER(xp) (((Ip4hdr*)((xp)->rp))->vihl&0xF0)
-
-/* MIB II counters */
-enum
-{
- Forwarding,
- DefaultTTL,
- InReceives,
- InHdrErrors,
- InAddrErrors,
- ForwDatagrams,
- InUnknownProtos,
- InDiscards,
- InDelivers,
- OutRequests,
- OutDiscards,
- OutNoRoutes,
- ReasmTimeout,
- ReasmReqds,
- ReasmOKs,
- ReasmFails,
- FragOKs,
- FragFails,
- FragCreates,
-
- Nstats,
-};
-
-struct Fragment4
-{
- Block* blist;
- Fragment4* next;
- ulong src;
- ulong dst;
- ushort id;
- ulong age;
-};
-
-struct Fragment6
-{
- Block* blist;
- Fragment6* next;
- uchar src[IPaddrlen];
- uchar dst[IPaddrlen];
- uint id;
- ulong age;
-};
-
-struct Ipfrag
-{
- ushort foff;
- ushort flen;
-};
-
-/* an instance of IP */
-struct IP
-{
- ulong stats[Nstats];
-
- QLock fraglock4;
- Fragment4* flisthead4;
- Fragment4* fragfree4;
- Ref id4;
-
- QLock fraglock6;
- Fragment6* flisthead6;
- Fragment6* fragfree6;
- Ref id6;
-
- int iprouting; /* true if we route like a gateway */
-};
-
static char *statnames[] =
{
[Forwarding] "Forwarding",
@@ -118,45 +30,11 @@
[FragCreates] "FragCreates",
};
-#define BLKIP(xp) ((Ip4hdr*)((xp)->rp))
-/*
- * This sleazy macro relies on the media header size being
- * larger than sizeof(Ipfrag). ipreassemble checks this is true
- */
-#define BKFG(xp) ((Ipfrag*)((xp)->base))
+static Block* ip4reassemble(IP*, int, Block*);
+static void ipfragfree4(IP*, Fragment4*);
+static Fragment4* ipfragallo4(IP*);
-ushort ipcsum(uchar*);
-Block* ip4reassemble(IP*, int, Block*, Ip4hdr*);
-void ipfragfree4(IP*, Fragment4*);
-Fragment4* ipfragallo4(IP*);
-
-
-void
-ip_init_6(Fs *f)
-{
- V6params *v6p;
-
- v6p = smalloc(sizeof(V6params));
-
- v6p->rp.mflag = 0; // default not managed
- v6p->rp.oflag = 0;
- v6p->rp.maxraint = 600000; // millisecs
- v6p->rp.minraint = 200000;
- v6p->rp.linkmtu = 0; // no mtu sent
- v6p->rp.reachtime = 0;
- v6p->rp.rxmitra = 0;
- v6p->rp.ttl = MAXTTL;
- v6p->rp.routerlt = 3*(v6p->rp.maxraint);
-
- v6p->hp.rxmithost = 1000; // v6 RETRANS_TIMER
-
- v6p->cdrouter = -1;
-
- f->v6p = v6p;
-
-}
-
-void
+static void
initfrag(IP *ip, int size)
{
Fragment4 *fq4, *eq4;
@@ -189,6 +67,7 @@
IP *ip;
ip = smalloc(sizeof(IP));
+ ip->stats[DefaultTTL] = MAXTTL;
initfrag(ip, 100);
f->ip = ip;
@@ -202,11 +81,11 @@
if(f->ip->iprouting==0)
f->ip->stats[Forwarding] = 2;
else
- f->ip->stats[Forwarding] = 1;
+ f->ip->stats[Forwarding] = 1;
}
int
-ipoput4(Fs *f, Block *bp, int gating, int ttl, int tos, Conv *c)
+ipoput4(Fs *f, Block *bp, int gating, int ttl, int tos, Routehint *rh)
{
Ipifc *ifc;
uchar *gate;
@@ -213,66 +92,41 @@
ulong fragoff;
Block *xp, *nb;
Ip4hdr *eh, *feh;
- int lid, len, seglen, chunk, dlen, blklen, offset, medialen;
- Route *r, *sr;
+ int lid, len, seglen, chunk, hlen, dlen, blklen, offset, medialen;
+ Route *r;
IP *ip;
int rv = 0;
ip = f->ip;
-
- /* Fill out the ip header */
- eh = (Ip4hdr*)(bp->rp);
-
ip->stats[OutRequests]++;
- /* Number of uchars in data and ip header to write */
+ /* Fill out the ip header */
+ eh = (Ip4hdr*)bp->rp;
+ assert(BLEN(bp) >= IP4HDR);
len = blocklen(bp);
-
- if(gating){
- chunk = nhgets(eh->length);
- if(chunk > len){
- ip->stats[OutDiscards]++;
- netlog(f, Logip, "short gated packet\n");
- goto free;
- }
- if(chunk < len)
- len = chunk;
- }
if(len >= IP_MAX){
ip->stats[OutDiscards]++;
- netlog(f, Logip, "exceeded ip max size %V\n", eh->dst);
+ netlog(f, Logip, "%V -> %V: exceeded ip max size: %d\n", eh->src, eh->dst, len);
goto free;
}
- r = v4lookup(f, eh->dst, c);
- if(r == nil){
+ r = v4lookup(f, eh->dst, eh->src, rh);
+ if(r == nil || (ifc = r->ifc) == nil){
ip->stats[OutNoRoutes]++;
- netlog(f, Logip, "no interface %V\n", eh->dst);
+ netlog(f, Logip, "%V -> %V: no interface\n", eh->src, eh->dst);
rv = -1;
goto free;
}
- ifc = r->ifc;
- if(r->type & (Rifc|Runi))
+ if(r->type & (Rifc|Runi|Rbcast|Rmulti))
gate = eh->dst;
else
- if(r->type & (Rbcast|Rmulti)) {
- gate = eh->dst;
- sr = v4lookup(f, eh->src, nil);
- if(sr != nil && (sr->type & Runi))
- ifc = sr->ifc;
- }
- else
gate = r->v4.gate;
- if(!gating)
- eh->vihl = IP_VER4|IP_HLEN4;
- eh->ttl = ttl;
- if(!gating)
- eh->tos = tos;
-
- if(!canrlock(ifc))
+ if(!canrlock(ifc)){
+ ip->stats[OutDiscards]++;
goto free;
+ }
if(waserror()){
runlock(ifc);
nexterror();
@@ -280,17 +134,18 @@
if(ifc->m == nil)
goto raise;
- /* Output NAT */
- if(nato(bp, ifc, f) != 0)
- goto raise;
+ if(!gating){
+ eh->vihl = IP_VER4|IP_HLEN4;
+ eh->tos = tos;
+ }
+ eh->ttl = ttl;
/* If we dont need to fragment just send it */
medialen = ifc->maxtu - ifc->m->hsize;
if(len <= medialen) {
- if(!gating)
- hnputs(eh->id, incref(&ip->id4));
hnputs(eh->length, len);
if(!gating){
+ hnputs(eh->id, incref(&ip->id4));
eh->frag[0] = 0;
eh->frag[1] = 0;
}
@@ -297,31 +152,31 @@
eh->cksum[0] = 0;
eh->cksum[1] = 0;
hnputs(eh->cksum, ipcsum(&eh->vihl));
- ifc->m->bwrite(ifc, bp, V4, gate);
+
+ ipifcoput(ifc, bp, V4, gate);
runlock(ifc);
poperror();
return 0;
}
-if((eh->frag[0] & (IP_DF>>8)) && !gating) print("%V: DF set\n", eh->dst);
-
if(eh->frag[0] & (IP_DF>>8)){
ip->stats[FragFails]++;
ip->stats[OutDiscards]++;
icmpcantfrag(f, bp, medialen);
- netlog(f, Logip, "%V: eh->frag[0] & (IP_DF>>8)\n", eh->dst);
+ netlog(f, Logip, "%V -> %V: can't fragment with DF flag set\n", eh->src, eh->dst);
goto raise;
}
- seglen = (medialen - IP4HDR) & ~7;
+ hlen = (eh->vihl & 0xF)<<2;
+ seglen = (medialen - hlen) & ~7;
if(seglen < 8){
ip->stats[FragFails]++;
ip->stats[OutDiscards]++;
- netlog(f, Logip, "%V seglen < 8\n", eh->dst);
+ netlog(f, Logip, "%V -> %V: can't fragment with seglen < 8\n", eh->src, eh->dst);
goto raise;
}
- dlen = len - IP4HDR;
+ dlen = len - hlen;
xp = bp;
if(gating)
lid = nhgets(eh->id);
@@ -328,8 +183,8 @@
else
lid = incref(&ip->id4);
- offset = IP4HDR;
- while(xp != nil && offset && offset >= BLEN(xp)) {
+ offset = hlen;
+ while(offset && offset >= BLEN(xp)) {
offset -= BLEN(xp);
xp = xp->next;
}
@@ -341,30 +196,30 @@
fragoff = 0;
dlen += fragoff;
for(; fragoff < dlen; fragoff += seglen) {
- nb = allocb(IP4HDR+seglen);
- feh = (Ip4hdr*)(nb->rp);
+ nb = allocb(hlen+seglen);
+ feh = (Ip4hdr*)nb->rp;
- memmove(nb->wp, eh, IP4HDR);
- nb->wp += IP4HDR;
+ memmove(nb->wp, eh, hlen);
+ nb->wp += hlen;
if((fragoff + seglen) >= dlen) {
seglen = dlen - fragoff;
hnputs(feh->frag, fragoff>>3);
}
- else
+ else
hnputs(feh->frag, (fragoff>>3)|IP_MF);
- hnputs(feh->length, seglen + IP4HDR);
+ hnputs(feh->length, seglen + hlen);
hnputs(feh->id, lid);
/* Copy up the data area */
chunk = seglen;
while(chunk) {
- if(!xp) {
+ if(xp == nil) {
ip->stats[OutDiscards]++;
ip->stats[FragFails]++;
freeblist(nb);
- netlog(f, Logip, "!xp: chunk %d\n", chunk);
+ netlog(f, Logip, "xp == nil: chunk %d\n", chunk);
goto raise;
}
blklen = chunk;
@@ -376,12 +231,13 @@
chunk -= blklen;
if(xp->rp == xp->wp)
xp = xp->next;
- }
+ }
feh->cksum[0] = 0;
feh->cksum[1] = 0;
hnputs(feh->cksum, ipcsum(&feh->vihl));
- ifc->m->bwrite(ifc, nb, V4, gate);
+
+ ipifcoput(ifc, nb, V4, gate);
ip->stats[FragCreates]++;
}
ip->stats[FragOKs]++;
@@ -396,17 +252,14 @@
void
ipiput4(Fs *f, Ipifc *ifc, Block *bp)
{
- int hl;
- int hop, tos, proto, olen;
+ int hl, len, hop, tos;
+ uchar v6dst[IPaddrlen];
+ ushort frag;
Ip4hdr *h;
Proto *p;
- ushort frag;
- int notforme;
- uchar *dp, v6dst[IPaddrlen];
IP *ip;
- Route *r;
- if(BLKIPVER(bp) != IP_VER4) {
+ if((bp->rp[0]&0xF0) != IP_VER4) {
ipiput6(f, ifc, bp);
return;
}
@@ -430,58 +283,45 @@
return;
}
- h = (Ip4hdr*)(bp->rp);
-
- /* Input NAT */
- nati(bp, ifc);
-
- /* dump anything that whose header doesn't checksum */
+ h = (Ip4hdr*)bp->rp;
+ hl = (h->vihl & 0xF)<<2;
+ if(hl < IP4HDR || hl > BLEN(bp)) {
+ ip->stats[InHdrErrors]++;
+ netlog(f, Logip, "%V -> %V: bad ip header length: %d\n", h->src, h->dst, hl);
+ goto drop;
+ }
if((bp->flag & Bipck) == 0 && ipcsum(&h->vihl)) {
ip->stats[InHdrErrors]++;
- netlog(f, Logip, "ip: checksum error %V\n", h->src);
- freeblist(bp);
+ netlog(f, Logip, "%V -> %V: bad ip header checksum\n", h->src, h->dst);
+ goto drop;
+ }
+ len = nhgets(h->length);
+ if(len < hl || (bp = trimblock(bp, 0, len)) == nil){
+ ip->stats[InHdrErrors]++;
+ netlog(f, Logip, "%V -> %V: bogus packet length: %d\n", h->src, h->dst, len);
+ if(bp != nil)
+ goto drop;
return;
}
- v4tov6(v6dst, h->dst);
- notforme = ipforme(f, v6dst) == 0;
+ h = (Ip4hdr*)bp->rp;
- /* Check header length and version */
- if((h->vihl&0x0F) != IP_HLEN4) {
- hl = (h->vihl&0xF)<<2;
- if(hl < (IP_HLEN4<<2)) {
- ip->stats[InHdrErrors]++;
- netlog(f, Logip, "ip: %V bad hivl %ux\n", h->src, h->vihl);
- freeblist(bp);
- return;
- }
- /* If this is not routed strip off the options */
- if(notforme == 0) {
- olen = nhgets(h->length);
- dp = bp->rp + (hl - (IP_HLEN4<<2));
- memmove(dp, h, IP_HLEN4<<2);
- bp->rp = dp;
- h = (Ip4hdr*)(bp->rp);
- h->vihl = (IP_VER4|IP_HLEN4);
- hnputs(h->length, olen-hl+(IP_HLEN4<<2));
- }
- }
-
/* route */
- if(notforme) {
- Conv conv;
+ v4tov6(v6dst, h->dst);
+ if(!ipforme(f, v6dst)) {
+ Route *r;
+ Routehint rh;
+ Ipifc *nifc;
- if(!ip->iprouting){
- freeb(bp);
- return;
- }
+ if(!ip->iprouting)
+ goto drop;
/* don't forward to source's network */
- conv.r = nil;
- r = v4lookup(f, h->dst, &conv);
- if(r == nil || r->ifc == ifc){
+ rh.r = nil;
+ r = v4lookup(f, h->dst, h->src, &rh);
+ if(r == nil || (nifc = r->ifc) == nil
+ || (nifc == ifc && !ifc->reflect)){
ip->stats[OutDiscards]++;
- freeblist(bp);
- return;
+ goto drop;
}
/* don't forward if packet has timed out */
@@ -488,23 +328,18 @@
hop = h->ttl;
if(hop < 1) {
ip->stats[InHdrErrors]++;
- icmpttlexceeded(f, ifc->lifc->local, bp);
- freeblist(bp);
- return;
+ icmpttlexceeded(f, ifc, bp);
+ goto drop;
}
/* reassemble if the interface expects it */
-if(r->ifc == nil) panic("nil route rfc");
- if(r->ifc->reassemble){
+ if(nifc->reassemble){
frag = nhgets(h->frag);
- if(frag) {
- h->tos = 0;
- if(frag & IP_MF)
- h->tos = 1;
- bp = ip4reassemble(ip, frag, bp, h);
+ if(frag & (IP_MF|IP_FO)) {
+ bp = ip4reassemble(ip, frag, bp);
if(bp == nil)
return;
- h = (Ip4hdr*)(bp->rp);
+ h = (Ip4hdr*)bp->rp;
}
}
@@ -511,27 +346,30 @@
ip->stats[ForwDatagrams]++;
tos = h->tos;
hop = h->ttl;
- ipoput4(f, bp, 1, hop - 1, tos, &conv);
+ ipoput4(f, bp, 1, hop - 1, tos, &rh);
return;
}
+ /* If this is not routed strip off the options */
+ if(hl > IP4HDR) {
+ hl -= IP4HDR;
+ len -= hl;
+ bp->rp += hl;
+ memmove(bp->rp, h, IP4HDR);
+ h = (Ip4hdr*)bp->rp;
+ h->vihl = IP_VER4|IP_HLEN4;
+ hnputs(h->length, len);
+ }
+
frag = nhgets(h->frag);
- if(frag) {
- h->tos = 0;
- if(frag & IP_MF)
- h->tos = 1;
- bp = ip4reassemble(ip, frag, bp, h);
+ if(frag & (IP_MF|IP_FO)) {
+ bp = ip4reassemble(ip, frag, bp);
if(bp == nil)
return;
- h = (Ip4hdr*)(bp->rp);
+ h = (Ip4hdr*)bp->rp;
}
- /* don't let any frag info go up the stack */
- h->frag[0] = 0;
- h->frag[1] = 0;
-
- proto = h->proto;
- p = Fsrcvpcol(f, proto);
+ p = Fsrcvpcol(f, h->proto);
if(p != nil && p->rcv != nil) {
ip->stats[InDelivers]++;
(*p->rcv)(p, ifc, bp);
@@ -539,6 +377,7 @@
}
ip->stats[InDiscards]++;
ip->stats[InUnknownProtos]++;
+drop:
freeblist(bp);
}
@@ -550,45 +389,43 @@
int i;
ip = f->ip;
- ip->stats[DefaultTTL] = MAXTTL;
-
p = buf;
e = p+len;
- for(i = 0; i < Nstats; i++)
- p = seprint(p, e, "%s: %lud\n", statnames[i], ip->stats[i]);
+ for(i = 0; i < Nipstats; i++)
+ p = seprint(p, e, "%s: %llud\n", statnames[i], ip->stats[i]);
return p - buf;
}
-Block*
-ip4reassemble(IP *ip, int offset, Block *bp, Ip4hdr *ih)
+static Block*
+ip4reassemble(IP *ip, int offset, Block *bp)
{
- int fend;
+ int ovlap, fragsize, len;
+ ulong src, dst;
ushort id;
+ Block *bl, **l, *prev;
Fragment4 *f, *fnext;
- ulong src, dst;
- Block *bl, **l, *last, *prev;
- int ovlap, len, fragsize, pktposn;
+ Ipfrag *fp, *fq;
+ Ip4hdr *ih;
+ /*
+ * block lists are too hard, concatblock into a single block
+ */
+ bp = concatblock(bp);
+
+ ih = (Ip4hdr*)bp->rp;
src = nhgetl(ih->src);
dst = nhgetl(ih->dst);
id = nhgets(ih->id);
+ fragsize = BLEN(bp) - ((ih->vihl&0xF)<<2);
- /*
- * block lists are too hard, pullupblock into a single block
- */
- if(bp->next){
- bp = pullupblock(bp, blocklen(bp));
- ih = (Ip4hdr*)(bp->rp);
- }
-
qlock(&ip->fraglock4);
/*
* find a reassembly queue for this fragment
*/
- for(f = ip->flisthead4; f; f = fnext){
+ for(f = ip->flisthead4; f != nil; f = fnext){
fnext = f->next; /* because ipfragfree4 changes the list */
- if(f->src == src && f->dst == dst && f->id == id)
+ if(f->id == id && f->src == src && f->dst == dst)
break;
if(f->age < NOW){
ip->stats[ReasmTimeout]++;
@@ -601,22 +438,23 @@
* and get rid of any fragments that might go
* with it.
*/
- if(!ih->tos && (offset & ~(IP_MF|IP_DF)) == 0) {
+ if((offset & (IP_MF|IP_FO)) == 0) {
if(f != nil) {
- ipfragfree4(ip, f);
ip->stats[ReasmFails]++;
+ ipfragfree4(ip, f);
}
qunlock(&ip->fraglock4);
return bp;
}
- if(bp->base+sizeof(Ipfrag) >= bp->rp){
- bp = padblock(bp, sizeof(Ipfrag));
- bp->rp += sizeof(Ipfrag);
+ if(bp->base+IPFRAGSZ > bp->rp){
+ bp = padblock(bp, IPFRAGSZ);
+ bp->rp += IPFRAGSZ;
}
- BKFG(bp)->foff = offset<<3;
- BKFG(bp)->flen = nhgets(ih->length)-IP4HDR;
+ fp = (Ipfrag*)bp->base;
+ fp->foff = (offset & IP_FO)<<3;
+ fp->flen = fragsize;
/* First fragment allocates a reassembly queue */
if(f == nil) {
@@ -627,8 +465,9 @@
f->blist = bp;
- qunlock(&ip->fraglock4);
ip->stats[ReasmReqds]++;
+ qunlock(&ip->fraglock4);
+
return nil;
}
@@ -638,7 +477,7 @@
prev = nil;
l = &f->blist;
bl = f->blist;
- while(bl != nil && BKFG(bp)->foff > BKFG(bl)->foff) {
+ while(bl != nil && fp->foff > ((Ipfrag*)bl->base)->foff) {
prev = bl;
l = &bl->next;
bl = bl->next;
@@ -645,15 +484,16 @@
}
/* Check overlap of a previous fragment - trim away as necessary */
- if(prev) {
- ovlap = BKFG(prev)->foff + BKFG(prev)->flen - BKFG(bp)->foff;
+ if(prev != nil) {
+ fq = (Ipfrag*)prev->base;
+ ovlap = fq->foff + fq->flen - fp->foff;
if(ovlap > 0) {
- if(ovlap >= BKFG(bp)->flen) {
- freeblist(bp);
+ if(ovlap >= fp->flen) {
qunlock(&ip->fraglock4);
+ freeb(bp);
return nil;
}
- BKFG(prev)->flen -= ovlap;
+ fq->flen -= ovlap;
}
}
@@ -662,26 +502,26 @@
*l = bp;
/* Check to see if succeeding segments overlap */
- if(bp->next) {
+ if(bp->next != nil) {
l = &bp->next;
- fend = BKFG(bp)->foff + BKFG(bp)->flen;
+ offset = fp->foff + fp->flen;
/* Take completely covered segments out */
- while(*l) {
- ovlap = fend - BKFG(*l)->foff;
+ while((bl = *l) != nil) {
+ fq = (Ipfrag*)bl->base;
+ ovlap = offset - fq->foff;
if(ovlap <= 0)
break;
- if(ovlap < BKFG(*l)->flen) {
- BKFG(*l)->flen -= ovlap;
- BKFG(*l)->foff += ovlap;
- /* move up ih hdrs */
- memmove((*l)->rp + ovlap, (*l)->rp, IP4HDR);
- (*l)->rp += ovlap;
+ if(ovlap < fq->flen) {
+ /* move up ip header */
+ memmove(bl->rp + ovlap, bl->rp, BLEN(bl) - fq->flen);
+ bl->rp += ovlap;
+ fq->flen -= ovlap;
+ fq->foff += ovlap;
break;
}
- last = (*l)->next;
- (*l)->next = nil;
- freeblist(*l);
- *l = last;
+ *l = bl->next;
+ bl->next = nil;
+ freeb(bl);
}
}
@@ -689,35 +529,50 @@
* look for a complete packet. if we get to a fragment
* without IP_MF set, we're done.
*/
- pktposn = 0;
- for(bl = f->blist; bl; bl = bl->next) {
- if(BKFG(bl)->foff != pktposn)
+ offset = 0;
+ for(bl = f->blist; bl != nil; bl = bl->next, offset += fp->flen) {
+ fp = (Ipfrag*)bl->base;
+ if(fp->foff != offset)
break;
- if((BLKIP(bl)->frag[0]&(IP_MF>>8)) == 0) {
- bl = f->blist;
- len = nhgets(BLKIP(bl)->length);
- bl->wp = bl->rp + len;
- /* Pullup all the fragment headers and
- * return a complete packet
- */
- for(bl = bl->next; bl; bl = bl->next) {
- fragsize = BKFG(bl)->flen;
- len += fragsize;
- bl->rp += IP4HDR;
- bl->wp = bl->rp + fragsize;
- }
+ ih = (Ip4hdr*)bl->rp;
+ if(ih->frag[0]&(IP_MF>>8))
+ continue;
- bl = f->blist;
- f->blist = nil;
+ bl = f->blist;
+ len = BLEN(bl);
+
+ /*
+ * Pullup all the fragment headers and
+ * return a complete packet
+ */
+ for(bl = bl->next; bl != nil && len < IP_MAX; bl = bl->next) {
+ fq = (Ipfrag*)bl->base;
+ fragsize = fq->flen;
+ bl->rp = bl->wp - fragsize;
+ len += fragsize;
+ }
+
+ if(len >= IP_MAX){
ipfragfree4(ip, f);
- ih = BLKIP(bl);
- hnputs(ih->length, len);
+ ip->stats[ReasmFails]++;
qunlock(&ip->fraglock4);
- ip->stats[ReasmOKs]++;
- return bl;
+ return nil;
}
- pktposn += BKFG(bl)->flen;
+
+ bl = f->blist;
+ f->blist = nil;
+ ipfragfree4(ip, f);
+
+ ih = (Ip4hdr*)bl->rp;
+ ih->frag[0] = 0;
+ ih->frag[1] = 0;
+ hnputs(ih->length, len);
+
+ ip->stats[ReasmOKs]++;
+ qunlock(&ip->fraglock4);
+
+ return bl;
}
qunlock(&ip->fraglock4);
return nil;
@@ -726,20 +581,20 @@
/*
* ipfragfree4 - Free a list of fragments - assume hold fraglock4
*/
-void
+static void
ipfragfree4(IP *ip, Fragment4 *frag)
{
Fragment4 *fl, **l;
- if(frag->blist)
+ if(frag->blist != nil)
freeblist(frag->blist);
-
- frag->src = 0;
- frag->id = 0;
frag->blist = nil;
+ frag->id = 0;
+ frag->src = 0;
+ frag->dst = 0;
l = &ip->flisthead4;
- for(fl = *l; fl; fl = fl->next) {
+ for(fl = *l; fl != nil; fl = fl->next) {
if(fl == frag) {
*l = frag->next;
break;
@@ -755,7 +610,7 @@
/*
* ipfragallo4 - allocate a reassembly queue - assume hold fraglock4
*/
-Fragment4 *
+static Fragment4*
ipfragallo4(IP *ip)
{
Fragment4 *f;
@@ -762,7 +617,7 @@
while(ip->fragfree4 == nil) {
/* free last entry on fraglist */
- for(f = ip->flisthead4; f->next; f = f->next)
+ for(f = ip->flisthead4; f->next != nil; f = f->next)
;
ipfragfree4(ip, f);
}
diff -u a/os/ip//ip.h b/os/ip//ip.h
--- a/os/ip//ip.h
+++ b/os/ip//ip.h
@@ -1,35 +1,33 @@
typedef struct Conv Conv;
+typedef struct Fragment4 Fragment4;
+typedef struct Fragment6 Fragment6;
typedef struct Fs Fs;
typedef union Hwaddr Hwaddr;
typedef struct IP IP;
typedef struct IPaux IPaux;
+typedef struct Ip4hdr Ip4hdr;
+typedef struct Ipfrag Ipfrag;
typedef struct Ipself Ipself;
typedef struct Ipselftab Ipselftab;
typedef struct Iplink Iplink;
typedef struct Iplifc Iplifc;
typedef struct Ipmulti Ipmulti;
-typedef struct IProuter IProuter;
typedef struct Ipifc Ipifc;
typedef struct Iphash Iphash;
typedef struct Ipht Ipht;
typedef struct Netlog Netlog;
-typedef struct Ifclog Ifclog;
typedef struct Medium Medium;
typedef struct Proto Proto;
typedef struct Arpent Arpent;
typedef struct Arp Arp;
typedef struct Route Route;
+typedef struct Routehint Routehint;
typedef struct Routerparams Routerparams;
typedef struct Hostparams Hostparams;
-typedef struct V6router V6router;
-typedef struct V6params V6params;
+typedef struct v6params v6params;
-typedef struct Ip4hdr Ip4hdr;
-typedef struct Nat Nat;
-
#pragma incomplete Arp
-#pragma incomplete Ifclog
#pragma incomplete Ipself
#pragma incomplete Ipselftab
#pragma incomplete IP
@@ -39,10 +37,9 @@
{
Addrlen= 64,
Maxproto= 20,
- Nhash= 64,
- Maxincall= 5,
- Nchans= 16383,
- MAClen= 16, /* longest mac address */
+ Maxincall= 10,
+ Nchans= 1024,
+ MAClen= 8, /* longest mac address */
MAXTTL= 255,
DFLTTOS= 0,
@@ -57,6 +54,12 @@
V6= 6,
IP_VER4= 0x40,
IP_VER6= 0x60,
+ IP_HLEN4= 5, /* v4: Header length in words */
+ IP_DF= 0x4000, /* v4: Don't fragment */
+ IP_MF= 0x2000, /* v4: More fragments */
+ IP_FO= 0x1fff, /* v4: Fragment offset */
+ IP4HDR= IP_HLEN4<<2, /* sizeof(Ip4hdr) */
+ IP_MAX= 64*1024, /* Max. Internet packet size, v4 & v6 */
/* 2^Lroot trees in the root table */
Lroot= 10,
@@ -73,6 +76,79 @@
Connected= 4,
};
+/* MIB II counters */
+enum
+{
+ Forwarding,
+ DefaultTTL,
+ InReceives,
+ InHdrErrors,
+ InAddrErrors,
+ ForwDatagrams,
+ InUnknownProtos,
+ InDiscards,
+ InDelivers,
+ OutRequests,
+ OutDiscards,
+ OutNoRoutes,
+ ReasmTimeout,
+ ReasmReqds,
+ ReasmOKs,
+ ReasmFails,
+ FragOKs,
+ FragFails,
+ FragCreates,
+
+ Nipstats,
+};
+
+struct Fragment4
+{
+ Block* blist;
+ Fragment4* next;
+ ulong src;
+ ulong dst;
+ ushort id;
+ ulong age;
+};
+
+struct Fragment6
+{
+ Block* blist;
+ Fragment6* next;
+ uchar src[IPaddrlen];
+ uchar dst[IPaddrlen];
+ uint id;
+ ulong age;
+};
+
+struct Ipfrag
+{
+ ushort foff;
+ ushort flen;
+ uchar payload[];
+};
+
+#define IPFRAGSZ offsetof(Ipfrag, payload[0])
+
+/* an instance of IP */
+struct IP
+{
+ uvlong stats[Nipstats];
+
+ QLock fraglock4;
+ Fragment4* flisthead4;
+ Fragment4* fragfree4;
+ Ref id4;
+
+ QLock fraglock6;
+ Fragment6* flisthead6;
+ Fragment6* fragfree6;
+ Ref id6;
+
+ int iprouting; /* true if we route like a gateway */
+};
+
/* on the wire packet header */
struct Ip4hdr
{
@@ -86,9 +162,14 @@
uchar cksum[2]; /* Header checksum */
uchar src[4]; /* IP source */
uchar dst[4]; /* IP destination */
- uchar data[1]; /* start of data */
};
+struct Routehint
+{
+ Route *r; /* last route used */
+ ulong rgen; /* routetable generation for *r */
+};
+
/*
* one per conversation directory
*/
@@ -100,9 +181,9 @@
Proto* p;
int restricted; /* remote port is restricted */
+ int ignoreadvice; /* don't terminate connection on icmp errors */
uint ttl; /* max time to live */
uint tos; /* type of service */
- int ignoreadvice; /* don't terminate connection on icmp errors */
uchar ipversion;
uchar laddr[IPaddrlen]; /* local IP address */
@@ -139,8 +220,7 @@
void* ptcl; /* protocol specific stuff */
- Route *r; /* last route used */
- ulong rgen; /* routetable generation for *r */
+ Routehint;
};
struct Medium
@@ -161,18 +241,8 @@
/* process packets written to 'data' */
void (*pktin)(Fs *f, Ipifc *ifc, Block *bp);
- /* routes for router boards */
- void (*addroute)(Ipifc *ifc, int, uchar*, uchar*, uchar*, int);
- void (*remroute)(Ipifc *ifc, int, uchar*, uchar*);
- void (*flushroutes)(Ipifc *ifc);
-
- /* for routing multicast groups */
- void (*joinmulti)(Ipifc *ifc, uchar *a, uchar *ia);
- void (*leavemulti)(Ipifc *ifc, uchar *a, uchar *ia);
-
/* address resolution */
- void (*ares)(Fs*, int, uchar*, uchar*, int, int); /* resolve */
- void (*areg)(Ipifc*, uchar*); /* register */
+ void (*areg)(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *ip);
/* v6 address generation */
void (*pref2addr)(uchar *pref, uchar *ea);
@@ -187,12 +257,13 @@
uchar mask[IPaddrlen];
uchar remote[IPaddrlen];
uchar net[IPaddrlen];
+ uchar type; /* route type */
uchar tentative; /* =1 => v6 dup disc on, =0 => confirmed unique */
uchar onlink; /* =1 => onlink, =0 offlink. */
uchar autoflag; /* v6 autonomous flag */
- long validlt; /* v6 valid lifetime */
- long preflt; /* v6 preferred lifetime */
- long origint; /* time when addr was added */
+ ulong validlt; /* v6 valid lifetime */
+ ulong preflt; /* v6 preferred lifetime */
+ ulong origint; /* time when addr was added */
Iplink *link; /* addresses linked to this lifc */
Iplifc *next;
};
@@ -203,25 +274,25 @@
Ipself *self;
Iplifc *lifc;
Iplink *selflink; /* next link for this local address */
- Iplink *lifclink; /* next link for this ifc */
- ulong expire;
+ Iplink *lifclink; /* next link for this lifc */
Iplink *next; /* free list */
+ ulong expire;
int ref;
};
-/* rfc 2461, pp.40--43. */
+/* rfc 2461, pp.40—43. */
/* default values, one per stack */
struct Routerparams {
- int mflag;
- int oflag;
- int maxraint;
- int minraint;
- int linkmtu;
- int reachtime;
- int rxmitra;
- int ttl;
- int routerlt;
+ int mflag; /* flag: managed address configuration */
+ int oflag; /* flag: other stateful configuration */
+ int maxraint; /* max. router adv interval (ms) */
+ int minraint; /* min. router adv interval (ms) */
+ int linkmtu; /* mtu options */
+ int reachtime; /* reachable time */
+ int rxmitra; /* retransmit interval */
+ int ttl; /* cur hop count limit */
+ int routerlt; /* router lifetime */
};
struct Hostparams {
@@ -231,22 +302,18 @@
struct Ipifc
{
RWlock;
-
+
Conv *conv; /* link to its conversation structure */
char dev[64]; /* device we're attached to */
Medium *m; /* Media pointer */
int maxtu; /* Maximum transfer unit */
int mintu; /* Minumum tranfer unit */
- int mbps; /* megabits per second */
void *arg; /* medium specific */
- int reassemble; /* reassemble IP packets before forwarding */
- /* these are used so that we can unbind on the fly */
- Lock idlock;
+ uchar reflect; /* allow forwarded packets to go out the same interface */
+ uchar reassemble; /* reassemble IP packets before forwarding to this interface */
+
uchar ifcid; /* incremented each 'bind/unbind/add/remove' */
- int ref; /* number of proc's using this ipifc */
- Rendez wait; /* where unbinder waits for ref == 0 */
- int unbinding;
uchar mac[MAClen]; /* MAC address */
@@ -255,10 +322,16 @@
ulong in, out; /* message statistics */
ulong inerr, outerr; /* ... */
- uchar sendra6; /* == 1 => send router advs on this ifc */
- uchar recvra6; /* == 1 => recv router advs on this ifc */
- Routerparams rp; /* router parameters as in RFC 2461, pp.40--43.
+ uchar sendra6; /* flag: send router advs on this ifc */
+ uchar recvra6; /* flag: recv router advs on this ifc */
+ Routerparams rp; /* router parameters as in RFC 2461, pp.40—43.
used only if node is router */
+
+ int speed; /* link speed in bits per second */
+ int delay; /* burst delay in ms */
+ int burst; /* burst delay in bytes */
+ int load; /* bytes in flight */
+ ulong ticks;
};
/*
@@ -330,20 +403,11 @@
int nc; /* number of conversations */
int ac;
Qid qid; /* qid for protocol directory */
- ushort nextport;
ushort nextrport;
void *priv;
};
-/*
- * Stream for sending packets to user level
- */
-struct IProuter {
- QLock;
- int opens;
- Queue *q;
-};
/*
* one per IP protocol stack
@@ -362,8 +426,7 @@
IP *ip;
Ipselftab *self;
Arp *arp;
- V6params *v6p;
- IProuter iprouter;
+ v6params *v6p;
Route *v4root[1<<Lroot]; /* v4 routing forest */
Route *v6root[1<<Lroot]; /* v6 routing forest */
@@ -370,7 +433,6 @@
Route *queue; /* used as temp when reinjecting routes */
Netlog *alog;
- Ifclog *ilog;
char ndb[1024]; /* an ndb entry for this interface */
int ndbvers;
@@ -377,23 +439,10 @@
long ndbmtime;
};
-/* one per default router known to host */
-struct V6router {
- uchar inuse;
- Ipifc *ifc;
- int ifcid;
- uchar routeraddr[IPaddrlen];
- long ltorigin;
- Routerparams rp;
-};
-
-struct V6params
+struct v6params
{
Routerparams rp; /* v6 params, one copy per node now */
Hostparams hp;
- V6router v6rlist[3]; /* max 3 default routers, currently */
- int cdrouter; /* uses only v6rlist[cdrouter] if */
- /* cdrouter >= 0. */
};
@@ -410,8 +459,7 @@
char* Fsstdbind(Conv*, char**, int);
ulong scalednconv(void);
void closeconv(Conv*);
-
-/*
+/*
* logging
*/
enum
@@ -434,7 +482,6 @@
Logrudpmsg= 1<<16,
Logesp= 1<<17,
Logtcpwin= 1<<18,
- Lognat= 1<<19,
};
void netloginit(Fs*);
@@ -449,17 +496,17 @@
void ifclogopen(Fs*, Chan*);
void ifclogclose(Fs*, Chan*);
+#pragma varargck argpos netlog 3
+
/*
* iproute.c
*/
typedef struct RouteTree RouteTree;
-typedef struct Routewalk Routewalk;
typedef struct V4route V4route;
typedef struct V6route V6route;
enum
{
-
/* type bits */
Rv4= (1<<0), /* this is a version 4 route */
Rifc= (1<<1), /* this route is a directly connected interface */
@@ -468,27 +515,18 @@
Rbcast= (1<<4), /* a broadcast self address */
Rmulti= (1<<5), /* a multicast self address */
Rproxy= (1<<6), /* this route should be proxied */
+ Rsrc= (1<<7), /* source specific route */
};
-struct Routewalk
-{
- int o;
- int h;
- char* p;
- char* e;
- void* state;
- void (*walk)(Route*, Routewalk*);
-};
-
struct RouteTree
{
- Route* right;
- Route* left;
- Route* mid;
+ Route *mid;
+ Route *left;
+ Route *right;
+ Ipifc *ifc;
+ uchar ifcid; /* must match ifc->id */
uchar depth;
uchar type;
- uchar ifcid; /* must match ifc->id */
- Ipifc *ifc;
char tag[4];
int ref;
};
@@ -497,6 +535,10 @@
{
ulong address;
ulong endaddress;
+
+ ulong source;
+ ulong endsource;
+
uchar gate[IPv4addrlen];
};
@@ -504,6 +546,10 @@
{
ulong address[IPllen];
ulong endaddress[IPllen];
+
+ ulong source[IPllen];
+ ulong endsource[IPllen];
+
uchar gate[IPaddrlen];
};
@@ -516,17 +562,16 @@
V4route v4;
};
};
-extern void v4addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type);
-extern void v6addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type);
-extern void v4delroute(Fs *f, uchar *a, uchar *mask, int dolock);
-extern void v6delroute(Fs *f, uchar *a, uchar *mask, int dolock);
-extern Route* v4lookup(Fs *f, uchar *a, Conv *c);
-extern Route* v6lookup(Fs *f, uchar *a, Conv *c);
+
+extern void addroute(Fs *f, uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag);
+extern void remroute(Fs *f, uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag);
+extern Route* v4lookup(Fs *f, uchar *a, uchar *s, Routehint *h);
+extern Route* v6lookup(Fs *f, uchar *a, uchar *s, Routehint *h);
+extern Route* v4source(Fs *f, uchar *a, uchar *s);
+extern Route* v6source(Fs *f, uchar *a, uchar *s);
extern long routeread(Fs *f, char*, ulong, int);
extern long routewrite(Fs *f, Chan*, char*, int);
-extern void routetype(int, char*);
-extern void ipwalkroutes(Fs*, Routewalk*);
-extern void convroute(Route*, uchar*, uchar*, uchar*, char*, int*);
+extern void routetype(int type, char p[8]);
/*
* devip.c
@@ -543,7 +588,6 @@
};
extern IPaux* newipaux(char*, char*);
-extern void setlport(Conv*);
/*
* arp.c
@@ -552,18 +596,16 @@
{
uchar ip[IPaddrlen];
uchar mac[MAClen];
- Medium *type; /* media type */
- Arpent* hash;
- Block* hold;
- Block* last;
- uint ctime; /* time entry was created or refreshed */
- uint utime; /* time entry was last used */
- uchar state;
+ Arpent *hash;
Arpent *nextrxt; /* re-transmit chain */
- uint rtime; /* time for next retransmission */
- uchar rxtsrem;
+ Block *hold;
+ Block *last;
Ipifc *ifc;
uchar ifcid; /* must match ifc->id */
+ uchar state;
+ uchar rxtsrem; /* re-tranmissions remaining */
+ ulong ctime; /* time entry was created or refreshed */
+ ulong utime; /* time entry was last used */
};
extern void arpinit(Fs*);
@@ -572,15 +614,17 @@
extern Arpent* arpget(Arp*, Block *bp, int version, Ipifc *ifc, uchar *ip, uchar *h);
extern void arprelease(Arp*, Arpent *a);
extern Block* arpresolve(Arp*, Arpent *a, Medium *type, uchar *mac);
-extern void arpenter(Fs*, int version, uchar *ip, uchar *mac, int len, int norefresh);
+extern int arpenter(Fs*, int version, uchar *ip, uchar *mac, int n, uchar *ia, Ipifc *ifc, int refresh);
+extern void ndpsendsol(Fs*, Ipifc*, Arpent*);
/*
* ipaux.c
*/
-extern int myetheraddr(uchar*, char*);
-extern ulong parseip(uchar*, char*);
-extern ulong parseipmask(uchar*, char*);
+extern int parseether(uchar*, char*);
+extern vlong parseip(uchar*, char*);
+extern vlong parseipmask(uchar*, char*, int);
+extern vlong parseipandmask(uchar*, uchar*, char*, char*);
extern char* v4parseip(uchar*, char*);
extern void maskip(uchar *from, uchar *mask, uchar *to);
extern int parsemac(uchar *to, char *from, int len);
@@ -589,12 +633,10 @@
extern void v4tov6(uchar *v6, uchar *v4);
extern int v6tov4(uchar *v4, uchar *v6);
extern int eipfmt(Fmt*);
+extern int convipvers(Conv *c);
#define ipmove(x, y) memmove(x, y, IPaddrlen)
#define ipcmp(x, y) ( (x)[IPaddrlen-1] != (y)[IPaddrlen-1] || memcmp(x, y, IPaddrlen) )
-
-#define ip4move(x, y) memmove(x, y, IPv4addrlen)
-#define ip4cmp(x, y) ( (x)[IPv4addrlen-1] != (y)[IPv4addrlen-1] || memcmp(x, y, IPv4addrlen) )
extern uchar IPv4bcast[IPaddrlen];
extern uchar IPv4bcastobs[IPaddrlen];
@@ -612,7 +654,6 @@
extern Medium ethermedium;
extern Medium nullmedium;
extern Medium pktmedium;
-extern Medium tripmedium;
/*
* ipifc.c
@@ -619,33 +660,24 @@
*/
extern Medium* ipfindmedium(char *name);
extern void addipmedium(Medium *med);
+extern void ipifcoput(Ipifc *ifc, Block *bp, int version, uchar *ip);
extern int ipforme(Fs*, uchar *addr);
-extern int iptentative(Fs*, uchar *addr);
-extern int ipisbm(uchar *);
-extern int ipismulticast(uchar *);
-extern Ipifc* findipifc(Fs*, uchar *remote, int type);
-extern void findprimaryip(Fs*, uchar*);
+extern int ipismulticast(uchar *ip);
+extern Ipifc* findipifc(Fs*, uchar *local, uchar *remote, int type);
+extern Ipifc* findipifcstr(Fs *f, char *s);
extern void findlocalip(Fs*, uchar *local, uchar *remote);
-extern int ipv4local(Ipifc *ifc, uchar *addr);
-extern int ipv6local(Ipifc *ifc, uchar *addr);
-extern int ipv6anylocal(Ipifc *ifc, uchar *addr);
+extern int ipv4local(Ipifc *ifc, uchar *local, int prefixlen, uchar *remote);
+extern int ipv6local(Ipifc *ifc, uchar *local, int prefixlen, uchar *remote);
extern Iplifc* iplocalonifc(Ipifc *ifc, uchar *ip);
+extern Iplifc* ipremoteonifc(Ipifc *ifc, uchar *ip);
extern int ipproxyifc(Fs *f, Ipifc *ifc, uchar *ip);
-extern int ipismulticast(uchar *ip);
-extern int ipisbooting(void);
-extern int ipifccheckin(Ipifc *ifc, Medium *med);
-extern void ipifccheckout(Ipifc *ifc);
-extern int ipifcgrab(Ipifc *ifc);
-extern void ipifcaddroute(Fs*, int, uchar*, uchar*, uchar*, int);
-extern void ipifcremroute(Fs*, int, uchar*, uchar*);
extern void ipifcremmulti(Conv *c, uchar *ma, uchar *ia);
extern void ipifcaddmulti(Conv *c, uchar *ma, uchar *ia);
extern char* ipifcrem(Ipifc *ifc, char **argv, int argc);
extern char* ipifcadd(Ipifc *ifc, char **argv, int argc, int tentative, Iplifc *lifcp);
extern long ipselftabread(Fs*, char *a, ulong offset, int n);
-extern char* ipifcaddpref6(Ipifc *ifc, char**argv, int argc);
-extern void ipsendra6(Fs *f, int on);
-
+extern char* ipifcadd6(Ipifc *ifc, char**argv, int argc);
+extern char* ipifcremove6(Ipifc *ifc, char**argv, int argc);
/*
* ip.c
*/
@@ -652,37 +684,26 @@
extern void iprouting(Fs*, int);
extern void icmpnoconv(Fs*, Block*);
extern void icmpcantfrag(Fs*, Block*, int);
-extern void icmpttlexceeded(Fs*, uchar*, Block*);
+extern void icmpttlexceeded(Fs*, Ipifc*, Block*);
extern ushort ipcsum(uchar*);
extern void ipiput4(Fs*, Ipifc*, Block*);
extern void ipiput6(Fs*, Ipifc*, Block*);
-extern int ipoput4(Fs*, Block*, int, int, int, Conv*);
-extern int ipoput6(Fs*, Block*, int, int, int, Conv*);
+extern int ipoput4(Fs*, Block*, int, int, int, Routehint*);
+extern int ipoput6(Fs*, Block*, int, int, int, Routehint*);
extern int ipstats(Fs*, char*, int);
extern ushort ptclbsum(uchar*, int);
extern ushort ptclcsum(Block*, int, int);
extern void ip_init(Fs*);
-extern void update_mtucache(uchar*, ulong);
-extern ulong restrict_mtu(uchar*, ulong);
+extern void ip_init_6(Fs*);
/*
* bootp.c
*/
-char* (*bootp)(Ipifc*);
-int (*bootpread)(char*, ulong, int);
+extern int bootpread(char*, ulong, int);
/*
- * iprouter.c
- */
-void useriprouter(Fs*, Ipifc*, Block*);
-void iprouteropen(Fs*);
-void iprouterclose(Fs*);
-long iprouterread(Fs*, void*, int);
-
-/*
* resolving inferno/plan9 differences
*/
-Chan* commonfdtochan(int, int, int, int);
char* commonuser(void);
char* commonerror(void);
@@ -695,15 +716,3 @@
* global to all of the stack
*/
extern void (*igmpreportfn)(Ipifc*, uchar*);
-
-/*
- * nat.c
- */
-extern int nato(Block*, Ipifc*, Fs*);
-extern void nati(Block*, Ipifc*);
-extern int natgc(uchar);
-
-extern int addnataddr(uchar*, uchar*, Iplifc*);
-extern int removenataddr(uchar*, uchar*, Iplifc*);
-extern void shownataddr(void);
-extern void flushnataddr(void);
diff -u a/os/ip//ipaux.c b/os/ip//ipaux.c
--- a/os/ip//ipaux.c
+++ b/os/ip//ipaux.c
@@ -5,49 +5,8 @@
#include "fns.h"
#include "../port/error.h"
#include "ip.h"
-#include "ipv6.h"
+#include "ipv6.h"
-/*
- * well known IP addresses
- */
-uchar IPv4bcast[IPaddrlen] = {
- 0, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff
-};
-uchar IPv4allsys[IPaddrlen] = {
- 0, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0xff, 0xff,
- 0xe0, 0, 0, 0x01
-};
-uchar IPv4allrouter[IPaddrlen] = {
- 0, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0xff, 0xff,
- 0xe0, 0, 0, 0x02
-};
-uchar IPallbits[IPaddrlen] = {
- 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff
-};
-
-uchar IPnoaddr[IPaddrlen];
-
-/*
- * prefix of all v4 addresses
- */
-uchar v4prefix[IPaddrlen] = {
- 0, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0xff, 0xff,
- 0, 0, 0, 0
-};
-
-
char *v6hdrtypes[Maxhdrtype] =
{
[HBH] "HopbyHop",
@@ -54,7 +13,7 @@
[ICMP] "ICMP",
[IGMP] "IGMP",
[GGP] "GGP",
- [IPINIP] "IP",
+ [IPINIP] "IP",
[ST] "ST",
[TCP] "TCP",
[UDP] "UDP",
@@ -87,6 +46,7 @@
0, 0, 0, 0,
0, 0, 0, 0x01
};
+
uchar v6linklocal[IPaddrlen] = {
0xfe, 0x80, 0, 0,
0, 0, 0, 0,
@@ -99,26 +59,8 @@
0, 0, 0, 0,
0, 0, 0, 0
};
-int v6llpreflen = 8; // link-local prefix length
-uchar v6sitelocal[IPaddrlen] = {
- 0xfe, 0xc0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0, 0
-};
-uchar v6sitelocalmask[IPaddrlen] = {
- 0xff, 0xff, 0xff, 0xff,
- 0xff, 0xff, 0xff, 0xff,
- 0, 0, 0, 0,
- 0, 0, 0, 0
-};
-int v6slpreflen = 6; // site-local prefix length
-uchar v6glunicast[IPaddrlen] = {
- 0x08, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0, 0
-};
+int v6llpreflen = 8; /* link-local prefix length in bytes */
+
uchar v6multicast[IPaddrlen] = {
0xff, 0, 0, 0,
0, 0, 0, 0,
@@ -131,7 +73,8 @@
0, 0, 0, 0,
0, 0, 0, 0
};
-int v6mcpreflen = 1; // multicast prefix length
+int v6mcpreflen = 1; /* multicast prefix length */
+
uchar v6allnodesN[IPaddrlen] = {
0xff, 0x01, 0, 0,
0, 0, 0, 0,
@@ -138,6 +81,12 @@
0, 0, 0, 0,
0, 0, 0, 0x01
};
+uchar v6allroutersN[IPaddrlen] = {
+ 0xff, 0x01, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0x02
+};
uchar v6allnodesNmask[IPaddrlen] = {
0xff, 0xff, 0, 0,
0, 0, 0, 0,
@@ -144,7 +93,8 @@
0, 0, 0, 0,
0, 0, 0, 0
};
-int v6aNpreflen = 2; // all nodes (N) prefix
+int v6aNpreflen = 2; /* all nodes (N) prefix */
+
uchar v6allnodesL[IPaddrlen] = {
0xff, 0x02, 0, 0,
0, 0, 0, 0,
@@ -151,19 +101,6 @@
0, 0, 0, 0,
0, 0, 0, 0x01
};
-uchar v6allnodesLmask[IPaddrlen] = {
- 0xff, 0xff, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0, 0
-};
-int v6aLpreflen = 2; // all nodes (L) prefix
-uchar v6allroutersN[IPaddrlen] = {
- 0xff, 0x01, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0, 0,
- 0, 0, 0, 0x02
-};
uchar v6allroutersL[IPaddrlen] = {
0xff, 0x02, 0, 0,
0, 0, 0, 0,
@@ -170,12 +107,14 @@
0, 0, 0, 0,
0, 0, 0, 0x02
};
-uchar v6allroutersS[IPaddrlen] = {
- 0xff, 0x05, 0, 0,
+uchar v6allnodesLmask[IPaddrlen] = {
+ 0xff, 0xff, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
- 0, 0, 0, 0x02
+ 0, 0, 0, 0
};
+int v6aLpreflen = 2; /* all nodes (L) prefix */
+
uchar v6solicitednode[IPaddrlen] = {
0xff, 0x02, 0, 0,
0, 0, 0, 0,
@@ -190,9 +129,6 @@
};
int v6snpreflen = 13;
-
-
-
ushort
ptclcsum(Block *bp, int offset, int len)
{
@@ -215,7 +151,7 @@
if(bp->next == nil) {
if(blocklen < len)
len = blocklen;
- return ~ptclbsum(addr, len) & 0xffff;
+ return ptclbsum(addr, len) ^ 0xffff;
}
losum = 0;
@@ -247,7 +183,7 @@
while((csum = losum>>16) != 0)
losum = csum + (losum & 0xffff);
- return ~losum & 0xffff;
+ return losum ^ 0xffff;
}
enum
@@ -255,306 +191,9 @@
Isprefix= 16,
};
-static uchar prefixvals[256] =
-{
-[0x00] 0 | Isprefix,
-[0x80] 1 | Isprefix,
-[0xC0] 2 | Isprefix,
-[0xE0] 3 | Isprefix,
-[0xF0] 4 | Isprefix,
-[0xF8] 5 | Isprefix,
-[0xFC] 6 | Isprefix,
-[0xFE] 7 | Isprefix,
-[0xFF] 8 | Isprefix,
-};
-
-int
-eipfmt(Fmt *f)
-{
- char buf[5*8];
- static char *efmt = "%.2lux%.2lux%.2lux%.2lux%.2lux%.2lux";
- static char *ifmt = "%d.%d.%d.%d";
- uchar *p, ip[16];
- ulong *lp;
- ushort s;
- int i, j, n, eln, eli;
-
- switch(f->r) {
- case 'E': /* Ethernet address */
- p = va_arg(f->args, uchar*);
- return fmtprint(f, efmt, p[0], p[1], p[2], p[3], p[4], p[5]);
-
- case 'I': /* Ip address */
- p = va_arg(f->args, uchar*);
-common:
- if(memcmp(p, v4prefix, 12) == 0)
- return fmtprint(f, ifmt, p[12], p[13], p[14], p[15]);
-
- /* find longest elision */
- eln = eli = -1;
- for(i = 0; i < 16; i += 2){
- for(j = i; j < 16; j += 2)
- if(p[j] != 0 || p[j+1] != 0)
- break;
- if(j > i && j - i > eln){
- eli = i;
- eln = j - i;
- }
- }
-
- /* print with possible elision */
- n = 0;
- for(i = 0; i < 16; i += 2){
- if(i == eli){
- n += sprint(buf+n, "::");
- i += eln;
- if(i >= 16)
- break;
- } else if(i != 0)
- n += sprint(buf+n, ":");
- s = (p[i]<<8) + p[i+1];
- n += sprint(buf+n, "%ux", s);
- }
- return fmtstrcpy(f, buf);
-
- case 'i': /* v6 address as 4 longs */
- lp = va_arg(f->args, ulong*);
- for(i = 0; i < 4; i++)
- hnputl(ip+4*i, *lp++);
- p = ip;
- goto common;
-
- case 'V': /* v4 ip address */
- p = va_arg(f->args, uchar*);
- return fmtprint(f, ifmt, p[0], p[1], p[2], p[3]);
-
- case 'M': /* ip mask */
- p = va_arg(f->args, uchar*);
-
- /* look for a prefix mask */
- for(i = 0; i < 16; i++)
- if(p[i] != 0xff)
- break;
- if(i < 16){
- if((prefixvals[p[i]] & Isprefix) == 0)
- goto common;
- for(j = i+1; j < 16; j++)
- if(p[j] != 0)
- goto common;
- n = 8*i + (prefixvals[p[i]] & ~Isprefix);
- } else
- n = 8*16;
-
- /* got one, use /xx format */
- return fmtprint(f, "/%d", n);
- }
- return fmtstrcpy(f, "(eipfmt)");
-}
-
#define CLASS(p) ((*(uchar*)(p))>>6)
-extern char*
-v4parseip(uchar *to, char *from)
-{
- int i;
- char *p;
-
- p = from;
- for(i = 0; i < 4 && *p; i++){
- to[i] = strtoul(p, &p, 0);
- if(*p == '.')
- p++;
- }
- switch(CLASS(to)){
- case 0: /* class A - 1 uchar net */
- case 1:
- if(i == 3){
- to[3] = to[2];
- to[2] = to[1];
- to[1] = 0;
- } else if(i == 2){
- to[3] = to[1];
- to[1] = 0;
- }
- break;
- case 2: /* class B - 2 uchar net */
- if(i == 3){
- to[3] = to[2];
- to[2] = 0;
- }
- break;
- }
- return p;
-}
-
-int
-isv4(uchar *ip)
-{
- return memcmp(ip, v4prefix, IPv4off) == 0;
-}
-
-
-/*
- * the following routines are unrolled with no memset's to speed
- * up the usual case
- */
void
-v4tov6(uchar *v6, uchar *v4)
-{
- v6[0] = 0;
- v6[1] = 0;
- v6[2] = 0;
- v6[3] = 0;
- v6[4] = 0;
- v6[5] = 0;
- v6[6] = 0;
- v6[7] = 0;
- v6[8] = 0;
- v6[9] = 0;
- v6[10] = 0xff;
- v6[11] = 0xff;
- v6[12] = v4[0];
- v6[13] = v4[1];
- v6[14] = v4[2];
- v6[15] = v4[3];
-}
-
-int
-v6tov4(uchar *v4, uchar *v6)
-{
- if(v6[0] == 0
- && v6[1] == 0
- && v6[2] == 0
- && v6[3] == 0
- && v6[4] == 0
- && v6[5] == 0
- && v6[6] == 0
- && v6[7] == 0
- && v6[8] == 0
- && v6[9] == 0
- && v6[10] == 0xff
- && v6[11] == 0xff)
- {
- v4[0] = v6[12];
- v4[1] = v6[13];
- v4[2] = v6[14];
- v4[3] = v6[15];
- return 0;
- } else {
- memset(v4, 0, 4);
- return -1;
- }
-}
-
-ulong
-parseip(uchar *to, char *from)
-{
- int i, elipsis = 0, v4 = 1;
- ulong x;
- char *p, *op;
-
- memset(to, 0, IPaddrlen);
- p = from;
- for(i = 0; i < 16 && *p; i+=2){
- op = p;
- x = strtoul(p, &p, 16);
- if(*p == '.' || (*p == 0 && i == 0)){
- p = v4parseip(to+i, op);
- i += 4;
- break;
- } else {
- to[i] = x>>8;
- to[i+1] = x;
- }
- if(*p == ':'){
- v4 = 0;
- if(*++p == ':'){
- elipsis = i+2;
- p++;
- }
- }
- }
- if(i < 16){
- memmove(&to[elipsis+16-i], &to[elipsis], i-elipsis);
- memset(&to[elipsis], 0, 16-i);
- }
- if(v4){
- to[10] = to[11] = 0xff;
- return nhgetl(to+12);
- } else
- return 6;
-}
-
-/*
- * hack to allow ip v4 masks to be entered in the old
- * style
- */
-ulong
-parseipmask(uchar *to, char *from)
-{
- ulong x;
- int i;
- uchar *p;
-
- if(*from == '/'){
- /* as a number of prefix bits */
- i = atoi(from+1);
- if(i < 0)
- i = 0;
- if(i > 128)
- i = 128;
- memset(to, 0, IPaddrlen);
- for(p = to; i >= 8; i -= 8)
- *p++ = 0xff;
- if(i > 0)
- *p = ~((1<<(8-i))-1);
- x = nhgetl(to+IPv4off);
- } else {
- /* as a straight bit mask */
- x = parseip(to, from);
- if(memcmp(to, v4prefix, IPv4off) == 0)
- memset(to, 0xff, IPv4off);
- }
- return x;
-}
-
-void
-maskip(uchar *from, uchar *mask, uchar *to)
-{
- int i;
-
- for(i = 0; i < IPaddrlen; i++)
- to[i] = from[i] & mask[i];
-}
-
-uchar classmask[4][16] = {
- 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0x00,0x00,0x00,
- 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0x00,0x00,0x00,
- 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0x00,0x00,
- 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0xff, 0xff,0xff,0xff,0x00,
-};
-
-uchar*
-defmask(uchar *ip)
-{
- if(isv4(ip))
- return classmask[ip[IPv4off]>>6];
- else {
- if(ipcmp(ip, v6loopback) == 0)
- return IPallbits;
- else if(memcmp(ip, v6linklocal, v6llpreflen) == 0)
- return v6linklocalmask;
- else if(memcmp(ip, v6sitelocal, v6slpreflen) == 0)
- return v6sitelocalmask;
- else if(memcmp(ip, v6solicitednode, v6snpreflen) == 0)
- return v6solicitednodemask;
- else if(memcmp(ip, v6multicast, v6mcpreflen) == 0)
- return v6multicastmask;
- return IPallbits;
- }
-}
-
-void
ipv62smcast(uchar *smcast, uchar *a)
{
assert(IPaddrlen == 16);
@@ -599,7 +238,7 @@
ulong
iphash(uchar *sa, ushort sp, uchar *da, ushort dp)
{
- return ((sa[IPaddrlen-1]<<24) ^ (sp << 16) ^ (da[IPaddrlen-1]<<8) ^ dp ) % Nhash;
+ return ((sa[IPaddrlen-1]<<24) ^ (sp << 16) ^ (da[IPaddrlen-1]<<8) ^ dp ) % Nipht;
}
void
@@ -678,7 +317,7 @@
return c;
}
}
-
+
/* match local address and port */
hv = iphash(IPnoaddr, 0, da, dp);
for(h = ht->tab[hv]; h != nil; h = h->next){
@@ -690,7 +329,7 @@
return c;
}
}
-
+
/* match just port */
hv = iphash(IPnoaddr, 0, IPnoaddr, dp);
for(h = ht->tab[hv]; h != nil; h = h->next){
@@ -702,7 +341,7 @@
return c;
}
}
-
+
/* match local address */
hv = iphash(IPnoaddr, 0, da, 0);
for(h = ht->tab[hv]; h != nil; h = h->next){
@@ -714,7 +353,7 @@
return c;
}
}
-
+
/* look for something that matches anything */
hv = iphash(IPnoaddr, 0, IPnoaddr, 0);
for(h = ht->tab[hv]; h != nil; h = h->next){
@@ -726,4 +365,13 @@
}
unlock(ht);
return nil;
+}
+
+int
+convipvers(Conv *c)
+{
+ if(isv4(c->raddr) && isv4(c->laddr) || ipcmp(c->raddr, IPnoaddr) == 0)
+ return V4;
+ else
+ return V6;
}
diff -u a/os/ip//ipifc.c b/os/ip//ipifc.c
--- a/os/ip//ipifc.c
+++ b/os/ip//ipifc.c
@@ -11,17 +11,14 @@
#define DPRINT if(0)print
enum {
- Maxmedia = 32,
- Nself = Maxmedia*5,
- NHASH = (1<<6),
- NCACHE = 256,
- QMAX = 64*1024-1,
+ Maxmedia = 32,
+ Nself = Maxmedia*5,
+ NHASH = 1<<6,
+ NCACHE = 256,
+ QMAX = 192*1024-1,
};
-Medium *media[Maxmedia] =
-{
- 0
-};
+Medium *media[Maxmedia] = { 0 };
/*
* cache of local addresses (addresses we answer to)
@@ -29,12 +26,10 @@
struct Ipself
{
uchar a[IPaddrlen];
- Ipself *hnext; /* next address in the hash table */
+ Ipself *next; /* next address in the hash table */
Iplink *link; /* binding twixt Ipself and Ipifc */
ulong expire;
uchar type; /* type of address */
- int ref;
- Ipself *next; /* free list */
};
struct Ipselftab
@@ -64,11 +59,47 @@
static void addselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a, int type);
static void remselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a);
-static char* ipifcjoinmulti(Ipifc *ifc, char **argv, int argc);
-static char* ipifcleavemulti(Ipifc *ifc, char **argv, int argc);
-static void ipifcregisterproxy(Fs*, Ipifc*, uchar*);
-static char* ipifcremlifc(Ipifc*, Iplifc*);
+static void ipifcregisteraddr(Fs*, Ipifc*, Iplifc*, uchar*);
+static void ipifcregisterproxy(Fs*, Ipifc*, uchar*, int);
+static char* ipifcremlifc(Ipifc*, Iplifc**);
+static char Ebound[] = "interface already bound";
+static char Eunbound[] = "interface not bound";
+
+enum {
+ unknownv6, /* UGH */
+ unspecifiedv6,
+ linklocalv6,
+ globalv6,
+};
+
+static int
+v6addrtype(uchar *addr)
+{
+ if(isv4(addr) || ipcmp(addr, IPnoaddr) == 0)
+ return unknownv6;
+ else if(islinklocal(addr) || ipcmp(addr, v6loopback) == 0 ||
+ isv6mcast(addr) && (addr[1] & 0xF) <= Link_local_scop)
+ return linklocalv6;
+ else
+ return globalv6;
+}
+
+static int
+comprefixlen(uchar *a, uchar *b, int n)
+{
+ int i, c;
+
+ for(i = 0; i < n; i++){
+ if((c = a[i] ^ b[i]) == 0)
+ continue;
+ for(i <<= 3; (c & 0x80) == 0; i++)
+ c <<= 1;
+ return i;
+ }
+ return i << 3;
+}
+
/*
* link in a new medium
*/
@@ -121,7 +152,7 @@
wlock(ifc);
if(ifc->m != nil){
wunlock(ifc);
- return "interface already bound";
+ return Ebound;
}
if(waserror()){
wunlock(ifc);
@@ -142,18 +173,14 @@
ifc->m = m;
ifc->mintu = ifc->m->mintu;
ifc->maxtu = ifc->m->maxtu;
+ ifc->delay = 40;
+ ifc->speed = 0;
if(ifc->m->unbindonclose == 0)
ifc->conv->inuse++;
- ifc->rp.mflag = 0; // default not managed
- ifc->rp.oflag = 0;
- ifc->rp.maxraint = 600000; // millisecs
- ifc->rp.minraint = 200000;
- ifc->rp.linkmtu = 0; // no mtu sent
- ifc->rp.reachtime = 0;
- ifc->rp.rxmitra = 0;
- ifc->rp.ttl = MAXTTL;
- ifc->rp.routerlt = 3*(ifc->rp.maxraint);
+ /* default router paramters */
+ ifc->rp = c->p->f->v6p->rp;
+
/* any ancillary structures (like routes) no longer pertain */
ifc->ifcid++;
@@ -170,29 +197,44 @@
/*
* detach a device from an interface, close the interface
- * called with ifc->conv closed
*/
static char*
ipifcunbind(Ipifc *ifc)
{
- char *err;
+ Medium *m;
- if(waserror()){
+ wlock(ifc);
+ m = ifc->m;
+ if(m == nil){
wunlock(ifc);
- nexterror();
+ return Eunbound;
}
- wlock(ifc);
- /* dissociate routes */
- if(ifc->m != nil && ifc->m->unbindonclose == 0)
- ifc->conv->inuse--;
- ifc->ifcid++;
+ /* disassociate logical interfaces (before zeroing ifc->arg) */
+ while(ifc->lifc != nil)
+ ipifcremlifc(ifc, &ifc->lifc);
/* disassociate device */
- if(ifc->m != nil && ifc->m->unbind)
- (*ifc->m->unbind)(ifc);
+ if(m->unbind != nil){
+ extern Medium nullmedium;
+
+ /*
+ * unbind() might unlock the ifc, so change the medium
+ * to the nullmedium to prevent packets from getting
+ * sent while the medium is shutting down.
+ */
+ ifc->m = &nullmedium;
+
+ if(!waserror()){
+ (*m->unbind)(ifc);
+ poperror();
+ }
+ }
+
memset(ifc->dev, 0, sizeof(ifc->dev));
ifc->arg = nil;
+
+ ifc->reflect = 0;
ifc->reassemble = 0;
/* close queues to stop queuing of packets */
@@ -200,26 +242,22 @@
qclose(ifc->conv->wq);
qclose(ifc->conv->sq);
- /* disassociate logical interfaces */
- while(ifc->lifc){
- err = ipifcremlifc(ifc, ifc->lifc);
- if(err)
- error(err);
- }
-
+ /* dissociate routes */
+ ifc->ifcid++;
+ if(m->unbindonclose == 0)
+ ifc->conv->inuse--;
ifc->m = nil;
wunlock(ifc);
- poperror();
+
return nil;
}
+char sfixedformat[] = "device %s maxtu %d sendra %d recvra %d mflag %d oflag %d"
+" maxraint %d minraint %d linkmtu %d reachtime %d rxmitra %d ttl %d routerlt %d"
+" pktin %lud pktout %lud errin %lud errout %lud speed %d delay %d\n";
-
-char sfixedformat[] = "device %s maxtu %d sendra %d recvra %d mflag %d oflag %d maxraint %d minraint %d linkmtu %d reachtime %d rxmitra %d ttl %d routerlt %d pktin %lud pktout %lud errin %lud errout %lud\n";
-
char slineformat[] = " %-40I %-10M %-40I %-12lud %-12lud\n";
-
static int
ipifcstate(Conv *c, char *state, int n)
{
@@ -228,19 +266,18 @@
int m;
ifc = (Ipifc*)c->ptcl;
-
m = snprint(state, n, sfixedformat,
ifc->dev, ifc->maxtu, ifc->sendra6, ifc->recvra6,
ifc->rp.mflag, ifc->rp.oflag, ifc->rp.maxraint,
ifc->rp.minraint, ifc->rp.linkmtu, ifc->rp.reachtime,
ifc->rp.rxmitra, ifc->rp.ttl, ifc->rp.routerlt,
- ifc->in, ifc->out, ifc->inerr, ifc->outerr);
+ ifc->in, ifc->out, ifc->inerr, ifc->outerr,
+ ifc->speed, ifc->delay);
rlock(ifc);
- for(lifc = ifc->lifc; lifc && n > m; lifc = lifc->next)
- m += snprint(state+m, n - m, slineformat,
- lifc->local, lifc->mask, lifc->remote,
- lifc->validlt, lifc->preflt);
+ for(lifc = ifc->lifc; lifc != nil && n > m; lifc = lifc->next)
+ m += snprint(state+m, n - m, slineformat, lifc->local,
+ lifc->mask, lifc->remote, lifc->validlt, lifc->preflt);
if(ifc->lifc == nil)
m += snprint(state+m, n - m, "\n");
runlock(ifc);
@@ -256,13 +293,11 @@
int m;
ifc = (Ipifc*)c->ptcl;
-
- m = 0;
-
rlock(ifc);
- for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+ m = 0;
+ for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
m += snprint(state+m, n - m, "%-40.40I ->", lifc->local);
- for(link = lifc->link; link; link = link->lifclink)
+ for(link = lifc->link; link != nil; link = link->lifclink)
m += snprint(state+m, n - m, " %-40.40I", link->self->a);
m += snprint(state+m, n - m, "\n");
}
@@ -279,6 +314,59 @@
return ifc->m != nil;
}
+static void
+ipifcadjustburst(Ipifc *ifc)
+{
+ int burst;
+
+ burst = ((vlong)ifc->delay * ifc->speed) / 8000;
+ if(burst < ifc->maxtu)
+ burst = ifc->maxtu;
+ ifc->burst = burst;
+}
+
+static void
+ipifcsetdelay(Ipifc *ifc, int delay)
+{
+ if(delay < 0)
+ delay = 0;
+ else if(delay > 1000)
+ delay = 1000;
+ ifc->delay = delay;
+ ipifcadjustburst(ifc);
+}
+
+static void
+ipifcsetspeed(Ipifc *ifc, int speed)
+{
+ if(speed < 0)
+ speed = 0;
+ ifc->speed = speed;
+ ifc->load = 0;
+ ipifcadjustburst(ifc);
+}
+
+void
+ipifcoput(Ipifc *ifc, Block *bp, int version, uchar *ip)
+{
+ if(ifc->speed){
+ ulong now = MACHP(0)->ticks;
+ int dt = TK2MS(now - ifc->ticks);
+ ifc->ticks = now;
+ ifc->load -= ((vlong)dt * ifc->speed) / 8000;
+ if(ifc->load < 0 || dt < 0 || dt > 1000)
+ ifc->load = 0;
+ else if(ifc->load > ifc->burst){
+ freeblist(bp);
+ return;
+ }
+ }
+ bp = concatblock(bp);
+ ifc->load += BLEN(bp);
+ ifc->m->bwrite(ifc, bp, version, ip);
+}
+
+
/*
* called when a process writes to an interface's 'data'
*/
@@ -294,18 +382,15 @@
return;
ifc = (Ipifc*)c->ptcl;
- if(!canrlock(ifc)){
- freeb(bp);
- return;
- }
+ rlock(ifc);
if(waserror()){
runlock(ifc);
nexterror();
}
- if(ifc->m == nil || ifc->m->pktin == nil)
- freeb(bp);
- else
+ if(ifc->m != nil && ifc->m->pktin != nil)
(*ifc->m->pktin)(c->p->f, ifc, bp);
+ else
+ freeb(bp);
runlock(ifc);
poperror();
}
@@ -319,27 +404,26 @@
Ipifc *ifc;
c->rq = qopen(QMAX, 0, 0, 0);
- c->sq = qopen(2*QMAX, 0, 0, 0);
c->wq = qopen(QMAX, Qkick, ipifckick, c);
+ c->sq = qopen(QMAX, 0, 0, 0);
+ if(c->rq == nil || c->wq == nil || c->sq == nil)
+ error(Enomem);
ifc = (Ipifc*)c->ptcl;
ifc->conv = c;
- ifc->unbinding = 0;
ifc->m = nil;
+ ifc->reflect = 0;
ifc->reassemble = 0;
}
/*
* called after last close of ipifc data or ctl
- * called with c locked, we must unlock
*/
static void
ipifcclose(Conv *c)
{
- Ipifc *ifc;
- Medium *m;
+ Ipifc *ifc = (Ipifc*)c->ptcl;
+ Medium *m = ifc->m;
- ifc = (Ipifc*)c->ptcl;
- m = ifc->m;
if(m != nil && m->unbindonclose)
ipifcunbind(ifc);
}
@@ -347,19 +431,17 @@
/*
* change an interface's mtu
*/
-char*
-ipifcsetmtu(Ipifc *ifc, char **argv, int argc)
+static char*
+ipifcsetmtu(Ipifc *ifc, int mtu)
{
- int mtu;
+ Medium *m = ifc->m;
- if(argc < 2)
+ if(m == nil)
+ return Eunbound;
+ if(mtu < m->mintu || mtu > m->maxtu)
return Ebadarg;
- if(ifc->m == nil)
- return Ebadarg;
- mtu = strtoul(argv[1], 0, 0);
- if(mtu < ifc->m->mintu || mtu > ifc->m->maxtu)
- return Ebadarg;
ifc->maxtu = mtu;
+ ipifcadjustburst(ifc);
return nil;
}
@@ -374,13 +456,8 @@
Iplifc *lifc, **l;
int i, type, mtu;
Fs *f;
- int sendnbrdisc = 0;
- if(ifc->m == nil)
- return "ipifc not yet bound to device";
-
- f = ifc->conv->p->f;
-
+ mtu = 0;
type = Rifc;
memset(ip, 0, IPaddrlen);
memset(mask, 0, IPaddrlen);
@@ -392,23 +469,21 @@
/* fall through */
case 5:
mtu = strtoul(argv[4], 0, 0);
- if(mtu >= ifc->m->mintu && mtu <= ifc->m->maxtu)
- ifc->maxtu = mtu;
/* fall through */
case 4:
- parseip(ip, argv[1]);
- parseipmask(mask, argv[2]);
- parseip(rem, argv[3]);
+ if (parseipandmask(ip, mask, argv[1], argv[2]) == -1 || parseip(rem, argv[3]) == -1)
+ return Ebadip;
maskip(rem, mask, net);
break;
case 3:
- parseip(ip, argv[1]);
- parseipmask(mask, argv[2]);
+ if (parseipandmask(ip, mask, argv[1], argv[2]) == -1)
+ return Ebadip;
maskip(ip, mask, rem);
maskip(rem, mask, net);
break;
case 2:
- parseip(ip, argv[1]);
+ if (parseip(ip, argv[1]) == -1)
+ return Ebadip;
memmove(mask, defmask(ip), IPaddrlen);
maskip(ip, mask, rem);
maskip(rem, mask, net);
@@ -415,26 +490,55 @@
break;
default:
return Ebadarg;
- break;
}
- if(isv4(ip))
+
+ /* check for point-to-point interface */
+ if(ipcmp(ip, v6loopback) != 0) /* skip v6 loopback, it's a special address */
+ if(ipcmp(mask, IPallbits) == 0)
+ type |= Rptpt;
+
+ if(isv4(ip) || ipcmp(ip, IPnoaddr) == 0){
+ type |= Rv4;
tentative = 0;
+ }
+
wlock(ifc);
+ if(ifc->m == nil){
+ wunlock(ifc);
+ return Eunbound;
+ }
+ f = ifc->conv->p->f;
+ if(waserror()){
+ wunlock(ifc);
+ return up->errstr;
+ }
+ if(mtu > 0)
+ ipifcsetmtu(ifc, mtu);
+
/* ignore if this is already a local address for this ifc */
- for(lifc = ifc->lifc; lifc; lifc = lifc->next) {
- if(ipcmp(lifc->local, ip) == 0) {
- if(lifc->tentative != tentative)
- lifc->tentative = tentative;
- if(lifcp != nil) {
- lifc->onlink = lifcp->onlink;
- lifc->autoflag = lifcp->autoflag;
- lifc->validlt = lifcp->validlt;
- lifc->preflt = lifcp->preflt;
- lifc->origint = lifcp->origint;
+ if((lifc = iplocalonifc(ifc, ip)) != nil){
+ if(lifcp != nil) {
+ if(!lifc->onlink && lifcp->onlink){
+ lifc->onlink = 1;
+ addroute(f, lifc->remote, lifc->mask, ip, IPallbits,
+ lifc->remote, lifc->type, ifc, tifc);
+ if(v6addrtype(ip) != linklocalv6)
+ addroute(f, lifc->remote, lifc->mask, ip, IPnoaddr,
+ lifc->remote, lifc->type, ifc, tifc);
}
- goto out;
+ lifc->autoflag = lifcp->autoflag;
+ lifc->validlt = lifcp->validlt;
+ lifc->preflt = lifcp->preflt;
+ lifc->origint = lifcp->origint;
}
+ if(lifc->tentative != tentative){
+ lifc->tentative = tentative;
+ goto done;
+ }
+ wunlock(ifc);
+ poperror();
+ return nil;
}
/* add the address to the list of logical ifc's for this ifc */
@@ -443,6 +547,7 @@
ipmove(lifc->mask, mask);
ipmove(lifc->remote, rem);
ipmove(lifc->net, net);
+ lifc->type = type;
lifc->tentative = tentative;
if(lifcp != nil) {
lifc->onlink = lifcp->onlink;
@@ -450,39 +555,34 @@
lifc->validlt = lifcp->validlt;
lifc->preflt = lifcp->preflt;
lifc->origint = lifcp->origint;
+ } else { /* default values */
+ lifc->onlink = lifc->autoflag = 1;
+ lifc->validlt = lifc->preflt = ~0UL;
+ lifc->origint = NOW / 1000;
}
- else { // default values
- lifc->onlink = 1;
- lifc->autoflag = 1;
- lifc->validlt = 0xffffffff;
- lifc->preflt = 0xffffffff;
- lifc->origint = NOW / 10^3;
- }
lifc->next = nil;
- for(l = &ifc->lifc; *l; l = &(*l)->next)
+ for(l = &ifc->lifc; *l != nil; l = &(*l)->next)
;
*l = lifc;
- /* check for point-to-point interface */
- if(ipcmp(ip, v6loopback)) /* skip v6 loopback, it's a special address */
- if(ipcmp(mask, IPallbits) == 0)
- type |= Rptpt;
+ /* add route for this logical interface */
+ if(lifc->onlink){
+ addroute(f, rem, mask, ip, IPallbits, rem, type, ifc, tifc);
+ if(v6addrtype(ip) != linklocalv6)
+ addroute(f, rem, mask, ip, IPnoaddr, rem, type, ifc, tifc);
+ }
- /* add local routes */
- if(isv4(ip))
- v4addroute(f, tifc, rem+IPv4off, mask+IPv4off, rem+IPv4off, type);
- else
- v6addroute(f, tifc, rem, mask, rem, type);
-
addselfcache(f, ifc, lifc, ip, Runi);
- if((type & (Rproxy|Rptpt)) == (Rproxy|Rptpt)){
- ipifcregisterproxy(f, ifc, rem);
- goto out;
+ /* register proxy */
+ if(type & Rptpt){
+ if(type & Rproxy)
+ ipifcregisterproxy(f, ifc, rem, 1);
+ goto done;
}
- if(isv4(ip) || ipcmp(ip, IPnoaddr) == 0) {
+ if(type & Rv4) {
/* add subnet directed broadcast address to the self cache */
for(i = 0; i < IPaddrlen; i++)
bcast[i] = (ip[i] & mask[i]) | ~mask[i];
@@ -504,174 +604,135 @@
for(i = 0; i < IPaddrlen; i++)
bcast[i] = (ip[i] & mask[i]) & mask[i];
addselfcache(f, ifc, lifc, bcast, Rbcast);
-
+
addselfcache(f, ifc, lifc, IPv4bcast, Rbcast);
- }
- else {
+ } else {
if(ipcmp(ip, v6loopback) == 0) {
/* add node-local mcast address */
addselfcache(f, ifc, lifc, v6allnodesN, Rmulti);
/* add route for all node multicast */
- v6addroute(f, tifc, v6allnodesN, v6allnodesNmask, v6allnodesN, Rmulti);
+ addroute(f, v6allnodesN, v6allnodesNmask,
+ ip, IPallbits,
+ v6allnodesN, Rmulti, ifc, tifc);
}
/* add all nodes multicast address */
addselfcache(f, ifc, lifc, v6allnodesL, Rmulti);
-
+
/* add route for all nodes multicast */
- v6addroute(f, tifc, v6allnodesL, v6allnodesLmask, v6allnodesL, Rmulti);
-
+ addroute(f, v6allnodesL, v6allnodesLmask,
+ ip, IPallbits,
+ v6allnodesL, Rmulti, ifc, tifc);
+
/* add solicited-node multicast address */
ipv62smcast(bcast, ip);
addselfcache(f, ifc, lifc, bcast, Rmulti);
-
- sendnbrdisc = 1;
}
- /* register the address on this network for address resolution */
- if(isv4(ip) && ifc->m->areg != nil)
- (*ifc->m->areg)(ifc, ip);
-
-out:
+done:
wunlock(ifc);
- if(tentative && sendnbrdisc)
- icmpns(f, 0, SRC_UNSPEC, ip, TARG_MULTI, ifc->mac);
+ poperror();
+
+ rlock(ifc);
+ ipifcregisteraddr(f, ifc, lifc, ip);
+ runlock(ifc);
+
return nil;
}
/*
* remove a logical interface from an ifc
- * always called with ifc wlock'd
+ * called with ifc wlock'd
*/
static char*
-ipifcremlifc(Ipifc *ifc, Iplifc *lifc)
+ipifcremlifc(Ipifc *ifc, Iplifc **l)
{
- Iplifc **l;
- Fs *f;
+ Iplifc *lifc = *l;
+ Fs *f = ifc->conv->p->f;
- f = ifc->conv->p->f;
-
- /*
- * find address on this interface and remove from chain.
- * for pt to pt we actually specify the remote address as the
- * addresss to remove.
- */
- for(l = &ifc->lifc; *l != nil && *l != lifc; l = &(*l)->next)
- ;
- if(*l == nil)
+ if(lifc == nil)
return "address not on this interface";
*l = lifc->next;
/* disassociate any addresses */
- while(lifc->link)
+ while(lifc->link != nil)
remselfcache(f, ifc, lifc, lifc->link->self->a);
/* remove the route for this logical interface */
- if(isv4(lifc->local))
- v4delroute(f, lifc->remote+IPv4off, lifc->mask+IPv4off, 1);
- else {
- v6delroute(f, lifc->remote, lifc->mask, 1);
+ if(lifc->onlink){
+ remroute(f, lifc->remote, lifc->mask,
+ lifc->local, IPallbits,
+ lifc->remote, lifc->type, ifc, tifc);
+ if(v6addrtype(lifc->local) != linklocalv6)
+ remroute(f, lifc->remote, lifc->mask,
+ lifc->local, IPnoaddr,
+ lifc->remote, lifc->type, ifc, tifc);
+ }
+
+ /* unregister proxy */
+ if(lifc->type & Rptpt){
+ if(lifc->type & Rproxy)
+ ipifcregisterproxy(f, ifc, lifc->remote, 0);
+ goto done;
+ }
+
+ /* remove route for all nodes multicast */
+ if((lifc->type & Rv4) == 0){
if(ipcmp(lifc->local, v6loopback) == 0)
- /* remove route for all node multicast */
- v6delroute(f, v6allnodesN, v6allnodesNmask, 1);
- else if(memcmp(lifc->local, v6linklocal, v6llpreflen) == 0)
- /* remove route for all link multicast */
- v6delroute(f, v6allnodesL, v6allnodesLmask, 1);
+ remroute(f, v6allnodesN, v6allnodesNmask,
+ lifc->local, IPallbits,
+ v6allnodesN, Rmulti, ifc, tifc);
+
+ remroute(f, v6allnodesL, v6allnodesLmask,
+ lifc->local, IPallbits,
+ v6allnodesL, Rmulti, ifc, tifc);
}
+done:
free(lifc);
return nil;
-
}
/*
* remove an address from an interface.
- * called with c locked
*/
char*
ipifcrem(Ipifc *ifc, char **argv, int argc)
{
- uchar ip[IPaddrlen];
- uchar mask[IPaddrlen];
- uchar rem[IPaddrlen];
- Iplifc *lifc;
- char *rv;
+ uchar ip[IPaddrlen], mask[IPaddrlen], rem[IPaddrlen];
+ Iplifc *lifc, **l;
+ char *err;
if(argc < 3)
return Ebadarg;
-
- parseip(ip, argv[1]);
- parseipmask(mask, argv[2]);
+ if(parseipandmask(ip, mask, argv[1], argv[2]) == -1)
+ return Ebadip;
if(argc < 4)
maskip(ip, mask, rem);
- else
- parseip(rem, argv[3]);
+ else if(parseip(rem, argv[3]) == -1)
+ return Ebadip;
- wlock(ifc);
-
/*
* find address on this interface and remove from chain.
* for pt to pt we actually specify the remote address as the
* addresss to remove.
*/
+ wlock(ifc);
+ l = &ifc->lifc;
for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next) {
- if (memcmp(ip, lifc->local, IPaddrlen) == 0
- && memcmp(mask, lifc->mask, IPaddrlen) == 0
- && memcmp(rem, lifc->remote, IPaddrlen) == 0)
+ if(ipcmp(ip, lifc->local) == 0
+ && ipcmp(mask, lifc->mask) == 0
+ && ipcmp(rem, lifc->remote) == 0)
break;
+ l = &lifc->next;
}
-
- rv = ipifcremlifc(ifc, lifc);
+ err = ipifcremlifc(ifc, l);
wunlock(ifc);
- return rv;
+ return err;
}
/*
- * distribute routes to active interfaces like the
- * TRIP linecards
- */
-void
-ipifcaddroute(Fs *f, int vers, uchar *addr, uchar *mask, uchar *gate, int type)
-{
- Medium *m;
- Conv **cp, **e;
- Ipifc *ifc;
-
- e = &f->ipifc->conv[f->ipifc->nc];
- for(cp = f->ipifc->conv; cp < e; cp++){
- if(*cp != nil) {
- ifc = (Ipifc*)(*cp)->ptcl;
- m = ifc->m;
- if(m == nil)
- continue;
- if(m->addroute != nil)
- m->addroute(ifc, vers, addr, mask, gate, type);
- }
- }
-}
-
-void
-ipifcremroute(Fs *f, int vers, uchar *addr, uchar *mask)
-{
- Medium *m;
- Conv **cp, **e;
- Ipifc *ifc;
-
- e = &f->ipifc->conv[f->ipifc->nc];
- for(cp = f->ipifc->conv; cp < e; cp++){
- if(*cp != nil) {
- ifc = (Ipifc*)(*cp)->ptcl;
- m = ifc->m;
- if(m == nil)
- continue;
- if(m->remroute != nil)
- m->remroute(ifc, vers, addr, mask);
- }
- }
-}
-
-/*
* associate an address with the interface. This wipes out any previous
* addresses. This is a macro that means, remove all the old interfaces
* and add a new one.
@@ -679,170 +740,89 @@
static char*
ipifcconnect(Conv* c, char **argv, int argc)
{
+ Ipifc *ifc = (Ipifc*)c->ptcl;
char *err;
- Ipifc *ifc;
- ifc = (Ipifc*)c->ptcl;
-
- if(ifc->m == nil)
- return "ipifc not yet bound to device";
-
- if(waserror()){
- wunlock(ifc);
- nexterror();
- }
wlock(ifc);
- while(ifc->lifc){
- err = ipifcremlifc(ifc, ifc->lifc);
- if(err)
- error(err);
- }
+ while(ifc->lifc != nil)
+ ipifcremlifc(ifc, &ifc->lifc);
wunlock(ifc);
- poperror();
err = ipifcadd(ifc, argv, argc, 0, nil);
- if(err)
+ if(err != nil)
return err;
Fsconnected(c, nil);
-
return nil;
}
char*
-ipifcsetpar6(Ipifc *ifc, char **argv, int argc)
+ipifcra6(Ipifc *ifc, char **argv, int argc)
{
- int i, argsleft, vmax = ifc->rp.maxraint, vmin = ifc->rp.minraint;
+ int i, argsleft;
+ uchar sendra, recvra;
+ Routerparams rp;
- argsleft = argc - 1;
i = 1;
-
- if(argsleft % 2 != 0)
+ argsleft = argc - 1;
+ if((argsleft % 2) != 0)
return Ebadarg;
+ sendra = ifc->sendra6;
+ recvra = ifc->recvra6;
+ rp = ifc->rp;
+
while (argsleft > 1) {
- if(strcmp(argv[i],"recvra")==0)
- ifc->recvra6 = (atoi(argv[i+1]) != 0);
- else if(strcmp(argv[i],"sendra")==0)
- ifc->sendra6 = (atoi(argv[i+1]) != 0);
- else if(strcmp(argv[i],"mflag")==0)
- ifc->rp.mflag = (atoi(argv[i+1]) != 0);
- else if(strcmp(argv[i],"oflag")==0)
- ifc->rp.oflag = (atoi(argv[i+1]) != 0);
- else if(strcmp(argv[i],"maxraint")==0)
- ifc->rp.maxraint = atoi(argv[i+1]);
- else if(strcmp(argv[i],"minraint")==0)
- ifc->rp.minraint = atoi(argv[i+1]);
- else if(strcmp(argv[i],"linkmtu")==0)
- ifc->rp.linkmtu = atoi(argv[i+1]);
- else if(strcmp(argv[i],"reachtime")==0)
- ifc->rp.reachtime = atoi(argv[i+1]);
- else if(strcmp(argv[i],"rxmitra")==0)
- ifc->rp.rxmitra = atoi(argv[i+1]);
- else if(strcmp(argv[i],"ttl")==0)
- ifc->rp.ttl = atoi(argv[i+1]);
- else if(strcmp(argv[i],"routerlt")==0)
- ifc->rp.routerlt = atoi(argv[i+1]);
+ if(strcmp(argv[i], "recvra") == 0)
+ recvra = atoi(argv[i+1]) != 0;
+ else if(strcmp(argv[i], "sendra") == 0)
+ sendra = atoi(argv[i+1]) != 0;
+ else if(strcmp(argv[i], "mflag") == 0)
+ rp.mflag = atoi(argv[i+1]) != 0;
+ else if(strcmp(argv[i], "oflag") == 0)
+ rp.oflag = atoi(argv[i+1]) != 0;
+ else if(strcmp(argv[i], "maxraint") == 0)
+ rp.maxraint = atoi(argv[i+1]);
+ else if(strcmp(argv[i], "minraint") == 0)
+ rp.minraint = atoi(argv[i+1]);
+ else if(strcmp(argv[i], "linkmtu") == 0)
+ rp.linkmtu = atoi(argv[i+1]);
+ else if(strcmp(argv[i], "reachtime") == 0)
+ rp.reachtime = atoi(argv[i+1]);
+ else if(strcmp(argv[i], "rxmitra") == 0)
+ rp.rxmitra = atoi(argv[i+1]);
+ else if(strcmp(argv[i], "ttl") == 0)
+ rp.ttl = atoi(argv[i+1]);
+ else if(strcmp(argv[i], "routerlt") == 0)
+ rp.routerlt = atoi(argv[i+1]);
else
- return Ebadarg;
+ return Ebadarg;
argsleft -= 2;
i += 2;
}
- // consistency check
- if(ifc->rp.maxraint < ifc->rp.minraint) {
- ifc->rp.maxraint = vmax;
- ifc->rp.minraint = vmin;
+ /* consistency check */
+ if(rp.maxraint < rp.minraint)
return Ebadarg;
- }
- return nil;
-}
+ ifc->rp = rp;
+ ifc->sendra6 = sendra;
+ ifc->recvra6 = recvra;
-char*
-ipifcsendra6(Ipifc *ifc, char **argv, int argc)
-{
- int i;
-
- i = 0;
- if(argc > 1)
- i = atoi(argv[1]);
- ifc->sendra6 = (i!=0);
return nil;
}
-char*
-ipifcrecvra6(Ipifc *ifc, char **argv, int argc)
-{
- int i;
-
- i = 0;
- if(argc > 1)
- i = atoi(argv[1]);
- ifc->recvra6 = (i!=0);
- return nil;
-}
-
-char*
-ipifcnat(Ipifc *ifc, char **argv, int argc)
-{
- uchar src[IPaddrlen], mask[IPaddrlen], dst[IPaddrlen];
- Iplifc *lifc;
-
- if(argc == 2){
- if((strcmp(argv[1], "show") == 0)){
- shownataddr();
- return nil;
- }else if((strcmp(argv[1], "flush") == 0)){
- flushnataddr();
- return nil;
- }else
- return Ebadarg;
- }
-
- if(argc != 5)
- return Ebadarg;
-
- if (parseip(src, argv[2]) == -1)
- return Ebadip;
-
- if (parseipmask(mask, argv[3]) == -1)
- return Ebadip;
-
- if (parseip(dst, argv[4]) == -1)
- return Ebadip;
-
- if((lifc=iplocalonifc(ifc, dst)) == nil)
- return Ebadip;
-
- if(strcmp(argv[1], "add") == 0){
- if(addnataddr(src, mask, lifc) != 0)
- return Ebadarg;
- }else if(strcmp(argv[1], "remove") == 0){
- if(removenataddr(src, mask, lifc) != 0)
- return Ebadarg;
- }else
- return Ebadarg;
-
- return nil;
-}
-
/*
* non-standard control messages.
- * called with c locked.
*/
static char*
-ipifcctl(Conv* c, char**argv, int argc)
+ipifcctl(Conv* c, char **argv, int argc)
{
- Ipifc *ifc;
- int i;
+ Ipifc *ifc = (Ipifc*)c->ptcl;
- ifc = (Ipifc*)c->ptcl;
if(strcmp(argv[0], "add") == 0)
return ipifcadd(ifc, argv, argc, 0, nil);
- else if(strcmp(argv[0], "bootp") == 0)
- return bootp(ifc);
else if(strcmp(argv[0], "try") == 0)
return ipifcadd(ifc, argv, argc, 1, nil);
else if(strcmp(argv[0], "remove") == 0)
@@ -849,36 +829,38 @@
return ipifcrem(ifc, argv, argc);
else if(strcmp(argv[0], "unbind") == 0)
return ipifcunbind(ifc);
- else if(strcmp(argv[0], "joinmulti") == 0)
- return ipifcjoinmulti(ifc, argv, argc);
- else if(strcmp(argv[0], "leavemulti") == 0)
- return ipifcleavemulti(ifc, argv, argc);
else if(strcmp(argv[0], "mtu") == 0)
- return ipifcsetmtu(ifc, argv, argc);
- else if(strcmp(argv[0], "reassemble") == 0){
- ifc->reassemble = 1;
+ return ipifcsetmtu(ifc, argc>1? strtoul(argv[1], 0, 0): 0);
+ else if(strcmp(argv[0], "speed") == 0){
+ ipifcsetspeed(ifc, argc>1? atoi(argv[1]): 0);
return nil;
}
+ else if(strcmp(argv[0], "delay") == 0){
+ ipifcsetdelay(ifc, argc>1? atoi(argv[1]): 0);
+ return nil;
+ }
else if(strcmp(argv[0], "iprouting") == 0){
- i = 1;
- if(argc > 1)
- i = atoi(argv[1]);
- iprouting(c->p->f, i);
+ iprouting(c->p->f, argc>1? atoi(argv[1]): 1);
return nil;
}
- else if(strcmp(argv[0], "addpref6") == 0)
- return ipifcaddpref6(ifc, argv, argc);
- else if(strcmp(argv[0], "setpar6") == 0)
- return ipifcsetpar6(ifc, argv, argc);
- else if(strcmp(argv[0], "sendra6") == 0)
- return ipifcsendra6(ifc, argv, argc);
- else if(strcmp(argv[0], "recvra6") == 0)
- return ipifcrecvra6(ifc, argv, argc);
- else if(strcmp(argv[0], "nat") == 0)
- return ipifcnat(ifc, argv, argc);
+ else if(strcmp(argv[0], "reflect") == 0){
+ ifc->reflect = argc>1? atoi(argv[1]): 1;
+ return nil;
+ }
+ else if(strcmp(argv[0], "reassemble") == 0){
+ ifc->reassemble = argc>1? atoi(argv[1]): 1;
+ return nil;
+ }
+ else if(strcmp(argv[0], "add6") == 0)
+ return ipifcadd6(ifc, argv, argc);
+ else if(strcmp(argv[0], "remove6") == 0)
+ return ipifcremove6(ifc, argv, argc);
+ else if(strcmp(argv[0], "ra6") == 0)
+ return ipifcra6(ifc, argv, argc);
return "unsupported ctl";
}
+int
ipifcstats(Proto *ipifc, char *buf, int len)
{
return ipstats(ipifc->f, buf, len);
@@ -907,7 +889,7 @@
ipifc->nc = Maxmedia;
ipifc->ptclsize = sizeof(Ipifc);
- f->ipifc = ipifc; /* hack for ipifcremroute, findipifc, ... */
+ f->ipifc = ipifc; /* hack for ipifcremroute, findipifc, ... */
f->self = smalloc(sizeof(Ipselftab)); /* hack for ipforme */
Fsproto(f, ipifc);
@@ -915,21 +897,25 @@
/*
* add to self routing cache
- * called with c locked
*/
static void
addselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a, int type)
{
- Ipself *p;
Iplink *lp;
+ Ipself *p;
int h;
+ type |= (lifc->type & Rv4);
qlock(f->self);
+ if(waserror()){
+ qunlock(f->self);
+ nexterror();
+ }
/* see if the address already exists */
h = hashipa(a);
- for(p = f->self->hash[h]; p; p = p->next)
- if(memcmp(a, p->a, IPaddrlen) == 0)
+ for(p = f->self->hash[h]; p != nil; p = p->next)
+ if(ipcmp(a, p->a) == 0)
break;
/* allocate a local address and add to hash chain */
@@ -946,7 +932,7 @@
}
/* look for a link for this lifc */
- for(lp = p->link; lp; lp = lp->selflink)
+ for(lp = p->link; lp != nil; lp = lp->selflink)
if(lp->lifc == lifc)
break;
@@ -962,18 +948,19 @@
lifc->link = lp;
/* add to routing table */
- if(isv4(a))
- v4addroute(f, tifc, a+IPv4off, IPallbits+IPv4off, a+IPv4off, type);
- else
- v6addroute(f, tifc, a, IPallbits, a, type);
+ addroute(f, a, IPallbits,
+ lifc->local,
+ ((type & (Rbcast|Rmulti)) != 0 || v6addrtype(a) == linklocalv6) ?
+ IPallbits : IPnoaddr,
+ a, type, ifc, tifc);
if((type & Rmulti) && ifc->m->addmulti != nil)
(*ifc->m->addmulti)(ifc, a, lifc->local);
- } else {
+ } else
lp->ref++;
- }
qunlock(f->self);
+ poperror();
}
/*
@@ -992,8 +979,8 @@
ulong now = NOW;
l = &freeiplink;
- for(np = *l; np; np = *l){
- if(np->expire > now){
+ for(np = *l; np != nil; np = *l){
+ if((long)(now - np->expire) >= 0){
*l = np->next;
free(np);
continue;
@@ -1000,10 +987,11 @@
}
l = &np->next;
}
- p->expire = now + 5000; /* give other threads 5 secs to get out */
+ p->expire = now + 5000; /* give other threads 5 secs to get out */
p->next = nil;
*l = p;
}
+
static void
ipselffree(Ipself *p)
{
@@ -1011,8 +999,8 @@
ulong now = NOW;
l = &freeipself;
- for(np = *l; np; np = *l){
- if(np->expire > now){
+ for(np = *l; np != nil; np = *l){
+ if((long)(now - np->expire) >= 0){
*l = np->next;
free(np);
continue;
@@ -1019,7 +1007,7 @@
}
l = &np->next;
}
- p->expire = now + 5000; /* give other threads 5 secs to get out */
+ p->expire = now + 5000; /* give other threads 5 secs to get out */
p->next = nil;
*l = p;
}
@@ -1027,7 +1015,6 @@
/*
* Decrement reference for this address on this link.
* Unlink from selftab if this is the last ref.
- * called with c locked
*/
static void
remselfcache(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *a)
@@ -1039,7 +1026,7 @@
/* find the unique selftab entry */
l = &f->self->hash[hashipa(a)];
- for(p = *l; p; p = *l){
+ for(p = *l; p != nil; p = *l){
if(ipcmp(p->a, a) == 0)
break;
l = &p->next;
@@ -1053,7 +1040,7 @@
* that matches the selftab entry
*/
l_lifc = &lifc->link;
- for(link = *l_lifc; link; link = *l_lifc){
+ for(link = *l_lifc; link != nil; link = *l_lifc){
if(link->self == p)
break;
l_lifc = &link->lifclink;
@@ -1067,8 +1054,8 @@
* the one we just found
*/
l_self = &p->link;
- for(link = *l_self; link; link = *l_self){
- if(link == *(l_lifc))
+ for(link = *l_self; link != nil; link = *l_self){
+ if(link == *l_lifc)
break;
l_self = &link->selflink;
}
@@ -1079,9 +1066,20 @@
if(--(link->ref) != 0)
goto out;
- if((p->type & Rmulti) && ifc->m->remmulti != nil)
- (*ifc->m->remmulti)(ifc, a, lifc->local);
+ /* remove from routing table */
+ remroute(f, a, IPallbits,
+ lifc->local,
+ ((p->type & (Rbcast|Rmulti)) != 0 || v6addrtype(a) == linklocalv6) ?
+ IPallbits : IPnoaddr,
+ a, p->type, ifc, tifc);
+ if((p->type & Rmulti) && ifc->m->remmulti != nil){
+ if(!waserror()){
+ (*ifc->m->remmulti)(ifc, a, lifc->local);
+ poperror();
+ }
+ }
+
/* ref == 0, remove from both chains and free the link */
*l_lifc = link->lifclink;
*l_self = link->selflink;
@@ -1090,30 +1088,18 @@
if(p->link != nil)
goto out;
- /* remove from routing table */
- if(isv4(a))
- v4delroute(f, a+IPv4off, IPallbits+IPv4off, 1);
- else
- v6delroute(f, a, IPallbits, 1);
-
+ /* if null address, forget */
+ if(ipcmp(a, v4prefix) == 0 || ipcmp(a, IPnoaddr) == 0)
+ f->self->acceptall = 0;
+
/* no more links, remove from hash and free */
*l = p->next;
ipselffree(p);
- /* if IPnoaddr, forget */
- if(ipcmp(a, v4prefix) == 0 || ipcmp(a, IPnoaddr) == 0)
- f->self->acceptall = 0;
-
out:
qunlock(f->self);
}
-static char *stformat = "%-44.44I %2.2d %4.4s\n";
-enum
-{
- Nstformat= 41,
-};
-
long
ipselftabread(Fs *f, char *cp, ulong offset, int n)
{
@@ -1124,14 +1110,14 @@
m = 0;
off = offset;
- qlock(f->self);
for(i = 0; i < NHASH && m < n; i++){
for(p = f->self->hash[i]; p != nil && m < n; p = p->next){
nifc = 0;
- for(link = p->link; link; link = link->selflink)
+ for(link = p->link; link != nil; link = link->selflink)
nifc++;
routetype(p->type, state);
- m += snprint(cp + m, n - m, stformat, p->a, nifc, state);
+ m += snprint(cp + m, n - m, "%-44.44I %2.2d %4.4s\n",
+ p->a, nifc, state);
if(off > 0){
off -= m;
m = 0;
@@ -1138,30 +1124,15 @@
}
}
}
- qunlock(f->self);
return m;
}
-int
-iptentative(Fs *f, uchar *addr)
-{
- Ipself *p;
-
- p = f->self->hash[hashipa(addr)];
- for(; p; p = p->next){
- if(ipcmp(addr, p->a) == 0) {
- return p->link->lifc->tentative;
- }
- }
- return 0;
-}
-
/*
* returns
* 0 - no match
* Runi
* Rbcast
- * Rmcast
+ * Rmulti
*/
int
ipforme(Fs *f, uchar *addr)
@@ -1168,11 +1139,9 @@
{
Ipself *p;
- p = f->self->hash[hashipa(addr)];
- for(; p; p = p->next){
+ for(p = f->self->hash[hashipa(addr)]; p != nil; p = p->next)
if(ipcmp(addr, p->a) == 0)
- return p->type;
- }
+ return p->type & (Runi|Rbcast|Rmulti);
/* hack to say accept anything */
if(f->self->acceptall)
@@ -1186,254 +1155,237 @@
* return nil.
*/
Ipifc*
-findipifc(Fs *f, uchar *remote, int type)
+findipifc(Fs *f, uchar *local, uchar *remote, int type)
{
+ uchar gnet[IPaddrlen];
+ int spec, xspec;
Ipifc *ifc, *x;
Iplifc *lifc;
- Conv **cp, **e;
- uchar gnet[IPaddrlen];
- uchar xmask[IPaddrlen];
+ Conv **cp;
- x = nil; memset(xmask, 0, IPaddrlen);
-
- /* find most specific match */
- e = &f->ipifc->conv[f->ipifc->nc];
- for(cp = f->ipifc->conv; cp < e; cp++){
- if(*cp == 0)
- continue;
-
+ x = nil;
+ xspec = 0;
+ for(cp = f->ipifc->conv; *cp != nil; cp++){
ifc = (Ipifc*)(*cp)->ptcl;
-
- for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+ if(!canrlock(ifc))
+ continue;
+ for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+ if(type & Runi){
+ if(ipcmp(remote, lifc->local) == 0){
+ Found:
+ runlock(ifc);
+ return ifc;
+ }
+ } else if(type & (Rbcast|Rmulti)) {
+ if(ipcmp(local, lifc->local) == 0)
+ goto Found;
+ }
maskip(remote, lifc->mask, gnet);
if(ipcmp(gnet, lifc->net) == 0){
- if(x == nil || ipcmp(lifc->mask, xmask) > 0){
+ spec = comprefixlen(remote, lifc->local, IPaddrlen);
+ if(spec > xspec){
x = ifc;
- ipmove(xmask, lifc->mask);
+ xspec = spec;
}
}
}
+ runlock(ifc);
}
- if(x != nil)
- return x;
+ return x;
+}
- /* for now for broadcast and multicast, just use first interface */
- if(type & (Rbcast|Rmulti)){
- for(cp = f->ipifc->conv; cp < e; cp++){
- if(*cp == 0)
- continue;
- ifc = (Ipifc*)(*cp)->ptcl;
- if(ifc->lifc != nil)
- return ifc;
- }
+Ipifc*
+findipifcstr(Fs *f, char *s)
+{
+ uchar ip[IPaddrlen];
+ Conv *c;
+ char *p;
+ long x;
+
+ x = strtol(s, &p, 10);
+ if(p > s && *p == '\0'){
+ if(x < 0)
+ return nil;
+ if(x < f->ipifc->nc && (c = f->ipifc->conv[x]) != nil && ipifcinuse(c))
+ return (Ipifc*)c->ptcl;
}
-
+ if(parseip(ip, s) != -1)
+ return findipifc(f, ip, ip, Runi);
return nil;
}
-enum {
- unknownv6,
- multicastv6,
- unspecifiedv6,
- linklocalv6,
- sitelocalv6,
- globalv6,
-};
-
-int
-v6addrtype(uchar *addr)
-{
- if(isv6global(addr))
- return globalv6;
- if(islinklocal(addr))
- return linklocalv6;
- if(isv6mcast(addr))
- return multicastv6;
- if(issitelocal(addr))
- return sitelocalv6;
- return unknownv6;
-}
-
-#define v6addrcurr(lifc) (( (lifc)->origint + (lifc)->preflt >= (NOW/10^3) ) || ( (lifc)->preflt == 0xffffffff ))
-
+/*
+ * find "best" (global > link local > unspecified)
+ * local address; address must be current.
+ */
static void
findprimaryipv6(Fs *f, uchar *local)
{
- Conv **cp, **e;
- Ipifc *ifc;
- Iplifc *lifc;
+ ulong now = NOW/1000;
int atype, atypel;
+ Iplifc *lifc;
+ Ipifc *ifc;
+ Conv **cp;
ipmove(local, v6Unspecified);
atype = unspecifiedv6;
- /* find "best" (global > sitelocal > link local > unspecified)
- * local address; address must be current */
-
- e = &f->ipifc->conv[f->ipifc->nc];
- for(cp = f->ipifc->conv; cp < e; cp++){
- if(*cp == 0)
- continue;
+ for(cp = f->ipifc->conv; *cp != nil; cp++){
ifc = (Ipifc*)(*cp)->ptcl;
- for(lifc = ifc->lifc; lifc; lifc = lifc->next){
+ rlock(ifc);
+ for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
atypel = v6addrtype(lifc->local);
if(atypel > atype)
- if(v6addrcurr(lifc)) {
+ if(lifc->preflt == ~0UL || lifc->preflt >= now-lifc->origint) {
ipmove(local, lifc->local);
atype = atypel;
- if(atype == globalv6)
+ if(atype == globalv6){
+ runlock(ifc);
return;
+ }
}
}
+ runlock(ifc);
}
}
/*
- * returns first ip address configured
+ * returns first v4 address configured
*/
static void
findprimaryipv4(Fs *f, uchar *local)
{
- Conv **cp, **e;
- Ipifc *ifc;
Iplifc *lifc;
+ Ipifc *ifc;
+ Conv **cp;
/* find first ifc local address */
- e = &f->ipifc->conv[f->ipifc->nc];
- for(cp = f->ipifc->conv; cp < e; cp++){
- if(*cp == 0)
- continue;
+ for(cp = f->ipifc->conv; *cp != nil; cp++){
ifc = (Ipifc*)(*cp)->ptcl;
- if((lifc = ifc->lifc) != nil){
- ipmove(local, lifc->local);
- return;
+ rlock(ifc);
+ for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+ if((lifc->type & Rv4) != 0){
+ ipmove(local, lifc->local);
+ runlock(ifc);
+ return;
+ }
}
+ runlock(ifc);
}
+ ipmove(local, IPnoaddr);
}
/*
- * find the local address 'closest' to the remote system, copy it to
- * local and return the ifc for that address
+ * ipv4local, ipv6local:
+ * return a local address associated with an interface close to remote.
+ * prefixlen is the number of leading bits in the local address that
+ * have to match an interface address to be considered. this is used
+ * by source specific routes to filter on the source address.
+ * return non-zero on success or zero when no address was found.
+ *
+ * for ipv4local, all addresses are 4 byte format.
*/
-void
-findlocalip(Fs *f, uchar *local, uchar *remote)
+int
+ipv4local(Ipifc *ifc, uchar *local, int prefixlen, uchar *remote)
{
- Ipifc *ifc;
Iplifc *lifc;
- Route *r;
- uchar gate[IPaddrlen];
- uchar gnet[IPaddrlen];
- int version;
- int atype = unspecifiedv6, atypel = unknownv6;
+ int a, b;
- USED(atype);
- USED(atypel);
- qlock(f->ipifc);
- r = v6lookup(f, remote, nil);
- version = (memcmp(remote, v4prefix, IPv4off) == 0) ? V4 : V6;
-
- if(r != nil){
- ifc = r->ifc;
- if(r->type & Rv4)
- v4tov6(gate, r->v4.gate);
- else {
- ipmove(gate, r->v6.gate);
- ipmove(local, v6Unspecified);
- }
+ b = -1;
+ for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+ if((lifc->type & Rv4) == 0 || ipcmp(lifc->local, IPnoaddr) == 0)
+ continue;
- /* find ifc address closest to the gateway to use */
- switch(version) {
- case V4:
- for(lifc = ifc->lifc; lifc; lifc = lifc->next){
- maskip(gate, lifc->mask, gnet);
- if(ipcmp(gnet, lifc->net) == 0){
- ipmove(local, lifc->local);
- goto out;
- }
- }
- break;
- case V6:
- for(lifc = ifc->lifc; lifc; lifc = lifc->next){
- atypel = v6addrtype(lifc->local);
- maskip(gate, lifc->mask, gnet);
- if(ipcmp(gnet, lifc->net) == 0)
- if(atypel > atype)
- if(v6addrcurr(lifc)) {
- ipmove(local, lifc->local);
- atype = atypel;
- if(atype == globalv6)
- break;
- }
- }
- if(atype > unspecifiedv6)
- goto out;
- break;
- default:
- panic("findlocalip: version %d", version);
+ if(prefixlen && comprefixlen(lifc->local+IPv4off, local, IPv4addrlen) < prefixlen)
+ continue;
+
+ a = comprefixlen(lifc->local+IPv4off, remote, IPv4addrlen);
+ if(a > b){
+ b = a;
+ memmove(local, lifc->local+IPv4off, IPv4addrlen);
}
}
-
- switch(version){
- case V4:
- findprimaryipv4(f, local);
- break;
- case V6:
- findprimaryipv6(f, local);
- break;
- default:
- panic("findlocalip2: version %d", version);
- }
-
-out:
- qunlock(f->ipifc);
+ return b >= 0;
}
-/*
- * return first v4 address associated with an interface
- */
int
-ipv4local(Ipifc *ifc, uchar *addr)
+ipv6local(Ipifc *ifc, uchar *local, int prefixlen, uchar *remote)
{
+ struct {
+ int atype;
+ int deprecated;
+ int comprefixlen;
+ } a, b;
+ int atype;
+ ulong now;
Iplifc *lifc;
- for(lifc = ifc->lifc; lifc; lifc = lifc->next){
- if(isv4(lifc->local)){
- memmove(addr, lifc->local+IPv4off, IPv4addrlen);
- return 1;
- }
+ if(isv4(remote)){
+ memmove(local, v4prefix, IPv4off);
+ if((prefixlen -= IPv4off*8) < 0)
+ prefixlen = 0;
+ return ipv4local(ifc, local+IPv4off, prefixlen, remote+IPv4off);
}
- return 0;
-}
-/*
- * return first v6 address associated with an interface
- */
-int
-ipv6local(Ipifc *ifc, uchar *addr)
-{
- Iplifc *lifc;
+ atype = v6addrtype(remote);
+ b.atype = unknownv6;
+ b.deprecated = 1;
+ b.comprefixlen = 0;
- for(lifc = ifc->lifc; lifc; lifc = lifc->next){
- if(!isv4(lifc->local) && !(lifc->tentative)){
- ipmove(addr, lifc->local);
- return 1;
+ now = NOW/1000;
+ for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+ if(lifc->tentative)
+ continue;
+
+ if(prefixlen && comprefixlen(lifc->local, local, IPaddrlen) < prefixlen)
+ continue;
+
+ a.atype = v6addrtype(lifc->local);
+ a.deprecated = lifc->preflt != ~0UL && lifc->preflt < now-lifc->origint;
+ a.comprefixlen = comprefixlen(lifc->local, remote, IPaddrlen);
+
+ /* prefer appropriate scope */
+ if(a.atype != b.atype){
+ if(a.atype > b.atype && b.atype < atype ||
+ a.atype < b.atype && b.atype > atype)
+ goto Good;
+ continue;
}
+ /* prefer non-deprecated addresses */
+ if(a.deprecated != b.deprecated){
+ if(b.deprecated)
+ goto Good;
+ continue;
+ }
+ /* prefer longer common prefix */
+ if(a.comprefixlen != b.comprefixlen){
+ if(a.comprefixlen > b.comprefixlen)
+ goto Good;
+ continue;
+ }
+ continue;
+ Good:
+ b = a;
+ ipmove(local, lifc->local);
}
- return 0;
+
+ return b.atype >= atype;
}
-int
-ipv6anylocal(Ipifc *ifc, uchar *addr)
+/*
+ * find the local address for a remote destination
+ */
+void
+findlocalip(Fs *f, uchar *local, uchar *remote)
{
- Iplifc *lifc;
-
- for(lifc = ifc->lifc; lifc; lifc = lifc->next){
- if(!isv4(lifc->local)){
- ipmove(addr, lifc->local);
- return SRC_UNI;
- }
+ if(isv4(remote)) {
+ memmove(local, v4prefix, IPv4off);
+ if(v4source(f, remote+IPv4off, local+IPv4off) == nil)
+ findprimaryipv4(f, local);
+ } else {
+ if(v6source(f, remote, local) == nil)
+ findprimaryipv6(f, local);
}
- return SRC_UNSPEC;
}
/*
@@ -1444,13 +1396,28 @@
{
Iplifc *lifc;
- for(lifc = ifc->lifc; lifc; lifc = lifc->next)
+ for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next)
if(ipcmp(ip, lifc->local) == 0)
return lifc;
+
return nil;
}
+Iplifc*
+ipremoteonifc(Ipifc *ifc, uchar *ip)
+{
+ uchar net[IPaddrlen];
+ Iplifc *lifc;
+ for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next){
+ maskip(ip, lifc->mask, net);
+ if(ipcmp(net, lifc->remote) == 0)
+ return lifc;
+ }
+ return nil;
+}
+
+
/*
* See if we're proxying for this address on this interface
*/
@@ -1458,24 +1425,13 @@
ipproxyifc(Fs *f, Ipifc *ifc, uchar *ip)
{
Route *r;
- uchar net[IPaddrlen];
- Iplifc *lifc;
/* see if this is a direct connected pt to pt address */
- r = v6lookup(f, ip, nil);
- if(r == nil)
+ r = v6lookup(f, ip, ip, nil);
+ if(r == nil || (r->type & (Rifc|Rproxy)) != (Rifc|Rproxy))
return 0;
- if((r->type & (Rifc|Rproxy)) != (Rifc|Rproxy))
- return 0;
- /* see if this is on the right interface */
- for(lifc = ifc->lifc; lifc; lifc = lifc->next){
- maskip(ip, lifc->mask, net);
- if(ipcmp(net, lifc->remote) == 0)
- return 1;
- }
-
- return 0;
+ return ipremoteonifc(ifc, ip) != nil;
}
/*
@@ -1487,73 +1443,53 @@
if(isv4(ip)){
if(ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0)
return V4;
- } else {
- if(ip[0] == 0xff)
- return V6;
}
+ else if(ip[0] == 0xff)
+ return V6;
return 0;
}
-int
-ipisbm(uchar *ip)
-{
- if(isv4(ip)){
- if(ip[IPv4off] >= 0xe0 && ip[IPv4off] < 0xf0)
- return V4;
- if(ipcmp(ip, IPv4bcast) == 0)
- return V4;
- } else {
- if(ip[0] == 0xff)
- return V6;
- }
- return 0;
-}
-
-
/*
- * add a multicast address to an interface, called with c locked
+ * add a multicast address to an interface.
*/
void
ipifcaddmulti(Conv *c, uchar *ma, uchar *ia)
{
- Ipifc *ifc;
- Iplifc *lifc;
- Conv **p;
Ipmulti *multi, **l;
+ Iplifc *lifc;
+ Ipifc *ifc;
Fs *f;
- f = c->p->f;
-
- for(l = &c->multi; *l; l = &(*l)->next)
- if(ipcmp(ma, (*l)->ma) == 0)
- if(ipcmp(ia, (*l)->ia) == 0)
+ if(isv4(ma) != isv4(ia))
+ error("incompatible multicast/interface ip address");
+
+ for(l = &c->multi; *l != nil; l = &(*l)->next)
+ if(ipcmp(ma, (*l)->ma) == 0 && ipcmp(ia, (*l)->ia) == 0)
return; /* it's already there */
- multi = *l = smalloc(sizeof(*multi));
- ipmove(multi->ma, ma);
- ipmove(multi->ia, ia);
- multi->next = nil;
-
- for(p = f->ipifc->conv; *p; p++){
- if((*p)->inuse == 0)
- continue;
- ifc = (Ipifc*)(*p)->ptcl;
+ f = c->p->f;
+ if((ifc = findipifc(f, ia, ma, Rmulti)) != nil){
+ rlock(ifc);
if(waserror()){
- wunlock(ifc);
+ runlock(ifc);
nexterror();
}
- wlock(ifc);
- for(lifc = ifc->lifc; lifc; lifc = lifc->next)
- if(ipcmp(ia, lifc->local) == 0)
- addselfcache(f, ifc, lifc, ma, Rmulti);
- wunlock(ifc);
+ if((lifc = iplocalonifc(ifc, ia)) != nil)
+ addselfcache(f, ifc, lifc, ma, Rmulti);
+ runlock(ifc);
poperror();
}
+
+ multi = smalloc(sizeof(*multi));
+ ipmove(multi->ma, ma);
+ ipmove(multi->ia, ia);
+ multi->next = nil;
+ *l = multi;
}
/*
- * remove a multicast address from an interface, called with c locked
+ * remove a multicast address from an interface.
*/
void
ipifcremmulti(Conv *c, uchar *ma, uchar *ia)
@@ -1560,15 +1496,11 @@
{
Ipmulti *multi, **l;
Iplifc *lifc;
- Conv **p;
Ipifc *ifc;
Fs *f;
- f = c->p->f;
-
- for(l = &c->multi; *l; l = &(*l)->next)
- if(ipcmp(ma, (*l)->ma) == 0)
- if(ipcmp(ia, (*l)->ia) == 0)
+ for(l = &c->multi; *l != nil; l = &(*l)->next)
+ if(ipcmp(ma, (*l)->ma) == 0 && ipcmp(ia, (*l)->ia) == 0)
break;
multi = *l;
@@ -1576,161 +1508,101 @@
return; /* we don't have it open */
*l = multi->next;
+ multi->next = nil;
- for(p = f->ipifc->conv; *p; p++){
- if((*p)->inuse == 0)
- continue;
-
- ifc = (Ipifc*)(*p)->ptcl;
- if(waserror()){
- wunlock(ifc);
- nexterror();
- }
- wlock(ifc);
- for(lifc = ifc->lifc; lifc; lifc = lifc->next)
- if(ipcmp(ia, lifc->local) == 0)
+ f = c->p->f;
+ if((ifc = findipifc(f, ia, ma, Rmulti)) != nil){
+ rlock(ifc);
+ if(!waserror()){
+ if((lifc = iplocalonifc(ifc, ia)) != nil)
remselfcache(f, ifc, lifc, ma);
- wunlock(ifc);
- poperror();
+ poperror();
+ }
+ runlock(ifc);
}
-
free(multi);
}
-/*
- * make lifc's join and leave multicast groups
- */
-static char*
-ipifcjoinmulti(Ipifc *ifc, char **argv, int argc)
+/* register the address on this network for address resolution */
+static void
+ipifcregisteraddr(Fs *f, Ipifc *ifc, Iplifc *lifc, uchar *ip)
{
- USED(ifc, argv, argc);
- return nil;
+ if(waserror()){
+ print("ipifcregisteraddr %s %I %I: %s\n", ifc->dev, lifc->local, ip, up->errstr);
+ return;
+ }
+ if(ifc->m != nil && ifc->m->areg != nil)
+ (*ifc->m->areg)(f, ifc, lifc, ip);
+ poperror();
}
-static char*
-ipifcleavemulti(Ipifc *ifc, char **argv, int argc)
-{
- USED(ifc, argv, argc);
- return nil;
-}
-
static void
-ipifcregisterproxy(Fs *f, Ipifc *ifc, uchar *ip)
+ipifcregisterproxy(Fs *f, Ipifc *ifc, uchar *ip, int add)
{
- Conv **cp, **e;
- Ipifc *nifc;
+ uchar a[IPaddrlen];
Iplifc *lifc;
- Medium *m;
- uchar net[IPaddrlen];
+ Ipifc *nifc;
+ Conv **cp;
- /* register the address on any network that will proxy for us */
- e = &f->ipifc->conv[f->ipifc->nc];
+ /* register the address on any interface that will proxy for the ip */
+ for(cp = f->ipifc->conv; *cp != nil; cp++){
+ nifc = (Ipifc*)(*cp)->ptcl;
+ if(nifc == ifc || !canrlock(nifc))
+ continue;
- if(!isv4(ip)) { // V6
- for(cp = f->ipifc->conv; cp < e; cp++){
- if(*cp == nil)
- continue;
- nifc = (Ipifc*)(*cp)->ptcl;
- if(nifc == ifc)
- continue;
-
- rlock(nifc);
- m = nifc->m;
- if(m == nil || m->addmulti == nil) {
- runlock(nifc);
- continue;
- }
- for(lifc = nifc->lifc; lifc; lifc = lifc->next){
- maskip(ip, lifc->mask, net);
- if(ipcmp(net, lifc->remote) == 0) { /* add solicited-node multicast address */
- ipv62smcast(net, ip);
- addselfcache(f, nifc, lifc, net, Rmulti);
- arpenter(f, V6, ip, nifc->mac, 6, 0);
- //(*m->addmulti)(nifc, net, ip);
- break;
- }
- }
+ if(nifc->m == nil
+ || (lifc = ipremoteonifc(nifc, ip)) == nil
+ || (lifc->type & Rptpt) != 0
+ || waserror()){
runlock(nifc);
+ continue;
}
- return;
- }
- else { // V4
- for(cp = f->ipifc->conv; cp < e; cp++){
- if(*cp == nil)
- continue;
- nifc = (Ipifc*)(*cp)->ptcl;
- if(nifc == ifc)
- continue;
-
- rlock(nifc);
- m = nifc->m;
- if(m == nil || m->areg == nil){
- runlock(nifc);
- continue;
- }
- for(lifc = nifc->lifc; lifc; lifc = lifc->next){
- maskip(ip, lifc->mask, net);
- if(ipcmp(net, lifc->remote) == 0){
- (*m->areg)(nifc, ip);
- break;
- }
- }
- runlock(nifc);
+ if((lifc->type & Rv4) == 0){
+ /* add solicited-node multicast addr */
+ ipv62smcast(a, ip);
+ if(add)
+ addselfcache(f, nifc, lifc, a, Rmulti);
+ else
+ remselfcache(f, nifc, lifc, a);
}
+ if(add)
+ ipifcregisteraddr(f, nifc, lifc, ip);
+ runlock(nifc);
+ poperror();
}
}
-
-// added for new v6 mesg types
-static void
-adddefroute6(Fs *f, uchar *gate, int force)
-{
- Route *r;
-
- r = v6lookup(f, v6Unspecified, nil);
- if(r!=nil)
- if(!(force) && (strcmp(r->tag,"ra")!=0)) // route entries generated
- return; // by all other means take
- // precedence over router annc
-
- v6delroute(f, v6Unspecified, v6Unspecified, 1);
- v6addroute(f, "ra", v6Unspecified, v6Unspecified, gate, 0);
-}
-
-enum
-{
- Ngates = 3,
-};
-
char*
-ipifcaddpref6(Ipifc *ifc, char**argv, int argc)
+ipifcadd6(Ipifc *ifc, char **argv, int argc)
{
- uchar onlink = 1;
- uchar autoflag = 1;
- long validlt = 0xffffffff;
- long preflt = 0xffffffff;
- long origint = NOW / 10^3;
- uchar prefix[IPaddrlen];
- int plen = 64;
- Iplifc *lifc;
- char addr[40], preflen[6];
- char *params[3];
+ int plen = 64;
+ char addr[40], preflen[6];
+ char *params[3];
+ uchar prefix[IPaddrlen];
+ Iplifc lifc;
+ Medium *m;
+ lifc.onlink = 1;
+ lifc.autoflag = 1;
+ lifc.validlt = lifc.preflt = ~0UL;
+ lifc.origint = NOW / 1000;
+
switch(argc) {
case 7:
- preflt = atoi(argv[6]);
+ lifc.preflt = strtoul(argv[6], 0, 10);
/* fall through */
case 6:
- validlt = atoi(argv[5]);
+ lifc.validlt = strtoul(argv[5], 0, 10);
/* fall through */
case 5:
- autoflag = atoi(argv[4]);
+ lifc.autoflag = atoi(argv[4]) != 0;
/* fall through */
case 4:
- onlink = atoi(argv[3]);
+ lifc.onlink = atoi(argv[3]) != 0;
/* fall through */
case 3:
plen = atoi(argv[2]);
+ /* fall through */
case 2:
break;
default:
@@ -1737,25 +1609,16 @@
return Ebadarg;
}
- if((parseip(prefix, argv[1])!=6) ||
- (validlt < preflt) ||
- (plen < 0) || (plen > 64) ||
- (islinklocal(prefix))
- )
+ if (parseip(prefix, argv[1]) != 6 || lifc.validlt < lifc.preflt || plen < 0 ||
+ plen > 64 || islinklocal(prefix))
return Ebadarg;
- lifc = smalloc(sizeof(Iplifc));
- lifc->onlink = (onlink!=0);
- lifc->autoflag = (autoflag!=0);
- lifc->validlt = validlt;
- lifc->preflt = preflt;
- lifc->origint = origint;
+ /* issue "add" ctl msg for v6 link-local addr and prefix len */
+ m = ifc->m;
+ if(m == nil || m->pref2addr == nil)
+ return Eunbound;
+ (*m->pref2addr)(prefix, ifc->mac); /* mac → v6 link-local addr */
- if(ifc->m->pref2addr!=nil)
- ifc->m->pref2addr(prefix, ifc->mac);
- else
- return Ebadarg;
-
sprint(addr, "%I", prefix);
sprint(preflen, "/%d", plen);
params[0] = "add";
@@ -1762,6 +1625,28 @@
params[1] = addr;
params[2] = preflen;
- return ipifcadd(ifc, params, 3, 0, lifc);
+ return ipifcadd(ifc, params, 3, 0, &lifc);
}
+char*
+ipifcremove6(Ipifc *ifc, char**, int argc)
+{
+ Iplifc *lifc, **l;
+ ulong now;
+
+ if(argc != 1)
+ return Ebadarg;
+
+ wlock(ifc);
+ now = NOW/1000;
+ for(l = &ifc->lifc; (lifc = *l) != nil;) {
+ if((lifc->type & Rv4) == 0)
+ if(lifc->validlt != ~0UL && lifc->validlt < now-lifc->origint)
+ if(ipifcremlifc(ifc, l) == nil)
+ continue;
+ l = &lifc->next;
+ }
+ wunlock(ifc);
+
+ return nil;
+}
diff -u a/os/ip//ipmux.c b/os/ip//ipmux.c
--- a/os/ip//ipmux.c
+++ b/os/ip//ipmux.c
@@ -1,3 +1,6 @@
+/*
+ * IP packet filter
+ */
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
@@ -6,30 +9,14 @@
#include "../port/error.h"
#include "ip.h"
-#define DPRINT if(0)print
+#include "ipv6.h"
typedef struct Ipmuxrock Ipmuxrock;
typedef struct Ipmux Ipmux;
-typedef struct Ip6hdr Ip6hdr;
enum
{
- IPHDR = 20, /* sizeof(Ip4hdr) */
-};
-
-struct Ip6hdr
-{
- uchar vcf[4]; /* version, class label, and flow label */
- uchar ploadlen[2]; /* payload length */
- uchar proto; /* next header, i.e. proto */
- uchar ttl; /* hop limit, i.e. ttl */
- uchar src[16]; /* IP source */
- uchar dst[16]; /* IP destination */
-};
-
-
-enum
-{
+ Tver,
Tproto,
Tdata,
Tiph,
@@ -36,28 +23,8 @@
Tdst,
Tsrc,
Tifc,
-
- Cother = 0,
- Cbyte, /* single byte */
- Cmbyte, /* single byte with mask */
- Cshort, /* single short */
- Cmshort, /* single short with mask */
- Clong, /* single long */
- Cmlong, /* single long with mask */
- Cifc,
- Cmifc,
};
-char *ftname[] =
-{
-[Tproto] "proto",
-[Tdata] "data",
-[Tiph] "iph",
-[Tdst] "dst",
-[Tsrc] "src",
-[Tifc] "ifc",
-};
-
/*
* a node in the decision tree
*/
@@ -66,16 +33,12 @@
Ipmux *yes;
Ipmux *no;
uchar type; /* type of field(Txxxx) */
- uchar ctype; /* tupe of comparison(Cxxxx) */
uchar len; /* length in bytes of item to compare */
uchar n; /* number of items val points to */
- short off; /* offset of comparison */
- short eoff; /* end offset of comparison */
- uchar skiphdr; /* should offset start after ipheader */
+ int off; /* offset of comparison */
uchar *val;
uchar *mask;
uchar *e; /* val+n*len*/
-
int ref; /* so we can garbage collect */
Conv *conv;
};
@@ -90,6 +53,7 @@
static int ipmuxsprint(Ipmux*, int, char*, int);
static void ipmuxkick(void *x);
+static void ipmuxfree(Ipmux *f);
static char*
skipwhite(char *p)
@@ -122,27 +86,33 @@
Ipmux *f;
p = skipwhite(p);
- if(strncmp(p, "dst", 3) == 0){
+ if(strncmp(p, "ver", 3) == 0){
+ type = Tver;
+ off = 0;
+ len = 1;
+ p += 3;
+ }
+ else if(strncmp(p, "dst", 3) == 0){
type = Tdst;
- off = offsetof(Ip4hdr, dst[0]);
- len = IPv4addrlen;
+ off = offsetof(Ip6hdr, dst[0]);
+ len = IPaddrlen;
p += 3;
}
else if(strncmp(p, "src", 3) == 0){
type = Tsrc;
- off = offsetof(Ip4hdr, src[0]);
- len = IPv4addrlen;
+ off = offsetof(Ip6hdr, src[0]);
+ len = IPaddrlen;
p += 3;
}
else if(strncmp(p, "ifc", 3) == 0){
type = Tifc;
- off = -IPv4addrlen;
- len = IPv4addrlen;
+ off = -IPaddrlen;
+ len = IPaddrlen;
p += 3;
}
else if(strncmp(p, "proto", 5) == 0){
type = Tproto;
- off = offsetof(Ip4hdr, proto);
+ off = offsetof(Ip6hdr, proto);
len = 1;
p += 5;
}
@@ -160,7 +130,7 @@
return nil;
p++;
off = strtoul(p, &p, 0);
- if(off < 0 || off > (64-IPHDR))
+ if(off < 0)
return nil;
p = skipwhite(p);
if(*p != ':')
@@ -189,11 +159,6 @@
f->mask = nil;
f->n = 1;
f->ref = 1;
- if(type == Tdata)
- f->skiphdr = 1;
- else
- f->skiphdr = 0;
-
return f;
}
@@ -229,7 +194,7 @@
static Ipmux*
parsemux(char *p)
{
- int n, nomask;
+ int n;
Ipmux *f;
char *val;
char *mask;
@@ -247,7 +212,7 @@
goto parseerror;
/* parse mask */
- mask = follows(val, '&');
+ mask = follows(p, '&');
if(mask != nil){
switch(f->type){
case Tsrc:
@@ -254,7 +219,7 @@
case Tdst:
case Tifc:
f->mask = smalloc(f->len);
- v4parseip(f->mask, mask);
+ parseipmask(f->mask, mask, 0);
break;
case Tdata:
case Tiph:
@@ -264,15 +229,13 @@
default:
goto parseerror;
}
- nomask = 0;
- } else {
- nomask = 1;
+ } else if(f->type == Tver){
f->mask = smalloc(f->len);
- memset(f->mask, 0xff, f->len);
+ f->mask[0] = 0xF0;
}
/* parse vals */
- f->n = getfields(val, vals, sizeof(vals)/sizeof(char*), 1, "|");
+ f->n = getfields(val, vals, nelem(vals), 1, "|");
if(f->n == 0)
goto parseerror;
f->val = smalloc(f->n*f->len);
@@ -279,10 +242,21 @@
v = f->val;
for(n = 0; n < f->n; n++){
switch(f->type){
+ case Tver:
+ if(f->n != 1)
+ goto parseerror;
+ if(strcmp(vals[n], "6") == 0)
+ *v = IP_VER6;
+ else if(strcmp(vals[n], "4") == 0)
+ *v = IP_VER4;
+ else
+ goto parseerror;
+ break;
case Tsrc:
case Tdst:
case Tifc:
- v4parseip(v, vals[n]);
+ if(parseip(v, vals[n]) == -1)
+ goto parseerror;
break;
case Tproto:
case Tdata:
@@ -292,34 +266,11 @@
}
v += f->len;
}
-
- f->eoff = f->off + f->len;
f->e = f->val + f->n*f->len;
- f->ctype = Cother;
- if(f->n == 1){
- switch(f->len){
- case 1:
- f->ctype = nomask ? Cbyte : Cmbyte;
- break;
- case 2:
- f->ctype = nomask ? Cshort : Cmshort;
- break;
- case 4:
- if(f->type == Tifc)
- f->ctype = nomask ? Cifc : Cmifc;
- else
- f->ctype = nomask ? Clong : Cmlong;
- break;
- }
- }
return f;
parseerror:
- if(f->mask)
- free(f->mask);
- if(f->val)
- free(f->val);
- free(f);
+ ipmuxfree(f);
return nil;
}
@@ -342,8 +293,7 @@
return n;
/* compare offsets, call earlier ones more specific */
- n = (a->off+((int)a->skiphdr)*offsetof(Ip4hdr, data[0])) -
- (b->off+((int)b->skiphdr)*offsetof(Ip4hdr, data[0]));
+ n = a->off - b->off;
if(n != 0)
return n;
@@ -413,6 +363,10 @@
*nf = *f;
nf->no = ipmuxcopy(f->no);
nf->yes = ipmuxcopy(f->yes);
+ if(f->mask != nil){
+ nf->mask = smalloc(f->len);
+ memmove(nf->mask, f->mask, f->len);
+ }
nf->val = smalloc(f->n*f->len);
nf->e = nf->val + f->len*f->n;
memmove(nf->val, f->val, f->n*f->len);
@@ -422,8 +376,10 @@
static void
ipmuxfree(Ipmux *f)
{
- if(f->val != nil)
- free(f->val);
+ if(f == nil)
+ return;
+ free(f->val);
+ free(f->mask);
free(f);
}
@@ -432,10 +388,8 @@
{
if(f == nil)
return;
- if(f->no != nil)
- ipmuxfree(f->no);
- if(f->yes != nil)
- ipmuxfree(f->yes);
+ ipmuxfree(f->no);
+ ipmuxfree(f->yes);
ipmuxfree(f);
}
@@ -510,6 +464,8 @@
return ipmuxremove(&ft->no, f);
}
+ ipmuxremove(&ft->no, f->no);
+
/* we found a match */
if(--(ft->ref) == 0){
/*
@@ -531,8 +487,55 @@
}
/*
+ * convert to ipv4 filter
+ */
+static Ipmux*
+ipmuxconv4(Ipmux *f)
+{
+ int i, n;
+
+ if(f == nil)
+ return nil;
+
+ switch(f->type){
+ case Tproto:
+ f->off = offsetof(Ip4hdr, proto);
+ break;
+ case Tdst:
+ f->off = offsetof(Ip4hdr, dst[0]);
+ if(0){
+ case Tsrc:
+ f->off = offsetof(Ip4hdr, src[0]);
+ }
+ if(f->len != IPaddrlen)
+ break;
+ n = 0;
+ for(i = 0; i < f->n; i++){
+ if(isv4(f->val + i*IPaddrlen)){
+ memmove(f->val + n*IPv4addrlen, f->val + i*IPaddrlen + IPv4off, IPv4addrlen);
+ n++;
+ }
+ }
+ if(n == 0){
+ ipmuxtreefree(f);
+ return nil;
+ }
+ f->n = n;
+ f->len = IPv4addrlen;
+ if(f->mask != nil)
+ memmove(f->mask, f->mask+IPv4off, IPv4addrlen);
+ }
+ f->e = f->val + f->n*f->len;
+
+ f->yes = ipmuxconv4(f->yes);
+ f->no = ipmuxconv4(f->no);
+
+ return f;
+}
+
+/*
* connection request is a semi separated list of filters
- * e.g. proto=17;dat[0:4]=11aa22bb;ifc=135.104.9.2&255.255.255.0
+ * e.g. ver=4;proto=17;data[0:4]=11aa22bb;ifc=135.104.9.2&255.255.255.0
*
* there's no protection against overlapping specs.
*/
@@ -568,6 +571,18 @@
return Ebadarg;
mux->conv = c;
+ if(chain->type != Tver) {
+ char ver6[] = "ver=6";
+ mux = parsemux(ver6);
+ mux->yes = chain;
+ mux->no = ipmuxcopy(chain);
+ chain = mux;
+ }
+ if(*chain->val == IP_VER4)
+ chain->yes = ipmuxconv4(chain->yes);
+ else
+ chain->no = ipmuxconv4(chain->no);
+
/* save a copy of the chain so we can later remove it */
mux = ipmuxcopy(chain);
r = (Ipmuxrock*)(c->ptcl);
@@ -642,95 +657,84 @@
Block *bp;
bp = qget(c->wq);
- if(bp == nil)
- return;
- else {
+ if(bp != nil) {
Ip4hdr *ih4 = (Ip4hdr*)(bp->rp);
- if((ih4->vihl)&0xF0 != 0x60)
+
+ if((ih4->vihl & 0xF0) != IP_VER6)
ipoput4(c->p->f, bp, 0, ih4->ttl, ih4->tos, nil);
- else {
- Ip6hdr *ih6 = (Ip6hdr*)(bp->rp);
- ipoput6(c->p->f, bp, 0, ih6->ttl, 0, nil);
- }
+ else
+ ipoput6(c->p->f, bp, 0, ((Ip6hdr*)ih4)->ttl, 0, nil);
}
}
+static int
+maskmemcmp(uchar *m, uchar *v, uchar *c, int n)
+{
+ int i;
+
+ if(m == nil)
+ return memcmp(v, c, n) != 0;
+
+ for(i = 0; i < n; i++)
+ if((v[i] & m[i]) != c[i])
+ return 1;
+ return 0;
+}
+
static void
ipmuxiput(Proto *p, Ipifc *ifc, Block *bp)
{
- int len, hl;
Fs *f = p->f;
- uchar *m, *h, *v, *e, *ve, *hp;
Conv *c;
+ Iplifc *lifc;
Ipmux *mux;
- Ip4hdr *ip;
+ uchar *v;
+ Ip4hdr *ip4;
Ip6hdr *ip6;
+ int off, hl;
- ip = (Ip4hdr*)bp->rp;
- hl = (ip->vihl&0x0F)<<2;
+ ip4 = (Ip4hdr*)bp->rp;
+ if((ip4->vihl & 0xF0) == IP_VER4) {
+ hl = (ip4->vihl&0x0F)<<2;
+ ip6 = nil;
+ } else {
+ hl = IP6HDR;
+ ip6 = (Ip6hdr*)ip4;
+ }
if(p->priv == nil)
goto nomatch;
- h = bp->rp;
- len = BLEN(bp);
+ c = nil;
+ lifc = nil;
- /* run the v4 filter */
+ /* run the filter */
rlock(f);
- c = nil;
mux = f->ipmux->priv;
while(mux != nil){
- if(mux->eoff > len){
- mux = mux->no;
- continue;
- }
- hp = h + mux->off + ((int)mux->skiphdr)*hl;
- switch(mux->ctype){
- case Cbyte:
- if(*mux->val == *hp)
- goto yes;
+ switch(mux->type){
+ case Tifc:
+ if(mux->len != IPaddrlen)
+ goto no;
+ for(lifc = ifc->lifc; lifc != nil; lifc = lifc->next)
+ for(v = mux->val; v < mux->e; v += IPaddrlen)
+ if(maskmemcmp(mux->mask, lifc->local, v, IPaddrlen) == 0)
+ goto yes;
+ goto no;
+ case Tdata:
+ off = hl;
break;
- case Cmbyte:
- if((*hp & *mux->mask) == *mux->val)
- goto yes;
- break;
- case Cshort:
- if(*((ushort*)mux->val) == *(ushort*)hp)
- goto yes;
- break;
- case Cmshort:
- if((*(ushort*)hp & (*((ushort*)mux->mask))) == *((ushort*)mux->val))
- goto yes;
- break;
- case Clong:
- if(*((ulong*)mux->val) == *(ulong*)hp)
- goto yes;
- break;
- case Cmlong:
- if((*(ulong*)hp & (*((ulong*)mux->mask))) == *((ulong*)mux->val))
- goto yes;
- break;
- case Cifc:
- if(*((ulong*)mux->val) == *(ulong*)(ifc->lifc->local + IPv4off))
- goto yes;
- break;
- case Cmifc:
- if((*(ulong*)(ifc->lifc->local + IPv4off) & (*((ulong*)mux->mask))) == *((ulong*)mux->val))
- goto yes;
- break;
default:
- v = mux->val;
- for(e = mux->e; v < e; v = ve){
- m = mux->mask;
- hp = h + mux->off;
- for(ve = v + mux->len; v < ve; v++){
- if((*hp++ & *m++) != *v)
- break;
- }
- if(v == ve)
- goto yes;
- }
+ off = 0;
+ break;
}
+ off += mux->off;
+ if(off < 0 || off + mux->len > BLEN(bp))
+ goto no;
+ for(v = mux->val; v < mux->e; v += mux->len)
+ if(maskmemcmp(mux->mask, bp->rp + off, v, mux->len) == 0)
+ goto yes;
+no:
mux = mux->no;
continue;
yes:
@@ -743,28 +747,24 @@
if(c != nil){
/* tack on interface address */
bp = padblock(bp, IPaddrlen);
- ipmove(bp->rp, ifc->lifc->local);
- bp = concatblock(bp);
- if(bp != nil)
- if(qpass(c->rq, bp) < 0)
- print("Q");
+ if(lifc == nil)
+ lifc = ifc->lifc;
+ ipmove(bp->rp, lifc != nil ? lifc->local : IPnoaddr);
+ qpass(c->rq, concatblock(bp));
return;
}
nomatch:
/* doesn't match any filter, hand it to the specific protocol handler */
- ip = (Ip4hdr*)bp->rp;
- if((ip->vihl&0xF0)==0x40) {
- p = f->t2p[ip->proto];
- } else {
- ip6 = (Ip6hdr*)bp->rp;
+ if(ip6 != nil)
p = f->t2p[ip6->proto];
- }
- if(p && p->rcv)
- (*p->rcv)(p, ifc, bp);
else
- freeblist(bp);
- return;
+ p = f->t2p[ip4->proto];
+ if(p != nil && p->rcv != nil){
+ (*p->rcv)(p, ifc, bp);
+ return;
+ }
+ freeblist(bp);
}
static int
@@ -780,11 +780,14 @@
n += snprint(buf+n, len-n, "\n");
return n;
}
- n += snprint(buf+n, len-n, "h[%d:%d]&",
- mux->off+((int)mux->skiphdr)*((int)offsetof(Ip4hdr, data[0])),
- mux->off+(((int)mux->skiphdr)*((int)offsetof(Ip4hdr, data[0])))+mux->len-1);
- for(i = 0; i < mux->len; i++)
- n += snprint(buf+n, len - n, "%2.2ux", mux->mask[i]);
+ n += snprint(buf+n, len-n, "%s[%d:%d]",
+ mux->type == Tdata ? "data": "iph",
+ mux->off, mux->off+mux->len-1);
+ if(mux->mask != nil){
+ n += snprint(buf+n, len-n, "&");
+ for(i = 0; i < mux->len; i++)
+ n += snprint(buf+n, len - n, "%2.2ux", mux->mask[i]);
+ }
n += snprint(buf+n, len-n, "=");
v = mux->val;
for(j = 0; j < mux->n; j++){
diff -u a/os/ip//iproute.c b/os/ip//iproute.c
--- a/os/ip//iproute.c
+++ b/os/ip//iproute.c
@@ -12,10 +12,10 @@
static void calcd(Route*);
/* these are used for all instances of IP */
-Route* v4freelist;
-Route* v6freelist;
-RWlock routelock;
-ulong v4routegeneration, v6routegeneration;
+static Route* v4freelist;
+static Route* v6freelist;
+static RWlock routelock;
+static ulong v4routegeneration, v6routegeneration;
static void
freeroute(Route *r)
@@ -22,6 +22,7 @@
{
Route **l;
+ r->ref = 0;
r->left = nil;
r->right = nil;
if(r->type & Rv4)
@@ -35,9 +36,8 @@
static Route*
allocroute(int type)
{
- Route *r;
+ Route *r, **l;
int n;
- Route **l;
if(type & Rv4){
n = sizeof(RouteTree) + sizeof(V4route);
@@ -72,9 +72,9 @@
return;
l = allocroute(r->type);
+ l->left = r;
l->mid = *q;
*q = l;
- l->left = r;
}
/*
@@ -99,11 +99,11 @@
*/
enum
{
- Rpreceeds,
- Rfollows,
- Requals,
- Rcontains,
- Rcontained,
+ Rpreceeds, /* a left of b */
+ Rfollows, /* a right of b */
+ Requals, /* a equals b */
+ Rcontains, /* a contians b */
+ Roverlaps, /* a overlaps b */
};
static int
@@ -112,44 +112,88 @@
if(a->type & Rv4){
if(a->v4.endaddress < b->v4.address)
return Rpreceeds;
-
if(a->v4.address > b->v4.endaddress)
return Rfollows;
-
if(a->v4.address <= b->v4.address
&& a->v4.endaddress >= b->v4.endaddress){
if(a->v4.address == b->v4.address
- && a->v4.endaddress == b->v4.endaddress)
- return Requals;
+ && a->v4.endaddress == b->v4.endaddress){
+ if(a->v4.source <= b->v4.source
+ && a->v4.endsource >= b->v4.endsource){
+ if(a->v4.source == b->v4.source
+ && a->v4.endsource == b->v4.endsource)
+ return Requals;
+ return Rcontains;
+ }
+ return Roverlaps;
+ }
return Rcontains;
}
- return Rcontained;
+ return Roverlaps;
}
if(lcmp(a->v6.endaddress, b->v6.address) < 0)
return Rpreceeds;
-
if(lcmp(a->v6.address, b->v6.endaddress) > 0)
return Rfollows;
-
if(lcmp(a->v6.address, b->v6.address) <= 0
&& lcmp(a->v6.endaddress, b->v6.endaddress) >= 0){
if(lcmp(a->v6.address, b->v6.address) == 0
- && lcmp(a->v6.endaddress, b->v6.endaddress) == 0)
- return Requals;
+ && lcmp(a->v6.endaddress, b->v6.endaddress) == 0){
+ if(lcmp(a->v6.source, b->v6.source) <= 0
+ && lcmp(a->v6.endsource, b->v6.endsource) >= 0){
+ if(lcmp(a->v6.source, b->v6.source) == 0
+ && lcmp(a->v6.endsource, b->v6.endsource) == 0)
+ return Requals;
+ return Rcontains;
+ }
+ return Roverlaps;
+ }
return Rcontains;
}
+ return Roverlaps;
+}
- return Rcontained;
+/* return 1 if a matches b, otherwise 0 */
+static int
+matchroute(Route *a, Route *b)
+{
+ if(a == b)
+ return 1;
+
+ if((a->type^b->type) & (Rifc|Runi|Rmulti|Rbcast))
+ return 0;
+
+ if(a->type & Rv4){
+ if(memcmp(a->v4.gate, IPnoaddr+IPv4off, IPv4addrlen) != 0
+ && memcmp(a->v4.gate, b->v4.gate, IPv4addrlen) != 0)
+ return 0;
+ } else {
+ if(ipcmp(a->v6.gate, IPnoaddr) != 0
+ && ipcmp(a->v6.gate, b->v6.gate) != 0)
+ return 0;
+ }
+
+ if(a->ifc != nil && b->ifc != nil && (a->ifc != b->ifc || a->ifcid != b->ifcid))
+ return 0;
+
+ if(*a->tag != 0 && strncmp(a->tag, b->tag, sizeof(a->tag)) != 0)
+ return 0;
+
+ return 1;
}
static void
copygate(Route *old, Route *new)
{
+ old->type = new->type;
+ old->ifc = new->ifc;
+ old->ifcid = new->ifcid;
if(new->type & Rv4)
memmove(old->v4.gate, new->v4.gate, IPv4addrlen);
else
- memmove(old->v6.gate, new->v6.gate, IPaddrlen);
+ ipmove(old->v6.gate, new->v6.gate);
+ strncpy(old->tag, new->tag, sizeof(new->tag));
}
/*
@@ -162,12 +206,12 @@
l = p->left;
r = p->right;
- p->left = 0;
- p->right = 0;
+ p->left = nil;
+ p->right = nil;
addnode(f, root, p);
- if(l)
+ if(l != nil)
walkadd(f, root, l);
- if(r)
+ if(r != nil)
walkadd(f, root, r);
}
@@ -180,16 +224,16 @@
Route *q;
int d;
- if(p) {
+ if(p != nil) {
d = 0;
q = p->left;
- if(q)
+ if(q != nil)
d = q->depth;
q = p->right;
- if(q && q->depth > d)
+ if(q != nil && q->depth > d)
d = q->depth;
q = p->mid;
- if(q && q->depth > d)
+ if(q != nil && q->depth > d)
d = q->depth;
p->depth = d+1;
}
@@ -210,8 +254,8 @@
* rotate tree node
*/
p = *cur;
- dl = 0; if(l = p->left) dl = l->depth;
- dr = 0; if(r = p->right) dr = r->depth;
+ dl = 0; if((l = p->left) != nil) dl = l->depth;
+ dr = 0; if((r = p->right) != nil) dr = r->depth;
if(dl > dr+1) {
p->left = l->right;
@@ -239,7 +283,7 @@
Route *p;
p = *cur;
- if(p == 0) {
+ if(p == nil) {
*cur = new;
new->depth = 1;
return;
@@ -269,15 +313,13 @@
* supercede the old entry if the old one isn't
* a local interface.
*/
- if((p->type & Rifc) == 0){
- p->type = new->type;
- p->ifcid = -1;
+ if((p->type & Rifc) == 0)
copygate(p, new);
- } else if(new->type & Rifc)
+ else if(new->type & Rifc)
p->ref++;
freeroute(new);
break;
- case Rcontained:
+ case Roverlaps:
addnode(f, &p->mid, new);
break;
}
@@ -285,241 +327,316 @@
balancetree(cur);
}
-#define V4H(a) ((a&0x07ffffff)>>(32-Lroot-5))
-
-void
-v4addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type)
+/*
+ * find node matching r
+ */
+static Route**
+looknode(Route **cur, Route *r)
{
Route *p;
- ulong sa;
- ulong m;
- ulong ea;
- int h, eh;
- m = nhgetl(mask);
- sa = nhgetl(a) & m;
- ea = sa | ~m;
+ for(;;){
+ p = *cur;
+ if(p == nil)
+ return nil;
+ switch(rangecompare(r, p)){
+ case Rcontains:
+ return nil;
+ case Rpreceeds:
+ cur = &p->left;
+ break;
+ case Rfollows:
+ cur = &p->right;
+ break;
+ case Roverlaps:
+ cur = &p->mid;
+ break;
+ case Requals:
+ if((p->type & Rifc) == 0 && !matchroute(r, p))
+ return nil;
+ return cur;
+ }
+ }
+}
- eh = V4H(ea);
- for(h=V4H(sa); h<=eh; h++) {
- p = allocroute(Rv4 | type);
- p->v4.address = sa;
- p->v4.endaddress = ea;
- memmove(p->v4.gate, gate, sizeof(p->v4.gate));
- memmove(p->tag, tag, sizeof(p->tag));
+static Route*
+looknodetag(Route *r, char *tag)
+{
+ Route *x;
- wlock(&routelock);
- addnode(f, &f->v4root[h], p);
- while(p = f->queue) {
- f->queue = p->mid;
- walkadd(f, &f->v4root[h], p->left);
- freeroute(p);
- }
- wunlock(&routelock);
+ if(r == nil)
+ return nil;
+
+ if((x = looknodetag(r->mid, tag)) != nil)
+ return x;
+ if((x = looknodetag(r->left, tag)) != nil)
+ return x;
+ if((x = looknodetag(r->right, tag)) != nil)
+ return x;
+
+ if((r->type & Rifc) == 0){
+ if(tag == nil || strncmp(tag, r->tag, sizeof(r->tag)) == 0)
+ return r;
}
- v4routegeneration++;
- ipifcaddroute(f, Rv4, a, mask, gate, type);
+ return nil;
}
-#define V6H(a) (((a)[IPllen-1] & 0x07ffffff)>>(32-Lroot-5))
-#define ISDFLT(a, mask, tag) ((ipcmp((a),v6Unspecified)==0) && (ipcmp((mask),v6Unspecified)==0) && (strcmp((tag), "ra")!=0))
+#define V4H(a) ((a&0x07ffffff)>>(32-Lroot-5))
+#define V6H(a) (((a)[IPllen-1]&0x07ffffff)>>(32-Lroot-5))
-void
-v6addroute(Fs *f, char *tag, uchar *a, uchar *mask, uchar *gate, int type)
+static void
+routeadd(Fs *f, Route *r)
{
- Route *p;
- ulong sa[IPllen], ea[IPllen];
- ulong x, y;
- int h, eh;
+ Route **h, **e, *p;
- /*
- if(ISDFLT(a, mask, tag))
- f->v6p->cdrouter = -1;
- */
+ if(r->type & Rv4){
+ h = &f->v4root[V4H(r->v4.address)];
+ e = &f->v4root[V4H(r->v4.endaddress)];
+ } else {
+ h = &f->v6root[V6H(r->v6.address)];
+ e = &f->v6root[V6H(r->v6.endaddress)];
+ }
+ for(; h <= e; h++) {
+ p = allocroute(r->type);
- for(h = 0; h < IPllen; h++){
- x = nhgetl(a+4*h);
- y = nhgetl(mask+4*h);
- sa[h] = x & y;
- ea[h] = x | ~y;
- }
+ p->ifc = r->ifc;
+ p->ifcid = r->ifcid;
- eh = V6H(ea);
- for(h = V6H(sa); h <= eh; h++) {
- p = allocroute(type);
- memmove(p->v6.address, sa, IPaddrlen);
- memmove(p->v6.endaddress, ea, IPaddrlen);
- memmove(p->v6.gate, gate, IPaddrlen);
- memmove(p->tag, tag, sizeof(p->tag));
+ if(r->type & Rv4)
+ memmove(&p->v4, &r->v4, sizeof(r->v4));
+ else
+ memmove(&p->v6, &r->v6, sizeof(r->v6));
- wlock(&routelock);
- addnode(f, &f->v6root[h], p);
- while(p = f->queue) {
+ memmove(p->tag, r->tag, sizeof(r->tag));
+
+ addnode(f, h, p);
+ while((p = f->queue) != nil) {
f->queue = p->mid;
- walkadd(f, &f->v6root[h], p->left);
+ walkadd(f, h, p->left);
freeroute(p);
}
- wunlock(&routelock);
}
- v6routegeneration++;
- ipifcaddroute(f, 0, a, mask, gate, type);
+ if(r->type & Rv4)
+ v4routegeneration++;
+ else
+ v6routegeneration++;
}
-Route**
-looknode(Route **cur, Route *r)
+static void
+routerem(Fs *f, Route *r)
{
- Route *p;
+ Route **h, **e, **l, *p;
- for(;;){
- p = *cur;
- if(p == 0)
- return 0;
-
- switch(rangecompare(r, p)){
- case Rcontains:
- return 0;
- case Rpreceeds:
- cur = &p->left;
- break;
- case Rfollows:
- cur = &p->right;
- break;
- case Rcontained:
- cur = &p->mid;
- break;
- case Requals:
- return cur;
+ if(r->type & Rv4){
+ h = &f->v4root[V4H(r->v4.address)];
+ e = &f->v4root[V4H(r->v4.endaddress)];
+ } else {
+ h = &f->v6root[V6H(r->v6.address)];
+ e = &f->v6root[V6H(r->v6.endaddress)];
+ }
+
+ for(; h <= e; h++) {
+ if((l = looknode(h, r)) == nil)
+ continue;
+ p = *l;
+ if(--(p->ref) != 0)
+ continue;
+ *l = nil;
+ addqueue(&f->queue, p->left);
+ addqueue(&f->queue, p->mid);
+ addqueue(&f->queue, p->right);
+ freeroute(p);
+
+ while((p = f->queue) != nil) {
+ f->queue = p->mid;
+ walkadd(f, h, p->left);
+ freeroute(p);
}
}
+
+ if(r->type & Rv4)
+ v4routegeneration++;
+ else
+ v6routegeneration++;
}
-void
-v4delroute(Fs *f, uchar *a, uchar *mask, int dolock)
+static Route
+mkroute(uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag)
{
- Route **r, *p;
- Route rt;
- int h, eh;
- ulong m;
+ ulong x, y;
+ Route r;
+ int h;
- m = nhgetl(mask);
- rt.v4.address = nhgetl(a) & m;
- rt.v4.endaddress = rt.v4.address | ~m;
- rt.type = Rv4;
+ memset(&r, 0, sizeof(r));
- eh = V4H(rt.v4.endaddress);
- for(h=V4H(rt.v4.address); h<=eh; h++) {
- if(dolock)
- wlock(&routelock);
- r = looknode(&f->v4root[h], &rt);
- if(r) {
- p = *r;
- if(--(p->ref) == 0){
- *r = 0;
- addqueue(&f->queue, p->left);
- addqueue(&f->queue, p->mid);
- addqueue(&f->queue, p->right);
- freeroute(p);
- while(p = f->queue) {
- f->queue = p->mid;
- walkadd(f, &f->v4root[h], p->left);
- freeroute(p);
- }
- }
+ r.type = type;
+
+ if(type & Rv4){
+ x = nhgetl(a+IPv4off);
+ y = nhgetl(mask+IPv4off);
+ r.v4.address = x & y;
+ r.v4.endaddress = x | ~y;
+
+ x = nhgetl(s+IPv4off);
+ y = nhgetl(smask+IPv4off);
+ if(y != 0)
+ r.type |= Rsrc;
+ r.v4.source = x & y;
+ r.v4.endsource = x | ~y;
+
+ memmove(r.v4.gate, gate+IPv4off, IPv4addrlen);
+ } else {
+ for(h = 0; h < IPllen; h++){
+ x = nhgetl(a+4*h);
+ y = nhgetl(mask+4*h);
+ r.v6.address[h] = x & y;
+ r.v6.endaddress[h] = x | ~y;
+
+ x = nhgetl(s+4*h);
+ y = nhgetl(smask+4*h);
+ if(y != 0)
+ r.type |= Rsrc;
+ r.v6.source[h] = x & y;
+ r.v6.endsource[h] = x | ~y;
}
- if(dolock)
- wunlock(&routelock);
+
+ memmove(r.v6.gate, gate, IPaddrlen);
}
- v4routegeneration++;
- ipifcremroute(f, Rv4, a, mask);
+ if(ifc != nil){
+ r.ifc = ifc;
+ r.ifcid = ifc->ifcid;
+ }
+
+ if(tag != nil)
+ strncpy(r.tag, tag, sizeof(r.tag));
+
+ return r;
}
void
-v6delroute(Fs *f, uchar *a, uchar *mask, int dolock)
+addroute(Fs *f, uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag)
{
- Route **r, *p;
- Route rt;
- int h, eh;
- ulong x, y;
+ Route r = mkroute(a, mask, s, smask, gate, type, ifc, tag);
+ wlock(&routelock);
+ routeadd(f, &r);
+ wunlock(&routelock);
+}
- for(h = 0; h < IPllen; h++){
- x = nhgetl(a+4*h);
- y = nhgetl(mask+4*h);
- rt.v6.address[h] = x & y;
- rt.v6.endaddress[h] = x | ~y;
+void
+remroute(Fs *f, uchar *a, uchar *mask, uchar *s, uchar *smask, uchar *gate, int type, Ipifc *ifc, char *tag)
+{
+ Route r = mkroute(a, mask, s, smask, gate, type, ifc, tag);
+ wlock(&routelock);
+ routerem(f, &r);
+ wunlock(&routelock);
+}
+
+/* get the outgoing interface for route r */
+static Ipifc*
+routefindipifc(Route *r, Fs *f)
+{
+ uchar local[IPaddrlen], gate[IPaddrlen];
+ Ipifc *ifc;
+ int i;
+
+ ifc = r->ifc;
+ if(ifc != nil && ifc->ifcid == r->ifcid)
+ return ifc;
+
+ if(r->type & Rsrc) {
+ if(r->type & Rv4) {
+ hnputl(local+IPv4off, r->v4.source);
+ memmove(local, v4prefix, IPv4off);
+ } else {
+ for(i = 0; i < IPllen; i++)
+ hnputl(local+4*i, r->v6.source[i]);
+ }
+ } else {
+ ipmove(local, IPnoaddr);
}
- rt.type = 0;
- eh = V6H(rt.v6.endaddress);
- for(h=V6H(rt.v6.address); h<=eh; h++) {
- if(dolock)
- wlock(&routelock);
- r = looknode(&f->v6root[h], &rt);
- if(r) {
- p = *r;
- if(--(p->ref) == 0){
- *r = 0;
- addqueue(&f->queue, p->left);
- addqueue(&f->queue, p->mid);
- addqueue(&f->queue, p->right);
- freeroute(p);
- while(p = f->queue) {
- f->queue = p->mid;
- walkadd(f, &f->v6root[h], p->left);
- freeroute(p);
- }
- }
+ if(r->type & Rifc) {
+ if(r->type & Rv4) {
+ hnputl(gate+IPv4off, r->v4.address);
+ memmove(gate, v4prefix, IPv4off);
+ } else {
+ for(i = 0; i < IPllen; i++)
+ hnputl(gate+4*i, r->v6.address[i]);
}
- if(dolock)
- wunlock(&routelock);
+ } else {
+ if(r->type & Rv4)
+ v4tov6(gate, r->v4.gate);
+ else
+ ipmove(gate, r->v6.gate);
}
- v6routegeneration++;
- ipifcremroute(f, 0, a, mask);
+ if((ifc = findipifc(f, local, gate, r->type)) == nil)
+ return nil;
+
+ r->ifc = ifc;
+ r->ifcid = ifc->ifcid;
+ return ifc;
}
+/*
+ * v4lookup, v6lookup:
+ * lookup a route to destination address a from source address s
+ * and return the route. returns nil if no route was found.
+ * an optional Routehint can be passed in rh to cache the lookup.
+ *
+ * for v4lookup, addresses are in 4 byte format.
+ */
Route*
-v4lookup(Fs *f, uchar *a, Conv *c)
+v4lookup(Fs *f, uchar *a, uchar *s, Routehint *rh)
{
+ ulong la, ls;
Route *p, *q;
- ulong la;
- uchar gate[IPaddrlen];
Ipifc *ifc;
- if(c != nil && c->r != nil && c->r->ifc != nil && c->rgen == v4routegeneration)
- return c->r;
+ if(rh != nil
+ && rh->rgen == v4routegeneration
+ && (q = rh->r) != nil
+ && (ifc = q->ifc) != nil
+ && q->ifcid == ifc->ifcid
+ && q->ref > 0)
+ return q;
la = nhgetl(a);
+ ls = nhgetl(s);
q = nil;
- for(p=f->v4root[V4H(la)]; p;)
- if(la >= p->v4.address) {
- if(la <= p->v4.endaddress) {
- q = p;
- p = p->mid;
- } else
- p = p->right;
- } else
+ for(p = f->v4root[V4H(la)]; p != nil;){
+ if(la < p->v4.address){
p = p->left;
-
- if(q && (q->ifc == nil || q->ifcid != q->ifc->ifcid)){
- if(q->type & Rifc) {
- hnputl(gate+IPv4off, q->v4.address);
- memmove(gate, v4prefix, IPv4off);
- } else
- v4tov6(gate, q->v4.gate);
- ifc = findipifc(f, gate, q->type);
- if(ifc == nil)
- return nil;
- q->ifc = ifc;
- q->ifcid = ifc->ifcid;
+ continue;
+ }
+ if(la > p->v4.endaddress){
+ p = p->right;
+ continue;
+ }
+ if(p->type & Rsrc){
+ if(ls < p->v4.source){
+ p = p->mid;
+ continue;
+ }
+ if(ls > p->v4.endsource){
+ p = p->mid;
+ continue;
+ }
+ }
+ q = p;
+ p = p->mid;
}
- if(c != nil){
- c->r = q;
- c->rgen = v4routegeneration;
+ if(q == nil || q->ref == 0 || routefindipifc(q, f) == nil)
+ return nil;
+
+ if(rh != nil){
+ rh->r = q;
+ rh->rgen = v4routegeneration;
}
return q;
@@ -526,29 +643,35 @@
}
Route*
-v6lookup(Fs *f, uchar *a, Conv *c)
+v6lookup(Fs *f, uchar *a, uchar *s, Routehint *rh)
{
- Route *p, *q;
- ulong la[IPllen];
- int h;
+ ulong la[IPllen], ls[IPllen];
ulong x, y;
- uchar gate[IPaddrlen];
+ Route *p, *q;
Ipifc *ifc;
+ int h;
- if(memcmp(a, v4prefix, IPv4off) == 0){
- q = v4lookup(f, a+IPv4off, c);
- if(q != nil)
- return q;
+ if(isv4(s)){
+ if(isv4(a))
+ return v4lookup(f, a+IPv4off, s+IPv4off, rh);
+ return nil;
}
- if(c != nil && c->r != nil && c->r->ifc != nil && c->rgen == v6routegeneration)
- return c->r;
+ if(rh != nil
+ && rh->rgen == v6routegeneration
+ && (q = rh->r) != nil
+ && (ifc = q->ifc) != nil
+ && q->ifcid == ifc->ifcid
+ && q->ref > 0)
+ return q;
- for(h = 0; h < IPllen; h++)
+ for(h = 0; h < IPllen; h++){
la[h] = nhgetl(a+4*h);
+ ls[h] = nhgetl(s+4*h);
+ }
- q = 0;
- for(p=f->v6root[V6H(la)]; p;){
+ q = nil;
+ for(p = f->v6root[V6H(la)]; p != nil;){
for(h = 0; h < IPllen; h++){
x = la[h];
y = p->v6.address[h];
@@ -571,42 +694,202 @@
}
break;
}
+ if(p->type & Rsrc){
+ for(h = 0; h < IPllen; h++){
+ x = ls[h];
+ y = p->v6.source[h];
+ if(x == y)
+ continue;
+ if(x < y){
+ p = p->mid;
+ goto next;
+ }
+ break;
+ }
+ for(h = 0; h < IPllen; h++){
+ x = ls[h];
+ y = p->v6.endsource[h];
+ if(x == y)
+ continue;
+ if(x > y){
+ p = p->mid;
+ goto next;
+ }
+ break;
+ }
+ }
q = p;
p = p->mid;
next: ;
}
- if(q && (q->ifc == nil || q->ifcid != q->ifc->ifcid)){
- if(q->type & Rifc) {
- for(h = 0; h < IPllen; h++)
- hnputl(gate+4*h, q->v6.address[h]);
- ifc = findipifc(f, gate, q->type);
- } else
- ifc = findipifc(f, q->v6.gate, q->type);
- if(ifc == nil)
- return nil;
- q->ifc = ifc;
- q->ifcid = ifc->ifcid;
+ if(q == nil || q->ref == 0 || routefindipifc(q, f) == nil)
+ return nil;
+
+ if(rh != nil){
+ rh->r = q;
+ rh->rgen = v6routegeneration;
}
- if(c != nil){
- c->r = q;
- c->rgen = v6routegeneration;
- }
return q;
}
+/*
+ * v4source, v6source:
+ * lookup a route to destination address a and also find
+ * a suitable source address s on the outgoing interface.
+ * return the route on success or nil when no route
+ * was found.
+ *
+ * for v4source, addresses are in 4 byte format.
+ */
+Route*
+v4source(Fs *f, uchar *a, uchar *s)
+{
+ uchar src[IPv4addrlen];
+ int splen;
+ ulong x, la;
+ Route *p, *q;
+ Ipifc *ifc;
+
+ q = nil;
+ la = nhgetl(a);
+ rlock(&routelock);
+ for(p = f->v4root[V4H(la)]; p != nil;){
+ if(la < p->v4.address){
+ p = p->left;
+ continue;
+ }
+ if(la > p->v4.endaddress){
+ p = p->right;
+ continue;
+ }
+ splen = 0;
+ if(p->type & Rsrc){
+ /* calculate local prefix length for source specific routes */
+ for(x = ~(p->v4.endsource ^ p->v4.source); x & 0x80000000UL; x <<= 1)
+ splen++;
+ hnputl(src, p->v4.source);
+ }
+ if((ifc = routefindipifc(p, f)) == nil
+ || !ipv4local(ifc, src, splen, (p->type & (Rifc|Rbcast|Rmulti|Rv4))==Rv4? p->v4.gate: a)){
+ p = p->mid;
+ continue;
+ }
+ memmove(s, src, IPv4addrlen);
+ q = p;
+ p = p->mid;
+ }
+ runlock(&routelock);
+ return q;
+}
+
+Route*
+v6source(Fs *f, uchar *a, uchar *s)
+{
+ uchar src[IPaddrlen];
+ int splen, h;
+ ulong x, y, la[IPllen];
+ Route *p, *q;
+ Ipifc *ifc;
+
+ q = nil;
+ for(h = 0; h < IPllen; h++)
+ la[h] = nhgetl(a+4*h);
+ rlock(&routelock);
+ for(p = f->v6root[V6H(la)]; p != nil;){
+ for(h = 0; h < IPllen; h++){
+ x = la[h];
+ y = p->v6.address[h];
+ if(x == y)
+ continue;
+ if(x < y){
+ p = p->left;
+ goto next;
+ }
+ break;
+ }
+ for(h = 0; h < IPllen; h++){
+ x = la[h];
+ y = p->v6.endaddress[h];
+ if(x == y)
+ continue;
+ if(x > y){
+ p = p->right;
+ goto next;
+ }
+ break;
+ }
+ splen = 0;
+ if(p->type & Rsrc){
+ /* calculate local prefix length for source specific routes */
+ for(h = 0; h < IPllen; h++){
+ hnputl(src+4*h, p->v6.source[h]);
+ if((x = ~(p->v6.endsource[h] ^ p->v6.source[h])) != ~0UL){
+ for(; x & 0x80000000UL; x <<= 1)
+ splen++;
+ break;
+ }
+ splen += 32;
+ }
+ }
+ if((ifc = routefindipifc(p, f)) == nil
+ || !ipv6local(ifc, src, splen, a)){
+ p = p->mid;
+ continue;
+ }
+ ipmove(s, src);
+ q = p;
+ p = p->mid;
+next: ;
+ }
+ runlock(&routelock);
+ return q;
+}
+
+static int
+parseroutetype(char *p)
+{
+ int type = 0;
+ switch(*p++){
+ default: return -1;
+ case '4': type |= Rv4;
+ case '6': break;
+ }
+ for(;;) switch(*p++){
+ default:
+ return -1;
+ case 'i':
+ if(((type ^= Rifc) & Rifc) != Rifc) return -1;
+ break;
+ case 'u':
+ if(((type ^= Runi) & (Runi|Rbcast|Rmulti)) != Runi) return -1;
+ break;
+ case 'b':
+ if(((type ^= Rbcast) & (Runi|Rbcast|Rmulti)) != Rbcast) return -1;
+ break;
+ case 'm':
+ if(((type ^= Rmulti) & (Runi|Rbcast|Rmulti)) != Rmulti) return -1;
+ break;
+ case 'p':
+ if(((type ^= Rptpt) & Rptpt) != Rptpt) return -1;
+ break;
+ case '\0':
+ return type;
+ }
+}
+
void
-routetype(int type, char *p)
+routetype(int type, char p[8])
{
- memset(p, ' ', 4);
- p[4] = 0;
if(type & Rv4)
*p++ = '4';
else
*p++ = '6';
+
if(type & Rifc)
*p++ = 'i';
+
if(type & Runi)
*p++ = 'u';
else if(type & Rbcast)
@@ -613,14 +896,14 @@
*p++ = 'b';
else if(type & Rmulti)
*p++ = 'm';
+
if(type & Rptpt)
- *p = 'p';
+ *p++ = 'p';
+ *p = 0;
}
-char *rformat = "%-15I %-4M %-15I %4.4s %4.4s %3s\n";
-
-void
-convroute(Route *r, uchar *addr, uchar *mask, uchar *gate, char *t, int *nifc)
+static void
+convroute(Route *r, uchar *addr, uchar *mask, uchar *src, uchar *smask, uchar *gate)
{
int i;
@@ -627,8 +910,16 @@
if(r->type & Rv4){
memmove(addr, v4prefix, IPv4off);
hnputl(addr+IPv4off, r->v4.address);
+
memset(mask, 0xff, IPv4off);
hnputl(mask+IPv4off, ~(r->v4.endaddress ^ r->v4.address));
+
+ memmove(src, v4prefix, IPv4off);
+ hnputl(src+IPv4off, r->v4.source);
+
+ memset(smask, 0xff, IPv4off);
+ hnputl(smask+IPv4off, ~(r->v4.endsource ^ r->v4.source));
+
memmove(gate, v4prefix, IPv4off);
memmove(gate+IPv4off, r->v4.gate, IPv4addrlen);
} else {
@@ -635,162 +926,186 @@
for(i = 0; i < IPllen; i++){
hnputl(addr + 4*i, r->v6.address[i]);
hnputl(mask + 4*i, ~(r->v6.endaddress[i] ^ r->v6.address[i]));
+ hnputl(src + 4*i, r->v6.source[i]);
+ hnputl(smask + 4*i, ~(r->v6.endsource[i] ^ r->v6.source[i]));
}
memmove(gate, r->v6.gate, IPaddrlen);
}
+}
- routetype(r->type, t);
+static char*
+seprintroute(char *p, char *e, Route *r)
+{
+ uchar addr[IPaddrlen], mask[IPaddrlen], src[IPaddrlen], smask[IPaddrlen], gate[IPaddrlen];
+ char type[8], ifbuf[4], *iname;
- if(r->ifc)
- *nifc = r->ifc->conv->x;
+ convroute(r, addr, mask, src, smask, gate);
+ routetype(r->type, type);
+ if(r->ifc != nil && r->ifcid == r->ifc->ifcid)
+ snprint(iname = ifbuf, sizeof ifbuf, "%d", r->ifc->conv->x);
else
- *nifc = -1;
+ iname = "-";
+ return seprint(p, e, "%-15I %-4M %-15I %-4s %4.4s %3s %-15I %-4M\n",
+ addr, mask, gate, type, r->tag, iname, src, smask);
}
-/*
- * this code is not in rr to reduce stack size
- */
-static void
-sprintroute(Route *r, Routewalk *rw)
+typedef struct Routewalk Routewalk;
+struct Routewalk
{
- int nifc, n;
- char t[5], *iname, ifbuf[5];
- uchar addr[IPaddrlen], mask[IPaddrlen], gate[IPaddrlen];
- char *p;
+ int o;
+ int h;
+ char* p;
+ char* e;
+};
- convroute(r, addr, mask, gate, t, &nifc);
- iname = "-";
- if(nifc != -1) {
- iname = ifbuf;
- snprint(ifbuf, sizeof ifbuf, "%d", nifc);
- }
- p = seprint(rw->p, rw->e, rformat, addr, mask, gate, t, r->tag, iname);
+static int
+rr1(Routewalk *rw, Route *r)
+{
+ int n = seprintroute(rw->p, rw->e, r) - rw->p;
if(rw->o < 0){
- n = p - rw->p;
if(n > -rw->o){
- memmove(rw->p, rw->p-rw->o, n+rw->o);
- rw->p = p + rw->o;
+ memmove(rw->p, rw->p - rw->o, n + rw->o);
+ rw->p += n + rw->o;
}
rw->o += n;
} else
- rw->p = p;
+ rw->p += n;
+ return rw->p < rw->e;
}
-/*
- * recurse descending tree, applying the function in Routewalk
- */
static int
rr(Route *r, Routewalk *rw)
{
int h;
- if(rw->e <= rw->p)
- return 0;
if(r == nil)
return 1;
-
if(rr(r->left, rw) == 0)
return 0;
-
if(r->type & Rv4)
h = V4H(r->v4.address);
else
h = V6H(r->v6.address);
-
- if(h == rw->h)
- rw->walk(r, rw);
-
+ if(h == rw->h){
+ if(rr1(rw, r) == 0)
+ return 0;
+ }
if(rr(r->mid, rw) == 0)
return 0;
-
return rr(r->right, rw);
}
-void
-ipwalkroutes(Fs *f, Routewalk *rw)
+long
+routeread(Fs *f, char *p, ulong offset, int n)
{
+ Routewalk rw[1];
+
+ rw->p = p;
+ rw->e = p+n;
+ rw->o = -offset;
+ if(rw->o > 0)
+ return 0;
+
rlock(&routelock);
- if(rw->e > rw->p) {
+ if(rw->p < rw->e) {
for(rw->h = 0; rw->h < nelem(f->v4root); rw->h++)
if(rr(f->v4root[rw->h], rw) == 0)
break;
}
- if(rw->e > rw->p) {
+ if(rw->p < rw->e) {
for(rw->h = 0; rw->h < nelem(f->v6root); rw->h++)
if(rr(f->v6root[rw->h], rw) == 0)
break;
}
runlock(&routelock);
-}
-long
-routeread(Fs *f, char *p, ulong offset, int n)
-{
- Routewalk rw;
-
- rw.p = p;
- rw.e = p+n;
- rw.o = -offset;
- rw.walk = sprintroute;
-
- ipwalkroutes(f, &rw);
-
- return rw.p - p;
+ return rw->p - p;
}
/*
- * this code is not in routeflush to reduce stack size
+ * 4 add addr mask gate
+ * 5 add addr mask gate ifc
+ * 6 add addr mask gate src smask
+ * 7 add addr mask gate ifc src smask
+ * 8 add addr mask gate tag ifc src smask
+ * 9 add addr mask gate type tag ifc src smask
+ * 3 remove addr mask
+ * 4 remove addr mask gate
+ * 5 remove addr mask src smask
+ * 6 remove addr mask gate src smask
+ * 7 remove addr mask gate ifc src smask
+ * 8 remove addr mask gate tag ifc src smask
+ * 9 remove addr mask gate type tag ifc src smask
*/
-void
-delroute(Fs *f, Route *r, int dolock)
+static Route
+parseroute(Fs *f, char **argv, int argc)
{
- uchar addr[IPaddrlen];
- uchar mask[IPaddrlen];
+ uchar addr[IPaddrlen], mask[IPaddrlen];
+ uchar src[IPaddrlen], smask[IPaddrlen];
uchar gate[IPaddrlen];
- char t[5];
- int nifc;
+ Ipifc *ifc;
+ char *tag;
+ int type;
- convroute(r, addr, mask, gate, t, &nifc);
- if(r->type & Rv4)
- v4delroute(f, addr+IPv4off, mask+IPv4off, dolock);
- else
- v6delroute(f, addr, mask, dolock);
-}
+ type = 0;
+ tag = nil;
+ ifc = nil;
+ ipmove(gate, IPnoaddr);
+ ipmove(src, IPnoaddr);
+ ipmove(smask, IPnoaddr);
-/*
- * recurse until one route is deleted
- * returns 0 if nothing is deleted, 1 otherwise
- */
-int
-routeflush(Fs *f, Route *r, char *tag)
-{
- if(r == nil)
- return 0;
- if(routeflush(f, r->mid, tag))
- return 1;
- if(routeflush(f, r->left, tag))
- return 1;
- if(routeflush(f, r->right, tag))
- return 1;
- if((r->type & Rifc) == 0){
- if(tag == nil || strncmp(tag, r->tag, sizeof(r->tag)) == 0){
- delroute(f, r, 0);
- return 1;
- }
+ if(argc < 3)
+ error(Ebadctl);
+ if(parseipandmask(addr, mask, argv[1], argv[2]) == -1)
+ error(Ebadip);
+
+ if(strcmp(argv[0], "add") == 0 || (argc > 3 && argc != 5)){
+ if(argc < 4)
+ error(Ebadctl);
+ if(parseip(gate, argv[3]) == -1)
+ error(Ebadip);
}
- return 0;
+ if(argc > 4 && (strcmp(argv[0], "add") != 0 || argc != 5)){
+ if(parseipandmask(src, smask, argv[argc-2], argv[argc-1]) == -1)
+ error(Ebadip);
+ }
+ if(argc == 5 && strcmp(argv[0], "add") == 0)
+ ifc = findipifcstr(f, argv[4]);
+ if(argc > 6)
+ ifc = findipifcstr(f, argv[argc-3]);
+ if(argc > 7)
+ tag = argv[argc-4];
+ if(argc > 8){
+ if((type = parseroutetype(argv[argc-5])) < 0)
+ error(Ebadctl);
+ } else {
+ if(isv4(addr))
+ type |= Rv4;
+ }
+ if(argc > 9)
+ error(Ebadctl);
+
+ if(type & Rv4){
+ if(!isv4(addr))
+ error(Ebadip);
+ if(ipcmp(smask, IPnoaddr) != 0 && !isv4(src))
+ error(Ebadip);
+ if(ipcmp(gate, IPnoaddr) != 0 && !isv4(gate))
+ error(Ebadip);
+ } else {
+ if(isv4(addr))
+ error(Ebadip);
+ }
+
+ return mkroute(addr, mask, src, smask, gate, type, ifc, tag);
}
long
routewrite(Fs *f, Chan *c, char *p, int n)
{
- int h, changed;
- char *tag;
Cmdbuf *cb;
- uchar addr[IPaddrlen];
- uchar mask[IPaddrlen];
- uchar gate[IPaddrlen];
- IPaux *a, *na;
+ IPaux *a;
+ Route *x, r;
cb = parsecmd(p, n);
if(waserror()){
@@ -797,54 +1112,44 @@
free(cb);
nexterror();
}
-
+ if(cb->nf < 1)
+ error("short control request");
if(strcmp(cb->f[0], "flush") == 0){
- tag = cb->f[1];
+ char *tag = cb->nf < 2 ? nil : cb->f[1];
+ int h;
+
+ wlock(&routelock);
for(h = 0; h < nelem(f->v4root); h++)
- for(changed = 1; changed;){
- wlock(&routelock);
- changed = routeflush(f, f->v4root[h], tag);
- wunlock(&routelock);
+ while((x = looknodetag(f->v4root[h], tag)) != nil){
+ memmove(&r, x, sizeof(RouteTree) + sizeof(V4route));
+ routerem(f, &r);
}
for(h = 0; h < nelem(f->v6root); h++)
- for(changed = 1; changed;){
- wlock(&routelock);
- changed = routeflush(f, f->v6root[h], tag);
- wunlock(&routelock);
+ while((x = looknodetag(f->v6root[h], tag)) != nil){
+ memmove(&r, x, sizeof(RouteTree) + sizeof(V6route));
+ routerem(f, &r);
}
- } else if(strcmp(cb->f[0], "remove") == 0){
- if(cb->nf < 3)
- error(Ebadarg);
- parseip(addr, cb->f[1]);
- parseipmask(mask, cb->f[2]);
- if(memcmp(addr, v4prefix, IPv4off) == 0)
- v4delroute(f, addr+IPv4off, mask+IPv4off, 1);
- else
- v6delroute(f, addr, mask, 1);
- } else if(strcmp(cb->f[0], "add") == 0){
- if(cb->nf < 4)
- error(Ebadarg);
- parseip(addr, cb->f[1]);
- parseipmask(mask, cb->f[2]);
- parseip(gate, cb->f[3]);
- tag = "none";
- if(c != nil){
+ wunlock(&routelock);
+ } else if(strcmp(cb->f[0], "add") == 0 || strcmp(cb->f[0], "remove") == 0){
+ r = parseroute(f, cb->f, cb->nf);
+ if(*r.tag == 0){
a = c->aux;
- tag = a->tag;
+ strncpy(r.tag, a->tag, sizeof(r.tag));
}
- if(memcmp(addr, v4prefix, IPv4off) == 0)
- v4addroute(f, tag, addr+IPv4off, mask+IPv4off, gate+IPv4off, 0);
+ wlock(&routelock);
+ if(strcmp(cb->f[0], "add") == 0)
+ routeadd(f, &r);
else
- v6addroute(f, tag, addr, mask, gate, 0);
+ routerem(f, &r);
+ wunlock(&routelock);
} else if(strcmp(cb->f[0], "tag") == 0) {
if(cb->nf < 2)
error(Ebadarg);
-
a = c->aux;
- na = newipaux(a->owner, cb->f[1]);
- c->aux = na;
+ c->aux = newipaux(a->owner, cb->f[1]);
free(a);
- }
+ } else
+ error(Ebadctl);
poperror();
free(cb);
diff -u a/os/ip//ipv6.c b/os/ip//ipv6.c
--- a/os/ip//ipv6.c
+++ b/os/ip//ipv6.c
@@ -8,250 +8,127 @@
#include "ip.h"
#include "ipv6.h"
-enum
-{
- IP4HDR = 20, /* sizeof(Ip4hdr) */
- IP6HDR = 40, /* sizeof(Ip6hdr) */
- IP_HLEN4 = 0x05, /* Header length in words */
- IP_DF = 0x4000, /* Don't fragment */
- IP_MF = 0x2000, /* More fragments */
- IP6FHDR = 8, /* sizeof(Fraghdr6) */
- IP_MAX = (32*1024), /* Maximum Internet packet size */
-};
-
-#define IPV6CLASS(hdr) ((hdr->vcf[0]&0x0F)<<2 | (hdr->vcf[1]&0xF0)>>2)
-#define BLKIPVER(xp) (((Ip6hdr*)((xp)->rp))->vcf[0]&0xF0)
-/*
- * This sleazy macro is stolen shamelessly from ip.c, see comment there.
- */
-#define BKFG(xp) ((Ipfrag*)((xp)->base))
-
-typedef struct IP IP;
-typedef struct Fragment4 Fragment4;
-typedef struct Fragment6 Fragment6;
-typedef struct Ipfrag Ipfrag;
-
-Block* ip6reassemble(IP*, int, Block*, Ip6hdr*);
-void ipfragfree6(IP*, Fragment6*);
-Fragment6* ipfragallo6(IP*);
+static Block* ip6reassemble(IP*, int, Block*);
+static Fragment6* ipfragallo6(IP*);
+static void ipfragfree6(IP*, Fragment6*);
+static Block* procopts(Block *bp);
static Block* procxtns(IP *ip, Block *bp, int doreasm);
-int unfraglen(Block *bp, uchar *nexthdr, int setfh);
-Block* procopts(Block *bp);
+static int unfraglen(Block *bp, uchar *nexthdr, int setfh, int popfh);
-/* MIB II counters */
-enum
+void
+ip_init_6(Fs *f)
{
- Forwarding,
- DefaultTTL,
- InReceives,
- InHdrErrors,
- InAddrErrors,
- ForwDatagrams,
- InUnknownProtos,
- InDiscards,
- InDelivers,
- OutRequests,
- OutDiscards,
- OutNoRoutes,
- ReasmTimeout,
- ReasmReqds,
- ReasmOKs,
- ReasmFails,
- FragOKs,
- FragFails,
- FragCreates,
+ v6params *v6p;
- Nstats,
-};
+ v6p = smalloc(sizeof(v6params));
-static char *statnames[] =
-{
-[Forwarding] "Forwarding",
-[DefaultTTL] "DefaultTTL",
-[InReceives] "InReceives",
-[InHdrErrors] "InHdrErrors",
-[InAddrErrors] "InAddrErrors",
-[ForwDatagrams] "ForwDatagrams",
-[InUnknownProtos] "InUnknownProtos",
-[InDiscards] "InDiscards",
-[InDelivers] "InDelivers",
-[OutRequests] "OutRequests",
-[OutDiscards] "OutDiscards",
-[OutNoRoutes] "OutNoRoutes",
-[ReasmTimeout] "ReasmTimeout",
-[ReasmReqds] "ReasmReqds",
-[ReasmOKs] "ReasmOKs",
-[ReasmFails] "ReasmFails",
-[FragOKs] "FragOKs",
-[FragFails] "FragFails",
-[FragCreates] "FragCreates",
-};
+ v6p->rp.mflag = 0; /* default not managed */
+ v6p->rp.oflag = 0;
+ v6p->rp.maxraint = 600000; /* millisecs */
+ v6p->rp.minraint = 200000;
+ v6p->rp.linkmtu = 0; /* no mtu sent */
+ v6p->rp.reachtime = 0;
+ v6p->rp.rxmitra = 0;
+ v6p->rp.ttl = MAXTTL;
+ v6p->rp.routerlt = (3 * v6p->rp.maxraint) / 1000;
-struct Fragment4
-{
- Block* blist;
- Fragment4* next;
- ulong src;
- ulong dst;
- ushort id;
- ulong age;
-};
+ v6p->hp.rxmithost = 1000; /* v6 RETRANS_TIMER */
-struct Fragment6
-{
- Block* blist;
- Fragment6* next;
- uchar src[IPaddrlen];
- uchar dst[IPaddrlen];
- uint id;
- ulong age;
-};
+ f->v6p = v6p;
+}
-struct Ipfrag
-{
- ushort foff;
- ushort flen;
-};
-
-/* an instance of IP */
-struct IP
-{
- ulong stats[Nstats];
-
- QLock fraglock4;
- Fragment4* flisthead4;
- Fragment4* fragfree4;
- Ref id4;
-
- QLock fraglock6;
- Fragment6* flisthead6;
- Fragment6* fragfree6;
- Ref id6;
-
- int iprouting; /* true if we route like a gateway */
-};
-
int
-ipoput6(Fs *f, Block *bp, int gating, int ttl, int tos, Conv *c)
+ipoput6(Fs *f, Block *bp, int gating, int ttl, int tos, Routehint *rh)
{
- int tentative;
- Ipifc *ifc;
+ int medialen, len, chunk, uflen, flen, seglen, lid, offset, fragoff;
+ int morefrags, blklen, rv = 0;
uchar *gate, nexthdr;
- Ip6hdr *eh;
- int medialen, len, chunk, uflen, flen, seglen, lid, offset, fragoff, morefrags, blklen;
- Route *r, *sr;
- Fraghdr6 fraghdr;
Block *xp, *nb;
+ Fraghdr6 fraghdr;
IP *ip;
- int rv = 0;
+ Ip6hdr *eh;
+ Ipifc *ifc;
+ Route *r;
ip = f->ip;
-
- /* Fill out the ip header */
- eh = (Ip6hdr*)(bp->rp);
-
ip->stats[OutRequests]++;
- /* Number of uchars in data and ip header to write */
+ /* Fill out the ip header */
+ eh = (Ip6hdr*)bp->rp;
+ assert(BLEN(bp) >= IP6HDR);
len = blocklen(bp);
-
- tentative = iptentative(f, eh->src);
- if(tentative){
- netlog(f, Logip, "reject tx of packet with tentative src address\n");
- goto free;
- }
-
- if(gating){
- chunk = nhgets(eh->ploadlen);
- if(chunk > len){
- ip->stats[OutDiscards]++;
- netlog(f, Logip, "short gated packet\n");
- goto free;
- }
- if(chunk + IPV6HDR_LEN < len)
- len = chunk + IPV6HDR_LEN;
- }
-
if(len >= IP_MAX){
-// print("len > IP_MAX, free\n");
ip->stats[OutDiscards]++;
- netlog(f, Logip, "exceeded ip max size %I\n", eh->dst);
+ netlog(f, Logip, "%I -> %I: exceeded ip max size: %d\n", eh->src, eh->dst, len);
goto free;
}
- r = v6lookup(f, eh->dst, c);
- if(r == nil){
-// print("no route for %I, src %I free\n", eh->dst, eh->src);
+ r = v6lookup(f, eh->dst, eh->src, rh);
+ if(r == nil || (r->type & Rv4) != 0 || (ifc = r->ifc) == nil){
ip->stats[OutNoRoutes]++;
- netlog(f, Logip, "no interface %I\n", eh->dst);
+ netlog(f, Logip, "%I -> %I: no interface\n", eh->src, eh->dst);
rv = -1;
goto free;
}
- ifc = r->ifc;
- if(r->type & (Rifc|Runi))
+ if(r->type & (Rifc|Runi|Rbcast|Rmulti))
gate = eh->dst;
else
- if(r->type & (Rbcast|Rmulti)) {
- gate = eh->dst;
- sr = v6lookup(f, eh->src, nil);
- if(sr != nil && (sr->type & Runi))
- ifc = sr->ifc;
- }
- else
gate = r->v6.gate;
- if(!gating)
- eh->vcf[0] = IP_VER6;
- eh->ttl = ttl;
- if(!gating) {
- eh->vcf[0] |= (tos >> 4);
- eh->vcf[1] = (tos << 4);
- }
-
- if(!canrlock(ifc)) {
+ if(!canrlock(ifc)){
+ ip->stats[OutDiscards]++;
goto free;
}
-
if(waserror()){
runlock(ifc);
nexterror();
}
- if(ifc->m == nil) {
+ if(ifc->m == nil)
goto raise;
+
+ if(!gating){
+ eh->vcf[0] = IP_VER6;
+ eh->vcf[0] |= tos >> 4;
+ eh->vcf[1] = tos << 4;
}
+ eh->ttl = ttl;
/* If we dont need to fragment just send it */
medialen = ifc->maxtu - ifc->m->hsize;
if(len <= medialen) {
- hnputs(eh->ploadlen, len-IPV6HDR_LEN);
- ifc->m->bwrite(ifc, bp, V6, gate);
+ hnputs(eh->ploadlen, len - IP6HDR);
+ ipifcoput(ifc, bp, V6, gate);
runlock(ifc);
poperror();
return 0;
}
- if(gating)
- if(ifc->reassemble <= 0) {
-
- /* v6 intermediate nodes are not supposed to fragment pkts;
- we fragment if ifc->reassemble is turned on; an exception
- needed for nat.
+ if(gating && !ifc->reassemble) {
+ /*
+ * v6 intermediate nodes are not supposed to fragment pkts;
+ * we fragment if ifc->reassemble is turned on; an exception
+ * needed for nat.
*/
-
ip->stats[OutDiscards]++;
icmppkttoobig6(f, ifc, bp);
- netlog(f, Logip, "%I: gated pkts not fragmented\n", eh->dst);
+ netlog(f, Logip, "%I -> %I: gated pkts not fragmented\n", eh->src, eh->dst);
goto raise;
}
-
+
/* start v6 fragmentation */
- uflen = unfraglen(bp, &nexthdr, 1);
+ uflen = unfraglen(bp, &nexthdr, 1, 0);
+ if(uflen < IP6HDR || nexthdr == FH) {
+ ip->stats[FragFails]++;
+ ip->stats[OutDiscards]++;
+ netlog(f, Logip, "%I -> %I: fragment header botch\n", eh->src, eh->dst);
+ goto raise;
+ }
if(uflen > medialen) {
ip->stats[FragFails]++;
ip->stats[OutDiscards]++;
- netlog(f, Logip, "%I: unfragmentable part too big\n", eh->dst);
+ netlog(f, Logip, "%I -> %I: unfragmentable part too big: %d\n", eh->src, eh->dst, uflen);
goto raise;
}
@@ -260,7 +137,7 @@
if(seglen < 8) {
ip->stats[FragFails]++;
ip->stats[OutDiscards]++;
- netlog(f, Logip, "%I: seglen < 8\n", eh->dst);
+ netlog(f, Logip, "%I -> %I: seglen < 8\n", eh->src, eh->dst);
goto raise;
}
@@ -271,13 +148,13 @@
xp = bp;
offset = uflen;
- while (xp != nil && offset && offset >= BLEN(xp)) {
+ while (offset && offset >= BLEN(xp)) {
offset -= BLEN(xp);
xp = xp->next;
}
xp->rp += offset;
- fragoff = 0;
+ fragoff = 0;
morefrags = 1;
for(; fragoff < flen; fragoff += seglen) {
@@ -292,7 +169,7 @@
memmove(nb->wp, eh, uflen);
nb->wp += uflen;
- hnputs(fraghdr.offsetRM, fragoff); // last 3 bits must be 0
+ hnputs(fraghdr.offsetRM, fragoff); /* last 3 bits must be 0 */
fraghdr.offsetRM[1] |= morefrags;
memmove(nb->wp, &fraghdr, IP6FHDR);
nb->wp += IP6FHDR;
@@ -300,11 +177,11 @@
/* Copy data */
chunk = seglen;
while (chunk) {
- if(!xp) {
+ if(xp == nil) {
ip->stats[OutDiscards]++;
ip->stats[FragFails]++;
freeblist(nb);
- netlog(f, Logip, "!xp: chunk in v6%d\n", chunk);
+ netlog(f, Logip, "xp == nil: chunk in v6%d\n", chunk);
goto raise;
}
blklen = chunk;
@@ -316,10 +193,9 @@
xp->rp += blklen;
chunk -= blklen;
if(xp->rp == xp->wp)
- xp = xp->next;
+ xp = xp->next;
}
-
- ifc->m->bwrite(ifc, nb, V6, gate);
+ ipifcoput(ifc, nb, V6, gate);
ip->stats[FragCreates]++;
}
ip->stats[FragOKs]++;
@@ -328,7 +204,7 @@
runlock(ifc);
poperror();
free:
- freeblist(bp);
+ freeblist(bp);
return rv;
}
@@ -335,16 +211,10 @@
void
ipiput6(Fs *f, Ipifc *ifc, Block *bp)
{
- int hl;
- int hop, tos;
- uchar proto;
+ int hl, len, hop, tos;
+ IP *ip;
Ip6hdr *h;
Proto *p;
- int notforme;
- int tentative;
- uchar v6dst[IPaddrlen];
- IP *ip;
- Route *r, *sr;
ip = f->ip;
ip->stats[InReceives]++;
@@ -365,40 +235,44 @@
return;
}
- h = (Ip6hdr *)(bp->rp);
-
- memmove(&v6dst[0], &(h->dst)[0], IPaddrlen);
- notforme = ipforme(f, v6dst) == 0;
- tentative = iptentative(f, v6dst);
-
- if(tentative && (h->proto != ICMPv6)) {
- print("tentative addr, drop\n");
- freeblist(bp);
- return;
- }
-
/* Check header version */
- if(BLKIPVER(bp) != IP_VER6) {
+ h = (Ip6hdr*)bp->rp;
+ if((h->vcf[0] & 0xF0) != IP_VER6) {
ip->stats[InHdrErrors]++;
netlog(f, Logip, "ip: bad version %ux\n", (h->vcf[0]&0xF0)>>2);
- freeblist(bp);
+ goto drop;
+ }
+ len = IP6HDR + nhgets(h->ploadlen);
+ if((bp = trimblock(bp, 0, len)) == nil){
+ ip->stats[InHdrErrors]++;
+ netlog(f, Logip, "%I -> %I: bogus packet length: %d\n", h->src, h->dst, len);
return;
}
+ h = (Ip6hdr*)bp->rp;
/* route */
- if(notforme) {
- if(!ip->iprouting){
- freeb(bp);
- return;
+ if(!ipforme(f, h->dst)) {
+ Route *r;
+ Routehint rh;
+ Ipifc *nifc;
+
+ if(!ip->iprouting)
+ goto drop;
+
+ /* don't forward to link-local destinations */
+ if(islinklocal(h->dst) ||
+ (isv6mcast(h->dst) && (h->dst[1]&0xF) <= Link_local_scop)){
+ ip->stats[OutDiscards]++;
+ goto drop;
}
+
/* don't forward to source's network */
- sr = v6lookup(f, h->src, nil);
- r = v6lookup(f, h->dst, nil);
-
- if(r == nil || sr == r){
+ rh.r = nil;
+ r = v6lookup(f, h->dst, h->src, &rh);
+ if(r == nil || (nifc = r->ifc) == nil || (r->type & Rv4) != 0
+ || (nifc == ifc && !ifc->reflect)){
ip->stats[OutDiscards]++;
- freeblist(bp);
- return;
+ goto drop;
}
/* don't forward if packet has timed out */
@@ -406,33 +280,29 @@
if(hop < 1) {
ip->stats[InHdrErrors]++;
icmpttlexceeded6(f, ifc, bp);
- freeblist(bp);
- return;
+ goto drop;
}
/* process headers & reassemble if the interface expects it */
- bp = procxtns(ip, bp, r->ifc->reassemble);
-
+ bp = procxtns(ip, bp, nifc->reassemble);
if(bp == nil)
return;
ip->stats[ForwDatagrams]++;
- h = (Ip6hdr *) (bp->rp);
- tos = IPV6CLASS(h);
+ h = (Ip6hdr*)bp->rp;
+ tos = (h->vcf[0]&0x0F)<<2 | (h->vcf[1]&0xF0)>>2;
hop = h->ttl;
- ipoput6(f, bp, 1, hop-1, tos, nil);
+ ipoput6(f, bp, 1, hop-1, tos, &rh);
return;
}
/* reassemble & process headers if needed */
bp = procxtns(ip, bp, 1);
-
if(bp == nil)
return;
- h = (Ip6hdr *) (bp->rp);
- proto = h->proto;
- p = Fsrcvpcol(f, proto);
+ h = (Ip6hdr*)bp->rp;
+ p = Fsrcvpcol(f, h->proto);
if(p != nil && p->rcv != nil) {
ip->stats[InDelivers]++;
(*p->rcv)(p, ifc, bp);
@@ -441,6 +311,7 @@
ip->stats[InDiscards]++;
ip->stats[InUnknownProtos]++;
+drop:
freeblist(bp);
}
@@ -447,20 +318,20 @@
/*
* ipfragfree6 - copied from ipfragfree4 - assume hold fraglock6
*/
-void
+static void
ipfragfree6(IP *ip, Fragment6 *frag)
{
Fragment6 *fl, **l;
- if(frag->blist)
+ if(frag->blist != nil)
freeblist(frag->blist);
-
- memset(frag->src, 0, IPaddrlen);
- frag->id = 0;
frag->blist = nil;
+ frag->id = 0;
+ memset(frag->src, 0, IPaddrlen);
+ memset(frag->dst, 0, IPaddrlen);
l = &ip->flisthead6;
- for(fl = *l; fl; fl = fl->next) {
+ for(fl = *l; fl != nil; fl = fl->next) {
if(fl == frag) {
*l = frag->next;
break;
@@ -470,13 +341,12 @@
frag->next = ip->fragfree6;
ip->fragfree6 = frag;
-
}
/*
* ipfragallo6 - copied from ipfragalloc4
*/
-Fragment6*
+static Fragment6*
ipfragallo6(IP *ip)
{
Fragment6 *f;
@@ -483,7 +353,7 @@
while(ip->fragfree6 == nil) {
/* free last entry on fraglist */
- for(f = ip->flisthead6; f->next; f = f->next)
+ for(f = ip->flisthead6; f->next != nil; f = f->next)
;
ipfragfree6(ip, f);
}
@@ -497,108 +367,109 @@
}
static Block*
-procxtns(IP *ip, Block *bp, int doreasm) {
-
- int offset;
+procxtns(IP *ip, Block *bp, int doreasm)
+{
uchar proto;
- Ip6hdr *h;
+ int offset;
- h = (Ip6hdr *) (bp->rp);
- offset = unfraglen(bp, &proto, 0);
-
- if((proto == FH) && (doreasm != 0)) {
- bp = ip6reassemble(ip, offset, bp, h);
- if(bp == nil)
- return nil;
- offset = unfraglen(bp, &proto, 0);
+ offset = unfraglen(bp, &proto, 0, doreasm);
+ if(offset >= IP6HDR && proto == FH && doreasm) {
+ bp = ip6reassemble(ip, offset, bp);
+ if(bp == nil)
+ return nil;
+ offset = unfraglen(bp, &proto, 0, 0);
+ if(proto == FH)
+ offset = -1;
}
-
- if(proto == DOH || offset > IP6HDR)
+ if(offset < IP6HDR){
+ ip->stats[InHdrErrors]++;
+ ip->stats[InDiscards]++;
+ freeblist(bp);
+ return nil;
+ }
+ if(proto == DOH || offset > IP6HDR)
bp = procopts(bp);
-
return bp;
}
-
-/* returns length of "Unfragmentable part", i.e., sum of lengths of ipv6 hdr,
- * hop-by-hop & routing headers if present; *nexthdr is set to nexthdr value
- * of the last header in the "Unfragmentable part"; if setfh != 0, nexthdr
- * field of the last header in the "Unfragmentable part" is set to FH.
+/*
+ * returns length of "Unfragmentable part", i.e., sum of lengths of ipv6 hdr,
+ * hop-by-hop & routing headers if present; *nexthdr is set to nexthdr value
+ * of the last header in the "Unfragmentable part"; if setfh != 0, nexthdr
+ * field of the last header in the "Unfragmentable part" is set to FH.
+ * When the last header is a fragment header and popfh != 0 then set
+ * the nexthdr value of the previous header to the nexthdr value of the
+ * fragment header. returns -1 on error.
*/
-int
-unfraglen(Block *bp, uchar *nexthdr, int setfh)
+static int
+unfraglen(Block *bp, uchar *nexthdr, int setfh, int popfh)
{
- uchar *p, *q;
- int ufl, hs;
+ uchar *e, *p, *q;
+ e = bp->wp;
p = bp->rp;
- q = p+6; /* proto, = p+sizeof(Ip6hdr.vcf)+sizeof(Ip6hdr.ploadlen) */
+ q = p+6; /* proto, = p+sizeof(Ip6hdr.vcf)+sizeof(Ip6hdr.ploadlen) */
*nexthdr = *q;
- ufl = IP6HDR;
- p += ufl;
-
- for(;;) {
- if(*nexthdr == HBH || *nexthdr == RH) {
- *nexthdr = *p;
- hs = ((int)*(p+1) + 1) * 8;
- ufl += hs;
- q = p;
- p += hs;
- }
- else
- break;
+ p += IP6HDR;
+ while(*nexthdr == HBH || *nexthdr == RH){
+ if(p+2 > e)
+ return -1;
+ q = p;
+ *nexthdr = *q;
+ p += ((int)p[1] + 1) * 8;
}
-
- if(*nexthdr == FH)
- *q = *p;
-
- if(setfh)
+ if(p > e)
+ return -1;
+ if(*nexthdr == FH){
+ if(p+IP6FHDR > e || *p == FH)
+ return -1;
+ if(popfh)
+ *q = *p;
+ } else if(setfh)
*q = FH;
-
- return ufl;
+ return p - bp->rp;
}
-Block*
+static Block*
procopts(Block *bp)
{
return bp;
}
-Block*
-ip6reassemble(IP* ip, int uflen, Block* bp, Ip6hdr* ih)
+static Block*
+ip6reassemble(IP* ip, int uflen, Block* bp)
{
-
- int fend, offset;
+ int offset, ovlap, fragsize, len;
+ uchar src[IPaddrlen], dst[IPaddrlen];
uint id;
- Fragment6 *f, *fnext;
+ Block *bl, **l, *prev;
Fraghdr6 *fraghdr;
- uchar src[IPaddrlen], dst[IPaddrlen];
- Block *bl, **l, *last, *prev;
- int ovlap, len, fragsize, pktposn;
+ Fragment6 *f, *fnext;
+ Ipfrag *fp, *fq;
+ Ip6hdr* ih;
- fraghdr = (Fraghdr6 *) (bp->rp + uflen);
- memmove(src, ih->src, IPaddrlen);
- memmove(dst, ih->dst, IPaddrlen);
- id = nhgetl(fraghdr->id);
- offset = nhgets(fraghdr->offsetRM) & ~7;
-
/*
- * block lists are too hard, pullupblock into a single block
+ * block lists are too hard, concatblock into a single block
*/
- if(bp->next){
- bp = pullupblock(bp, blocklen(bp));
- ih = (Ip6hdr *)(bp->rp);
- }
+ bp = concatblock(bp);
+ ih = (Ip6hdr*)bp->rp;
+ fraghdr = (Fraghdr6*)(bp->rp + uflen);
+ id = nhgetl(fraghdr->id);
+ offset = nhgets(fraghdr->offsetRM);
+ fragsize = BLEN(bp) - uflen - IP6FHDR;
+ memmove(src, ih->src, IPaddrlen);
+ memmove(dst, ih->dst, IPaddrlen);
+
qlock(&ip->fraglock6);
/*
* find a reassembly queue for this fragment
*/
- for(f = ip->flisthead6; f; f = fnext){
+ for(f = ip->flisthead6; f != nil; f = fnext){
fnext = f->next;
- if(ipcmp(f->src, src)==0 && ipcmp(f->dst, dst)==0 && f->id == id)
+ if(f->id == id && ipcmp(f->src, src) == 0 && ipcmp(f->dst, dst) == 0)
break;
if(f->age < NOW){
ip->stats[ReasmTimeout]++;
@@ -606,28 +477,35 @@
}
}
-
/*
* if this isn't a fragmented packet, accept it
* and get rid of any fragments that might go
* with it.
*/
- if(nhgets(fraghdr->offsetRM)==0) { // first frag is also the last
+ if((offset & ~6) == 0) { /* 1st frag is also last */
if(f != nil) {
- ipfragfree6(ip, f);
ip->stats[ReasmFails]++;
+ ipfragfree6(ip, f);
}
qunlock(&ip->fraglock6);
+
+ /* get rid of frag header */
+ memmove(bp->rp + IP6FHDR, bp->rp, uflen);
+ bp->rp += IP6FHDR;
+ ih = (Ip6hdr*)bp->rp;
+ hnputs(ih->ploadlen, BLEN(bp)-IP6HDR);
+
return bp;
}
- if(bp->base+sizeof(Ipfrag) >= bp->rp){
- bp = padblock(bp, sizeof(Ipfrag));
- bp->rp += sizeof(Ipfrag);
+ if(bp->base+IPFRAGSZ > bp->rp){
+ bp = padblock(bp, IPFRAGSZ);
+ bp->rp += IPFRAGSZ;
}
- BKFG(bp)->foff = offset;
- BKFG(bp)->flen = nhgets(ih->ploadlen) + IP6HDR - uflen - IP6FHDR;
+ fp = (Ipfrag*)bp->base;
+ fp->foff = offset & ~7;
+ fp->flen = fragsize;
/* First fragment allocates a reassembly queue */
if(f == nil) {
@@ -638,8 +516,9 @@
f->blist = bp;
- qunlock(&ip->fraglock6);
ip->stats[ReasmReqds]++;
+ qunlock(&ip->fraglock6);
+
return nil;
}
@@ -649,7 +528,7 @@
prev = nil;
l = &f->blist;
bl = f->blist;
- while(bl != nil && BKFG(bp)->foff > BKFG(bl)->foff) {
+ while(bl != nil && fp->foff > ((Ipfrag*)bl->base)->foff) {
prev = bl;
l = &bl->next;
bl = bl->next;
@@ -656,15 +535,16 @@
}
/* Check overlap of a previous fragment - trim away as necessary */
- if(prev) {
- ovlap = BKFG(prev)->foff + BKFG(prev)->flen - BKFG(bp)->foff;
+ if(prev != nil) {
+ fq = (Ipfrag*)prev->base;
+ ovlap = fq->foff + fq->flen - fp->foff;
if(ovlap > 0) {
- if(ovlap >= BKFG(bp)->flen) {
- freeblist(bp);
+ if(ovlap >= fp->flen) {
qunlock(&ip->fraglock6);
+ freeb(bp);
return nil;
}
- BKFG(prev)->flen -= ovlap;
+ fq->flen -= ovlap;
}
}
@@ -673,29 +553,27 @@
*l = bp;
/* Check to see if succeeding segments overlap */
- if(bp->next) {
+ if(bp->next != nil) {
l = &bp->next;
- fend = BKFG(bp)->foff + BKFG(bp)->flen;
+ offset = fp->foff + fp->flen;
/* Take completely covered segments out */
-
- while(*l) {
- ovlap = fend - BKFG(*l)->foff;
-
- if(ovlap <= 0)
- break;
- if(ovlap < BKFG(*l)->flen) {
- BKFG(*l)->flen -= ovlap;
- BKFG(*l)->foff += ovlap;
- /* move up ih hdrs */
- memmove((*l)->rp + ovlap, (*l)->rp, uflen);
- (*l)->rp += ovlap;
+ while((bl = *l) != nil) {
+ fq = (Ipfrag*)bl->base;
+ ovlap = offset - fq->foff;
+ if(ovlap <= 0)
break;
+ if(ovlap < fq->flen) {
+ /* move up ip and frag header */
+ memmove(bl->rp + ovlap, bl->rp, BLEN(bl) - fq->flen);
+ bl->rp += ovlap;
+ fq->flen -= ovlap;
+ fq->foff += ovlap;
+ break;
}
- last = (*l)->next;
- (*l)->next = nil;
- freeblist(*l);
- *l = last;
+ *l = bl->next;
+ bl->next = nil;
+ freeb(bl);
}
}
@@ -703,45 +581,55 @@
* look for a complete packet. if we get to a fragment
* with the trailing bit of fraghdr->offsetRM[1] set, we're done.
*/
- pktposn = 0;
- for(bl = f->blist; bl; bl = bl->next) {
- if(BKFG(bl)->foff != pktposn)
+ offset = 0;
+ for(bl = f->blist; bl != nil; bl = bl->next, offset += fp->flen) {
+ fp = (Ipfrag*)bl->base;
+ if(fp->foff != offset)
break;
-
- fraghdr = (Fraghdr6 *) (bl->rp + uflen);
- if((fraghdr->offsetRM[1] & 1) == 0) {
- bl = f->blist;
+ fraghdr = (Fraghdr6*)(bl->wp - fp->flen - IP6FHDR);
+ if(fraghdr->offsetRM[1] & 1)
+ continue;
- /* get rid of frag header in first fragment */
+ bl = f->blist;
+ fq = (Ipfrag*)bl->base;
- memmove(bl->rp + IP6FHDR, bl->rp, uflen);
- bl->rp += IP6FHDR;
- len = nhgets(((Ip6hdr*)(bl->rp))->ploadlen) - IP6FHDR;
- bl->wp = bl->rp + len + IP6HDR;
+ /* get rid of frag header in first fragment */
+ memmove(bl->rp + IP6FHDR, bl->rp, BLEN(bl) - fq->flen - IP6FHDR);
+ bl->rp += IP6FHDR;
+ len = BLEN(bl);
- /* Pullup all the fragment headers and
- * return a complete packet
- */
- for(bl = bl->next; bl; bl = bl->next) {
- fragsize = BKFG(bl)->flen;
- len += fragsize;
- bl->rp += uflen + IP6FHDR;
- bl->wp = bl->rp + fragsize;
- }
+ /*
+ * Pullup all the fragment headers and
+ * return a complete packet
+ */
+ for(bl = bl->next; bl != nil && len < IP_MAX; bl = bl->next) {
+ fq = (Ipfrag*)bl->base;
+ fragsize = fq->flen;
+ bl->rp = bl->wp - fragsize;
+ len += fragsize;
+ }
- bl = f->blist;
- f->blist = nil;
+ if(len >= IP_MAX){
ipfragfree6(ip, f);
- ih = (Ip6hdr*)(bl->rp);
- hnputs(ih->ploadlen, len);
+ ip->stats[ReasmFails]++;
qunlock(&ip->fraglock6);
- ip->stats[ReasmOKs]++;
- return bl;
+
+ return nil;
}
- pktposn += BKFG(bl)->flen;
+
+ bl = f->blist;
+ f->blist = nil;
+ ipfragfree6(ip, f);
+
+ ih = (Ip6hdr*)bl->rp;
+ hnputs(ih->ploadlen, len-IP6HDR);
+
+ ip->stats[ReasmOKs]++;
+ qunlock(&ip->fraglock6);
+
+ return bl;
}
qunlock(&ip->fraglock6);
return nil;
}
-
diff -u a/os/ip//ipv6.h b/os/ip//ipv6.h
--- a/os/ip//ipv6.h
+++ b/os/ip//ipv6.h
@@ -1,53 +1,31 @@
-#undef MIN
-#define MIN(a, b) ((a) <= (b) ? (a) : (b))
-
-/* rfc 3513 defines the address prefices */
+/*
+ * Internet Protocol Version 6
+ *
+ * rfc2460 defines the protocol, rfc2461 neighbour discovery, and
+ * rfc2462 address autoconfiguration. rfc4443 defines ICMP; was rfc2463.
+ * rfc4291 defines the address architecture (including prefices), was rfc3513.
+ * rfc4007 defines the scoped address architecture.
+ *
+ * global unicast is anything but unspecified (::), loopback (::1),
+ * multicast (ff00::/8), and link-local unicast (fe80::/10).
+ *
+ * site-local (fec0::/10) is now deprecated, originally by rfc3879.
+ *
+ * Unique Local IPv6 Unicast Addresses are defined by rfc4193.
+ * prefix is fc00::/7, scope is global, routing is limited to roughly a site.
+ */
#define isv6mcast(addr) ((addr)[0] == 0xff)
#define islinklocal(addr) ((addr)[0] == 0xfe && ((addr)[1] & 0xc0) == 0x80)
-#define issitelocal(addr) ((addr)[0] == 0xfe && ((addr)[1] & 0xc0) == 0xc0)
-#define isv6global(addr) (((addr)[0] & 0xe0) == 0x20)
-#define optexsts(np) (nhgets((np)->ploadlen) > 24)
-#define issmcast(addr) (memcmp((addr), v6solicitednode, 13) == 0)
+#define optexsts(np) (nhgets((np)->ploadlen) > 24)
+#define issmcast(addr) (memcmp((addr), v6solicitednode, 13) == 0)
-/* from RFC 2460 */
+#ifndef MIN
+#define MIN(a, b) ((a) <= (b)? (a): (b))
+#endif
-typedef struct Ip6hdr Ip6hdr;
-typedef struct Opthdr Opthdr;
-typedef struct Routinghdr Routinghdr;
-typedef struct Fraghdr6 Fraghdr6;
-
-struct Ip6hdr {
- uchar vcf[4]; // version:4, traffic class:8, flow label:20
- uchar ploadlen[2]; // payload length: packet length - 40
- uchar proto; // next header type
- uchar ttl; // hop limit
- uchar src[IPaddrlen];
- uchar dst[IPaddrlen];
-};
-
-struct Opthdr {
- uchar nexthdr;
- uchar len;
-};
-
-struct Routinghdr {
- uchar nexthdr;
- uchar len;
- uchar rtetype;
- uchar segrem;
-};
-
-struct Fraghdr6 {
- uchar nexthdr;
- uchar res;
- uchar offsetRM[2]; // Offset, Res, M flag
- uchar id[4];
-};
-
-
-enum { /* Header Types */
- HBH = 0, //?
+enum { /* Header Types */
+ HBH = 0, /* hop-by-hop multicast routing protocol */
ICMP = 1,
IGMP = 2,
GGP = 3,
@@ -72,89 +50,113 @@
Maxhdrtype = 256,
};
-
enum {
- // multicast flgs and scop
+ /* multicast flags and scopes */
- well_known_flg = 0,
- transient_flg = 1,
+// Well_known_flg = 0,
+// Transient_flg = 1,
- node_local_scop = 1,
- link_local_scop = 2,
- site_local_scop = 5,
- org_local_scop = 8,
- global_scop = 14,
+// Interface_local_scop = 1,
+ Link_local_scop = 2,
+// Site_local_scop = 5,
+// Org_local_scop = 8,
+ Global_scop = 14,
- // various prefix lengths
+ /* various prefix lengths */
+ SOLN_PREF_LEN = 13,
- SOLN_PREF_LEN = 13,
+ /* icmpv6 unreachability codes */
+ Icmp6_no_route = 0,
+ Icmp6_ad_prohib = 1,
+ Icmp6_out_src_scope = 2,
+ Icmp6_adr_unreach = 3,
+ Icmp6_port_unreach = 4,
+ Icmp6_gress_src_fail = 5,
+ Icmp6_rej_route = 6,
+ Icmp6_unknown = 7, /* our own invention for internal use */
- // icmpv6 unreach codes
- icmp6_no_route = 0,
- icmp6_ad_prohib = 1,
- icmp6_unassigned = 2,
- icmp6_adr_unreach = 3,
- icmp6_port_unreach = 4,
- icmp6_unkn_code = 5,
+ /* various flags & constants */
+ v6MINTU = 1280,
+ HOP_LIMIT = 255,
+ IP6HDR = 40, /* sizeof(Ip6hdr) = 8 + 2*16 */
+ IP6FHDR = 8, /* sizeof(Fraghdr6) */
- // various flags & constants
+ /* option types */
- v6MINTU = 1280,
- HOP_LIMIT = 255,
- ETHERHDR_LEN = 14,
- IPV6HDR_LEN = 40,
- IPV4HDR_LEN = 20,
+ /* neighbour discovery */
+ SRC_LLADDR = 1,
+ TARGET_LLADDR = 2,
+ PREFIX_INFO = 3,
+ REDIR_HEADER = 4,
+ MTU_OPTION = 5,
+ /* new since rfc2461; see iana.org/assignments/icmpv6-parameters */
+ V6nd_home = 8,
+ V6nd_srcaddrs = 9, /* rfc3122 */
+ V6nd_ip = 17,
+ /* /lib/rfc/drafts/draft-jeong-dnsop-ipv6-dns-discovery-12.txt */
+ V6nd_rdns = 25,
+ /* plan 9 extensions */
+ V6nd_9fs = 250,
+ V6nd_9auth = 251,
- // option types
+ SRC_UNSPEC = 0,
+ SRC_UNI = 1,
+ TARG_UNI = 2,
+ TARG_MULTI = 3,
- SRC_LLADDRESS = 1,
- TARGET_LLADDRESS = 2,
- PREFIX_INFO = 3,
- REDIR_HEADER = 4,
- MTU_OPTION = 5,
+ Tunitent = 1,
+ Tuniproxy = 2,
+ Tunirany = 3,
- SRC_UNSPEC = 0,
- SRC_UNI = 1,
- TARG_UNI = 2,
- TARG_MULTI = 3,
+ /* Node constants */
+ MAX_MULTICAST_SOLICIT = 3,
+ RETRANS_TIMER = 1000,
+};
- t_unitent = 1,
- t_uniproxy = 2,
- t_unirany = 3,
+typedef struct Ip6hdr Ip6hdr;
+typedef struct Opthdr Opthdr;
+typedef struct Routinghdr Routinghdr;
+typedef struct Fraghdr6 Fraghdr6;
- // Router constants (all times in milliseconds)
+/* we do this in case there's padding at the end of Ip6hdr */
+#define IPV6HDR \
+ uchar vcf[4]; /* version:4, traffic class:8, flow label:20 */\
+ uchar ploadlen[2]; /* payload length: packet length - 40 */ \
+ uchar proto; /* next header type */ \
+ uchar ttl; /* hop limit */ \
+ uchar src[IPaddrlen]; \
+ uchar dst[IPaddrlen]
- MAX_INITIAL_RTR_ADVERT_INTERVAL = 16000,
- MAX_INITIAL_RTR_ADVERTISEMENTS = 3,
- MAX_FINAL_RTR_ADVERTISEMENTS = 3,
- MIN_DELAY_BETWEEN_RAS = 3000,
- MAX_RA_DELAY_TIME = 500,
+struct Ip6hdr {
+ IPV6HDR;
+ uchar payload[];
+};
- // Host constants
+struct Opthdr { /* unused */
+ uchar nexthdr;
+ uchar len;
+};
- MAX_RTR_SOLICITATION_DELAY = 1000,
- RTR_SOLICITATION_INTERVAL = 4000,
- MAX_RTR_SOLICITATIONS = 3,
+/*
+ * Beware routing header type 0 (loose source routing); see
+ * http://www.secdev.org/conf/IPv6_RH_security-csw07.pdf.
+ * Type 1 is unused. Type 2 is for MIPv6 (mobile IPv6) filtering
+ * against type 0 header.
+ */
+struct Routinghdr { /* unused */
+ uchar nexthdr;
+ uchar len;
+ uchar rtetype;
+ uchar segrem;
+};
- // Node constants
-
- MAX_MULTICAST_SOLICIT = 3,
- MAX_UNICAST_SOLICIT = 3,
- MAX_ANYCAST_DELAY_TIME = 1000,
- MAX_NEIGHBOR_ADVERTISEMENT = 3,
- REACHABLE_TIME = 30000,
- RETRANS_TIMER = 1000,
- DELAY_FIRST_PROBE_TIME = 5000,
-
+struct Fraghdr6 {
+ uchar nexthdr;
+ uchar res;
+ uchar offsetRM[2]; /* Offset, Res, M flag */
+ uchar id[4];
};
-extern void ipv62smcast(uchar *, uchar *);
-extern void icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac);
-extern void icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags);
-extern void icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp);
-extern void icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp);
-extern void icmphostunr(Fs *f, Ipifc *ifc, Block *bp, int code, int free);
-
extern uchar v6allnodesN[IPaddrlen];
extern uchar v6allnodesL[IPaddrlen];
extern uchar v6allroutersN[IPaddrlen];
@@ -161,23 +163,16 @@
extern uchar v6allroutersL[IPaddrlen];
extern uchar v6allnodesNmask[IPaddrlen];
extern uchar v6allnodesLmask[IPaddrlen];
-extern uchar v6allroutersS[IPaddrlen];
extern uchar v6solicitednode[IPaddrlen];
extern uchar v6solicitednodemask[IPaddrlen];
extern uchar v6Unspecified[IPaddrlen];
extern uchar v6loopback[IPaddrlen];
-extern uchar v6loopbackmask[IPaddrlen];
extern uchar v6linklocal[IPaddrlen];
extern uchar v6linklocalmask[IPaddrlen];
-extern uchar v6sitelocal[IPaddrlen];
-extern uchar v6sitelocalmask[IPaddrlen];
-extern uchar v6glunicast[IPaddrlen];
extern uchar v6multicast[IPaddrlen];
extern uchar v6multicastmask[IPaddrlen];
extern int v6llpreflen;
-extern int v6slpreflen;
-extern int v6lbpreflen;
extern int v6mcpreflen;
extern int v6snpreflen;
extern int v6aNpreflen;
@@ -184,3 +179,10 @@
extern int v6aLpreflen;
extern int ReTransTimer;
+
+void ipv62smcast(uchar *, uchar *);
+void icmpns(Fs *f, uchar* src, int suni, uchar* targ, int tuni, uchar* mac);
+void icmpna(Fs *f, uchar* src, uchar* dst, uchar* targ, uchar* mac, uchar flags);
+void icmpttlexceeded6(Fs *f, Ipifc *ifc, Block *bp);
+void icmppkttoobig6(Fs *f, Ipifc *ifc, Block *bp);
+void icmphostunr6(Fs *f, Ipifc *ifc, Block *bp, int code, int tome);
diff -u a/os/ip//loopbackmedium.c b/os/ip//loopbackmedium.c
--- a/os/ip//loopbackmedium.c
+++ b/os/ip//loopbackmedium.c
@@ -28,13 +28,12 @@
LB *lb;
lb = smalloc(sizeof(*lb));
+ lb->readp = (void*)-1;
lb->f = ifc->conv->p->f;
- /* TO DO: make queue size a function of kernel memory */
- lb->q = qopen(128*1024, Qmsg, nil, nil);
+ lb->q = qopen(1024*1024, Qmsg, nil, nil);
ifc->arg = lb;
- ifc->mbps = 1000;
- kproc("loopbackread", loopbackread, ifc, 0);
+ kproc("loopbackread", loopbackread, ifc);
}
@@ -43,13 +42,29 @@
{
LB *lb = ifc->arg;
- if(lb->readp)
+ while(waserror())
+ ;
+
+ /* wat for reader to start */
+ while(lb->readp == (void*)-1)
+ tsleep(&up->sleep, return0, 0, 300);
+
+ if(lb->readp != nil)
postnote(lb->readp, 1, "unbind", 0);
+ poperror();
+
+ wunlock(ifc);
+ while(waserror())
+ ;
+
/* wait for reader to die */
- while(lb->readp != 0)
+ while(lb->readp != nil)
tsleep(&up->sleep, return0, 0, 300);
+ poperror();
+ wlock(ifc);
+
/* clean up */
qfree(lb->q);
free(lb);
@@ -76,23 +91,14 @@
ifc = a;
lb = ifc->arg;
lb->readp = up; /* hide identity under a rock for unbind */
- if(waserror()){
- lb->readp = 0;
- pexit("hangup", 1);
- }
- for(;;){
- bp = qbread(lb->q, Maxtu);
- if(bp == nil)
- continue;
- ifc->in++;
- if(!canrlock(ifc)){
- freeb(bp);
- continue;
- }
+ if(!waserror())
+ while((bp = qbread(lb->q, Maxtu)) != nil){
+ rlock(ifc);
if(waserror()){
runlock(ifc);
nexterror();
}
+ ifc->in++;
if(ifc->lifc == nil)
freeb(bp);
else
@@ -100,6 +106,8 @@
runlock(ifc);
poperror();
}
+ lb->readp = nil;
+ pexit("hangup", 1);
}
Medium loopbackmedium =
diff -u a/os/ip//netdevmedium.c b/os/ip//netdevmedium.c
--- a/os/ip//netdevmedium.c
+++ b/os/ip//netdevmedium.c
@@ -49,12 +49,13 @@
mchan = namec(argv[2], Aopen, ORDWR, 0);
er = smalloc(sizeof(*er));
+ er->readp = (void*)-1;
er->mchan = mchan;
er->f = ifc->conv->p->f;
ifc->arg = er;
- kproc("netdevread", netdevread, ifc, 0);
+ kproc("netdevread", netdevread, ifc);
}
/*
@@ -65,13 +66,29 @@
{
Netdevrock *er = ifc->arg;
+ while(waserror())
+ ;
+
+ /* wait for reader to start */
+ while(er->readp == (void*)-1)
+ tsleep(&up->sleep, return0, 0, 300);
+
if(er->readp != nil)
postnote(er->readp, 1, "unbind", 0);
- /* wait for readers to die */
+ poperror();
+
+ wunlock(ifc);
+ while(waserror())
+ ;
+
+ /* wait for reader to die */
while(er->readp != nil)
tsleep(&up->sleep, return0, 0, 300);
+ poperror();
+ wlock(ifc);
+
if(er->mchan != nil)
cclose(er->mchan);
@@ -86,8 +103,6 @@
{
Netdevrock *er = ifc->arg;
- if(bp->next)
- bp = concatblock(bp);
if(BLEN(bp) < ifc->mintu)
bp = adjustblock(bp, ifc->mintu);
@@ -104,34 +119,22 @@
Ipifc *ifc;
Block *bp;
Netdevrock *er;
- char *argv[1];
ifc = a;
er = ifc->arg;
er->readp = up; /* hide identity under a rock for unbind */
- if(waserror()){
- er->readp = nil;
- pexit("hangup", 1);
- }
+ if(!waserror())
for(;;){
bp = devtab[er->mchan->type]->bread(er->mchan, ifc->maxtu, 0);
if(bp == nil){
- /*
- * get here if mchan is a pipe and other side hangs up
- * clean up this interface & get out
-ZZZ is this a good idea?
- */
poperror();
- er->readp = nil;
- argv[0] = "unbind";
- if(!waserror())
+ if(!waserror()){
+ static char *argv[] = { "unbind" };
ifc->conv->p->ctl(ifc->conv, argv, 1);
- pexit("hangup", 1);
+ }
+ break;
}
- if(!canrlock(ifc)){
- freeb(bp);
- continue;
- }
+ rlock(ifc);
if(waserror()){
runlock(ifc);
nexterror();
@@ -144,6 +147,8 @@
runlock(ifc);
poperror();
}
+ er->readp = nil;
+ pexit("hangup", 1);
}
void
diff -u a/os/ip//netlog.c b/os/ip//netlog.c
--- a/os/ip//netlog.c
+++ b/os/ip//netlog.c
@@ -7,7 +7,7 @@
#include "../ip/ip.h"
enum {
- Nlog = 4*1024,
+ Nlog = 16*1024,
};
/*
@@ -39,12 +39,12 @@
{ "ppp", Logppp, },
{ "ip", Logip, },
{ "fs", Logfs, },
- { "tcp", Logtcp, },
{ "il", Logil, },
+ { "tcp", Logtcp, },
{ "icmp", Logicmp, },
{ "udp", Logudp, },
{ "compress", Logcompress, },
- { "ilmsg", Logil|Logilmsg, },
+ { "logilmsg", Logilmsg, },
{ "gre", Loggre, },
{ "tcpwin", Logtcp|Logtcpwin, },
{ "tcprxmt", Logtcp|Logtcprxmt, },
@@ -85,8 +85,11 @@
nexterror();
}
if(f->alog->opens == 0){
- if(f->alog->buf == nil)
+ if(f->alog->buf == nil){
f->alog->buf = malloc(Nlog);
+ if(f->alog->buf == nil)
+ error(Enomem);
+ }
f->alog->rptr = f->alog->buf;
f->alog->end = f->alog->buf + Nlog;
}
@@ -202,6 +205,7 @@
else
f->alog->iponlyset = 1;
free(cb);
+ poperror();
return;
default:
@@ -227,7 +231,7 @@
void
netlog(Fs *f, int mask, char *fmt, ...)
{
- char buf[128], *t, *fp;
+ char buf[256], *t, *fp;
int i, n;
va_list arg;
diff -u a/os/ip//nullmedium.c b/os/ip//nullmedium.c
--- a/os/ip//nullmedium.c
+++ b/os/ip//nullmedium.c
@@ -19,8 +19,9 @@
}
static void
-nullbwrite(Ipifc*, Block*, int, uchar*)
+nullbwrite(Ipifc*, Block *bp, int, uchar*)
{
+ freeb(bp);
error("nullbwrite");
}
diff -u a/os/ip//pktmedium.c b/os/ip//pktmedium.c
--- a/os/ip//pktmedium.c
+++ b/os/ip//pktmedium.c
@@ -16,10 +16,10 @@
Medium pktmedium =
{
.name= "pkt",
-.hsize= 14,
-.mintu= 40,
+.hsize= 0,
+.mintu= 0,
.maxtu= 4*1024,
-.maclen= 6,
+.maclen= 0,
.bind= pktbind,
.unbind= pktunbind,
.bwrite= pktbwrite,
@@ -28,12 +28,13 @@
};
/*
- * called to bind an IP ifc to an ethernet device
+ * called to bind an IP ifc to an packet device
* called with ifc wlock'd
*/
static void
-pktbind(Ipifc*, int, char**)
+pktbind(Ipifc*, int argc, char **argv)
{
+ USED(argc, argv);
}
/*
@@ -51,7 +52,6 @@
pktbwrite(Ipifc *ifc, Block *bp, int, uchar*)
{
/* enqueue onto the conversation's rq */
- bp = concatblock(bp);
if(ifc->conv->snoopers.ref > 0)
qpass(ifc->conv->sq, copyblock(bp, BLEN(bp)));
qpass(ifc->conv->rq, bp);
diff -u a/os/ip//rudp.c b/os/ip//rudp.c
--- a/os/ip//rudp.c
+++ b/os/ip//rudp.c
@@ -1,4 +1,5 @@
/*
+ * Reliable User Datagram Protocol, currently only for IPv4.
* This protocol is compatible with UDP's packet format.
* It could be done over UDP if need be.
*/
@@ -25,20 +26,17 @@
enum
{
- UDP_HDRSIZE = 20, /* pseudo header + udp header */
UDP_PHDRSIZE = 12, /* pseudo header */
+// UDP_HDRSIZE = 20, /* pseudo header + udp header */
UDP_RHDRSIZE = 36, /* pseudo header + udp header + rudp header */
UDP_IPHDR = 8, /* ip header */
IP_UDPPROTO = 254,
- UDP_USEAD7 = 52,
- UDP_USEAD6 = 36,
- UDP_USEAD4 = 12,
+ UDP_USEAD7 = 52, /* size of new ipv6 headers struct */
Rudprxms = 200,
Rudptickms = 50,
Rudpmaxxmit = 10,
Maxunacked = 100,
-
};
#define Hangupgen 0xffffffff /* used only in hangup messages */
@@ -205,7 +203,7 @@
qlock(&rpriv->apl);
if(rpriv->ackprocstarted == 0){
sprint(kpname, "#I%drudpack", rudp->f->dev);
- kproc(kpname, relackproc, rudp, 0);
+ kproc(kpname, relackproc, rudp);
rpriv->ackprocstarted = 1;
}
qunlock(&rpriv->apl);
@@ -240,6 +238,7 @@
qlock(ucb);
for(r = ucb->r; r; r = r->next)
m += snprint(state+m, n-m, " %I/%ld", r->addr, UNACKED(r));
+ m += snprint(state+m, n-m, "\n");
qunlock(ucb);
return m;
}
@@ -281,7 +280,7 @@
/* force out any delayed acks */
ucb = (Rudpcb*)c->ptcl;
qlock(ucb);
- for(r = ucb->r; r; r = r->next){
+ for(r = ucb->r; r != nil; r = r->next){
if(r->acksent != r->rcvseq)
relsendack(c, r, 0);
}
@@ -374,27 +373,10 @@
rport = nhgets(bp->rp);
bp->rp += 2+2; /* Ignore local port */
break;
- case 6:
- /* get user specified addresses */
- bp = pullupblock(bp, UDP_USEAD6);
- if(bp == nil)
- return;
- ipmove(raddr, bp->rp);
- bp->rp += IPaddrlen;
- ipmove(laddr, bp->rp);
- bp->rp += IPaddrlen;
- /* pick interface closest to dest */
- if(ipforme(f, laddr) != Runi)
- findlocalip(f, laddr, raddr);
- rport = nhgets(bp->rp);
-
- bp->rp += 4; /* Igonore local port */
- break;
default:
ipmove(raddr, c->raddr);
ipmove(laddr, c->laddr);
rport = c->rport;
-
break;
}
@@ -402,9 +384,6 @@
/* Make space to fit rudp & ip header */
bp = padblock(bp, UDP_IPHDR+UDP_RHDRSIZE);
- if(bp == nil)
- return;
-
uh = (Udphdr *)(bp->rp);
uh->vihl = IP_VER4;
@@ -417,7 +396,6 @@
uh->frag[1] = 0;
hnputs(uh->udpplen, ptcllen);
switch(ucb->headers){
- case 6:
case 7:
v6tov4(uh->udpdst, raddr);
hnputs(uh->udpdport, rport);
@@ -528,7 +506,7 @@
c = iphtlook(&upriv->ht, raddr, rport, laddr, lport);
if(c == nil){
- /* no converstation found */
+ /* no conversation found */
upriv->ustats.rudpNoPorts++;
qunlock(rudp);
netlog(f, Logudp, "udp: no conv %I!%d -> %I!%d\n", raddr, rport,
@@ -574,45 +552,32 @@
p = bp->rp;
ipmove(p, raddr); p += IPaddrlen;
ipmove(p, laddr); p += IPaddrlen;
- ipmove(p, ifc->lifc->local); p += IPaddrlen;
+ if(!ipv6local(ifc, p, 0, raddr))
+ ipmove(p, ifc->lifc != nil ? ifc->lifc->local : IPnoaddr);
+ p += IPaddrlen;
hnputs(p, rport); p += 2;
hnputs(p, lport);
break;
- case 6:
- /* pass the src address */
- bp = padblock(bp, UDP_USEAD6);
- p = bp->rp;
- ipmove(p, raddr); p += IPaddrlen;
- ipmove(p, ipforme(f, laddr)==Runi ? laddr : ifc->lifc->local); p += IPaddrlen;
- hnputs(p, rport); p += 2;
- hnputs(p, lport);
- break;
default:
/* connection oriented rudp */
if(ipcmp(c->raddr, IPnoaddr) == 0){
- /* save the src address in the conversation */
+ /* reply with the same ip address (if not broadcast) */
+ if(ipforme(f, laddr) != Runi)
+ ipv6local(ifc, laddr, 0, raddr);
+ ipmove(c->laddr, laddr);
ipmove(c->raddr, raddr);
c->rport = rport;
-
- /* reply with the same ip address (if not broadcast) */
- if(ipforme(f, laddr) == Runi)
- ipmove(c->laddr, laddr);
- else
- v4tov6(c->laddr, ifc->lifc->local);
}
break;
}
- if(bp->next)
- bp = concatblock(bp);
if(qfull(c->rq)) {
- netlog(f, Logrudp, "rudp: qfull %I.%d -> %I.%d\n", raddr, rport,
- laddr, lport);
+ netlog(f, Logrudp, "rudp: qfull %I.%d -> %I.%d\n",
+ raddr, rport, laddr, lport);
freeblist(bp);
+ } else {
+ qpass(c->rq, concatblock(bp));
}
- else
- qpass(c->rq, bp);
-
qunlock(ucb);
}
@@ -629,16 +594,14 @@
if(n < 1)
return rudpunknown;
- if(strcmp(f[0], "headers++4") == 0){
- ucb->headers = 7;
+ if(strcmp(f[0], "headers") == 0){
+ ucb->headers = 7; /* new headers format */
return nil;
- } else if(strcmp(f[0], "headers") == 0){
- ucb->headers = 6;
- return nil;
} else if(strcmp(f[0], "hangup") == 0){
if(n < 3)
return "bad syntax";
- parseip(ip, f[1]);
+ if (parseip(ip, f[1]) == -1)
+ return Ebadip;
x = atoi(f[2]);
qlock(ucb);
relforget(c, ip, x, 1);
@@ -645,7 +608,7 @@
qunlock(ucb);
return nil;
} else if(strcmp(f[0], "randdrop") == 0){
- x = 10; /* default is 10% */
+ x = 10; /* default is 10% */
if(n > 1)
x = atoi(f[1]);
if(x > 100 || x < 0)
@@ -672,12 +635,13 @@
pdest = nhgets(h->udpdport);
/* Look for a connection */
- for(p = rudp->conv; *p; p++) {
- s = *p;
+ for(p = rudp->conv; (s = *p) != nil; p++) {
if(s->rport == pdest)
if(s->lport == psource)
if(ipcmp(s->raddr, dest) == 0)
if(ipcmp(s->laddr, source) == 0){
+ if(s->ignoreadvice)
+ break;
qhangup(s->rq, msg);
qhangup(s->wq, msg);
break;
@@ -701,12 +665,6 @@
upriv->orders);
}
-int
-rudpgc(Proto *rudp)
-{
- return natgc(rudp->ipproto);
-}
-
void
rudpinit(Fs *fs)
{
@@ -725,9 +683,8 @@
rudp->rcv = rudpiput;
rudp->advise = rudpadvise;
rudp->stats = rudpstats;
- rudp->gc = rudpgc;
rudp->ipproto = IP_UDPPROTO;
- rudp->nc = 16;
+ rudp->nc = 32;
rudp->ptclsize = sizeof(Rudpcb);
Fsproto(fs, rudp);
@@ -770,6 +727,8 @@
rudp = (Proto *)a;
+ while(waserror())
+ ;
loop:
tsleep(&up->sleep, return0, 0, Rudptickms);
@@ -989,8 +948,6 @@
Fs *f;
bp = allocb(UDP_IPHDR + UDP_RHDRSIZE);
- if(bp == nil)
- return;
bp->wp += UDP_IPHDR + UDP_RHDRSIZE;
f = c->p->f;
uh = (Udphdr *)(bp->rp);
diff -u a/os/ip//tcp.c b/os/ip//tcp.c
--- a/os/ip//tcp.c
+++ b/os/ip//tcp.c
@@ -41,13 +41,13 @@
EOLOPT = 0,
NOOPOPT = 1,
MSSOPT = 2,
- MSS_LENGTH = 4, /* Mean segment size */
+ MSS_LENGTH = 4, /* Maximum segment size */
WSOPT = 3,
WS_LENGTH = 3, /* Bits to scale window size by */
MSL2 = 10,
MSPTICK = 50, /* Milliseconds per timer tick */
- DEF_MSS = 1460, /* Default mean segment */
- DEF_MSS6 = 1280, /* Default mean segment (min) for v6 */
+ DEF_MSS = 1460, /* Default maximum segment */
+ DEF_MSS6 = 1220, /* Default maximum segment (min) for v6 */
DEF_RTT = 500, /* Default round trip */
DEF_KAT = 120000, /* Default time (ms) between keep alives */
TCP_LISTEN = 0, /* Listen connection */
@@ -81,7 +81,13 @@
NLHT = 256, /* hash table size, must be a power of 2 */
LHTMASK = NLHT-1,
- HaveWS = 1<<8,
+ /*
+ * window is 64kb · 2ⁿ
+ * these factors determine the ultimate bandwidth-delay product.
+ * 64kb · 2⁵ = 2mb, or 2x overkill for 100mbps · 70ms.
+ */
+ Maxqscale = 4, /* maximum queuing scale */
+ Defadvscale = 4, /* default advertisement */
};
/* Must correspond to the enumeration above */
@@ -169,8 +175,9 @@
ulong seq;
ulong ack;
uchar flags;
- ushort ws; /* window scale option (if not zero) */
- ulong wnd;
+ uchar update;
+ ushort ws; /* window scale option */
+ ulong wnd; /* prescaled window*/
ushort urg;
ushort mss; /* max segment size option (if not zero) */
ushort len; /* size of data */
@@ -205,44 +212,53 @@
ulong wnd; /* Tcp send window */
ulong urg; /* Urgent data pointer */
ulong wl2;
- int scale; /* how much to right shift window in xmitted packets */
+ uint scale; /* how much to right shift window in xmitted packets */
/* to implement tahoe and reno TCP */
ulong dupacks; /* number of duplicate acks rcvd */
+ ulong partialack;
int recovery; /* loss recovery flag */
- ulong rxt; /* right window marker for recovery */
+ int retransmit; /* retransmit 1 packet @ una flag */
+ int rto;
+ ulong rxt; /* right window marker for recovery "recover" rfc3782 */
} snd;
struct {
ulong nxt; /* Receive pointer to next uchar slot */
ulong wnd; /* Receive window incoming */
+ ulong wsnt; /* Last wptr sent. important to track for large bdp */
+ ulong wptr;
ulong urg; /* Urgent pointer */
+ ulong ackptr; /* last acked sequence */
int blocked;
- int una; /* unacked data segs */
- int scale; /* how much to left shift window in rcved packets */
+ uint scale; /* how much to left shift window in rcv'd packets */
} rcv;
ulong iss; /* Initial sequence number */
- int sawwsopt; /* true if we saw a wsopt on the incoming SYN */
ulong cwind; /* Congestion window */
- int scale; /* desired snd.scale */
- ushort ssthresh; /* Slow start threshold */
+ ulong abcbytes; /* appropriate byte counting rfc 3465 */
+ uint scale; /* desired snd.scale */
+ ulong ssthresh; /* Slow start threshold */
int resent; /* Bytes just resent */
int irs; /* Initial received squence */
- ushort mss; /* Mean segment size */
+ ushort mss; /* Maximum segment size */
int rerecv; /* Overlap of data rerecevived */
- ulong window; /* Recevive window */
+ ulong window; /* Our receive window (queue) */
+ uint qscale; /* Log2 of our receive window (queue) */
uchar backoff; /* Exponential backoff counter */
int backedoff; /* ms we've backed off for rexmits */
uchar flags; /* State flags */
Reseq *reseq; /* Resequencing queue */
+ int nreseq;
+ int reseqlen;
Tcptimer timer; /* Activity timer */
Tcptimer acktimer; /* Acknowledge timer */
Tcptimer rtt_timer; /* Round trip timer */
Tcptimer katimer; /* keep alive timer */
ulong rttseq; /* Round trip sequence */
- int srtt; /* Shortened round trip */
+ int srtt; /* Smoothed round trip */
int mdev; /* Mean deviation of round trip */
int kacounter; /* count down for keep alive */
uint sndsyntime; /* time syn sent */
ulong time; /* time Finwait2 or Syn_received was sent */
+ ulong timeuna; /* snd.una when time was set */
int nochecksum; /* non-zero means don't send checksums */
int flgcnt; /* number of flags in the sequence (FIN,SEQ) */
@@ -285,11 +301,11 @@
};
int tcp_irtt = DEF_RTT; /* Initial guess at round trip time */
-ushort tcp_mss = DEF_MSS; /* Maximum segment size to be sent */
enum {
/* MIB stats */
MaxConn,
+ Mss,
ActiveOpens,
PassiveOpens,
EstabResets,
@@ -297,6 +313,7 @@
InSegs,
OutSegs,
RetransSegs,
+ RetransSegsSent,
RetransTimeouts,
InErrs,
OutRsts,
@@ -305,14 +322,27 @@
CsumErrs,
HlenErrs,
LenErrs,
+ Resequenced,
OutOfOrder,
+ ReseqBytelim,
+ ReseqPktlim,
+ Delayack,
+ Wopenack,
+ Recovery,
+ RecoveryDone,
+ RecoveryRTO,
+ RecoveryNoSeq,
+ RecoveryCwind,
+ RecoveryPA,
+
Nstats
};
-static char *statnames[] =
+static char *statnames[Nstats] =
{
[MaxConn] "MaxConn",
+[Mss] "MaxSegment",
[ActiveOpens] "ActiveOpens",
[PassiveOpens] "PassiveOpens",
[EstabResets] "EstabResets",
@@ -320,6 +350,7 @@
[InSegs] "InSegs",
[OutSegs] "OutSegs",
[RetransSegs] "RetransSegs",
+[RetransSegsSent] "RetransSegsSent",
[RetransTimeouts] "RetransTimeouts",
[InErrs] "InErrs",
[OutRsts] "OutRsts",
@@ -327,6 +358,19 @@
[HlenErrs] "HlenErrs",
[LenErrs] "LenErrs",
[OutOfOrder] "OutOfOrder",
+[Resequenced] "Resequenced",
+[ReseqBytelim] "ReseqBytelim",
+[ReseqPktlim] "ReseqPktlim",
+[Delayack] "Delayack",
+[Wopenack] "Wopenack",
+
+[Recovery] "Recovery",
+[RecoveryDone] "RecoveryDone",
+[RecoveryRTO] "RecoveryRTO",
+
+[RecoveryNoSeq] "RecoveryNoSeq",
+[RecoveryCwind] "RecoveryCwind",
+[RecoveryPA] "RecoveryPA",
};
typedef struct Tcppriv Tcppriv;
@@ -347,7 +391,7 @@
QLock apl;
int ackprocstarted;
- ulong stats[Nstats];
+ uvlong stats[Nstats];
};
/*
@@ -356,34 +400,34 @@
* of DoS attack.
*
* To avoid stateless Conv hogs, we pick a sequence number at random. If
- * it that number gets acked by the other end, we shut down the connection.
- * Look for tcpporthogedefense in the code.
+ * that number gets acked by the other end, we shut down the connection.
+ * Look for tcpporthogdefense in the code.
*/
int tcpporthogdefense = 0;
-int addreseq(Tcpctl*, Tcppriv*, Tcp*, Block*, ushort);
-void getreseq(Tcpctl*, Tcp*, Block**, ushort*);
-void localclose(Conv*, char*);
-void procsyn(Conv*, Tcp*);
-void tcpiput(Proto*, Ipifc*, Block*);
-void tcpoutput(Conv*);
-int tcptrim(Tcpctl*, Tcp*, Block**, ushort*);
-void tcpstart(Conv*, int);
-void tcptimeout(void*);
-void tcpsndsyn(Conv*, Tcpctl*);
-void tcprcvwin(Conv*);
-void tcpacktimer(void*);
-void tcpkeepalive(void*);
-void tcpsetkacounter(Tcpctl*);
-void tcprxmit(Conv*);
-void tcpsettimer(Tcpctl*);
-void tcpsynackrtt(Conv*);
-void tcpsetscale(Conv*, Tcpctl*, ushort, ushort);
+static int addreseq(Fs*, Tcpctl*, Tcppriv*, Tcp*, Block*, ushort);
+static int dumpreseq(Tcpctl*);
+static void getreseq(Tcpctl*, Tcp*, Block**, ushort*);
+static void limbo(Conv*, uchar*, uchar*, Tcp*, int);
+static void limborexmit(Proto*);
+static void localclose(Conv*, char*);
+static void procsyn(Conv*, Tcp*);
+static void tcpacktimer(void*);
+static void tcpiput(Proto*, Ipifc*, Block*);
+static void tcpkeepalive(void*);
+static void tcpoutput(Conv*);
+static void tcprcvwin(Conv*);
+static void tcprxmit(Conv*);
+static void tcpsetkacounter(Tcpctl*);
+static void tcpsetscale(Conv*, Tcpctl*, ushort, ushort);
+static void tcpsettimer(Tcpctl*);
+static void tcpsndsyn(Conv*, Tcpctl*);
+static void tcpstart(Conv*, int);
+static void tcpsynackrtt(Conv*);
+static void tcptimeout(void*);
+static int tcptrim(Tcpctl*, Tcp*, Block**, ushort*);
-static void limborexmit(Proto*);
-static void limbo(Conv*, uchar*, uchar*, Tcp*, int);
-
-void
+static void
tcpsetstate(Conv *s, uchar newstate)
{
Tcpctl *tcb;
@@ -403,11 +447,6 @@
if(newstate == Established)
tpriv->stats[CurrEstab]++;
- /**
- print( "%d/%d %s->%s CurrEstab=%d\n", s->lport, s->rport,
- tcpstates[oldstate], tcpstates[newstate], tpriv->tstats.tcpCurrEstab );
- **/
-
switch(newstate) {
case Closed:
qclose(s->rq);
@@ -430,7 +469,12 @@
tcpconnect(Conv *c, char **argv, int argc)
{
char *e;
+ Tcpctl *tcb;
+ tcb = (Tcpctl*)(c->ptcl);
+ if(tcb->state != Closed)
+ return Econinuse;
+
e = Fsstdconnect(c, argv, argc);
if(e != nil)
return e;
@@ -447,12 +491,14 @@
s = (Tcpctl*)(c->ptcl);
return snprint(state, n,
- "%s qin %d qout %d srtt %d mdev %d cwin %lud swin %lud>>%d rwin %lud>>%d timer.start %d timer.count %d rerecv %d katimer.start %d katimer.count %d\n",
+ "%s qin %d qout %d rq %d.%d srtt %d mdev %d sst %lud cwin %lud swin %lud>>%d rwin %lud>>%d qscale %d timer.start %d timer.count %d rerecv %d katimer.start %d katimer.count %d\n",
tcpstates[s->state],
c->rq ? qlen(c->rq) : 0,
c->wq ? qlen(c->wq) : 0,
- s->srtt, s->mdev,
- s->cwind, s->snd.wnd, s->rcv.scale, s->rcv.wnd, s->snd.scale,
+ s->nreseq, s->reseqlen,
+ s->srtt, s->mdev, s->ssthresh,
+ s->cwind, s->snd.wnd, s->snd.scale, s->rcv.wnd, s->rcv.scale,
+ s->qscale,
s->timer.start, s->timer.count, s->rerecv,
s->katimer.start, s->katimer.count);
}
@@ -470,7 +516,12 @@
tcpannounce(Conv *c, char **argv, int argc)
{
char *e;
+ Tcpctl *tcb;
+ tcb = (Tcpctl*)(c->ptcl);
+ if(tcb->state != Closed)
+ return Econinuse;
+
e = Fsstdannounce(c, argv, argc);
if(e != nil)
return e;
@@ -524,7 +575,7 @@
}
}
-void
+static void
tcpkick(void *x)
{
Conv *s = x;
@@ -546,7 +597,6 @@
/*
* Push data
*/
- tcprcvwin(s);
tcpoutput(s);
break;
default:
@@ -558,7 +608,9 @@
poperror();
}
-void
+static int seq_lt(ulong, ulong);
+
+static void
tcprcvwin(Conv *s) /* Call with tcb locked */
{
int w;
@@ -568,12 +620,20 @@
w = tcb->window - qlen(s->rq);
if(w < 0)
w = 0;
- tcb->rcv.wnd = w;
- if(w == 0)
+ /* RFC 1122 § 4.2.2.17 do not move right edge of window left */
+ if(seq_lt(tcb->rcv.nxt + w, tcb->rcv.wptr))
+ w = tcb->rcv.wptr - tcb->rcv.nxt;
+ if(w != tcb->rcv.wnd)
+ if(w>>tcb->rcv.scale == 0 || tcb->window > 4*tcb->mss && w < tcb->mss/4){
tcb->rcv.blocked = 1;
+ netlog(s->p->f, Logtcp, "tcprcvwin: window %lud qlen %d ws %ud lport %d\n",
+ tcb->window, qlen(s->rq), tcb->rcv.scale, s->lport);
+ }
+ tcb->rcv.wnd = w;
+ tcb->rcv.wptr = tcb->rcv.nxt + w;
}
-void
+static void
tcpacktimer(void *v)
{
Tcpctl *tcb;
@@ -589,7 +649,6 @@
qlock(s);
if(tcb->state != Closed){
tcb->flags |= FORCE;
- tcprcvwin(s);
tcpoutput(s);
}
qunlock(s);
@@ -597,10 +656,52 @@
}
static void
+tcpcongestion(Tcpctl *tcb)
+{
+ ulong inflight;
+
+ inflight = tcb->snd.nxt - tcb->snd.una;
+ if(inflight > tcb->cwind)
+ inflight = tcb->cwind;
+ tcb->ssthresh = inflight / 2;
+ if(tcb->ssthresh < 2*tcb->mss)
+ tcb->ssthresh = 2*tcb->mss;
+}
+
+enum {
+ L = 2, /* aggressive slow start; legal values ∈ (1.0, 2.0) */
+};
+
+static void
+tcpabcincr(Tcpctl *tcb, uint acked)
+{
+ uint limit;
+
+ tcb->abcbytes += acked;
+ if(tcb->cwind < tcb->ssthresh){
+ /* slow start */
+ if(tcb->snd.rto)
+ limit = 1*tcb->mss;
+ else
+ limit = L*tcb->mss;
+ tcb->cwind += MIN(tcb->abcbytes, limit);
+ tcb->abcbytes = 0;
+ }
+ else{
+ tcb->snd.rto = 0;
+ /* avoidance */
+ if(tcb->abcbytes >= tcb->cwind){
+ tcb->abcbytes -= tcb->cwind;
+ tcb->cwind += tcb->mss;
+ }
+ }
+}
+
+static void
tcpcreate(Conv *c)
{
c->rq = qopen(QMAX, Qcoalesce, tcpacktimer, c);
- c->wq = qopen((3*QMAX)/2, Qkick, tcpkick, c);
+ c->wq = qopen(QMAX, Qkick, tcpkick, c);
}
static void
@@ -608,7 +709,7 @@
{
if(newstate != TcptimerON){
if(t->state == TcptimerON){
- // unchain
+ /* unchain */
if(priv->timers == t){
priv->timers = t->next;
if(t->prev != nil)
@@ -622,7 +723,7 @@
}
} else {
if(t->state != TcptimerON){
- // chain
+ /* chain */
if(t->prev != nil || t->next != nil)
panic("timerstate2");
t->prev = nil;
@@ -635,7 +736,7 @@
t->state = newstate;
}
-void
+static void
tcpackproc(void *a)
{
Tcptimer *t, *tp, *timeo;
@@ -646,6 +747,9 @@
tcp = a;
priv = tcp->priv;
+ while(waserror())
+ ;
+
for(;;) {
tsleep(&up->sleep, return0, 0, MSPTICK);
@@ -681,7 +785,7 @@
}
}
-void
+static void
tcpgo(Tcppriv *priv, Tcptimer *t)
{
if(t == nil || t->start == 0)
@@ -693,7 +797,7 @@
qunlock(&priv->tl);
}
-void
+static void
tcphalt(Tcppriv *priv, Tcptimer *t)
{
if(t == nil)
@@ -704,17 +808,16 @@
qunlock(&priv->tl);
}
-int
+static int
backoff(int n)
{
return 1 << n;
}
-void
+static void
localclose(Conv *s, char *reason) /* called with tcb locked */
{
Tcpctl *tcb;
- Reseq *rp,*rp1;
Tcppriv *tpriv;
tpriv = s->p->priv;
@@ -728,12 +831,7 @@
tcphalt(tpriv, &tcb->katimer);
/* Flush reassembly queue; nothing more can arrive */
- for(rp = tcb->reseq; rp != nil; rp = rp1) {
- rp1 = rp->next;
- freeblist(rp->bp);
- free(rp);
- }
- tcb->reseq = nil;
+ dumpreseq(tcb);
if(tcb->state == Syn_sent)
Fsconnected(s, reason);
@@ -747,45 +845,46 @@
}
/* mtu (- TCP + IP hdr len) of 1st hop */
-int
-tcpmtu(Proto *tcp, uchar *addr, int version, int *scale)
+static int
+tcpmtu(Route *r, int version, uint *scale)
{
Ipifc *ifc;
int mtu;
- ifc = findipifc(tcp->f, addr, 0);
- switch(version){
- default:
- case V4:
- mtu = DEF_MSS;
- if(ifc != nil)
- mtu = ifc->maxtu - ifc->m->hsize - (TCP4_PKT + TCP4_HDRSIZE);
- break;
- case V6:
- mtu = DEF_MSS6;
- if(ifc != nil)
- mtu = ifc->maxtu - ifc->m->hsize - (TCP6_PKT + TCP6_HDRSIZE);
- break;
- }
- if(ifc != nil){
- if(ifc->mbps > 100)
- *scale = HaveWS | 3;
- else if(ifc->mbps > 10)
- *scale = HaveWS | 1;
- else
- *scale = HaveWS | 0;
- } else
- *scale = HaveWS | 0;
+ /*
+ * set the ws. it doesn't commit us to anything.
+ * ws is the ultimate limit to the bandwidth-delay product.
+ */
+ *scale = Defadvscale;
- return mtu;
+ /*
+ * currently we do not implement path MTU discovery
+ * so use interface MTU *only* if directly reachable
+ * or when we use V4 which allows routers to fragment.
+ * otherwise, we use the default MSS which assumes a
+ * safe minimum MTU of 1280 bytes for V6.
+ */
+ if(r != nil && (ifc = r->ifc) != nil){
+ mtu = ifc->maxtu - ifc->m->hsize;
+ if(version == V4)
+ return mtu - (TCP4_PKT + TCP4_HDRSIZE);
+ mtu -= TCP6_PKT + TCP6_HDRSIZE;
+ if((r->type & (Rifc|Runi)) != 0 || mtu <= DEF_MSS6)
+ return mtu;
+ }
+ if(version == V6)
+ return DEF_MSS6;
+ else
+ return DEF_MSS;
}
-void
+static void
inittcpctl(Conv *s, int mode)
{
Tcpctl *tcb;
Tcp4hdr* h4;
Tcp6hdr* h6;
+ Tcppriv *tpriv;
int mss;
tcb = (Tcpctl*)s->ptcl;
@@ -792,7 +891,7 @@
memset(tcb, 0, sizeof(Tcpctl));
- tcb->ssthresh = 65535;
+ tcb->ssthresh = QMAX; /* reset by tcpsetscale() */
tcb->srtt = tcp_irtt<<LOGAGAIN;
tcb->mdev = 0;
@@ -841,19 +940,18 @@
}
tcb->mss = tcb->cwind = mss;
+ tcb->abcbytes = 0;
+ tpriv = s->p->priv;
+ tpriv->stats[Mss] = tcb->mss;
/* default is no window scaling */
- tcb->window = QMAX;
- tcb->rcv.wnd = QMAX;
- tcb->rcv.scale = 0;
- tcb->snd.scale = 0;
- qsetlimit(s->rq, QMAX);
+ tcpsetscale(s, tcb, 0, 0);
}
/*
* called with s qlocked
*/
-void
+static void
tcpstart(Conv *s, int mode)
{
Tcpctl *tcb;
@@ -865,8 +963,8 @@
if(tpriv->ackprocstarted == 0){
qlock(&tpriv->apl);
if(tpriv->ackprocstarted == 0){
- sprint(kpname, "#I%dtcpack", s->p->f->dev);
- kproc(kpname, tcpackproc, s->p, 0);
+ snprint(kpname, sizeof(kpname), "#I%dtcpack", s->p->f->dev);
+ kproc(kpname, tcpackproc, s->p);
tpriv->ackprocstarted = 1;
}
qunlock(&tpriv->apl);
@@ -895,28 +993,28 @@
}
static char*
-tcpflag(ushort flag)
+tcpflag(char *buf, char *e, ushort flag)
{
- static char buf[128];
+ char *p;
- sprint(buf, "%d", flag>>10); /* Head len */
+ p = seprint(buf, e, "%d", flag>>10); /* Head len */
if(flag & URG)
- strcat(buf, " URG");
+ p = seprint(p, e, " URG");
if(flag & ACK)
- strcat(buf, " ACK");
+ p = seprint(p, e, " ACK");
if(flag & PSH)
- strcat(buf, " PSH");
+ p = seprint(p, e, " PSH");
if(flag & RST)
- strcat(buf, " RST");
+ p = seprint(p, e, " RST");
if(flag & SYN)
- strcat(buf, " SYN");
+ p = seprint(p, e, " SYN");
if(flag & FIN)
- strcat(buf, " FIN");
-
+ p = seprint(p, e, " FIN");
+ USED(p);
return buf;
}
-Block *
+static Block*
htontcp6(Tcp *tcph, Block *data, Tcp6hdr *ph, Tcpctl *tcb)
{
int dlen;
@@ -940,14 +1038,10 @@
if(data) {
dlen = blocklen(data);
data = padblock(data, hdrlen + TCP6_PKT);
- if(data == nil)
- return nil;
}
else {
dlen = 0;
data = allocb(hdrlen + TCP6_PKT + 64); /* the 64 pad is to meet mintu's */
- if(data == nil)
- return nil;
data->wp += hdrlen + TCP6_PKT;
}
@@ -1000,7 +1094,7 @@
return data;
}
-Block *
+static Block*
htontcp4(Tcp *tcph, Block *data, Tcp4hdr *ph, Tcpctl *tcb)
{
int dlen;
@@ -1013,7 +1107,7 @@
if(tcph->flags & SYN){
if(tcph->mss)
hdrlen += MSS_LENGTH;
- if(tcph->ws)
+ if(1)
hdrlen += WS_LENGTH;
optpad = hdrlen & 3;
if(optpad)
@@ -1024,14 +1118,10 @@
if(data) {
dlen = blocklen(data);
data = padblock(data, hdrlen + TCP4_PKT);
- if(data == nil)
- return nil;
}
else {
dlen = 0;
data = allocb(hdrlen + TCP4_PKT + 64); /* the 64 pad is to meet mintu's */
- if(data == nil)
- return nil;
data->wp += hdrlen + TCP4_PKT;
}
@@ -1055,7 +1145,8 @@
hnputs(opt, tcph->mss);
opt += 2;
}
- if(tcph->ws != 0){
+ /* always offer. rfc1323 §2.2 */
+ if(1){
*opt++ = WSOPT;
*opt++ = WS_LENGTH;
*opt++ = tcph->ws;
@@ -1074,7 +1165,7 @@
return data;
}
-int
+static int
ntohtcp6(Tcp *tcph, Block **bpp)
{
Tcp6hdr *h;
@@ -1103,6 +1194,7 @@
tcph->urg = nhgets(h->tcpurg);
tcph->mss = 0;
tcph->ws = 0;
+ tcph->update = 0;
tcph->len = nhgets(h->ploadlen) - hdrlen;
*bpp = pullupblock(*bpp, hdrlen+TCP6_PKT);
@@ -1127,7 +1219,7 @@
break;
case WSOPT:
if(optlen == WS_LENGTH && *(optr+2) <= 14)
- tcph->ws = HaveWS | *(optr+2);
+ tcph->ws = *(optr+2);
break;
}
n -= optlen;
@@ -1136,7 +1228,7 @@
return hdrlen;
}
-int
+static int
ntohtcp4(Tcp *tcph, Block **bpp)
{
Tcp4hdr *h;
@@ -1166,6 +1258,7 @@
tcph->urg = nhgets(h->tcpurg);
tcph->mss = 0;
tcph->ws = 0;
+ tcph->update = 0;
tcph->len = nhgets(h->length) - (hdrlen + TCP4_PKT);
*bpp = pullupblock(*bpp, hdrlen+TCP4_PKT);
@@ -1190,7 +1283,7 @@
break;
case WSOPT:
if(optlen == WS_LENGTH && *(optr+2) <= 14)
- tcph->ws = HaveWS | *(optr+2);
+ tcph->ws = *(optr+2);
break;
}
n -= optlen;
@@ -1200,16 +1293,19 @@
}
/*
- * For outgiing calls, generate an initial sequence
+ * For outgoing calls, generate an initial sequence
* number and put a SYN on the send queue
*/
-void
+static void
tcpsndsyn(Conv *s, Tcpctl *tcb)
{
+ Tcppriv *tpriv;
+
tcb->iss = (nrand(1<<16)<<16)|nrand(1<<16);
tcb->rttseq = tcb->iss;
tcb->snd.wl2 = tcb->iss;
tcb->snd.una = tcb->iss;
+ tcb->snd.rxt = tcb->iss;
tcb->snd.ptr = tcb->rttseq;
tcb->snd.nxt = tcb->rttseq;
tcb->flgcnt++;
@@ -1217,7 +1313,9 @@
tcb->sndsyntime = NOW;
/* set desired mss and scale */
- tcb->mss = tcpmtu(s->p, s->laddr, s->ipversion, &tcb->scale);
+ tcb->mss = tcpmtu(v6lookup(s->p->f, s->raddr, s->laddr, s), s->ipversion, &tcb->scale);
+ tpriv = s->p->priv;
+ tpriv->stats[Mss] = tcb->mss;
}
void
@@ -1229,7 +1327,7 @@
Tcp4hdr ph4;
Tcp6hdr ph6;
- netlog(tcp->f, Logtcp, "sndrst: %s", reason);
+ netlog(tcp->f, Logtcp, "sndrst: %s\n", reason);
tpriv = tcp->priv;
@@ -1307,7 +1405,7 @@
* send a reset to the remote side and close the conversation
* called with s qlocked
*/
-char*
+static char*
tcphangup(Conv *s)
{
Tcp seg;
@@ -1322,7 +1420,7 @@
memset(&seg, 0, sizeof seg);
seg.flags = RST | ACK;
seg.ack = tcb->rcv.nxt;
- tcb->rcv.una = 0;
+ tcb->rcv.ackptr = seg.ack;
seg.seq = tcb->snd.ptr;
seg.wnd = 0;
seg.urg = 0;
@@ -1353,7 +1451,7 @@
/*
* (re)send a SYN ACK
*/
-int
+static int
sndsynack(Proto *tcp, Limbo *lp)
{
Block *hbp;
@@ -1360,7 +1458,7 @@
Tcp4hdr ph4;
Tcp6hdr ph6;
Tcp seg;
- int scale;
+ uint scale;
/* make pseudo header */
switch(lp->version) {
@@ -1388,11 +1486,12 @@
panic("sndrst: version %d", lp->version);
}
+ memset(&seg, 0, sizeof seg);
seg.seq = lp->iss;
seg.ack = lp->irs+1;
seg.flags = SYN|ACK;
seg.urg = 0;
- seg.mss = tcpmtu(tcp, lp->laddr, lp->version, &scale);
+ seg.mss = tcpmtu(v6lookup(tcp->f, lp->raddr, lp->laddr, nil), lp->version, &scale);
seg.wnd = QMAX;
/* if the other side set scale, we should too */
@@ -1570,6 +1669,18 @@
}
}
+static void
+initialwindow(Tcpctl *tcb)
+{
+ /* RFC 3390 initial window */
+ if(tcb->mss < 1095)
+ tcb->cwind = 4*tcb->mss;
+ else if(tcb->mss < 2190)
+ tcb->cwind = 4380;
+ else
+ tcb->cwind = 2*tcb->mss;
+}
+
/*
* come here when we finally get an ACK to our SYN-ACK.
* lookup call in limbo. if found, create a new conversation
@@ -1596,7 +1707,7 @@
/* find a call in limbo */
h = hashipa(src, segp->source);
for(l = &tpriv->lht[h]; (lp = *l) != nil; l = &lp->next){
- netlog(s->p->f, Logtcp, "tcpincoming s %I,%ux/%I,%ux d %I,%ux/%I,%ux v %d/%d",
+ netlog(s->p->f, Logtcp, "tcpincoming s %I!%ud/%I!%ud d %I!%ud/%I!%ud v %d/%d\n",
src, segp->source, lp->raddr, lp->rport,
dst, segp->dest, lp->laddr, lp->lport,
version, lp->version
@@ -1611,7 +1722,7 @@
/* we're assuming no data with the initial SYN */
if(segp->seq != lp->irs+1 || segp->ack != lp->iss+1){
- netlog(s->p->f, Logtcp, "tcpincoming s %lux/%lux a %lux %lux",
+ netlog(s->p->f, Logtcp, "tcpincoming s %lux/%lux a %lux %lux\n",
segp->seq, lp->irs+1, segp->ack, lp->iss+1);
lp = nil;
} else {
@@ -1641,6 +1752,8 @@
tcb->irs = lp->irs;
tcb->rcv.nxt = tcb->irs+1;
+ tcb->rcv.wptr = tcb->rcv.nxt;
+ tcb->rcv.wsnt = 0;
tcb->rcv.urg = tcb->rcv.nxt;
tcb->iss = lp->iss;
@@ -1649,19 +1762,24 @@
tcb->snd.una = tcb->iss+1;
tcb->snd.ptr = tcb->iss+1;
tcb->snd.nxt = tcb->iss+1;
+ tcb->snd.rxt = tcb->iss+1;
tcb->flgcnt = 0;
tcb->flags |= SYNACK;
+ /* set desired mss and scale */
+ tcb->mss = tcpmtu(v6lookup(s->p->f, src, dst, s), version, &tcb->scale);
+
/* our sending max segment size cannot be bigger than what he asked for */
if(lp->mss != 0 && lp->mss < tcb->mss)
tcb->mss = lp->mss;
+ tpriv->stats[Mss] = tcb->mss;
/* window scaling */
tcpsetscale(new, tcb, lp->rcvscale, lp->sndscale);
- /* the congestion window always starts out as a single segment */
+ /* congestion window */
tcb->snd.wnd = segp->wnd;
- tcb->cwind = tcb->mss;
+ initialwindow(tcb);
/* set initial round trip time */
tcb->sndsyntime = lp->lastsend+lp->rexmits*SYNACK_RXTIMER;
@@ -1700,7 +1818,7 @@
return new;
}
-int
+static int
seq_within(ulong x, ulong low, ulong high)
{
if(low <= high){
@@ -1714,25 +1832,25 @@
return 0;
}
-int
+static int
seq_lt(ulong x, ulong y)
{
return (int)(x-y) < 0;
}
-int
+static int
seq_le(ulong x, ulong y)
{
return (int)(x-y) <= 0;
}
-int
+static int
seq_gt(ulong x, ulong y)
{
return (int)(x-y) > 0;
}
-int
+static int
seq_ge(ulong x, ulong y)
{
return (int)(x-y) >= 0;
@@ -1742,7 +1860,7 @@
* use the time between the first SYN and it's ack as the
* initial round trip time
*/
-void
+static void
tcpsynackrtt(Conv *s)
{
Tcpctl *tcb;
@@ -1760,46 +1878,59 @@
tcphalt(tpriv, &tcb->rtt_timer);
}
-void
+static void
update(Conv *s, Tcp *seg)
{
int rtt, delta;
Tcpctl *tcb;
ulong acked;
- ulong expand;
Tcppriv *tpriv;
+ if(seg->update)
+ return;
+ seg->update = 1;
+
tpriv = s->p->priv;
tcb = (Tcpctl*)s->ptcl;
- /* if everything has been acked, force output(?) */
- if(seq_gt(seg->ack, tcb->snd.nxt)) {
- tcb->flags |= FORCE;
- return;
+ /* catch zero-window updates, update window & recover */
+ if(tcb->snd.wnd == 0 && seg->wnd > 0)
+ if(seq_lt(seg->ack, tcb->snd.ptr)){
+ netlog(s->p->f, Logtcp, "tcp: zwu ack %lud una %lud ptr %lud win %lud\n",
+ seg->ack, tcb->snd.una, tcb->snd.ptr, seg->wnd);
+ tcb->snd.wnd = seg->wnd;
+ goto recovery;
}
- /* added by Dong Lin for fast retransmission */
- if(seg->ack == tcb->snd.una
- && tcb->snd.una != tcb->snd.nxt
- && seg->len == 0
- && seg->wnd == tcb->snd.wnd) {
-
- /* this is a pure ack w/o window update */
- netlog(s->p->f, Logtcprxmt, "dupack %lud ack %lud sndwnd %d advwin %d\n",
- tcb->snd.dupacks, seg->ack, tcb->snd.wnd, seg->wnd);
-
- if(++tcb->snd.dupacks == TCPREXMTTHRESH) {
- /*
- * tahoe tcp rxt the packet, half sshthresh,
- * and set cwnd to one packet
- */
+ /* newreno fast retransmit */
+ if(seg->ack == tcb->snd.una)
+ if(tcb->snd.una != tcb->snd.nxt)
+ if(++tcb->snd.dupacks == 3){
+recovery:
+ if(tcb->snd.recovery){
+ tpriv->stats[RecoveryCwind]++;
+ tcb->cwind += tcb->mss;
+ }else if(seq_le(tcb->snd.rxt, seg->ack)){
+ tpriv->stats[Recovery]++;
+ tcb->abcbytes = 0;
tcb->snd.recovery = 1;
+ tcb->snd.partialack = 0;
tcb->snd.rxt = tcb->snd.nxt;
- netlog(s->p->f, Logtcprxmt, "fast rxt %lud, nxt %lud\n", tcb->snd.una, tcb->snd.nxt);
+ tcpcongestion(tcb);
+ tcb->cwind = tcb->ssthresh + 3*tcb->mss;
+ netlog(s->p->f, Logtcpwin, "recovery inflate %ld ss %ld @%lud\n",
+ tcb->cwind, tcb->ssthresh, tcb->snd.rxt);
tcprxmit(s);
- } else {
- /* do reno tcp here. */
+ }else{
+ tpriv->stats[RecoveryNoSeq]++;
+ netlog(s->p->f, Logtcpwin, "!recov %lud not ≤ %lud %ld\n",
+ tcb->snd.rxt, seg->ack, tcb->snd.rxt - seg->ack);
+ /* do not enter fast retransmit */
+ /* do not change ssthresh */
}
+ }else if(tcb->snd.recovery){
+ tpriv->stats[RecoveryCwind]++;
+ tcb->cwind += tcb->mss;
}
/*
@@ -1807,6 +1938,9 @@
*/
if(seq_gt(seg->ack, tcb->snd.wl2)
|| (tcb->snd.wl2 == seg->ack && seg->wnd > tcb->snd.wnd)){
+ /* clear dupack if we advance wl2 */
+ if(tcb->snd.wl2 != seg->ack)
+ tcb->snd.dupacks = 0;
tcb->snd.wnd = seg->wnd;
tcb->snd.wl2 = seg->ack;
}
@@ -1816,22 +1950,11 @@
* don't let us hangup if sending into a closed window and
* we're still getting acks
*/
- if((tcb->flags&RETRAN) && tcb->snd.wnd == 0){
+ if((tcb->flags&RETRAN) && tcb->snd.wnd == 0)
tcb->backedoff = MAXBACKMS/4;
- }
return;
}
- /*
- * any positive ack turns off fast rxt,
- * (should we do new-reno on partial acks?)
- */
- if(!tcb->snd.recovery || seq_ge(seg->ack, tcb->snd.rxt)) {
- tcb->snd.dupacks = 0;
- tcb->snd.recovery = 0;
- } else
- netlog(s->p->f, Logtcp, "rxt next %lud, cwin %ud\n", seg->ack, tcb->cwind);
-
/* Compute the new send window size */
acked = seg->ack - tcb->snd.una;
@@ -1843,24 +1966,41 @@
goto done;
}
- /* slow start as long as we're not recovering from lost packets */
- if(tcb->cwind < tcb->snd.wnd && !tcb->snd.recovery) {
- if(tcb->cwind < tcb->ssthresh) {
- expand = tcb->mss;
- if(acked < expand)
- expand = acked;
+ /*
+ * congestion control
+ */
+ if(tcb->snd.recovery){
+ if(seq_ge(seg->ack, tcb->snd.rxt)){
+ /* recovery finished; deflate window */
+ tpriv->stats[RecoveryDone]++;
+ tcb->snd.dupacks = 0;
+ tcb->snd.recovery = 0;
+ tcb->cwind = (tcb->snd.nxt - tcb->snd.una) + tcb->mss;
+ if(tcb->ssthresh < tcb->cwind)
+ tcb->cwind = tcb->ssthresh;
+ netlog(s->p->f, Logtcpwin, "recovery deflate %ld %ld\n",
+ tcb->cwind, tcb->ssthresh);
+ } else {
+ /* partial ack; we lost more than one segment */
+ tpriv->stats[RecoveryPA]++;
+ if(tcb->cwind > acked)
+ tcb->cwind -= acked;
+ else{
+ netlog(s->p->f, Logtcpwin, "partial ack neg\n");
+ tcb->cwind = tcb->mss;
+ }
+ netlog(s->p->f, Logtcpwin, "partial ack %ld left %ld cwind %ld\n",
+ acked, tcb->snd.rxt - seg->ack, tcb->cwind);
+
+ if(acked >= tcb->mss)
+ tcb->cwind += tcb->mss;
+ tcb->snd.partialack++;
}
- else
- expand = ((int)tcb->mss * tcb->mss) / tcb->cwind;
+ } else
+ tcpabcincr(tcb, acked);
- if(tcb->cwind + expand < tcb->cwind)
- expand = tcb->snd.wnd - tcb->cwind;
- if(tcb->cwind + expand > tcb->snd.wnd)
- expand = tcb->snd.wnd - tcb->cwind;
- tcb->cwind += expand;
- }
-
/* Adjust the timers according to the round trip time */
+ /* todo: fix sloppy treatment of overflow cases here. */
if(tcb->rtt_timer.state == TcptimerON && seq_ge(seg->ack, tcb->rttseq)) {
tcphalt(tpriv, &tcb->rtt_timer);
if((tcb->flags&RETRAN) == 0) {
@@ -1891,13 +2031,23 @@
done:
if(qdiscard(s->wq, acked) < acked)
tcb->flgcnt--;
-
tcb->snd.una = seg->ack;
+
+ /* newreno fast recovery */
+ if(tcb->snd.recovery)
+ tcprxmit(s);
+
if(seq_gt(seg->ack, tcb->snd.urg))
tcb->snd.urg = seg->ack;
- if(tcb->snd.una != tcb->snd.nxt)
- tcpgo(tpriv, &tcb->timer);
+ if(tcb->snd.una != tcb->snd.nxt){
+ /* “impatient” variant */
+ if(!tcb->snd.recovery || tcb->snd.partialack == 1){
+ tcb->time = NOW;
+ tcb->timeuna = tcb->snd.una;
+ tcpgo(tpriv, &tcb->timer);
+ }
+ }
else
tcphalt(tpriv, &tcb->timer);
@@ -1904,12 +2054,13 @@
if(seq_lt(tcb->snd.ptr, tcb->snd.una))
tcb->snd.ptr = tcb->snd.una;
- tcb->flags &= ~RETRAN;
+ if(!tcb->snd.recovery)
+ tcb->flags &= ~RETRAN;
tcb->backoff = 0;
tcb->backedoff = 0;
}
-void
+static void
tcpiput(Proto *tcp, Ipifc*, Block *bp)
{
Tcp seg;
@@ -1917,7 +2068,7 @@
Tcp6hdr *h6;
int hdrlen;
Tcpctl *tcb;
- ushort length;
+ ushort length, csum;
uchar source[IPaddrlen], dest[IPaddrlen];
Conv *s;
Fs *f;
@@ -1980,10 +2131,12 @@
h6->ttl = proto;
hnputl(h6->vcf, length);
if((h6->tcpcksum[0] || h6->tcpcksum[1]) &&
- ptclcsum(bp, TCP6_IPLEN, length+TCP6_PHDRSIZE)) {
+ (csum = ptclcsum(bp, TCP6_IPLEN, length+TCP6_PHDRSIZE)) != 0) {
tpriv->stats[CsumErrs]++;
tpriv->stats[InErrs]++;
- netlog(f, Logtcp, "bad tcp proto cksum\n");
+ netlog(f, Logtcp,
+ "bad tcpv6 proto cksum: got %#ux, computed %#ux\n",
+ h6->tcpcksum[0]<<8 | h6->tcpcksum[1], csum);
freeblist(bp);
return;
}
@@ -1995,7 +2148,7 @@
if(hdrlen < 0){
tpriv->stats[HlenErrs]++;
tpriv->stats[InErrs]++;
- netlog(f, Logtcp, "bad tcp hdr len\n");
+ netlog(f, Logtcp, "bad tcpv6 hdr len\n");
return;
}
@@ -2005,7 +2158,7 @@
if(bp == nil){
tpriv->stats[LenErrs]++;
tpriv->stats[InErrs]++;
- netlog(f, Logtcp, "tcp len < 0 after trim\n");
+ netlog(f, Logtcp, "tcpv6 len < 0 after trim\n");
return;
}
}
@@ -2016,7 +2169,8 @@
/* Look for a matching conversation */
s = iphtlook(&tpriv->ht, source, seg.source, dest, seg.dest);
if(s == nil){
- netlog(f, Logtcp, "iphtlook failed");
+ netlog(f, Logtcp, "iphtlook(src %I!%d, dst %I!%d) failed\n",
+ source, seg.source, dest, seg.dest);
reset:
qunlock(tcp);
sndrst(tcp, source, dest, length, &seg, version, "no conversation");
@@ -2136,8 +2290,12 @@
}
/* Cut the data to fit the receive window */
+ tcprcvwin(s);
if(tcptrim(tcb, &seg, &bp, &length) == -1) {
- netlog(f, Logtcp, "tcp len < 0, %lud %d\n", seg.seq, length);
+ if(seg.seq+1 != tcb->rcv.nxt || length != 1)
+ netlog(f, Logtcp, "tcp: trim: !inwind: seq %lud-%lud win %lud-%lud l %d from %I\n",
+ seg.seq, seg.seq + length - 1,
+ tcb->rcv.nxt, tcb->rcv.nxt + tcb->rcv.wnd-1, length, s->raddr);
update(s, &seg);
if(qlen(s->wq)+tcb->flgcnt == 0 && tcb->state == Closing) {
tcphalt(tpriv, &tcb->rtt_timer);
@@ -2168,12 +2326,15 @@
if(seg.seq != tcb->rcv.nxt)
if(length != 0 || (seg.flags & (SYN|FIN))) {
update(s, &seg);
- if(addreseq(tcb, tpriv, &seg, bp, length) < 0)
+ if(addreseq(f, tcb, tpriv, &seg, bp, length) < 0)
print("reseq %I.%d -> %I.%d\n", s->raddr, s->rport, s->laddr, s->lport);
- tcb->flags |= FORCE;
+ tcb->flags |= FORCE; /* force duplicate ack; RFC 5681 §3.2 */
goto output;
}
+ if(tcb->nreseq > 0)
+ tcb->flags |= FORCE; /* filled hole in sequence space; RFC 5681 §3.2 */
+
/*
* keep looping till we've processed this packet plus any
* adjacent packets in the resequence queue
@@ -2238,7 +2399,8 @@
goto raise;
}
case Time_wait:
- tcb->flags |= FORCE;
+ if(seg.flags & FIN)
+ tcb->flags |= FORCE;
if(tcb->timer.state != TcptimerON)
tcpgo(tpriv, &tcb->timer);
}
@@ -2272,34 +2434,12 @@
* receive queue
*/
if(bp) {
- bp = packblock(bp);
- if(bp == nil)
- panic("tcp packblock");
- qpassnolim(s->rq, bp);
+ qpassnolim(s->rq, packblock(bp));
bp = nil;
-
- /*
- * Force an ack every 2 data messages. This is
- * a hack for rob to make his home system run
- * faster.
- *
- * this also keeps the standard TCP congestion
- * control working since it needs an ack every
- * 2 max segs worth. This is not quite that,
- * but under a real stream is equivalent since
- * every packet has a max seg in it.
- */
- if(++(tcb->rcv.una) >= 2)
- tcb->flags |= FORCE;
}
tcb->rcv.nxt += length;
/*
- * update our rcv window
- */
- tcprcvwin(s);
-
- /*
* turn on the acktimer if there's something
* to ack
*/
@@ -2373,8 +2513,11 @@
getreseq(tcb, &seg, &bp, &length);
- if(tcptrim(tcb, &seg, &bp, &length) == 0)
+ tcprcvwin(s);
+ if(tcptrim(tcb, &seg, &bp, &length) == 0){
+ tcb->flags |= FORCE;
break;
+ }
}
}
output:
@@ -2394,15 +2537,15 @@
* the lock to ipoput the packet so some care has to be
* taken by callers.
*/
-void
+static void
tcpoutput(Conv *s)
{
Tcp seg;
- int msgs;
+ uint msgs;
Tcpctl *tcb;
Block *hbp, *bp;
- int sndcnt, n;
- ulong ssize, dsize, usable, sent;
+ int sndcnt;
+ ulong ssize, dsize, sent;
Fs *f;
Tcppriv *tpriv;
uchar version;
@@ -2411,9 +2554,26 @@
tpriv = s->p->priv;
version = s->ipversion;
- for(msgs = 0; msgs < 100; msgs++) {
- tcb = (Tcpctl*)s->ptcl;
+ tcb = (Tcpctl*)s->ptcl;
+ /* force ack every 2*mss */
+ if((tcb->flags & FORCE) == 0)
+ if(tcb->rcv.nxt - tcb->rcv.ackptr >= 2*tcb->mss){
+ tpriv->stats[Delayack]++;
+ tcb->flags |= FORCE;
+ }
+
+ /* force ack if window opening */
+ if(0)
+ if((tcb->flags & FORCE) == 0){
+ tcprcvwin(s);
+ if((int)(tcb->rcv.wptr - tcb->rcv.wsnt) >= 2*tcb->mss){
+ tpriv->stats[Wopenack]++;
+ tcb->flags |= FORCE;
+ }
+ }
+
+ for(msgs = 0; msgs < 100; msgs++) {
switch(tcb->state) {
case Listen:
case Closed:
@@ -2421,7 +2581,12 @@
return;
}
+ /* Don't send anything else until our SYN has been acked */
+ if(tcb->snd.ptr != tcb->iss && (tcb->flags & SYNACK) == 0)
+ break;
+
/* force an ack when a window has opened up */
+ tcprcvwin(s);
if(tcb->rcv.blocked && tcb->rcv.wnd > 0){
tcb->rcv.blocked = 0;
tcb->flags |= FORCE;
@@ -2429,54 +2594,57 @@
sndcnt = qlen(s->wq)+tcb->flgcnt;
sent = tcb->snd.ptr - tcb->snd.una;
-
- /* Don't send anything else until our SYN has been acked */
- if(tcb->snd.ptr != tcb->iss && (tcb->flags & SYNACK) == 0)
- break;
-
- /* Compute usable segment based on offered window and limit
- * window probes to one
- */
+ ssize = sndcnt;
if(tcb->snd.wnd == 0){
- if(sent != 0) {
- if((tcb->flags&FORCE) == 0)
- break;
-// tcb->snd.ptr = tcb->snd.una;
+ /* zero window probe */
+ if(sent > 0)
+ if(!(tcb->flags & FORCE))
+ break; /* already probing, rto re-probes */
+ if(ssize < sent)
+ ssize = 0;
+ else{
+ ssize -= sent;
+ if(ssize > 0)
+ ssize = 1;
}
- usable = 1;
+ } else {
+ /* calculate usable segment size */
+ if(ssize > tcb->cwind)
+ ssize = tcb->cwind;
+ if(ssize > tcb->snd.wnd)
+ ssize = tcb->snd.wnd;
+
+ if(ssize < sent)
+ ssize = 0;
+ else {
+ ssize -= sent;
+ if(ssize > tcb->mss)
+ ssize = tcb->mss;
+ }
}
- else {
- usable = tcb->cwind;
- if(tcb->snd.wnd < usable)
- usable = tcb->snd.wnd;
- usable -= sent;
- }
- ssize = sndcnt-sent;
- if(ssize && usable < 2)
- netlog(s->p->f, Logtcp, "throttled snd.wnd %lud cwind %lud\n",
- tcb->snd.wnd, tcb->cwind);
- if(usable < ssize)
- ssize = usable;
- if(tcb->mss < ssize)
- ssize = tcb->mss;
+
dsize = ssize;
seg.urg = 0;
- if(ssize == 0)
- if((tcb->flags&FORCE) == 0)
- break;
+ if(!(tcb->flags & FORCE)){
+ if(ssize == 0)
+ break;
+ if(ssize < tcb->mss)
+ if(tcb->snd.nxt == tcb->snd.ptr)
+ if(sent > TCPREXMTTHRESH*tcb->mss)
+ break;
+ }
tcb->flags &= ~FORCE;
- tcprcvwin(s);
/* By default we will generate an ack */
tcphalt(tpriv, &tcb->acktimer);
- tcb->rcv.una = 0;
seg.source = s->lport;
seg.dest = s->rport;
seg.flags = ACK;
seg.mss = 0;
seg.ws = 0;
+ seg.update = 0;
switch(tcb->state){
case Syn_sent:
seg.flags = 0;
@@ -2516,20 +2684,9 @@
}
}
- if(sent+dsize == sndcnt)
+ if(sent+dsize == sndcnt && dsize)
seg.flags |= PSH;
- /* keep track of balance of resent data */
- if(seq_lt(tcb->snd.ptr, tcb->snd.nxt)) {
- n = tcb->snd.nxt - tcb->snd.ptr;
- if(ssize < n)
- n = ssize;
- tcb->resent += n;
- netlog(f, Logtcp, "rexmit: %I.%d -> %I.%d ptr %lux nxt %lux\n",
- s->raddr, s->rport, s->laddr, s->lport, tcb->snd.ptr, tcb->snd.nxt);
- tpriv->stats[RetransSegs]++;
- }
-
tcb->snd.ptr += ssize;
/* Pull up the send pointer so we can accept acks
@@ -2565,13 +2722,17 @@
* expect acknowledges
*/
if(ssize != 0){
- if(tcb->timer.state != TcptimerON)
+ if(tcb->timer.state != TcptimerON){
+ tcb->time = NOW;
+ tcb->timeuna = tcb->snd.una;
tcpgo(tpriv, &tcb->timer);
+ }
/* If round trip timer isn't running, start it.
* measure the longest packet only in case the
* transmission time dominates RTT
*/
+ if(tcb->snd.retransmit == 0)
if(tcb->rtt_timer.state != TcptimerON)
if(ssize == tcb->mss) {
tcpgo(tpriv, &tcb->rtt_timer);
@@ -2580,6 +2741,10 @@
}
tpriv->stats[OutSegs]++;
+ if(tcb->snd.retransmit)
+ tpriv->stats[RetransSegsSent]++;
+ tcb->rcv.ackptr = seg.ack;
+ tcb->rcv.wsnt = tcb->rcv.wptr;
/* put off the next keep alive */
tcpgo(tpriv, &tcb->katimer);
@@ -2600,9 +2765,8 @@
default:
panic("tcpoutput2: version %d", version);
}
- if((msgs%4) == 1){
+ if((msgs%4) == 3){
qunlock(s);
- sched();
qlock(s);
}
}
@@ -2611,7 +2775,7 @@
/*
* the BSD convention (hack?) for keep alives. resend last uchar acked.
*/
-void
+static void
tcpsendka(Conv *s)
{
Tcp seg;
@@ -2621,6 +2785,7 @@
tcb = (Tcpctl*)s->ptcl;
dbp = nil;
+ memset(&seg, 0, sizeof seg);
seg.urg = 0;
seg.source = s->lport;
seg.dest = s->rport;
@@ -2632,7 +2797,8 @@
else
seg.seq = tcb->snd.una-1;
seg.ack = tcb->rcv.nxt;
- tcb->rcv.una = 0;
+ tcb->rcv.ackptr = seg.ack;
+ tcprcvwin(s);
seg.wnd = tcb->rcv.wnd;
if(tcb->state == Finwait2){
seg.flags |= FIN;
@@ -2666,7 +2832,7 @@
/*
* set connection to time out after 12 minutes
*/
-void
+static void
tcpsetkacounter(Tcpctl *tcb)
{
tcb->kacounter = (12 * 60 * 1000) / (tcb->katimer.start*MSPTICK);
@@ -2678,7 +2844,7 @@
* if we've timed out, close the connection
* otherwise, send a keepalive and restart the timer
*/
-void
+static void
tcpkeepalive(void *v)
{
Tcpctl *tcb;
@@ -2706,7 +2872,7 @@
/*
* start keepalive timer
*/
-char*
+static char*
tcpstartka(Conv *s, char **f, int n)
{
Tcpctl *tcb;
@@ -2729,7 +2895,7 @@
/*
* turn checksums on/off
*/
-char*
+static char*
tcpsetchecksum(Conv *s, char **f, int)
{
Tcpctl *tcb;
@@ -2740,30 +2906,38 @@
return nil;
}
-void
+/*
+ * retransmit (at most) one segment at snd.una.
+ * preserve cwind & snd.ptr
+ */
+static void
tcprxmit(Conv *s)
{
Tcpctl *tcb;
+ Tcppriv *tpriv;
+ ulong tcwind, tptr;
tcb = (Tcpctl*)s->ptcl;
-
tcb->flags |= RETRAN|FORCE;
- tcb->snd.ptr = tcb->snd.una;
- /*
- * We should be halving the slow start threshhold (down to one
- * mss) but leaving it at mss seems to work well enough
- */
- tcb->ssthresh = tcb->mss;
-
- /*
- * pull window down to a single packet
- */
+ tptr = tcb->snd.ptr;
+ tcwind = tcb->cwind;
+ tcb->snd.ptr = tcb->snd.una;
tcb->cwind = tcb->mss;
+ tcb->snd.retransmit = 1;
tcpoutput(s);
+ tcb->snd.retransmit = 0;
+ tcb->cwind = tcwind;
+ tcb->snd.ptr = tptr;
+
+ tpriv = s->p->priv;
+ tpriv->stats[RetransSegs]++;
}
-void
+/*
+ * todo: RFC 4138 F-RTO
+ */
+static void
tcptimeout(void *arg)
{
Conv *s;
@@ -2792,11 +2966,29 @@
localclose(s, Etimedout);
break;
}
- netlog(s->p->f, Logtcprxmt, "timeout rexmit 0x%lux %d/%d\n", tcb->snd.una, tcb->timer.start, NOW);
+ netlog(s->p->f, Logtcprxmt, "rxm %d/%d %ldms %lud rto %d %lud %s\n",
+ tcb->srtt, tcb->mdev, NOW-tcb->time,
+ tcb->snd.una-tcb->timeuna, tcb->snd.rto, tcb->snd.ptr,
+ tcpstates[s->state]);
tcpsettimer(tcb);
+ if(tcb->snd.rto == 0)
+ tcpcongestion(tcb);
tcprxmit(s);
+ tcb->snd.ptr = tcb->snd.una;
+ tcb->cwind = tcb->mss;
+ tcb->snd.rto = 1;
tpriv->stats[RetransTimeouts]++;
- tcb->snd.dupacks = 0;
+
+ if(tcb->snd.recovery){
+ tcb->snd.dupacks = 0; /* reno rto */
+ tcb->snd.recovery = 0;
+ tpriv->stats[RecoveryRTO]++;
+ tcb->snd.rxt = tcb->snd.nxt;
+ netlog(s->p->f, Logtcpwin,
+ "rto recovery rxt @%lud\n", tcb->snd.nxt);
+ }
+
+ tcb->abcbytes = 0;
break;
case Time_wait:
localclose(s, nil);
@@ -2808,7 +3000,7 @@
poperror();
}
-int
+static int
inwindow(Tcpctl *tcb, int seq)
{
return seq_within(seq, tcb->rcv.nxt, tcb->rcv.nxt+tcb->rcv.wnd-1);
@@ -2817,36 +3009,83 @@
/*
* set up state for a received SYN (or SYN ACK) packet
*/
-void
+static void
procsyn(Conv *s, Tcp *seg)
{
Tcpctl *tcb;
+ Tcppriv *tpriv;
tcb = (Tcpctl*)s->ptcl;
tcb->flags |= FORCE;
tcb->rcv.nxt = seg->seq + 1;
+ tcb->rcv.wptr = tcb->rcv.nxt;
+ tcb->rcv.wsnt = 0;
tcb->rcv.urg = tcb->rcv.nxt;
tcb->irs = seg->seq;
/* our sending max segment size cannot be bigger than what he asked for */
- if(seg->mss != 0 && seg->mss < tcb->mss)
+ if(seg->mss != 0 && seg->mss < tcb->mss) {
tcb->mss = seg->mss;
+ tpriv = s->p->priv;
+ tpriv->stats[Mss] = tcb->mss;
+ }
- /* the congestion window always starts out as a single segment */
+ /* if the server does not support ws option, disable window scaling */
+ if(seg->ws == 0){
+ tcb->scale = 0;
+ tcb->snd.scale = 0;
+ }
+
tcb->snd.wnd = seg->wnd;
- tcb->cwind = tcb->mss;
+ initialwindow(tcb);
}
-int
-addreseq(Tcpctl *tcb, Tcppriv *tpriv, Tcp *seg, Block *bp, ushort length)
+static int
+dumpreseq(Tcpctl *tcb)
{
- Reseq *rp, *rp1;
- int i, rqlen, qmax;
+ Reseq *r, *next;
+ for(r = tcb->reseq; r != nil; r = next){
+ next = r->next;
+ freeblist(r->bp);
+ free(r);
+ }
+ tcb->reseq = nil;
+ tcb->nreseq = 0;
+ tcb->reseqlen = 0;
+ return -1;
+}
+
+static void
+logreseq(Fs *f, Reseq *r, ulong n)
+{
+ char *s;
+
+ for(; r != nil; r = r->next){
+ s = nil;
+ if(r->next == nil && r->seg.seq != n)
+ s = "hole/end";
+ else if(r->next == nil)
+ s = "end";
+ else if(r->seg.seq != n)
+ s = "hole";
+ if(s != nil)
+ netlog(f, Logtcp, "%s %lud-%lud (%ld) %#ux\n", s,
+ n, r->seg.seq, r->seg.seq-n, r->seg.flags);
+ n = r->seg.seq + r->seg.len;
+ }
+}
+
+static int
+addreseq(Fs *f, Tcpctl *tcb, Tcppriv *tpriv, Tcp *seg, Block *bp, ushort length)
+{
+ Reseq *rp, **rr;
+ int qmax;
+
rp = malloc(sizeof(Reseq));
if(rp == nil){
- freeblist(bp); /* bp always consumed by add_reseq */
+ freeblist(bp); /* bp always consumed by addreseq */
return 0;
}
@@ -2854,56 +3093,39 @@
rp->bp = bp;
rp->length = length;
- /* Place on reassembly list sorting by starting seq number */
- rp1 = tcb->reseq;
- if(rp1 == nil || seq_lt(seg->seq, rp1->seg.seq)) {
- rp->next = rp1;
- tcb->reseq = rp;
- if(rp->next != nil)
- tpriv->stats[OutOfOrder]++;
- return 0;
- }
+ tcb->reseqlen += length;
+ tcb->nreseq++;
- rqlen = 0;
- for(i = 0;; i++) {
- rqlen += rp1->length;
- if(rp1->next == nil || seq_lt(seg->seq, rp1->next->seg.seq)) {
- rp->next = rp1->next;
- rp1->next = rp;
+ /* Place on reassembly list sorting by starting seq number */
+ for(rr = &tcb->reseq;; rr = &(*rr)->next)
+ if(*rr == nil || seq_lt(seg->seq, (*rr)->seg.seq)){
+ rp->next = *rr;
+ *rr = rp;
+ tpriv->stats[Resequenced]++;
if(rp->next != nil)
tpriv->stats[OutOfOrder]++;
break;
}
- rp1 = rp1->next;
- }
- qmax = QMAX<<tcb->rcv.scale;
- if(rqlen > qmax){
- print("resequence queue > window: %d > %d\n", rqlen, qmax);
- i = 0;
- for(rp1 = tcb->reseq; rp1 != nil; rp1 = rp1->next){
- print("%#lux %#lux %#ux\n", rp1->seg.seq,
- rp1->seg.ack, rp1->seg.flags);
- if(i++ > 10){
- print("...\n");
- break;
- }
- }
- // delete entire reassembly queue; wait for retransmit.
- // - should we be smarter and only delete the tail?
- for(rp = tcb->reseq; rp != nil; rp = rp1){
- rp1 = rp->next;
- freeblist(rp->bp);
- free(rp);
- }
- tcb->reseq = nil;
-
- return -1;
+ qmax = tcb->window;
+ if(tcb->reseqlen > qmax){
+ netlog(f, Logtcp, "tcp: reseq: queue > window: %d > %d; %d packets\n", tcb->reseqlen, qmax, tcb->nreseq);
+ logreseq(f, tcb->reseq, tcb->rcv.nxt);
+ tpriv->stats[ReseqBytelim]++;
+ return dumpreseq(tcb);
}
+ qmax = tcb->window / tcb->mss; /* ~190 for qscale==2, 390 for qscale=3 */
+ if(tcb->nreseq > qmax){
+ netlog(f, Logtcp, "resequence queue > packets: %d %d; %d bytes\n", tcb->nreseq, qmax, tcb->reseqlen);
+ logreseq(f, tcb->reseq, tcb->rcv.nxt);
+ tpriv->stats[ReseqPktlim]++;
+ return dumpreseq(tcb);
+ }
+
return 0;
}
-void
+static void
getreseq(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length)
{
Reseq *rp;
@@ -2918,10 +3140,13 @@
*bp = rp->bp;
*length = rp->length;
+ tcb->nreseq--;
+ tcb->reseqlen -= rp->length;
+
free(rp);
}
-int
+static int
tcptrim(Tcpctl *tcb, Tcp *seg, Block **bp, ushort *length)
{
ushort len;
@@ -2992,7 +3217,7 @@
return 0;
}
-void
+static void
tcpadvise(Proto *tcp, Block *bp, char *msg)
{
Tcp4hdr *h4;
@@ -3011,8 +3236,7 @@
v4tov6(source, h4->tcpsrc);
psource = nhgets(h4->tcpsport);
pdest = nhgets(h4->tcpdport);
- }
- else {
+ } else {
ipmove(dest, h6->tcpdst);
ipmove(source, h6->tcpsrc);
psource = nhgets(h6->tcpsport);
@@ -3021,8 +3245,7 @@
/* Look for a connection */
qlock(tcp);
- for(p = tcp->conv; *p; p++) {
- s = *p;
+ for(p = tcp->conv; (s = *p) != nil; p++) {
tcb = (Tcpctl*)s->ptcl;
if(s->rport == pdest)
if(s->lport == psource)
@@ -3029,6 +3252,8 @@
if(tcb->state != Closed)
if(ipcmp(s->raddr, dest) == 0)
if(ipcmp(s->laddr, source) == 0){
+ if(s->ignoreadvice)
+ break;
qlock(s);
qunlock(tcp);
switch(tcb->state){
@@ -3058,9 +3283,11 @@
}
/* called with c qlocked */
-char*
+static char*
tcpctl(Conv* c, char** f, int n)
{
+ if(n == 1 && strcmp(f[0], "close") == 0)
+ return tcpclose(c), nil;
if(n == 1 && strcmp(f[0], "hangup") == 0)
return tcphangup(c);
if(n >= 1 && strcmp(f[0], "keepalive") == 0)
@@ -3072,7 +3299,7 @@
return "unknown control request";
}
-int
+static int
tcpstats(Proto *tcp, char *buf, int len)
{
Tcppriv *priv;
@@ -3083,7 +3310,7 @@
p = buf;
e = p+len;
for(i = 0; i < Nstats; i++)
- p = seprint(p, e, "%s: %lud\n", statnames[i], priv->stats[i]);
+ p = seprint(p, e, "%s: %llud\n", statnames[i], priv->stats[i]);
return p - buf;
}
@@ -3096,7 +3323,7 @@
* of questionable validity so we try to use them only when we're
* up against the wall.
*/
-int
+static int
tcpgc(Proto *tcp)
{
Conv *c, **pp, **ep;
@@ -3104,7 +3331,7 @@
Tcpctl *tcb;
- n = natgc(tcp->ipproto);
+ n = 0;
ep = &tcp->conv[tcp->nc];
for(pp = tcp->conv; pp < ep; pp++) {
c = *pp;
@@ -3116,13 +3343,13 @@
switch(tcb->state){
case Syn_received:
if(NOW - tcb->time > 5000){
- localclose(c, "timed out");
+ localclose(c, Etimedout);
n++;
}
break;
case Finwait2:
if(NOW - tcb->time > 5*60*1000){
- localclose(c, "timed out");
+ localclose(c, Etimedout);
n++;
}
break;
@@ -3132,7 +3359,7 @@
return n;
}
-void
+static void
tcpsettimer(Tcpctl *tcb)
{
int x;
@@ -3141,9 +3368,9 @@
x = backoff(tcb->backoff) *
(tcb->mdev + (tcb->srtt>>LOGAGAIN) + MSPTICK) / MSPTICK;
- /* bounded twixt 1/2 and 64 seconds */
- if(x < 500/MSPTICK)
- x = 500/MSPTICK;
+ /* bounded twixt 0.3 and 64 seconds */
+ if(x < 300/MSPTICK)
+ x = 300/MSPTICK;
else if(x > (64000/MSPTICK))
x = 64000/MSPTICK;
tcb->timer.start = x;
@@ -3177,18 +3404,37 @@
Fsproto(fs, tcp);
}
-void
+static void
tcpsetscale(Conv *s, Tcpctl *tcb, ushort rcvscale, ushort sndscale)
{
- if(rcvscale){
- tcb->rcv.scale = rcvscale & 0xff;
- tcb->snd.scale = sndscale & 0xff;
- tcb->window = QMAX<<tcb->snd.scale;
- qsetlimit(s->rq, tcb->window);
- } else {
- tcb->rcv.scale = 0;
- tcb->snd.scale = 0;
- tcb->window = QMAX;
- qsetlimit(s->rq, tcb->window);
- }
+ /*
+ * guess at reasonable queue sizes. there's no current way
+ * to know how many nic receive buffers we can safely tie up in the
+ * tcp stack, and we don't adjust our queues to maximize throughput
+ * and minimize bufferbloat. n.b. the offer (rcvscale) needs to be
+ * respected, but we still control our own buffer commitment by
+ * keeping a seperate qscale.
+ */
+ tcb->rcv.scale = rcvscale & 0xff;
+ tcb->snd.scale = sndscale & 0xff;
+ tcb->qscale = rcvscale & 0xff;
+ if(rcvscale > Maxqscale)
+ tcb->qscale = Maxqscale;
+
+ if(rcvscale != tcb->rcv.scale)
+ netlog(s->p->f, Logtcp, "tcpsetscale: window %lud qlen %d >> window %ud lport %d\n",
+ tcb->window, qlen(s->rq), QMAX<<tcb->qscale, s->lport);
+ tcb->window = QMAX<<tcb->qscale;
+ tcb->ssthresh = tcb->window;
+
+ /*
+ * it's important to set wq large enough to cover the full
+ * bandwidth-delay product. it's possible to be in loss
+ * recovery with a big window, and we need to keep sending
+ * into the inflated window. the difference can be huge
+ * for even modest (70ms) ping times.
+ */
+ qsetlimit(s->rq, QMAX<<tcb->qscale);
+ qsetlimit(s->wq, QMAX<<tcb->qscale);
+ tcprcvwin(s);
}
diff -u a/os/ip//udp.c b/os/ip//udp.c
--- a/os/ip//udp.c
+++ b/os/ip//udp.c
@@ -24,7 +24,6 @@
IP_UDPPROTO = 17,
UDP_USEAD7 = 52,
- UDP_USEAD6 = 36,
Udprxms = 200,
Udptickms = 100,
@@ -40,7 +39,7 @@
uchar length[2]; /* packet length */
uchar id[2]; /* Identification */
uchar frag[2]; /* Fragment information */
- uchar Unused;
+ uchar Unused;
uchar udpproto; /* Protocol */
uchar udpplen[2]; /* Header plus data length */
uchar udpsrc[IPv4addrlen]; /* Ip source */
@@ -73,10 +72,10 @@
typedef struct Udpstats Udpstats;
struct Udpstats
{
- ulong udpInDatagrams;
+ uvlong udpInDatagrams;
ulong udpNoPorts;
ulong udpInErrors;
- ulong udpOutDatagrams;
+ uvlong udpOutDatagrams;
};
typedef struct Udppriv Udppriv;
@@ -101,7 +100,6 @@
typedef struct Udpcb Udpcb;
struct Udpcb
{
- QLock;
uchar headers;
};
@@ -125,7 +123,7 @@
static int
udpstate(Conv *c, char *state, int n)
{
- return snprint(state, n, "%s qin %d qout %d",
+ return snprint(state, n, "%s qin %d qout %d\n",
c->inuse ? "Open" : "Closed",
c->rq ? qlen(c->rq) : 0,
c->wq ? qlen(c->wq) : 0
@@ -151,7 +149,7 @@
static void
udpcreate(Conv *c)
{
- c->rq = qopen(64*1024, Qmsg, 0, 0);
+ c->rq = qopen(512*1024, Qmsg, 0, 0);
c->wq = qbypass(udpkick, c);
}
@@ -175,8 +173,6 @@
ucb = (Udpcb*)c->ptcl;
ucb->headers = 0;
-
- qunlock(c);
}
void
@@ -192,12 +188,13 @@
Udppriv *upriv;
Fs *f;
int version;
- Conv *rc;
+ Routehint *rh;
+ ushort csum;
upriv = c->p->priv;
f = c->p->f;
- netlog(c->p->f, Logudp, "udp: kick\n");
+// netlog(c->p->f, Logudp, "udp: kick\n"); /* frequent and uninteresting */
if(bp == nil)
return;
@@ -219,21 +216,6 @@
rport = nhgets(bp->rp);
bp->rp += 2+2; /* Ignore local port */
break;
- case 6:
- /* get user specified addresses */
- bp = pullupblock(bp, UDP_USEAD6);
- if(bp == nil)
- return;
- ipmove(raddr, bp->rp);
- bp->rp += IPaddrlen;
- ipmove(laddr, bp->rp);
- bp->rp += IPaddrlen;
- /* pick interface closest to dest */
- if(ipforme(f, laddr) != Runi)
- findlocalip(f, laddr, raddr);
- rport = nhgets(bp->rp);
- bp->rp += 2+2; /* Ignore local port */
- break;
default:
rport = 0;
break;
@@ -240,18 +222,12 @@
}
if(ucb->headers) {
- if(memcmp(laddr, v4prefix, IPv4off) == 0 ||
- ipcmp(laddr, IPnoaddr) == 0)
+ if(isv4(laddr) || ipcmp(laddr, IPnoaddr) == 0)
version = V4;
else
version = V6;
} else {
- if( (memcmp(c->raddr, v4prefix, IPv4off) == 0 &&
- memcmp(c->laddr, v4prefix, IPv4off) == 0)
- || ipcmp(c->raddr, IPnoaddr) == 0)
- version = V4;
- else
- version = V6;
+ version = convipvers(c);
}
dlen = blocklen(bp);
@@ -260,9 +236,6 @@
switch(version){
case V4:
bp = padblock(bp, UDP4_IPHDR_SZ+UDP_UDPHDR_SZ);
- if(bp == nil)
- return;
-
uh4 = (Udp4hdr *)(bp->rp);
ptcllen = dlen + UDP_UDPHDR_SZ;
uh4->Unused = 0;
@@ -274,7 +247,7 @@
v6tov4(uh4->udpdst, raddr);
hnputs(uh4->udpdport, rport);
v6tov4(uh4->udpsrc, laddr);
- rc = nil;
+ rh = nil;
} else {
v6tov4(uh4->udpdst, c->raddr);
hnputs(uh4->udpdport, c->rport);
@@ -281,25 +254,26 @@
if(ipcmp(c->laddr, IPnoaddr) == 0)
findlocalip(f, c->laddr, c->raddr);
v6tov4(uh4->udpsrc, c->laddr);
- rc = c;
+ rh = c;
}
hnputs(uh4->udpsport, c->lport);
hnputs(uh4->udplen, ptcllen);
uh4->udpcksum[0] = 0;
uh4->udpcksum[1] = 0;
- hnputs(uh4->udpcksum,
- ptclcsum(bp, UDP4_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP4_PHDR_SZ));
+ csum = ptclcsum(bp, UDP4_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP4_PHDR_SZ);
+ if(csum == 0)
+ csum = 0xffff; /* -0 */
+ hnputs(uh4->udpcksum, csum);
uh4->vihl = IP_VER4;
- ipoput4(f, bp, 0, c->ttl, c->tos, rc);
+ ipoput4(f, bp, 0, c->ttl, c->tos, rh);
break;
case V6:
+ /*
+ * using the v6 ip header to create pseudo header
+ * first then reset it to the normal ip header
+ */
bp = padblock(bp, UDP6_IPHDR_SZ+UDP_UDPHDR_SZ);
- if(bp == nil)
- return;
-
- // using the v6 ip header to create pseudo header
- // first then reset it to the normal ip header
uh6 = (Udp6hdr *)(bp->rp);
memset(uh6, 0, 8);
ptcllen = dlen + UDP_UDPHDR_SZ;
@@ -309,7 +283,7 @@
ipmove(uh6->udpdst, raddr);
hnputs(uh6->udpdport, rport);
ipmove(uh6->udpsrc, laddr);
- rc = nil;
+ rh = nil;
} else {
ipmove(uh6->udpdst, c->raddr);
hnputs(uh6->udpdport, c->rport);
@@ -316,19 +290,21 @@
if(ipcmp(c->laddr, IPnoaddr) == 0)
findlocalip(f, c->laddr, c->raddr);
ipmove(uh6->udpsrc, c->laddr);
- rc = c;
+ rh = c;
}
hnputs(uh6->udpsport, c->lport);
hnputs(uh6->udplen, ptcllen);
uh6->udpcksum[0] = 0;
uh6->udpcksum[1] = 0;
- hnputs(uh6->udpcksum,
- ptclcsum(bp, UDP6_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP6_PHDR_SZ));
+ csum = ptclcsum(bp, UDP6_PHDR_OFF, dlen+UDP_UDPHDR_SZ+UDP6_PHDR_SZ);
+ if(csum == 0)
+ csum = 0xffff; /* -0 */
+ hnputs(uh6->udpcksum, csum);
memset(uh6, 0, 8);
uh6->viclfl[0] = IP_VER6;
hnputs(uh6->len, ptcllen);
uh6->nextheader = IP_UDPPROTO;
- ipoput6(f, bp, 0, c->ttl, c->tos, rc);
+ ipoput6(f, bp, 0, c->ttl, c->tos, rh);
break;
default:
@@ -360,10 +336,8 @@
uh4 = (Udp4hdr*)(bp->rp);
version = ((uh4->vihl&0xF0)==IP_VER6) ? V6 : V4;
- /*
- * Put back pseudo header for checksum
- * (remember old values for icmpnoconv())
- */
+ /* Put back pseudo header for checksum
+ * (remember old values for icmpnoconv()) */
switch(version) {
case V4:
ottl = uh4->Unused;
@@ -423,7 +397,7 @@
c = iphtlook(&upriv->ht, raddr, rport, laddr, lport);
if(c == nil){
- /* no converstation found */
+ /* no conversation found */
upriv->ustats.udpNoPorts++;
qunlock(udp);
netlog(f, Logudp, "udp: no conv %I!%d -> %I!%d\n", raddr, rport,
@@ -434,7 +408,7 @@
icmpnoconv(f, bp);
break;
case V6:
- icmphostunr(f, ifc, bp, icmp6_port_unreach, 0);
+ icmphostunr6(f, ifc, bp, Icmp6_port_unreach, 0);
break;
default:
panic("udpiput2: version %d", version);
@@ -448,18 +422,8 @@
if(c->state == Announced){
if(ucb->headers == 0){
/* create a new conversation */
- if(ipforme(f, laddr) != Runi) {
- switch(version){
- case V4:
- v4tov6(laddr, ifc->lifc->local);
- break;
- case V6:
- ipmove(laddr, ifc->lifc->local);
- break;
- default:
- panic("udpiput3: version %d", version);
- }
- }
+ if(ipforme(f, laddr) != Runi)
+ ipv6local(ifc, laddr, 0, raddr);
c = Fsnewcall(c, raddr, rport, laddr, lport, version);
if(c == nil){
qunlock(udp);
@@ -507,33 +471,21 @@
p = bp->rp;
ipmove(p, raddr); p += IPaddrlen;
ipmove(p, laddr); p += IPaddrlen;
- ipmove(p, ifc->lifc->local); p += IPaddrlen;
+ if(!ipv6local(ifc, p, 0, raddr))
+ ipmove(p, ifc->lifc != nil ? ifc->lifc->local : IPnoaddr);
+ p += IPaddrlen;
hnputs(p, rport); p += 2;
hnputs(p, lport);
break;
- case 6:
- /* pass the src address */
- bp = padblock(bp, UDP_USEAD6);
- p = bp->rp;
- ipmove(p, raddr); p += IPaddrlen;
- ipmove(p, ipforme(f, laddr)==Runi ? laddr : ifc->lifc->local); p += IPaddrlen;
- hnputs(p, rport); p += 2;
- hnputs(p, lport);
- break;
}
- if(bp->next)
- bp = concatblock(bp);
-
if(qfull(c->rq)){
- qunlock(c);
- netlog(f, Logudp, "udp: qfull %I.%d -> %I.%d\n", raddr, rport,
- laddr, lport);
+ netlog(f, Logudp, "udp: qfull %I.%d -> %I.%d\n",
+ raddr, rport, laddr, lport);
freeblist(bp);
- return;
+ } else {
+ qpass(c->rq, concatblock(bp));
}
-
- qpass(c->rq, bp);
qunlock(c);
}
@@ -545,11 +497,13 @@
ucb = (Udpcb*)c->ptcl;
if(n == 1){
- if(strcmp(f[0], "oldheaders") == 0){
- ucb->headers = 6;
+ if(strcmp(f[0], "hangup") == 0){
+ qhangup(c->rq, nil);
+ qhangup(c->wq, nil);
return nil;
- } else if(strcmp(f[0], "headers") == 0){
- ucb->headers = 7;
+ }
+ if(strcmp(f[0], "headers") == 0){
+ ucb->headers = 7; /* new headers format */
return nil;
}
}
@@ -564,34 +518,25 @@
uchar source[IPaddrlen], dest[IPaddrlen];
ushort psource, pdest;
Conv *s, **p;
- int version;
h4 = (Udp4hdr*)(bp->rp);
- version = ((h4->vihl&0xF0)==IP_VER6) ? V6 : V4;
+ h6 = (Udp6hdr*)(bp->rp);
- switch(version) {
- case V4:
+ if((h4->vihl&0xF0)==IP_VER4) {
v4tov6(dest, h4->udpdst);
v4tov6(source, h4->udpsrc);
psource = nhgets(h4->udpsport);
pdest = nhgets(h4->udpdport);
- break;
- case V6:
- h6 = (Udp6hdr*)(bp->rp);
+ } else {
ipmove(dest, h6->udpdst);
ipmove(source, h6->udpsrc);
psource = nhgets(h6->udpsport);
pdest = nhgets(h6->udpdport);
- break;
- default:
- panic("udpadvise: version %d", version);
- return; /* to avoid a warning */
}
/* Look for a connection */
qlock(udp);
- for(p = udp->conv; *p; p++) {
- s = *p;
+ for(p = udp->conv; (s = *p) != nil; p++) {
if(s->rport == pdest)
if(s->lport == psource)
if(ipcmp(s->raddr, dest) == 0)
@@ -617,7 +562,8 @@
Udppriv *upriv;
upriv = udp->priv;
- return snprint(buf, len, "InDatagrams: %lud\nNoPorts: %lud\nInErrors: %lud\nOutDatagrams: %lud\n",
+ return snprint(buf, len, "InDatagrams: %llud\nNoPorts: %lud\n"
+ "InErrors: %lud\nOutDatagrams: %llud\n",
upriv->ustats.udpInDatagrams,
upriv->ustats.udpNoPorts,
upriv->ustats.udpInErrors,
@@ -624,12 +570,6 @@
upriv->ustats.udpOutDatagrams);
}
-int
-udpgc(Proto *udp)
-{
- return natgc(udp->ipproto);
-}
-
void
udpinit(Fs *fs)
{
@@ -647,7 +587,6 @@
udp->rcv = udpiput;
udp->advise = udpadvise;
udp->stats = udpstats;
- udp->gc = udpgc;
udp->ipproto = IP_UDPPROTO;
udp->nc = Nchans;
udp->ptclsize = sizeof(Udpcb);