code: plan9front

Download patch

ref: 5a1c1b590d319d58a178c6d299e99142bf7d4433
parent: 05b08f6665b8b0c14c0cb4caa42929db8d26c6c9
author: cinap_lenrek <cinap_lenrek@felloff.net>
date: Sat Sep 17 09:32:58 EDT 2022

devip: do tcp mss clamping when forwarding packets

when forwarding packets (gating), unconditionally
check tcp-syn packets for the mss-size option and
reduce it to fit the mtu of the outgoing interface.

this is done by exporting a new tcpmssclamp() function
from ip/tcp.c  that takes an ip packet and its buffer size
and the effective mtu of the interface and adjusts
the mss value of tcp syn options.

this function is now also used by devbridge, enforcing
a tcp mss below the tunnel mtu.

--- a/sys/src/9/ip/ip.c
+++ b/sys/src/9/ip/ip.c
@@ -134,7 +134,10 @@
 	if(ifc->m == nil)
 		goto raise;
 
-	if(!gating){
+	medialen = ifc->maxtu - ifc->m->hsize;
+	if(gating)
+		tcpmssclamp((uchar*)eh, len, medialen);
+	else {
 		eh->vihl = IP_VER4|IP_HLEN4;
 		eh->tos = tos;
 	}
@@ -141,7 +144,6 @@
 	eh->ttl = ttl;
 
 	/* If we dont need to fragment just send it */
-	medialen = ifc->maxtu - ifc->m->hsize;
 	if(len <= medialen) {
 		hnputs(eh->length, len);
 		if(!gating){
--- a/sys/src/9/ip/ip.h
+++ b/sys/src/9/ip/ip.h
@@ -755,6 +755,11 @@
 extern void	ip_init_6(Fs*);
 
 /*
+ *  tcp.c
+ */
+extern void	tcpmssclamp(uchar*, int, int);
+
+/*
  * bootp.c
  */
 extern int	bootpread(char*, ulong, int);
--- a/sys/src/9/ip/ipaux.c
+++ b/sys/src/9/ip/ipaux.c
@@ -557,8 +557,11 @@
 {
 	ulong csum;
 
-	assert((((uchar*)p - pcsum) & 1) == 0);
-
+	if(((uchar*)p - pcsum) & 1){
+		hnputs_csum((uchar*)p-1, (nhgets((uchar*)p-1) & 0xFF00) | v>>8, pcsum);
+		hnputs_csum((uchar*)p+1, (nhgets((uchar*)p+1) & 0x00FF) | v<<8, pcsum);
+		return;
+	}
 	csum = nhgets(pcsum)^0xFFFF;
 	csum += nhgets(p)^0xFFFF;
 	csum += v;
--- a/sys/src/9/ip/ipv6.c
+++ b/sys/src/9/ip/ipv6.c
@@ -88,7 +88,10 @@
 	if(ifc->m == nil)
 		goto raise;
 
-	if(!gating){
+	medialen = ifc->maxtu - ifc->m->hsize;
+	if(gating)
+		tcpmssclamp((uchar*)eh, len, medialen);
+	else {
 		eh->vcf[0] = IP_VER6;
 		eh->vcf[0] |= tos >> 4;
 		eh->vcf[1]  = tos << 4;
@@ -96,7 +99,6 @@
 	eh->ttl = ttl;
 
 	/* If we dont need to fragment just send it */
-	medialen = ifc->maxtu - ifc->m->hsize;
 	if(len <= medialen) {
 		hnputs(eh->ploadlen, len - IP6HDR);
 		ipifcoput(ifc, bp, V6, gate, rh);
--- a/sys/src/9/ip/tcp.c
+++ b/sys/src/9/ip/tcp.c
@@ -3307,6 +3307,68 @@
 	return bp;
 }
 
+void
+tcpmssclamp(uchar *p, int n, int mtu)
+{
+	Tcp4hdr *h4;
+	Tcp6hdr *h6;
+	uchar *pcksum;
+	int hdrlen, optlen, newmss, oldmss;
+
+	if(n < TCP4_PKT)
+		return;
+	h4 = (Tcp4hdr*)p;
+	h6 = (Tcp6hdr*)p;
+	if((h4->vihl&0xF0)==IP_VER4) {
+		if(h4->proto != IP_TCPPROTO)
+			return;
+		if(!(h4->tcpflag[1] & SYN))
+			return;
+		hdrlen = (h4->tcpflag[0] >> 2) & ~3;
+		if(hdrlen > (n - TCP4_PKT))
+			return;
+		n = hdrlen - TCP4_HDRSIZE;
+		p = h4->tcpopt;
+		pcksum = h4->tcpcksum;
+		newmss = mtu - (TCP4_PKT + TCP4_HDRSIZE);
+	} else {
+		if(n < TCP6_PKT)
+			return;
+		if(h6->proto != IP_TCPPROTO)
+			return;
+		if(!(h6->tcpflag[1] & SYN))
+			return;
+		hdrlen = (h6->tcpflag[0] >> 2) & ~3;
+		if(hdrlen > (n - TCP6_PKT))
+			return;
+		n = hdrlen - TCP6_HDRSIZE;
+		p = h6->tcpopt;
+		pcksum = h6->tcpcksum;
+		newmss = mtu - (TCP6_PKT + TCP6_HDRSIZE);
+	}
+	while(n > 0 && *p != EOLOPT) {
+		if(*p == NOOPOPT) {
+			n--;
+			p++;
+			continue;
+		}
+		optlen = p[1];
+		if(optlen < 2 || optlen > n)
+			break;
+		if(*p == MSSOPT){
+			if(optlen != MSS_LENGTH)
+				break;
+			oldmss = nhgets(p+2);
+			if(newmss >= oldmss)
+				break;
+			hnputs_csum(p+2, newmss, pcksum);
+			break;
+		}
+		n -= optlen;
+		p += optlen;
+	}
+}
+
 static char*
 tcpporthogdefensectl(char *val)
 {
--- a/sys/src/9/port/devbridge.c
+++ b/sys/src/9/port/devbridge.c
@@ -40,7 +40,6 @@
 	CacheTimeout=	5*60,		/* timeout for cache entry in seconds */
 	MaxMTU=		IP_MAX,		/* allow for jumbo frames and large UDP */
 
-	TcpMssMax = 1300,		/* max desirable Tcp MSS value */
 	TunnelMtu = 1400,
 };
 
@@ -153,27 +152,6 @@
 	uchar	member[0x1000/8];
 };
 
-enum {
-	EOLOPT		= 0,
-	NOOPOPT		= 1,
-	MSSOPT		= 2,
-	MSS_LENGTH	= 4,		/* Mean segment size */
-	SYN		= 0x02,		/* Pkt. is synchronise */
-	TCPHDR		= 20,
-};
-
-struct Tcphdr
-{
-	uchar	sport[2];
-	uchar	dport[2];
-	uchar	seq[4];
-	uchar	ack[4];
-	uchar	flag[2];
-	uchar	win[2];
-	uchar	cksum[2];
-	uchar	urg[2];
-};
-
 static Bridge *bridgetab[Maxbridge];
 
 static int bridgegen(Chan *c, char*, Dirtab*, int, int s, Dir *dp);
@@ -1011,89 +989,13 @@
 static void
 tcpmsshack(Etherpkt *epkt, int n)
 {
-	int hl, optlen;
-	Tcphdr *tcphdr;
-	ulong mss, cksum;
-	uchar *optr;
-
 	/* ignore non-ip packets */
 	switch(nhgets(epkt->type)){
 	case ETIP4:
 	case ETIP6:
+		tcpmssclamp(epkt->data, n-ETHERHDRSIZE, TunnelMtu-ETHERHDRSIZE);
 		break;
-	default:
-		return;
 	}
-	n -= ETHERHDRSIZE;
-	if(n < 1)
-		return;
-	switch(epkt->data[0]&0xF0){
-	case IP_VER4:
-		hl = (epkt->data[0]&15)<<2;
-		if(n < hl+TCPHDR || hl < IP4HDR || epkt->data[9] != TCP)
-			return;
-		n -= hl;
-		tcphdr = (Tcphdr*)(epkt->data + hl);
-		break;
-	case IP_VER6:
-		if(n < IP6HDR+TCPHDR || epkt->data[6] != TCP)
-			return;
-		n -= IP6HDR;
-		tcphdr = (Tcphdr*)(epkt->data + IP6HDR);
-		break;
-	default:
-		return;
-	}
-
-	/* MSS can only appear in SYN packet */
-	if(!(tcphdr->flag[1] & SYN))
-		return;
-	hl = (tcphdr->flag[0] & 0xf0)>>2;
-	if(n < hl)
-		return;
-
-	/* check for MSS option */
-	optr = (uchar*)tcphdr + TCPHDR;
-	n = hl - TCPHDR;
-	for(;;) {
-		if(n <= 0 || *optr == EOLOPT)
-			return;
-		if(*optr == NOOPOPT) {
-			n--;
-			optr++;
-			continue;
-		}
-		optlen = optr[1];
-		if(optlen < 2 || optlen > n)
-			return;
-		if(*optr == MSSOPT && optlen == MSS_LENGTH)
-			break;
-		n -= optlen;
-		optr += optlen;
-	}
-
-	mss = nhgets(optr+2);
-	if(mss <= TcpMssMax)
-		return;
-
-	/* fix checksum */
-	cksum = nhgets(tcphdr->cksum);
-	if(optr-(uchar*)tcphdr & 1) {
-		// odd alignments are a pain
-		cksum += nhgets(optr+1);
-		cksum -= (optr[1]<<8)|(TcpMssMax>>8);
-		cksum += (cksum>>16);
-		cksum &= 0xffff;
-		cksum += nhgets(optr+3);
-		cksum -= ((TcpMssMax&0xff)<<8)|optr[4];
-		cksum += (cksum>>16);
-	} else {
-		cksum += mss;
-		cksum -= TcpMssMax;
-		cksum += (cksum>>16);
-	}
-	hnputs(tcphdr->cksum, cksum);
-	hnputs(optr+2, TcpMssMax);
 }
 
 /*