code: plan9front

Download patch

ref: 78eff200d85e0fc2f92622d221c0e3d81aaf9522
parent: 466cf20d3524b8e42edc333a6d2df2a01e99a95b
author: cinap_lenrek <cinap_lenrek@felloff.net>
date: Mon Oct 30 17:43:00 EDT 2023

ndb/dns: implement EDNS(0) extension (rfc6891)

To properly handle TCP fallback for servers,
we have to avoid sending responses too big
for the client to accept.

We used to accept up to 8K of UDP requests
(and responses when resolving).

Instead, we now advertise a UDP response size
of 1232 (assuming 1280 MTU) to the client
and take even smaller values into account
from clients (tho not smaller than 512).

This makes sure we truncate packets, signaling
the client that it must retry with TCP.

Note that we still accept up to 8K of
UDP data regardless (for lucky clients).

--- a/sys/src/cmd/ip/snoopy/dns.c
+++ b/sys/src/cmd/ip/snoopy/dns.c
@@ -92,7 +92,13 @@
 	if(rr == nil)
 		return;
 	*rrp = rr->next;
-
+	if(rr->type == Topt){
+		m->p = seprint(m->p, m->e, "opt eflags=%#lux udpsize=%d data=%.*H",
+			rr->eflags, rr->udpsize,
+			rr->opt->dlen, rr->opt->data);
+		rrfree(rr);
+		return;
+	}
 	m->p = seprint(m->p, m->e, "%s name=%s ttl=%lud",
 		rrtypestr(rr->type),
 		rr->owner->name, rr->ttl);
@@ -469,6 +475,10 @@
 		rp->null = emalloc(sizeof(*rp->null));
 		setmalloctag(rp->null, rp->pc);
 		break;
+	case Topt:
+		rp->opt = emalloc(sizeof(*rp->opt));
+		setmalloctag(rp->opt, rp->pc);
+		break;
 	default:
 		if(rrsupported(rp->type))
 			break;
@@ -542,6 +552,11 @@
 			memset(t, 0, sizeof *t);	/* cause trouble */
 			free(t);
 		}
+		break;
+	case Topt:
+		free(rp->opt->data);
+		memset(rp->opt, 0, sizeof *rp->opt);	/* cause trouble */
+		free(rp->opt);
 		break;
 	default:
 		if(rrsupported(rp->type))
--- a/sys/src/cmd/ndb/convDNS2M.c
+++ b/sys/src/cmd/ndb/convDNS2M.c
@@ -198,14 +198,17 @@
 
 	NAME(rp->owner->name);
 	USHORT(rp->type);
-	USHORT(rp->owner->class);
-
-	if(rp->db || (ttl = (long)(rp->expire - now)) > rp->ttl)
-		ttl = rp->ttl;
-	if(ttl < 0)
-		ttl = 0;
-	ULONG(ttl);
-
+	if(rp->type == Topt) {
+		USHORT(rp->udpsize);
+		ULONG(rp->eflags);
+	} else {
+		if(rp->db || (ttl = (long)(rp->expire - now)) > rp->ttl)
+			ttl = rp->ttl;
+		if(ttl < 0)
+			ttl = 0;
+		USHORT(rp->owner->class);
+		ULONG(ttl);
+	}
 	lp = p;			/* leave room for the rdata length */
 	p += 2;
 	data = p;
@@ -301,6 +304,13 @@
 		SYMBOL(rp->caa->tag->name);
 		BYTES(rp->caa->data, rp->caa->dlen);
 		break;
+	case Topt:
+		BYTES(rp->opt->data, rp->opt->dlen);
+		break;
+	default:
+		if(rrsupported(rp->type))
+			break;
+		BYTES(rp->unknown->data, rp->unknown->dlen);
 	}
 
 	/* stuff in the rdata section length */
@@ -361,7 +371,17 @@
 	p = rrloop(m->qd, &m->qdcount, p, ep, &d, 1);
 	p = rrloop(m->an, &m->ancount, p, ep, &d, 0);
 	p = rrloop(m->ns, &m->nscount, p, ep, &d, 0);
+	if(m->edns) {
+		assert(m->edns->next == nil);
+		m->edns->next = m->ar;
+		m->ar = m->edns;
+	}
 	p = rrloop(m->ar, &m->arcount, p, ep, &d, 0);
+	if(m->edns) {
+		assert(m->edns == m->ar);
+		m->ar = m->edns->next;
+		m->edns->next = nil;
+	}
 	if(p > ep) {
 		trunc = Ftrunc;
 		dnslog("udp packet full; truncating my reply");
--- a/sys/src/cmd/ndb/convM2DNS.c
+++ b/sys/src/cmd/ndb/convM2DNS.c
@@ -338,10 +338,14 @@
 
 	type = mstypehack(sp, type, "convM2RR");
 	rp = rralloc(type);
-	rp->owner = dnlookup(dname, class, 1);
-	rp->type = type;
-
-	ULONG(rp->ttl);
+	if(type == Topt) {
+		rp->owner = dnlookup(dname, Cin, 1);
+		rp->udpsize = class;
+		ULONG(rp->eflags);
+	} else {
+		rp->owner = dnlookup(dname, class, 1);
+		ULONG(rp->ttl);
+	}
 	USHORT(len);			/* length of data following */
 	data = sp->p;
 	assert(data != nil);
@@ -465,6 +469,9 @@
 		SYMBOL(rp->caa->tag);
 		BYTES(rp->caa->data, rp->caa->dlen);
 		break;
+	case Topt:
+		BYTES(rp->opt->data, rp->opt->dlen);
+		break;
 	default:
 		if(rrsupported(type)){
 			sp->p = data + len;
@@ -592,6 +599,7 @@
 	if (sp->err)
 		err = strdup(sp->err);		/* live with bad ar's */
 	m->ar = rrloop(sp, "hints",	m->arcount, 0);
+	m->edns = nil;
 	if (sp->trunc)
 		m->flags |= Ftrunc;
 	if (sp->stop)
--- a/sys/src/cmd/ndb/dblookup.c
+++ b/sys/src/cmd/ndb/dblookup.c
@@ -112,7 +112,7 @@
 
 	/* so far only internet lookups are implemented */
 	if(class != Cin)
-		return 0;
+		return nil;
 
 	err = Rname;
 	rp = nil;
@@ -131,7 +131,7 @@
 	if(opendatabase() < 0)
 		goto out;
 	if(dp->rr)
-		err = 0;
+		err = Rok;
 
 	/* first try the given name */
 	if(cfg.cachedb)
@@ -146,7 +146,7 @@
 		snprint(buf, sizeof buf, "*%s", wild);
 		ndp = idnlookup(buf, class, 1);
 		if(ndp->rr)
-			err = 0;
+			err = Rok;
 		if(cfg.cachedb)
 			rp = rrlookup(ndp, type, NOneg);
 		else
--- a/sys/src/cmd/ndb/dn.c
+++ b/sys/src/cmd/ndb/dn.c
@@ -718,9 +718,10 @@
 	for(; rp; rp = next){
 		next = rp->next;
 		rp->next = nil;
-		/* avoid any outside spoofing */
-		if(cfg.cachedb && !rp->db && inmyarea(rp->owner->name)
-		|| !rrsupported(rp->type))
+		if(rp->type == Tall
+		|| rp->type == Topt
+		|| !rrsupported(rp->type)
+		|| cfg.cachedb && !rp->db && inmyarea(rp->owner->name))
 			rrfree(rp);
 		else
 			rrattach1(rp, auth);
@@ -1284,6 +1285,10 @@
 				rp->caa->flags, dnname(rp->caa->tag),
 				rp->caa->dlen, rp->caa->data);
 		break;
+	case Topt:
+		fmtprint(&fstr, "\t%#lux %d %.*H", rp->eflags, rp->udpsize,
+			rp->opt->dlen, rp->opt->data);
+		break;
 	default:
 		if(rrsupported(rp->type))
 			break;
@@ -1910,6 +1915,10 @@
 		rp->null = emalloc(sizeof(*rp->null));
 		setmalloctag(rp->null, rp->pc);
 		break;
+	case Topt:
+		rp->opt = emalloc(sizeof(*rp->opt));
+		setmalloctag(rp->opt, rp->pc);
+		break;
 	default:
 		if(rrsupported(type))
 			break;
@@ -1975,6 +1984,11 @@
 			memset(t, 0, sizeof *t);	/* cause trouble */
 			free(t);
 		}
+		break;
+	case Topt:
+		free(rp->opt->data);
+		memset(rp->opt, 0, sizeof *rp->opt);	/* cause trouble */
+		free(rp->opt);
 		break;
 	default:
 		if(rrsupported(rp->type))
--- a/sys/src/cmd/ndb/dnresolve.c
+++ b/sys/src/cmd/ndb/dnresolve.c
@@ -499,6 +499,41 @@
 		mp->qdcount = 1;
 }
 
+RR*
+getednsopt(DNSmsg *mp)
+{
+	RR *rp;
+
+	rp = rrremtype(&mp->ar, Topt);
+	if(rp == nil)
+		return nil;
+	mp->arcount--;
+	if(rp->udpsize < 512)
+		rp->udpsize = 512;
+	return rp;
+}
+
+RR*
+mkednsopt(void)
+{
+	RR *rp;
+
+	rp = rralloc(Topt);
+	rp->owner = dnlookup("", Cin, 1);
+	rp->eflags = 0;
+
+	/*
+	 * Advertise a safe UDP response size
+	 * instead of Maxudp as that is just
+	 * the worst case we can accept.
+	 *
+	 * 1232 = MTU(1280)-IPv6(40)-UDP(8).
+	 */
+	rp->udpsize = 1232;
+
+	return rp;
+}
+
 /* generate a DNS UDP query packet, return size of request (without Udphdr) */
 int
 mkreq(DN *dp, int type, uchar *pkt, int flags, ushort id)
@@ -516,7 +551,9 @@
 	rp = rralloc(type);
 	rp->owner = dp;
 	initdnsmsg(&m, rp, flags, id);
+	m.edns = mkednsopt();
 	len = convDNS2M(&m, &pkt[Udphdrsize], Maxudp);
+	rrfreelist(m.edns);
 	rrfreelist(rp);
 	return len;
 }
@@ -925,13 +962,22 @@
 	Query nq;
 	DN *ndp;
 	RR *tp, *soarr;
-	int rv;
+	int rv, rcode;
 
 	if(mp->an == nil)
 		stats.negans++;
 
+	/* get the rcode */
+	rcode = mp->flags & Rmask;
+
+	/* get extended rcode from edns */
+	if((tp = getednsopt(mp)) != nil){
+		rcode = (rcode & 15) | (tp->eflags & Ercode) >> 20;
+		rrfreelist(tp);
+	}
+
 	/* ignore any error replies */
-	switch(mp->flags & Rmask){
+	switch(rcode){
 	case Rrefused:
 	case Rserver:
 		stats.negserver++;
@@ -1023,7 +1069,7 @@
 		 *  they can legitimately come from a cache.
 		 */
 		if( /* (mp->flags & Fauth) && */ mp->an == nil)
-			cacheneg(qp->dp, qp->type, (mp->flags & Rmask), soarr);
+			cacheneg(qp->dp, qp->type, rcode, soarr);
 		else
 			rrfreelist(soarr);
 		return 1;
@@ -1034,7 +1080,7 @@
 		 *  negative responses need not be authoritative:
 		 *  they can legitimately come from a cache.
 		 */
-		cacheneg(qp->dp, qp->type, (mp->flags & Rmask), soarr);
+		cacheneg(qp->dp, qp->type, rcode, soarr);
 		return 1;
 	}
 	stats.negnorname++;
@@ -1203,11 +1249,11 @@
 			/* exponential backoff of requests */
 			if((1UL<<p->nx) > ndest)
 				continue;
-			if(writenet(qp, Udp, fd, pkt, len, p) == 0)
-				n++;
 			p->nx++;
+			if(writenet(qp, Udp, fd, pkt, len, p) < 0)
+				continue;
+			n++;
 		}
-
 		/* nothing left to send to */
 		if (n == 0)
 			break;
--- a/sys/src/cmd/ndb/dns.h
+++ b/sys/src/cmd/ndb/dns.h
@@ -115,6 +115,11 @@
 	Frecurse=	1<<8,	/* request recursion */
 	Fcanrec=	1<<7,	/* server can recurse */
 
+	/* EDNS flags (eflags) */
+	Ercode=		0xff<<24,
+	Evers=		0xff<<16,
+	Ednssecok=	1<<15,
+
 	Domlen=		256,	/* max domain name length (with NULL) */
 	Labellen=	64,	/* max domain label length (with NULL) */
 	Strlen=		256,	/* max string length (with NULL) */
@@ -163,6 +168,7 @@
 typedef struct Txt	Txt;
 typedef struct Caa	Caa;
 typedef struct Unknown	Unknown;
+typedef struct Opt	Opt;
 
 /*
  *  a structure to track a request and any slave process handling it
@@ -237,6 +243,10 @@
 {
 	Block;
 };
+struct Opt
+{
+	Block;
+};
 
 /*
  *  text strings
@@ -272,6 +282,7 @@
 		DN	*mb;	/* mailbox - mg, minfo */
 		DN	*ip;	/* ip address - a, aaaa */
 		DN	*rp;	/* rp arg - rp */
+		ulong	eflags;	/* EDNS(0) flags - opt */
 		uintptr	arg0;	/* arg[01] are compared to find dups in dn.c */
 	};
 	union {			/* discriminated by negative & type */
@@ -282,6 +293,7 @@
 		ulong	pref;	/* preference value - mx */
 		ulong	local;	/* ns served from local database - ns */
 		ushort	port;	/* - srv */
+		ushort	udpsize;/* requester's UDP payload size - opt */
 		uintptr	arg1;	/* arg[01] are compared to find dups in dn.c */
 	};
 	union {			/* discriminated by type */
@@ -294,6 +306,7 @@
 		Null	*null;
 		Txt	*txt;
 		Unknown	*unknown;
+		Opt	*opt;
 	};
 };
 
@@ -330,13 +343,6 @@
 	ushort	weight;
 };
 
-typedef struct Rrlist Rrlist;
-struct Rrlist
-{
-	int	count;
-	RR	*rrs;
-};
-
 /*
  *  domain messages
  */
@@ -352,6 +358,7 @@
 	RR	*ns;
 	int	arcount;	/* hints */
 	RR	*ar;
+	RR	*edns;		/* edns option */
 };
 
 /*
@@ -503,7 +510,9 @@
 /* dnresolve.c */
 RR*	dnresolve(char*, int, int, Request*, RR**, int, int, int, int*);
 int	udpport(char *);
-int	mkreq(DN *dp, int type, uchar *pkt, int flags, ushort reqno);
+int	mkreq(DN*, int type, uchar *pkt, int flags, ushort);
+RR*	mkednsopt(void);
+RR*	getednsopt(DNSmsg*);
 
 /* dnserver.c */
 void	dnserver(DNSmsg*, DNSmsg*, Request*, uchar *, int);
--- a/sys/src/cmd/ndb/dnserver.c
+++ b/sys/src/cmd/ndb/dnserver.c
@@ -6,6 +6,18 @@
 static RR*	doextquery(DNSmsg*, Request*, int);
 static void	hint(RR**, RR*);
 
+static void
+setflags(DNSmsg *repp, int rcode, int flags)
+{
+	if(repp->edns){
+		repp->edns->eflags = (rcode >> 4) << 24;
+		rcode &= 15;
+	}
+	rcode &= Rmask;
+	flags &= ~Rmask;
+	repp->flags |= rcode | flags;
+}
+
 /*
  *  answer a dns request
  */
@@ -20,7 +32,6 @@
 	RR *tp, *neg, *rp;
 
 	recursionflag = cfg.nonrecursive? 0: Fcanrec;
-	memset(repp, 0, sizeof(*repp));
 	repp->id = reqp->id;
 	repp->flags = Fresp | recursionflag | Oquery;
 
@@ -37,14 +48,14 @@
 		dnslog("%d: server: response code 0%o (%s), req from %I",
 			req->id, rcode, errmsg, srcip);
 		/* provide feedback to clients who send us trash */
-		repp->flags = (rcode&Rmask) | Fresp | Fcanrec | Oquery;
+		setflags(repp, rcode, Fresp | Fcanrec | Oquery);
 		return;
 	}
-	if(!rrsupported(repp->qd->type)){
+	if(repp->qd->type == Topt || !rrsupported(repp->qd->type)){
 		if(debug)
 			dnslog("%d: server: unsupported request %s from %I",
 				req->id, rrname(repp->qd->type, tname, sizeof tname), srcip);
-		repp->flags = Runimplimented | Fresp | Fcanrec | Oquery;
+		setflags(repp, Runimplimented, Fresp | Fcanrec | Oquery);
 		return;
 	}
 
@@ -52,7 +63,7 @@
 		if(debug)
 			dnslog("%d: server: unsupported class %d from %I",
 				req->id, repp->qd->owner->class, srcip);
-		repp->flags = Runimplimented | Fresp | Fcanrec | Oquery;
+		setflags(repp, Runimplimented, Fresp | Fcanrec | Oquery);
 		return;
 	}
 
@@ -63,13 +74,13 @@
 				dnslog("%d: server: unsupported xfr request %s for %s from %I",
 					req->id, rrname(repp->qd->type, tname, sizeof tname),
 					repp->qd->owner->name, srcip);
-			repp->flags = Runimplimented | Fresp | recursionflag | Oquery;
+			setflags(repp, Runimplimented, Fresp | recursionflag | Oquery);
 			return;
 		}
 	}
 	if(myarea == nil && cfg.nonrecursive) {
 		/* we don't recurse and we're not authoritative */
-		repp->flags = Rok | Fresp | Oquery;
+		setflags(repp, Rok, Fresp | Oquery);
 		neg = nil;
 	} else {
 		/*
@@ -89,7 +100,7 @@
 			dp = dnlookup(repp->qd->owner->name, repp->qd->owner->class, 0);
 			if(dp->rr == nil)
 				if(reqp->flags & Frecurse)
-					repp->flags |= dp->respcode | Fauth;
+					setflags(repp, dp->respcode, Fauth);
 		}
 	}
 
@@ -145,7 +156,7 @@
 				tp = rrlookup(neg->negsoaowner, Tsoa, NOneg);
 				rrcat(&repp->ns, tp);
 			}
-			repp->flags |= neg->negrcode;
+			setflags(repp, neg->negrcode, repp->flags);
 		}
 	}
 
--- a/sys/src/cmd/ndb/dntcpserver.c
+++ b/sys/src/cmd/ndb/dntcpserver.c
@@ -23,6 +23,7 @@
 	volatile uchar pkt[Maxpkt], callip[IPaddrlen];
 	volatile DNSmsg reqmsg, repmsg;
 	volatile Request req;
+	volatile RR *edns;
 	char *volatile err;
 
 	/*
@@ -54,10 +55,11 @@
 	/* loop on requests */
 	for(;; putactivity(&req)){
 		memset(&reqmsg, 0, sizeof reqmsg);
+		edns = nil;
 
 		ms = (long)(req.aborttime - nowms);
 		if(ms < Minreqtm){
-		noreq:
+		hangup:
 			close(fd);
 			_exits(0);
 		}
@@ -64,12 +66,12 @@
 		alarm(ms);
 		if(readn(fd, pkt, 2) != 2){
 			alarm(0);
-			goto noreq;
+			goto hangup;
 		}
 		len = pkt[0]<<8 | pkt[1];
 		if(len <= 0 || len > Maxtcp || readn(fd, pkt+2, len) != len){
 			alarm(0);
-			goto noreq;
+			goto hangup;
 		}
 		alarm(0);
 
@@ -111,10 +113,17 @@
 		logrequest(req.id, 0, "rcvd", callip, caller,
 			reqmsg.qd->owner->name, reqmsg.qd->type);
 
+		if((reqmsg.edns = getednsopt(&reqmsg)) != nil){
+			if(reqmsg.edns->eflags & Evers)
+				rcode = Rbadvers;
+			edns = mkednsopt();
+		}
+
 		/* loop through each question */
 		while(reqmsg.qd){
 			memset(&repmsg, 0, sizeof(repmsg));
-			if(reqmsg.qd->type == Taxfr)
+			repmsg.edns = edns;
+			if(rcode == Rok && reqmsg.qd->type == Taxfr)
 				rv = dnzone(fd, pkt, &reqmsg, &repmsg, &req, callip);
 			else {
 				dnserver(&reqmsg, &repmsg, &req, callip, rcode);
@@ -124,10 +133,14 @@
 			if(rv < 0)
 				goto out;
 		}
+		rrfreelist(edns);
+		rrfreelist(reqmsg.edns);
 		freeanswers(&reqmsg);
 	}
 out:
 	close(fd);
+	rrfreelist(edns);
+	rrfreelist(reqmsg.edns);
 	freeanswers(&reqmsg);
 	putactivity(&req);
 	_exits(0);
--- a/sys/src/cmd/ndb/dnudpserver.c
+++ b/sys/src/cmd/ndb/dnudpserver.c
@@ -4,7 +4,7 @@
 #include "dns.h"
 
 static int	udpannounce(char*, char*);
-static void	reply(int, uchar*, DNSmsg*, Request*);
+static void	reply(int, uchar*, int, DNSmsg*, Request*);
 
 typedef struct Inprogress Inprogress;
 struct Inprogress
@@ -65,6 +65,7 @@
 	volatile uchar pkt[Udphdrsize + Maxudp];
 	volatile DNSmsg reqmsg, repmsg;
 	Inprogress *volatile p;
+	volatile RR *edns;
 	volatile Request req;
 	Udphdr *volatile uh;
 
@@ -98,6 +99,8 @@
 	/* loop on requests */
 	for(;; putactivity(&req)){
 		memset(&reqmsg, 0, sizeof reqmsg);
+		edns = nil;
+
 		procsetname("%s: udp server %s: served %d", mntpt, addr, served);
 
 		len = read(fd, pkt, sizeof pkt);
@@ -156,24 +159,35 @@
 		logrequest(req.id, 0, "rcvd", uh->raddr, caller,
 			reqmsg.qd->owner->name, reqmsg.qd->type);
 
+		/* determine response size */
+		len = 512;	/* default */
+		if((reqmsg.edns = getednsopt(&reqmsg)) != nil){
+			if(reqmsg.edns->eflags & Evers)
+				rcode = Rbadvers;
+			edns = mkednsopt();
+			len = Maxudp;
+			if(edns->udpsize < len)
+				len = edns->udpsize;
+			if(reqmsg.edns->udpsize < len)
+				len = reqmsg.edns->udpsize;
+		}
+
 		/* loop through each question */
 		while(reqmsg.qd){
 			memset(&repmsg, 0, sizeof repmsg);
-			switch(op){
-			case Oquery:
-				dnserver(&reqmsg, &repmsg, &req, uh->raddr, rcode);
-				break;
-			case Onotify:
+			repmsg.edns = edns;
+			if(rcode == Rok && op == Onotify)
 				dnnotify(&reqmsg, &repmsg, &req);
-				break;
-			}
-			/* send reply on fd to address in pkt's udp hdr */
-			reply(fd, pkt, &repmsg, &req);
+			else
+				dnserver(&reqmsg, &repmsg, &req, uh->raddr, rcode);
+			reply(fd, pkt, len, &repmsg, &req);
 			freeanswers(&repmsg);
 		}
+		rrfreelist(edns);
 
 		p->inuse = 0;
 freereq:
+		rrfreelist(reqmsg.edns);
 		freeanswers(&reqmsg);
 		if(req.isslave){
 			putactivity(&req);
@@ -183,13 +197,11 @@
 }
 
 static void
-reply(int fd, uchar *pkt, DNSmsg *rep, Request *req)
+reply(int fd, uchar *pkt, int len, DNSmsg *rep, Request *req)
 {
-	int len;
-
 	logreply(req->id, "send", pkt, rep);
 
-	len = convDNS2M(rep, &pkt[Udphdrsize], Maxudp);
+	len = convDNS2M(rep, &pkt[Udphdrsize], len);
 	len += Udphdrsize;
 	if(write(fd, pkt, len) != len)
 		dnslog("%d: error sending reply to %I: %r",