code: plan9front

Download patch

ref: 69e9efa847c03924caef9db2a0a4db586ddcc2f9
parent: 6fbf81a98f8f747444cfe0e7a0591f6de511e510
author: cinap_lenrek <cinap_lenrek@felloff.net>
date: Sun Oct 15 17:20:30 EDT 2023

ndb/dns: avoid blocking 9p on refresh, use shorter 15second query timeout, cleanup

When writing "refresh" control message to trigger
garbage collection and there are other (query) processes active,
the 9p loop would block until these processes complete.

Instead, defer the garbage collection operation to these
outstanding processes once any of them completes,
keeping the 9p loop un-contended for longer.

When theres no other processes, we can garbage collect
ourselfs immediately (just as before).

Only acquire the dnvars lock on Twrite, so we can handle
other 9p requests while garbage collection is going on.

Also reduce the 60 second timeout to 15 second for queries.
Other dns servers usually use something between 10 and 30
seconds.

Avoid excessive nsec() and time() calls by having timems()
function that only uses nsec() and then updates global
nowms (millisecons) and now (seconds) variable.

After the end of readnet(), timems() needs to be called
anyway to update the query statistics so we can use that
as the new "now".

--- a/sys/src/cmd/ndb/dn.c
+++ b/sys/src/cmd/ndb/dn.c
@@ -29,8 +29,8 @@
 	Lock;
 	ulong	names;		/* names allocated */
 	ulong	oldest;		/* longest we'll leave a name around */
-	int	active;
-	int	mutex;
+	int	active;		/* number of active processes */
+	int	mutex;		/* 0 or pid of process doing garbage collection */
 	ushort	id;		/* same size as in packet */
 } dnvars;
 
@@ -149,6 +149,15 @@
 		noted(NDFLT);		/* die */
 }
 
+uvlong
+timems(void)
+{
+	uvlong ms = nsec()/1000000L;
+	now = ms/1000L;
+	nowms = ms;
+	return ms;
+}
+
 void
 dninit(void)
 {
@@ -164,6 +173,8 @@
 	dnvars.names = 0;
 	dnvars.id = truerand();	/* don't start with same id every time */
 
+	timems();
+
 	notify(ding);
 }
 
@@ -665,11 +676,12 @@
 	if(traceactivity)
 		dnslog("get: %d active by pid %d from %p",
 			dnvars.active, getpid(), getcallerpc(&req));
-	lock(&dnvars);
+
 	/*
 	 * can't block here if we're already holding one
 	 * of the dnvars.active (recursive).  will deadlock.
 	 */
+	lock(&dnvars);
 	while(!recursive && dnvars.mutex){
 		unlock(&dnvars);
 		sleep(100);			/* tune; was 200 */
@@ -676,8 +688,6 @@
 		lock(&dnvars);
 	}
 	rv = ++dnvars.active;
-	now = time(nil);
-	nowns = nsec();
 	req->id = ++dnvars.id;
 	req->aux = nil;
 	unlock(&dnvars);
@@ -687,9 +697,11 @@
 void
 putactivity(int recursive)
 {
+	int pid = getpid();
+
 	if(traceactivity)
-		dnslog("put: %d active by pid %d",
-			dnvars.active, getpid());
+		dnslog("put: %d active by pid %d", dnvars.active, pid);
+
 	lock(&dnvars);
 	dnvars.active--;
 	assert(dnvars.active >= 0); /* "dnvars.active %d", dnvars.active */
@@ -698,15 +710,16 @@
 	 *  clean out old entries and check for new db periodicly
 	 *  can't block here if being called to let go a "recursive" lock
 	 *  or we'll deadlock waiting for ourselves to give up the dnvars.active.
+	 *  also don't block if we are the 9p process (needrefresh == pid).
 	 */
-	if (recursive || dnvars.mutex ||
-	    (needrefresh == 0 && dnvars.active > 0)){
+	if(recursive || dnvars.mutex
+	|| dnvars.active > 0 && (needrefresh == 0 || needrefresh == pid)){
 		unlock(&dnvars);
 		return;
 	}
 
 	/* wait till we're alone */
-	dnvars.mutex = 1;
+	dnvars.mutex = pid;
 	while(dnvars.active > 0){
 		unlock(&dnvars);
 		sleep(100);		/* tune; was 100 */
@@ -714,13 +727,14 @@
 	}
 	unlock(&dnvars);
 
-	db2cache(needrefresh);
-
+	db2cache(needrefresh != 0);
 	dnageall(0);
 
 	/* let others back in */
+	lock(&dnvars);
 	needrefresh = 0;
 	dnvars.mutex = 0;
+	unlock(&dnvars);
 }
 
 int
--- a/sys/src/cmd/ndb/dnresolve.c
+++ b/sys/src/cmd/ndb/dnresolve.c
@@ -235,7 +235,7 @@
  * thus we don't wait very long for them.
  */
 static void
-notestats(vlong start, int tmout, int type)
+notestats(long ms, int tmout, int type)
 {
 	qlock(&stats);
 	if (tmout) {
@@ -245,7 +245,7 @@
 		else if (type == Tcname)
 			stats.tmoutcname++;
 	} else {
-		long wait10ths = NS2MS(nsec() - start) / 100;
+		long wait10ths = ms / 100;
 
 		if (wait10ths <= 0)
 			stats.under10ths[0]++;
@@ -559,31 +559,31 @@
 
 /* timed read of reply.  sets srcip.  ibuf must be 64K to handle tcp answers. */
 static int
-readnet(Query *qp, int medium, uchar *ibuf, uvlong endms, uchar **replyp,
-	uchar *srcip)
+readnet(Query *qp, int medium, uchar *ibuf, uvlong endms, uchar **replyp, uchar *srcip)
 {
 	int len, fd;
 	long ms;
-	vlong startns = nsec();
+	uvlong startms;
 	uchar *reply;
 	uchar lenbuf[2];
 
-	len = -1;			/* pessimism */
 	*replyp = nil;
 	memset(srcip, 0, IPaddrlen);
-	ms = endms - NS2MS(startns);
-	if (ms <= 0)
+
+	startms = nowms;
+	ms = (long)(endms - startms);
+	if (ms < 1)
 		return -1;		/* taking too long */
 
+	len = -1;			/* pessimism */
 	reply = ibuf;
-	alarm(ms);
 	if (medium == Udp)
 		if (qp->udpfd < 0)
 			dnslog("readnet: qp->udpfd closed");
 		else {
+			alarm(ms);
 			len = read(qp->udpfd, ibuf, Udphdrsize+Maxudpin);
 			alarm(0);
-			notestats(startns, len < 0, qp->type);
 			if (len >= IPaddrlen)
 				memmove(srcip, ibuf, IPaddrlen);
 			if (len >= Udphdrsize) {
@@ -598,24 +598,28 @@
 		if (fd < 0)
 			dnslog("readnet: %s: tcp fd unset for dest %I",
 				qp->dp->name, qp->tcpip);
-		else if (readn(fd, lenbuf, 2) != 2) {
-			dnslog("readnet: short read of 2-byte tcp msg size from %I",
-				qp->tcpip);
-			/* probably a time-out */
-			notestats(startns, 1, qp->type);
-		} else {
-			len = lenbuf[0]<<8 | lenbuf[1];
-			if (readn(fd, ibuf, len) != len) {
-				dnslog("readnet: short read of tcp data from %I",
-					qp->tcpip);
-				/* probably a time-out */
-				notestats(startns, 1, qp->type);
-				len = -1;
+		else {
+			alarm(ms);
+			if (readn(fd, lenbuf, 2) != 2) {
+				alarm(0);
+				dnslog("readnet: short read of 2-byte tcp msg size from %I", qp->tcpip);
+			} else {
+				len = lenbuf[0]<<8 | lenbuf[1];
+				if (readn(fd, ibuf, len) != len) {
+					alarm(0);
+					dnslog("readnet: short read of tcp data from %I", qp->tcpip);
+					len = -1;
+				} else {
+					alarm(0);
+				}
 			}
 		}
 		memmove(srcip, qp->tcpip, IPaddrlen);
 	}
-	alarm(0);
+
+	ms = (long)(timems() - startms);
+	notestats(ms, len < 0, qp->type);
+
 	*replyp = reply;
 	return len;
 }
@@ -631,14 +635,14 @@
 {
 	int len;
 	char *err;
-	char tbuf[32];
 	uchar *reply;
 	uchar srcip[IPaddrlen];
 	RR *rp;
 
-	for (; timems() < endms &&
-	    (len = readnet(qp, medium, ibuf, endms, &reply, srcip)) >= 0;
-	    freeanswers(mp)){
+	for (;; freeanswers(mp)) {
+		len = readnet(qp, medium, ibuf, endms, &reply, srcip);
+		if (len < 0)
+			break;
 		/* convert into internal format  */
 		memset(mp, 0, sizeof *mp);
 		err = convM2DNS(reply, len, mp, nil);
@@ -675,20 +679,6 @@
 			return 0;
 		}
 	}
-	if (timems() >= endms) {
-		;				/* query expired */
-	} else if (0) {
-		/* this happens routinely when a read times out */
-		dnslog("readreply: %s type %s: ns %I read error or eof "
-			"(returned %d): %r", qp->dp->name, rrname(qp->type,
-			tbuf, sizeof tbuf), srcip, len);
-		if (medium == Udp)
-			for (rp = qp->nsrp; rp != nil; rp = rp->next)
-				if (rp->type == Tns)
-					dnslog("readreply: %s: query sent to "
-						"ns %s", qp->dp->name,
-						rp->host->name);
-	}
 	memset(mp, 0, sizeof *mp);
 	return -1;
 }
@@ -985,9 +975,6 @@
 	char buf[32];
 	Dest *p;
 
-	if(timems() >= qp->req->aborttime)
-		return -1;
-
 	/*
 	 * if we send tcp query, we just take the dest ip address from
 	 * the udp header placed there by tcpquery().
@@ -1272,15 +1259,10 @@
  */
 static int
 tcpquery(Query *qp, DNSmsg *mp, int depth, uchar *ibuf, uchar *obuf, int len,
-	ulong waitms, int inns, ushort req)
+	uvlong endms, int inns, ushort req)
 {
 	int rv = 0;
-	uvlong endms;
 
-	endms = timems() + waitms;
-	if(endms > qp->req->aborttime)
-		endms = qp->req->aborttime;
-
 	if (0)
 		dnslog("%s: udp reply truncated; retrying query via tcp to %I",
 			qp->dp->name, qp->tcpip);
@@ -1338,10 +1320,13 @@
 	for(ndest = 2; ndest < Maxdest; ndest += 2){
 		qp->ndest = ndest;
 		qp->tcpset = 0;
+
+		endms = nowms;
+		if(endms >= qp->req->aborttime)
+			break;
 		if (xmitquery(qp, Udp, depth, obuf, inns, len) < 0)
 			break;
-
-		endms = timems() + waitms;
+		endms += waitms;
 		if(endms > qp->req->aborttime)
 			endms = qp->req->aborttime;
 
@@ -1361,8 +1346,10 @@
 			} else {
 				/* whoops, it was truncated! ask again via tcp */
 				freeanswers(&m);
+				if(nowms >= endms)
+					break;
 				rv = tcpquery(qp, &m, depth, ibuf, obuf, len,
-					waitms, inns, req);  /* answer in m */
+					endms, inns, req);  /* answer in m */
 				if (rv < 0) {
 					freeanswers(&m);
 					break;		/* failed via tcp too */
@@ -1516,8 +1503,8 @@
 	Query q;
 
 	memset(&req, 0, sizeof req);
-	req.isslave = 1;
 	req.aborttime = timems() + Maxreqtm;
+	req.isslave = 1;
 	req.from = "internal";
 	queryinit(&q, dnlookup(root, Cin, 1), Tns, &req);
 	nsrp = randomize(dblookup(root, Cin, Tns, 0, 0));
--- a/sys/src/cmd/ndb/dns.c
+++ b/sys/src/cmd/ndb/dns.c
@@ -64,7 +64,7 @@
 int	mfd[2];
 int	needrefresh;
 ulong	now;
-vlong	nowns;
+uvlong	nowms;
 int	sendnotifies;
 char	*trace;
 int	traceactivity;
@@ -186,8 +186,6 @@
 		(cfg.resolver? "resolver ": ""), mntpt);
 
 	opendatabase();
-	now = time(nil);		/* open time files before we fork */
-	nowns = nsec();
 	dnsuser = estrdup(getuser());
 
 	snprint(servefile, sizeof servefile, "/srv/dns%s", ext);
@@ -421,10 +419,6 @@
 		if(debug)
 			dnslog("%F", &job->request);
 
-		getactivity(&req, 0);
-		req.aborttime = timems() + Maxreqtm;
-		req.from = "9p";
-
 		switch(job->request.type){
 		default:
 			warning("unknown request type %d", job->request.type);
@@ -454,9 +448,17 @@
 			rread(job, mf);
 			break;
 		case Twrite:
-			/* &req is handed to dnresolve() */
+			getactivity(&req, 0);
+			req.aborttime = timems() + Maxreqtm;
+			req.from = "9p";
 			rwrite(job, mf, &req);
-			break;
+			freejob(job);
+			if(req.isslave){
+				putactivity(0);
+				_exits(0);
+			}
+			putactivity(0);
+			continue;
 		case Tclunk:
 			rclunk(job, mf);
 			break;
@@ -470,18 +472,7 @@
 			rwstat(job, mf);
 			break;
 		}
-
 		freejob(job);
-
-		/*
-		 *  slave processes die after replying
-		 */
-		if(req.isslave){
-			putactivity(0);
-			_exits(0);
-		}
-
-		putactivity(0);
 	}
 }
 
@@ -702,7 +693,7 @@
 	else if(strcmp(job->request.data, "dump")==0)
 		dndump("/lib/ndb/dnsdump");
 	else if(strcmp(job->request.data, "refresh")==0)
-		needrefresh = 1;
+		needrefresh = getpid();
 	else if(strcmp(job->request.data, "stats")==0)
 		dnstats("/lib/ndb/dnsstats");
 	else if(strncmp(job->request.data, "target ", 7)==0){
--- a/sys/src/cmd/ndb/dns.h
+++ b/sys/src/cmd/ndb/dns.h
@@ -1,8 +1,3 @@
-#define NS2MS(ns) ((ns) / 1000000L)
-#define S2MS(s)   ((s)  * 1000LL)
-
-#define timems()	NS2MS(nsec())
-
 typedef struct Ndbtuple Ndbtuple;
 
 enum
@@ -152,7 +147,7 @@
 	Maxactive=	250,
 
 	/* tune; was 8*1000; that was too short */
-	Maxreqtm=	60*1000,	/* max. ms to process a request */
+	Maxreqtm=	15*1000,	/* max. ms to process a request */
 
 	Notauthoritative = 0,
 	Authoritative,
@@ -424,10 +419,10 @@
 extern char	*logfile;
 extern int	maxage;		/* age of oldest entry in cache (secs) */
 extern char	mntpt[];
-extern int	needrefresh;
+extern int	needrefresh;	/* set to pid of the process requesting flush */
 extern int	norecursion;
 extern ulong	now;		/* time base */
-extern vlong	nowns;
+extern uvlong	nowms;
 extern Area	*owned;
 extern int	sendnotifies;
 extern ulong	target;
@@ -491,6 +486,7 @@
 int	rrtype(char*);
 void	slave(Request*);
 int	subsume(char*, char*);
+uvlong	timems(void);
 int	tsame(int, int);
 void	unique(RR*);
 void	warning(char*, ...);
--- a/sys/src/cmd/ndb/dnsdebug.c
+++ b/sys/src/cmd/ndb/dnsdebug.c
@@ -22,7 +22,7 @@
 char	mntpt[Maxpath];
 int	needrefresh;
 ulong	now;
-vlong	nowns;
+uvlong	nowms;
 char	*trace;
 int	traceactivity;
 char	*zonerefreshprogram;
@@ -76,8 +76,6 @@
 		usage();
 	}ARGEND
 
-	now = time(nil);
-	nowns = nsec();
 	dninit();
 	fmtinstall('P', prettyrrfmt);
 	opendatabase();
@@ -258,7 +256,7 @@
 		}
 		memset(&req, 0, sizeof req);
 		req.isslave = 1;
-		req.aborttime = NS2MS(nowns) + Maxreqtm;
+		req.aborttime = timems() + Maxreqtm;
 		*l = dnresolve(rp->host->name, Cin, Ta, &req, nil, 0, Recurse, 0, nil);
 		if(*l == nil)
 			*l = dnresolve(rp->host->name, Cin, Taaaa, &req,
@@ -345,7 +343,7 @@
 	memset(&req, 0, sizeof req);
 	getactivity(&req, 0);
 	req.isslave = 1;
-	req.aborttime = NS2MS(nowns) + Maxreqtm;
+	req.aborttime = timems() + Maxreqtm;
 	rr = dnresolve(buf, Cin, type, &req, nil, 0, Recurse, rooted, nil);
 	if(rr){
 		print("----------------------------\n");
--- a/sys/src/cmd/ndb/dnsgetip.c
+++ b/sys/src/cmd/ndb/dnsgetip.c
@@ -13,7 +13,7 @@
 char mntpt[Maxpath];
 int	needrefresh	= 0;
 ulong	now		= 0;
-vlong	nowns		= 0;
+uvlong	nowms		= 0;
 int	traceactivity	= 0;
 char	*zonerefreshprogram;
 
@@ -36,7 +36,7 @@
 	memset(&req, 0, sizeof req);
 	getactivity(&req, 0);
 	req.isslave = 1;
-	req.aborttime = NS2MS(nowns) + Maxreqtm;
+	req.aborttime = timems() + Maxreqtm;
 
 	rr = dnresolve(name, Cin, type, &req, nil, 0, Recurse, 0, &status);
 	neg = rrremneg(&rr);
--- a/sys/src/cmd/ndb/dnstcp.c
+++ b/sys/src/cmd/ndb/dnstcp.c
@@ -19,7 +19,7 @@
 char	mntpt[Maxpath];
 int	needrefresh;
 ulong	now;
-vlong	nowns;
+uvlong	nowms;
 int	traceactivity;
 char	*zonerefreshprogram;
 
@@ -98,7 +98,6 @@
 
 	/* loop on requests */
 	for(;; putactivity(0)){
-		now = time(nil);
 		memset(&repmsg, 0, sizeof repmsg);
 		len = readmsg(0, buf, sizeof buf);
 		if(len <= 0)
@@ -105,7 +104,7 @@
 			break;
 
 		getactivity(&req, 0);
-		req.aborttime = timems() + S2MS(15*Min);
+		req.aborttime = timems() + 15*Min*1000;
 		rcode = 0;
 		memset(&reqmsg, 0, sizeof reqmsg);
 		err = convM2DNS(buf, len, &reqmsg, &rcode);
--- a/sys/src/cmd/ndb/dnudpserver.c
+++ b/sys/src/cmd/ndb/dnudpserver.c
@@ -131,7 +131,7 @@
 {
 	volatile int fd, len, op, rcode, served;
 	char *volatile err;
-	volatile char tname[32];
+	volatile char tname[32], ipstr[64];
 	volatile uchar buf[Udphdrsize + Maxudp + 1024];
 	volatile DNSmsg reqmsg, repmsg;
 	Inprogress *volatile p;
@@ -165,8 +165,6 @@
 	if(setjmp(req.mret))
 		putactivity(0);
 	req.isslave = 0;
-	req.id = 0;
-	req.aborttime = 0;
 
 	/* loop on requests */
 	for(;; putactivity(0)){
@@ -188,16 +186,16 @@
 		uh = (Udphdr*)buf;
 		len -= Udphdrsize;
 
-		// dnslog("read received UDP from %I to %I",
-		//	((Udphdr*)buf)->raddr, ((Udphdr*)buf)->laddr);
+		// dnslog("read received UDP from %I to %I", uh->raddr, uh->laddr);
+		snprint(ipstr, sizeof(ipstr), "%I", uh->raddr);
 		getactivity(&req, 0);
 		req.aborttime = timems() + Maxreqtm;
-		req.from = smprint("%I", buf);
-		rcode = 0;
+		req.from = ipstr;
 
 		served++;
 		stats.qrecvdudp++;
 
+		rcode = 0;
 		err = convM2DNS(&buf[Udphdrsize], len, &reqmsg, &rcode);
 		if(err){
 			/* first bytes in buf are source IP addr */
@@ -215,8 +213,7 @@
 			}
 		op = reqmsg.flags & Omask;
 		if(op != Oquery && op != Onotify){
-			dnslog("server: op %d from %I", reqmsg.flags & Omask,
-				buf);
+			dnslog("server: op %d from %I", reqmsg.flags & Omask, buf);
 			goto freereq;
 		}
 
@@ -267,8 +264,6 @@
 
 		p->inuse = 0;
 freereq:
-		free(req.from);
-		req.from = nil;
 		freeanswers(&reqmsg);
 		if(req.isslave){
 			putactivity(0);