git: 9front

Download patch

ref: cdf5a548eb3bf9a371053ba6efa932e18afec63a
parent: dde671477e5981924e8b07e3342307fc28f090c5
author: cinap_lenrek <cinap_lenrek@felloff.net>
date: Sun Feb 22 20:38:10 EST 2026

ether82563: add a watchdog handling rx dma stuck condition

On 82579LM (Lenovo x230), after continuous 1Gbit line-rate
operation for 5-12 hours the reception dma fifo gets stuck
with descriptor ring fully populated (it has buffers) but
Rdh not advancing anymore.

Statistics counters still indicate that packets are
being received fine, but the mac is unable to transfer
it to the host.

Resetting the whole device does get it unstuck, so
implement a watchdog process checking the Rdh register
and the "Missed Packet Counter" and reset the device
if we get missed packets for 5 seconds and Rdh not
advancing.

--- a/sys/src/9/pc/ether82563.c
+++ b/sys/src/9/pc/ether82563.c
@@ -100,6 +100,7 @@
 	/* Statistics */
 
 	Statistics	= 0x4000,	/* Start of Statistics Area */
+	Mpc		= 0x10/4,	/* Missed packets Count */
 	Gorcl		= 0x88/4,	/* Good Octets Received Count */
 	Gotcl		= 0x90/4,	/* Good Octets Transmitted Count */
 	Torl		= 0xC0/4,	/* Total Octets Received */
@@ -524,6 +525,7 @@
 	Lock	imlock;
 	int	im;			/* interrupt mask */
 
+	Proc	*lproc;
 	Rendez	lrendez;
 	int	lim;
 
@@ -544,6 +546,7 @@
 	uchar	ra[Eaddrlen];		/* receive address */
 	u32int	mta[128];		/* multicast table array */
 
+	Proc	*rproc;
 	Rendez	rrendez;
 	int	rim;
 	int	rdfree;
@@ -554,6 +557,7 @@
 	int	rdtr;			/* receive delay timer ring value */
 	int	radv;			/* receive interrupt absolute delay timer */
 
+	Proc	*tproc;
 	Rendez	trendez;
 	QLock	tlock;
 	int	tbusy;
@@ -566,6 +570,8 @@
 	int	fcrth;
 
 	u32int	pba;			/* packet buffer allocation */
+
+	Proc	*wproc;			/* watchdog */
 };
 
 #define csr32r(c, r)	(*((c)->nic+((r)/4)))
@@ -650,6 +656,8 @@
 	"Interrupt Rx Overrun",
 };
 
+static void i82563recover(Ctlr*);
+
 static char*
 cname(Ctlr *c)
 {
@@ -796,6 +804,16 @@
 }
 
 static void
+procerror(Ctlr *ctlr, Proc **p)
+{	
+	print("#l%d: %s: %s: %s\n", ctlr->edev->ctlrno, cname(ctlr), up->text, up->errstr);
+
+	*p = nil;
+
+	pexit("", 1);
+}
+
+static void
 i82563im(Ctlr *ctlr, int im)
 {
 	ilock(&ctlr->imlock);
@@ -880,11 +898,14 @@
 
 	edev = v;
 	ctlr = edev->ctlr;
+	ctlr->tproc = up;
+
 	i82563txinit(ctlr);
 
 	tdt = ctlr->tdt;
 	while(waserror())
-		;
+		procerror(ctlr, &ctlr->tproc);
+
 	for(;;){
 		n = NEXT(tdt, ctlr->ntd);
 		if(n == i82563cleanup(ctlr)){
@@ -939,9 +960,12 @@
 static void
 i82563rxinit(Ctlr *ctlr)
 {
+	Ether *edev;
 	int i;
 	Block *bp;
 
+	edev = ctlr->edev;
+
 	if(ctlr->rbsz <= 2048)
 		csr32w(ctlr, Rctl, Dpf|Bsize2048|Bam|RdtmsHALF);
 	else{
@@ -975,6 +999,7 @@
 	csr32w(ctlr, Rdtr, ctlr->rdtr);
 	csr32w(ctlr, Radv, ctlr->radv);
 
+	ctlr->rdfree = 0;
 	for(i = 0; i < ctlr->nrd; i++)
 		if((bp = ctlr->rb[i]) != nil){
 			ctlr->rb[i] = nil;
@@ -996,6 +1021,13 @@
 	 * Enable checksum offload.
 	 */
 	csr32w(ctlr, Rxcsum, Tuofl | Ipofl | ETHERHDRSIZE);
+
+	i82563promiscuous(edev, edev->prom);
+
+	csr32w(ctlr, Rctl, csr32r(ctlr, Rctl) | Ren);
+
+	if(cttab[ctlr->type].flag & F75)
+		csr32w(ctlr, Rxdctl, csr32r(ctlr, Rxdctl) | Enable);
 }
 
 static int
@@ -1007,7 +1039,7 @@
 static void
 i82563rproc(void *arg)
 {
-	uint rdh, rim, im;
+	uint rdh, rim;
 	Block *bp;
 	Ctlr *ctlr;
 	Ether *edev;
@@ -1015,26 +1047,21 @@
 
 	edev = arg;
 	ctlr = edev->ctlr;
+	ctlr->rproc = up;
 
 	i82563rxinit(ctlr);
 
-	csr32w(ctlr, Rctl, csr32r(ctlr, Rctl) | Ren);
-	if(cttab[ctlr->type].flag & F75){
-		csr32w(ctlr, Rxdctl, csr32r(ctlr, Rxdctl) | Enable);
-		im = Rxt0|Rxo|Rxdmt0|Rxseq|Ack;
-	}else
-		im = Rxt0|Rxo|Rxdmt0|Rxseq|Ack;
-
 	while(waserror())
-		;
+		procerror(ctlr, &ctlr->rproc);
+
 	for(;;){
-		i82563im(ctlr, im);
+		i82563im(ctlr, Rxt0|Rxo|Rxdmt0|Rxseq|Ack);
 		ctlr->rsleep++;
 		i82563replenish(ctlr);
 		sleep(&ctlr->rrendez, i82563rim, ctlr);
 
 		rdh = ctlr->rdh;
-		for(;;){
+		while(rdh != ctlr->rdt){
 			rim = ctlr->rim;
 			ctlr->rim = 0;
 			rd = &ctlr->rdba[rdh];
@@ -1049,6 +1076,10 @@
 			 * calculated and valid.
 			 */
 			bp = ctlr->rb[rdh];
+			ctlr->rb[rdh] = nil;
+			ctlr->rdfree--;
+			ctlr->rdh = rdh = NEXT(rdh, ctlr->nrd);
+
 			if((rd->status & Reop) && rd->errors == 0){
 				bp->wp += rd->length;
 				if(!(rd->status & Ixsm)){
@@ -1075,9 +1106,7 @@
 				etheriq(edev, bp);
 			} else
 				freeb(bp);
-			ctlr->rb[rdh] = nil;
-			ctlr->rdfree--;
-			ctlr->rdh = rdh = NEXT(rdh, ctlr->nrd);
+
 			if(ctlr->nrd-ctlr->rdfree >= 32 || (rim & Rxdmt0))
 				i82563replenish(ctlr);
 		}
@@ -1211,8 +1240,10 @@
 
 	e = v;
 	c = e->ctlr;
+	c->lproc = up;
+
 	while(waserror())
-		;
+		procerror(c, &c->lproc);
 
 	while((phyno = phyprobe(c, 3<<1)) == ~0)
 		lsleep(c, Lsc);
@@ -1254,8 +1285,10 @@
 
 	e = v;
 	c = e->ctlr;
+	c->lproc = up;
+
 	while(waserror())
-		;
+		procerror(c, &c->lproc);
 
 	while((phyno = phyprobe(c, 3<<1)) == ~0)
 		lsleep(c, Lsc);
@@ -1315,8 +1348,10 @@
 
 	e = v;
 	c = e->ctlr;
+	c->lproc = up;
+
 	while(waserror())
-		;
+		procerror(c, &c->lproc);
 
 	if(c->type == i82575 || c->type == i82576)
 		csr32w(c, Connsw, Enrgirq);
@@ -1346,8 +1381,10 @@
 
 	e = v;
 	c = e->ctlr;
+	c->lproc = up;
 	while(waserror())
-		;
+		procerror(c, &c->lproc);
+
 	for(;;){
 		rx = csr32r(c, Rxcw);
 		tx = csr32r(c, Txcw);
@@ -1365,7 +1402,67 @@
 	}
 }
 
+static uint
+missedpackets(Ctlr *ctlr)
+{
+	uint r;
+
+	r = csr32r(ctlr, Statistics + Mpc*4);
+	ctlr->statistics[Mpc] += r;
+
+	return ctlr->statistics[Mpc];
+}
+
 static void
+i82563wproc(void *v)
+{
+	Ctlr *ctlr;
+	Ether *edev;
+	uint mpc, rdh, stuck;
+
+	edev = v;
+	ctlr = edev->ctlr;
+
+	ctlr->wproc = up;
+	while(waserror())
+		procerror(ctlr, &ctlr->wproc);
+
+Again:
+	mpc = missedpackets(ctlr);
+	rdh = csr32r(ctlr, Rdh);
+	stuck = 0;
+	for(;;){
+		tsleep(&up->sleep, return0, 0, 1000);
+		if(missedpackets(ctlr) == mpc)
+			continue;
+		if(csr32r(ctlr, Rdh) != rdh)
+			goto Again;
+		if(++stuck >= 5)
+			break;
+	}
+
+	print("#l%d: %s: %s: rx stuck, recovering...\n", ctlr->edev->ctlrno, cname(ctlr), up->text);
+
+	ctlr->wproc = nil;
+	i82563recover(ctlr);
+	pexit("", 1);
+}
+
+static void
+i82563dealloc(Ctlr *ctlr)
+{
+	ctlr->rdba = nil;
+	ctlr->tdba = nil;
+
+	free(ctlr->tb);
+	ctlr->tb = nil;
+	free(ctlr->rb);
+	ctlr->rb = nil;
+	free(ctlr->alloc);
+	ctlr->alloc = nil;
+}
+
+static void
 i82563attach(Ether *edev)
 {
 	char name[KNAMELEN];
@@ -1384,30 +1481,17 @@
 	ctlr->alloc = malloc(ctlr->nrd*sizeof(Rd)+ctlr->ntd*sizeof(Td) + 255);
 	ctlr->rb = malloc(ctlr->nrd * sizeof(Block*));
 	ctlr->tb = malloc(ctlr->ntd * sizeof(Block*));
-	if(ctlr->alloc == nil || ctlr->rb == nil || ctlr->tb == nil){
-		free(ctlr->rb);
-		ctlr->rb = nil;
-		free(ctlr->tb);
-		ctlr->tb = nil;
-		free(ctlr->alloc);
-		ctlr->alloc = nil;
-		qunlock(&ctlr->alock);
-		error(Enomem);
-	}
-	ctlr->rdba = (Rd*)ROUNDUP((uintptr)ctlr->alloc, 256);
-	ctlr->tdba = (Td*)(ctlr->rdba + ctlr->nrd);
-
 	if(waserror()){
-		free(ctlr->tb);
-		ctlr->tb = nil;
-		free(ctlr->rb);
-		ctlr->rb = nil;
-		free(ctlr->alloc);
-		ctlr->alloc = nil;
+		i82563dealloc(ctlr);
 		qunlock(&ctlr->alock);
 		nexterror();
 	}
+	if(ctlr->alloc == nil || ctlr->rb == nil || ctlr->tb == nil)
+		error(Enomem);
 
+	ctlr->rdba = (Rd*)ROUNDUP((uintptr)ctlr->alloc, 256);
+	ctlr->tdba = (Td*)(ctlr->rdba + ctlr->nrd);
+
 	/* set link up */
 	r = csr32r(ctlr, Ctrl);
 	r &= ~(Frcspd|Frcdplx);	/* dont force */
@@ -1429,6 +1513,9 @@
 	snprint(name, sizeof name, "#l%dt", edev->ctlrno);
 	kproc(name, i82563tproc, edev);
 
+	snprint(name, sizeof name, "#l%dw", edev->ctlrno);
+	kproc(name, i82563wproc, edev);
+
 	qunlock(&ctlr->alock);
 	poperror();
 }
@@ -1815,7 +1902,6 @@
 		csr32w(ctlr, Ral+i*8, 0);
 		csr32w(ctlr, Rah+i*8, 0);
 	}
-	memset(ctlr->mta, 0, sizeof(ctlr->mta));
 	for(i = 0; i < 128; i++)
 		csr32w(ctlr, Mta + i*4, 0);
 	if((flag & Fnofca) == 0){
@@ -1837,6 +1923,7 @@
 	CMradv,
 	CMpause,
 	CMan,
+	CMrecover,
 };
 
 static Cmdtab i82563ctlmsg[] = {
@@ -1844,6 +1931,7 @@
 	CMradv,	"radv",	2,
 	CMpause, "pause", 1,
 	CMan,	"an",	1,
+	CMrecover, "recover", 1,
 };
 
 static long
@@ -1886,6 +1974,9 @@
 	case CMan:
 		csr32w(ctlr, Ctrl, csr32r(ctlr, Ctrl) | Lrst | Phyrst);
 		break;
+	case CMrecover:
+		i82563recover(ctlr);
+		break;
 	}
 	free(cb);
 	poperror();
@@ -1893,6 +1984,39 @@
 	return n;
 }
 
+static void
+i82563recover(Ctlr *ctlr)
+{
+	Ether *edev;
+	Proc *p;
+
+	edev = ctlr->edev;
+
+	if((p = ctlr->wproc) != nil)
+		postnote(p, 1, "recover", 0);
+	if((p = ctlr->tproc) != nil)
+		postnote(p, 1, "recover", 0);
+	if((p = ctlr->rproc) != nil)
+		postnote(p, 1, "recover", 0);
+	if((p = ctlr->lproc) != nil)
+		postnote(p, 1, "recover", 0);
+
+	while(ctlr->wproc != nil
+	|| ctlr->tproc != nil
+	|| ctlr->rproc != nil
+	|| ctlr->lproc != nil)
+		tsleep(&up->sleep, return0, 0, 10);
+
+	qlock(&ctlr->alock);
+	splhi();
+	i82563reset(ctlr);
+	i82563dealloc(ctlr);
+	spllo();
+	qunlock(&ctlr->alock);
+
+	i82563attach(edev);
+}
+
 static int
 didtype(int d)
 {
@@ -2148,7 +2272,6 @@
 	 * Linkage to the generic ethernet driver.
 	 */
 	edev->attach = i82563attach;
-//	edev->transmit = i82563transmit;
 	edev->ifstat = i82563ifstat;
 	edev->ctl = i82563ctl;
 
--