code: plan9front

Download patch

ref: eaffa1ef55825c34e138246fe5db5bbf996a8dbb
parent: 39321d74d876356004c5314fbeb58fdaa36545aa
author: cinap_lenrek <cinap_lenrek@felloff.net>
date: Thu Jan 4 21:12:42 EST 2024

kernel: fix EDF scheduler double ready() and more robust double-ready detection

Move the "double-ready" check into queueproc() function,
doing it while holding the runq lock, meaning
all transitions to Ready state are serialized.

We do not just check for double-ready but for any
"illegal" transisions:

ready() on Dead, Moribund, New, Ready, Running and Waitrelease
is not allowed.

ready() on Queueing*, Wakeme, Broken, Stopped and Rendez
is only valid when done from another process.

For rescheduling, we have to go to Scheding state
before calling ready(). (rebalance(), schedinit()...)

The EDF scheduler had this bug where it could ready() multiple
times as it was staying in Waitrelease state after releasing
the edflock. Now it transitions thru Scheding avoiding
the issue.

--- a/sys/src/9/port/edf.c
+++ b/sys/src/9/port/edf.c
@@ -209,7 +209,6 @@
 releaseintr(Ureg *u, Timer *t)
 {
 	Proc *p;
-	Schedq *rq;
 
 	if(panicking || active.exiting)
 		return;
@@ -224,21 +223,17 @@
 		return;
 	case Ready:
 		/* remove proc from current runq */
-		rq = &runq[p->priority];
-		if(dequeueproc(rq, p) != p){
+		if(dequeueproc(&runq[p->priority], p) != p){
 			DPRINT("releaseintr: can't find proc or lock race\n");
 			release(p);	/* It'll start best effort */
 			edfunlock();
 			return;
 		}
-		p->state = Waitrelease;
 		/* fall through */
 	case Waitrelease:
+		p->state = Scheding;
 		release(p);
 		edfunlock();
-		if(p->state == Wakeme){
-			iprint("releaseintr: wakeme\n");
-		}
 		ready(p);
 		if(up){
 			up->delaysched++;
@@ -412,13 +407,13 @@
 			DPRINT("%lud edfadmit other %lud[%s], release at %lud\n",
 				now, p->pid, statename[p->state], e->t);
 			if(e->tt == nil){
-				e->tf = releaseintr;
-				e->ta = p;
 				tns = e->t - now;
 				if(tns < 20)
 					tns = 20;
 				e->tns = 1000LL * tns;
 				e->tmode = Trelative;
+				e->tf = releaseintr;
+				e->ta = p;
 				timeradd(e);
 			}
 		}
@@ -476,8 +471,8 @@
 	if(n < 20)
 		n = 20;
 	up->tns = 1000LL * n;
-	up->tf = releaseintr;
 	up->tmode = Trelative;
+	up->tf = releaseintr;
 	up->ta = up;
 	up->trend = &up->sleep;
 	timeradd(up);
@@ -488,6 +483,8 @@
 		nexterror();
 	}
 	sleep(&up->sleep, yfn, nil);
+	up->trend = nil;
+	timerdel(up);
 	poperror();
 }
 
@@ -495,17 +492,10 @@
 edfready(Proc *p)
 {
 	Edf *e;
-	Schedq *rq;
-	Proc *l, *pp;
-	void (*pt)(Proc*, int, vlong);
 	long n;
 
 	if((e = edflock(p)) == nil)
 		return 0;
-
-	if(p->state == Wakeme && p->r){
-		iprint("edfready: wakeme\n");
-	}
 	if(e->d - now <= 0){
 		/* past deadline, arrange for next release */
 		if((e->flags & Sporadic) == 0){
@@ -550,7 +540,7 @@
 				now, p->pid, statename[p->state], e->t);
 			p->state = Waitrelease;
 			edfunlock();
-			return 1;	/* Make runnable later */
+			return -1;	/* Make runnable later */
 		}
 		DPRINT("%lud edfready %lud %s release now\n", now, p->pid, statename[p->state]);
 		/* release now */
@@ -558,31 +548,6 @@
 	}
 	edfunlock();
 	DPRINT("^");
-	rq = &runq[PriEdf];
-	/* insert in queue in earliest deadline order */
-	lock(runq);
-	l = nil;
-	for(pp = rq->head; pp; pp = pp->rnext){
-		if(pp->edf->d > e->d)
-			break;
-		l = pp;
-	}
-	p->rnext = pp;
-	if (l == nil)
-		rq->head = p;
-	else
-		l->rnext = p;
-	if(pp == nil)
-		rq->tail = p;
-	rq->n++;
-	nrdy++;
-	runvec |= 1 << PriEdf;
-	p->priority = PriEdf;
-	p->readytime = m->ticks;
-	p->state = Ready;
-	unlock(runq);
-	if(p->trace && (pt = proctrace))
-		pt(p, SReady, 0);
 	return 1;
 }
 
--- a/sys/src/9/port/portclock.c
+++ b/sys/src/9/port/portclock.c
@@ -136,7 +136,7 @@
 
 	/* rare, but tf can still be active on another cpu */
 	while(dt->tactive == mp && dt->tt == nil)
-		if(up->nlocks == 0 && islo())
+		if(up->state == Running && up->nlocks == 0 && islo())
 			sched();
 }
 
--- a/sys/src/9/port/portdat.h
+++ b/sys/src/9/port/portdat.h
@@ -769,7 +769,6 @@
 	ulong	cpu;		/* cpu average */
 	ulong	lastupdate;
 	uchar	yield;		/* non-zero if the process just did a sleep(0) */
-	ulong	readytime;	/* time process came ready */
 	int	preempted;	/* true if this process hasn't finished the interrupt
 				 *  that last preempted it
 				 */
--- a/sys/src/9/port/proc.c
+++ b/sys/src/9/port/proc.c
@@ -12,8 +12,8 @@
 int	schedgain = 30;	/* units in seconds */
 int	nrdy;
 
-void updatecpu(Proc*);
-int reprioritize(Proc*);
+static void updatecpu(Proc*);
+static int reprioritize(Proc*);
 
 ulong	delayedscheds;	/* statistics */
 ulong	skipscheds;
@@ -78,17 +78,13 @@
 			updatecpu(up);
 			break;
 		case Running:
+			up->state = Scheding;
 			ready(up);
 			break;
 		case Moribund:
 			mmurelease(up);
-			up->state = Dead;
-			edfstop(up);
-			if(up->edf != nil){
-				free(up->edf);
-				up->edf = nil;
-			}
 			lock(&procalloc);
+			up->state = Dead;
 			up->mach = nil;
 			up->qnext = procalloc.free;
 			procalloc.free = up;
@@ -95,14 +91,15 @@
 			/* proc is free now, make sure unlock() wont touch it */
 			up = procalloc.Lock.p = nil;
 			unlock(&procalloc);
-
-			sched();
+			goto out;
 		}
 		coherence();
 		up->mach = nil;
 		up = nil;
 	}
+out:
 	sched();
+	panic("schedinit");
 }
 
 int
@@ -164,15 +161,12 @@
 void
 sched(void)
 {
-	Proc *p;
-
 	if(m->ilockdepth)
-		panic("cpu%d: ilockdepth %d, last lock %#p at %#p, sched called from %#p",
+		panic("cpu%d: ilockdepth %d, last lock %#p at %#p",
 			m->machno,
 			m->ilockdepth,
 			up != nil ? up->lastilock: nil,
-			(up != nil && up->lastilock != nil) ? up->lastilock->pc: 0,
-			getcallerpc(&p+2));
+			(up != nil && up->lastilock != nil) ? up->lastilock->pc: 0);
 	if(up != nil) {
 		/*
 		 * Delay the sched until the process gives up the locks
@@ -204,18 +198,15 @@
 		spllo();
 		return;
 	}
-	p = runproc();
-	if(p->edf == nil){
-		updatecpu(p);
-		p->priority = reprioritize(p);
-	}
-	if(p != m->readied)
+	up = runproc();
+	if(up->edf == nil)
+		up->priority = reprioritize(up);
+	if(up != m->readied)
 		m->schedticks = m->ticks + HZ/10;
 	m->readied = nil;
-	up = p;
-	up->state = Running;
-	up->mach = MACHP(m->machno);
 	m->proc = up;
+	up->mach = up->mp = MACHP(m->machno);
+	up->state = Running;
 	mmuswitch(up);
 	gotolabel(&up->sched);
 }
@@ -310,7 +301,7 @@
  * to maintain accurate cpu usage statistics.  It can be called
  * at any time to bring the stats for a given proc up-to-date.
  */
-void
+static void
 updatecpu(Proc *p)
 {
 	ulong t, ocpu, n, D;
@@ -348,11 +339,12 @@
  * of 3 means you're just right.  Having a higher priority (up to p->basepri) 
  * means you're not using as much as you could.
  */
-int
+static int
 reprioritize(Proc *p)
 {
 	int fairshare, n, load, ratio;
 
+	updatecpu(p);
 	load = MACHP(0)->load;
 	if(load == 0)
 		return p->basepri;
@@ -378,27 +370,101 @@
 /*
  * add a process to a scheduling queue
  */
-void
+static int
 queueproc(Schedq *rq, Proc *p)
 {
-	int pri;
+	int pri = rq - runq;
 
-	pri = rq - runq;
 	lock(runq);
+	switch(p->state){
+	case New:
+	case Queueing:
+	case QueueingR:
+	case QueueingW:
+	case Wakeme:
+	case Broken:
+	case Stopped:
+	case Rendezvous:
+		if(p != up)
+			break;
+		/* wet floor */
+	case Dead:
+	case Moribund:
+	case Ready:
+	case Running:
+	case Waitrelease:
+		unlock(runq);
+		return -1;
+	}
+	p->state = Ready;
 	p->priority = pri;
-	p->rnext = nil;
-	if(rq->tail != nil)
-		rq->tail->rnext = p;
-	else
-		rq->head = p;
-	rq->tail = p;
+	if(pri == PriEdf){
+		Proc *pp, *l;
+
+		/* insert in queue in earliest deadline order */
+		l = nil;
+		for(pp = rq->head; pp != nil; pp = pp->rnext){
+			if(pp->edf->d > p->edf->d)
+				break;
+			l = pp;
+		}
+		p->rnext = pp;
+		if(l == nil)
+			rq->head = p;
+		else
+			l->rnext = p;
+		if(pp == nil)
+			rq->tail = p;
+	} else {
+		p->rnext = nil;
+		if(rq->tail != nil)
+			rq->tail->rnext = p;
+		else
+			rq->head = p;
+		rq->tail = p;
+	}
 	rq->n++;
 	nrdy++;
 	runvec |= 1<<pri;
 	unlock(runq);
+	return 0;
 }
 
 /*
+ *  ready(p) picks a new priority for a process and sticks it in the
+ *  runq for that priority.
+ */
+void
+ready(Proc *p)
+{
+	int s, pri;
+
+	s = splhi();
+	switch(edfready(p)){
+	default:
+		splx(s);
+		return;
+	case 0:
+		pri = reprioritize(p);
+		break;
+	case 1:
+		pri = PriEdf;
+		break;
+	}
+	if(queueproc(&runq[pri], p) < 0){
+		iprint("ready %s %lud %s pc %p\n",
+			p->text, p->pid, statename[p->state], getcallerpc(&p));
+	} else {
+		void (*pt)(Proc*, int, vlong);
+		pt = proctrace;
+		if(pt != nil)
+			pt(p, SReady, 0);
+	}
+	splx(s);
+}
+
+
+/*
  *  try to remove a process from a scheduling queue (called splhi)
  */
 Proc*
@@ -437,63 +503,16 @@
 		runvec &= ~(1<<(rq-runq));
 	rq->n--;
 	nrdy--;
-	if(p->state != Ready)
-		print("dequeueproc %s %lud %s\n", p->text, p->pid, statename[p->state]);
-
+	if(p->state != Ready){
+		iprint("dequeueproc %s %lud %s pc %p\n",
+			p->text, p->pid, statename[p->state], getcallerpc(&rq));
+		p = nil;
+	}
 	unlock(runq);
 	return p;
 }
 
 /*
- *  ready(p) picks a new priority for a process and sticks it in the
- *  runq for that priority.
- */
-void
-ready(Proc *p)
-{
-	int s, pri;
-	Schedq *rq;
-	void (*pt)(Proc*, int, vlong);
-
-	switch(p->state){
-	case Running:
-		if(p == up)
-			break;
-		/* wet floor */
-	case Dead:
-	case Moribund:
-	case Scheding:
-		print("ready %s %s %lud pc %p\n", statename[p->state],
-			p->text, p->pid, getcallerpc(&p));
-		return;
-	case Ready:
-		print("double ready %s %lud pc %p\n",
-			p->text, p->pid, getcallerpc(&p));
-		return;
-	}
-
-	s = splhi();
-	if(edfready(p)){
-		splx(s);
-		return;
-	}
-
-	if(up != p && (p->wired == nil || p->wired == MACHP(m->machno)))
-		m->readied = p;	/* group scheduling */
-
-	updatecpu(p);
-	pri = reprioritize(p);
-	p->priority = pri;
-	rq = &runq[pri];
-	p->state = Ready;
-	queueproc(rq, p);
-	pt = proctrace;
-	if(pt != nil)
-		pt(p, SReady, 0);
-	splx(s);
-}
-
-/*
  *  yield the processor and drop our priority
  */
 void
@@ -516,7 +535,7 @@
 static void
 rebalance(void)
 {
-	int pri, npri, x;
+	int pri, npri;
 	Schedq *rq;
 	Proc *p;
 	ulong t;
@@ -526,6 +545,8 @@
 		return;
 	balancetime = t;
 
+	assert(!islo());
+
 	for(pri=0, rq=runq; pri<Npriq; pri++, rq++){
 another:
 		p = rq->head;
@@ -533,15 +554,16 @@
 			continue;
 		if(pri == p->basepri)
 			continue;
-		updatecpu(p);
 		npri = reprioritize(p);
 		if(npri != pri){
-			x = splhi();
 			p = dequeueproc(rq, p);
-			if(p != nil)
-				queueproc(&runq[npri], p);
-			splx(x);
-			goto another;
+			if(p != nil){
+				p->state = Scheding;
+				if(queueproc(&runq[npri], p) < 0)
+					iprint("rebalance: queueproc %lud %s %s\n",
+						p->pid, p->text, statename[p->state]);
+				goto another;
+			}
 		}
 	}
 }
@@ -606,10 +628,6 @@
 	p = dequeueproc(rq, p);
 	if(p == nil)
 		goto loop;
-
-	p->state = Scheding;
-	p->mp = MACHP(m->machno);
-
 	if(edflock(p)){
 		edfrun(p, rq == &runq[PriEdf]);	/* start deadline timer and do admin */
 		edfunlock();
@@ -831,12 +849,6 @@
 	error(Eintr);
 }
 
-static int
-tfn(void *arg)
-{
-	return up->trend == nil || up->tfn(arg);
-}
-
 void
 twakeup(Ureg*, Timer *t)
 {
@@ -851,6 +863,12 @@
 	}
 }
 
+static int
+tfn(void *arg)
+{
+	return up->trend == nil || up->tfn(arg);
+}
+
 void
 tsleep(Rendez *r, int (*fn)(void*), void *arg, ulong ms)
 {
@@ -860,8 +878,8 @@
 		timerdel(up);
 	}
 	up->tns = MS2NS(ms);
-	up->tf = twakeup;
 	up->tmode = Trelative;
+	up->tf = twakeup;
 	up->ta = up;
 	up->trend = r;
 	up->tfn = fn;
@@ -1342,6 +1360,10 @@
 	qunlock(&up->seglock);
 
 	edfstop(up);
+	if(up->edf != nil){
+		free(up->edf);
+		up->edf = nil;
+	}
 	up->state = Moribund;
 	sched();
 	panic("pexit");