code: plan9front

Download patch

ref: b3a26fb633f4649fc202b77c0184184b756960e7
parent: a6ccb66d9c0748d44cd622d6a700d1ad6447cada
author: cinap_lenrek <cinap_lenrek@felloff.net>
date: Mon Apr 8 12:45:23 EDT 2024

kernel: fix the semacquire stack corruption on interrupt

The semacquire allocates a Rendez struct on its stack,
and publishes it on the semaphore linked list in the
segment.

Before returning, it removes it again, properly taking
the locks protecting the linked list, so whats the issue?

The issue happens when procinterrupt() does the wakeup,
which does not care about the spinlock of the segment
lined list, and it does it in the following way:

 			p->r = nil;
 			r->p = nil;
			ready(p);
 			unlock(r);

Note that the unlock happens *after* the ready.
So the process could'v already run on another core, remove
itself from the segment list and get out of semacquire()
alltogether, but we still have one line to execute here,
which is the unlock() of the now free'd Rendez.

And that was causing the stack corruption!

So wakeup() and procinterrupt() always had this issue.
If the Rendez memory stays valid after the wakeup,
here is no issue. Most code just uses &up->sleep,
which will stay valid as Proc's are never freed.

The solution for now is to do the ready() as the
last step, not touching the resource after the final
unlock.

--- a/sys/src/9/port/proc.c
+++ b/sys/src/9/port/proc.c
@@ -167,10 +167,6 @@
 	up->pcycles -= t;
 }
 
-/*
- *  If changing this routine, look also at sleep().  It
- *  contains a copy of the guts of sched().
- */
 void
 sched(void)
 {
@@ -929,8 +925,9 @@
 
 	lock(r);
 	p = r->p;
-
-	if(p != nil){
+	if(p == nil)
+		unlock(r);
+	else {
 		lock(&p->rlock);
 		if(p->state != Wakeme || p->r != r){
 			iprint("%p %p %d\n", p->r, r, p->state);
@@ -938,11 +935,11 @@
 		}
 		r->p = nil;
 		p->r = nil;
-		ready(p);
 		unlock(&p->rlock);
+		unlock(r);
+		/* hands off r */
+		ready(p);
 	}
-	unlock(r);
-
 	splx(s);
 
 	return p;
@@ -982,14 +979,18 @@
 					r->p != p, p->r != r, p->state);
 			p->r = nil;
 			r->p = nil;
-			ready(p);
+			unlock(&p->rlock);
 			unlock(r);
-			break;
+			/* hands off r */
+			ready(p);
+			splx(s);
+			return;
 		}
 
 		/* give other process time to get out of critical section and try again */
 		unlock(&p->rlock);
 		splx(s);
+
 		sched();
 	}
 	unlock(&p->rlock);
@@ -1013,8 +1014,10 @@
 							q->tail = l;
 						p->qnext = nil;
 						p->eql = nil;	/* not taken */
+						unlock(&q->use);
+						/* hands off q */
 						ready(p);
-						break;
+						return;
 					}
 				}
 			}
@@ -1032,8 +1035,10 @@
 				if(d == p) {
 					*l = p->rendhash;
 					p->rendval = ~0;
+					unlock(p->rgrp);
+					/* hands off p->rgrp */
 					ready(p);
-					break;
+					return;
 				}
 				l = &d->rendhash;
 			}