ref: fc01d1577a7af7c9a885d0900ad914a64eab5cfc
parent: ec1831fbc071870d3bf2ba16098cde0c3c1114eb
author: cinap_lenrek <cinap_lenrek@felloff.net>
date: Mon Apr 8 12:45:23 EDT 2024
kernel: fix the semacquire stack corruption on interrupt The semacquire allocates a Rendez struct on its stack, and publishes it on the semaphore linked list in the segment. Before returning, it removes it again, properly taking the locks protecting the linked list, so whats the issue? The issue happens when procinterrupt() does the wakeup, which does not care about the spinlock of the segment lined list, and it does it in the following way: p->r = nil; r->p = nil; ready(p); unlock(r); Note that the unlock happens *after* the ready. So the process could'v already run on another core, remove itself from the segment list and get out of semacquire() alltogether, but we still have one line to execute here, which is the unlock() of the now free'd Rendez. And that was causing the stack corruption! So wakeup() and procinterrupt() always had this issue. If the Rendez memory stays valid after the wakeup, here is no issue. Most code just uses &up->sleep, which will stay valid as Proc's are never freed. The solution for now is to do the ready() as the last step, not touching the resource after the final unlock.
--- a/sys/src/9/port/proc.c
+++ b/sys/src/9/port/proc.c
@@ -167,10 +167,6 @@
up->pcycles -= t;
}
-/*
- * If changing this routine, look also at sleep(). It
- * contains a copy of the guts of sched().
- */
void
sched(void)
{
@@ -929,8 +925,9 @@
lock(r);
p = r->p;
-
- if(p != nil){
+ if(p == nil)
+ unlock(r);
+ else {
lock(&p->rlock);
if(p->state != Wakeme || p->r != r){
iprint("%p %p %d\n", p->r, r, p->state);
@@ -938,11 +935,11 @@
}
r->p = nil;
p->r = nil;
- ready(p);
unlock(&p->rlock);
+ unlock(r);
+ /* hands off r */
+ ready(p);
}
- unlock(r);
-
splx(s);
return p;
@@ -982,14 +979,18 @@
r->p != p, p->r != r, p->state);
p->r = nil;
r->p = nil;
- ready(p);
+ unlock(&p->rlock);
unlock(r);
- break;
+ /* hands off r */
+ ready(p);
+ splx(s);
+ return;
}
/* give other process time to get out of critical section and try again */
unlock(&p->rlock);
splx(s);
+
sched();
}
unlock(&p->rlock);
@@ -1013,8 +1014,10 @@
q->tail = l;
p->qnext = nil;
p->eql = nil; /* not taken */
+ unlock(&q->use);
+ /* hands off q */
ready(p);
- break;
+ return;
}
}
}
@@ -1032,8 +1035,10 @@
if(d == p) {
*l = p->rendhash;
p->rendval = ~0;
+ unlock(p->rgrp);
+ /* hands off p->rgrp */
ready(p);
- break;
+ return;
}
l = &d->rendhash;
}
--
⑨