git: 9front

Download patch

ref: a22de50890245d5e264be820d52141e288c4b00d
parent: eecf205f57b31bfeabbc3658e24ed51b72ac6cd1
author: cinap_lenrek <cinap_lenrek@gmx.de>
date: Fri Nov 23 15:27:09 EST 2012

ratrace: fix race conditions and range check

the syscallno check in syscallfmt() was wrong. the unsigned
syscall number was cast to an signed integer. so negative
values would pass the check provoking bad memory access from
kernel. the check also has an off by one. one has to check
syscallno >= nsyscalls instead of syscallno > nsyscalls.

access to the p->syscalltrace string was not protected
from modification in devproc. you could awake the process
and cause it to free the string giving an opportunity for
the kernel to access bad memory. or someone could kill the
process (pexit would just free it).

now the string is protected by the usual p->debug qlock. we
also keep the string arround until it is overwritten again
or the process exists. this has the nice side effect that
one can inspect it after the process crashed.

another problem was that our validaddr() would error() instead
of pexiting the current process. the code was changed to only
access up->s.args after it was validated and copied instead of
accessing the user stack directly. this also prevents a sneaky
multithreaded process from chaning the arguments under us.

in case our validaddr() errors, we cannot assume valid user
stack after the waserror() if block. use up->s.arg[0] for the
noted() call to avoid bad access.

--- a/sys/src/9/pc/fns.h
+++ b/sys/src/9/pc/fns.h
@@ -161,8 +161,6 @@
 void	(*screenputs)(char*, int);
 void*	sigsearch(char*);
 void	syncclock(void);
-void	syscallfmt(int syscallno, ulong pc, va_list list);
-void	sysretfmt(int syscallno, va_list list, long ret, uvlong start, uvlong stop);
 void*	tmpmap(Page*);
 void	tmpunmap(void*);
 void	touser(void*);
--- a/sys/src/9/pc/trap.c
+++ b/sys/src/9/pc/trap.c
@@ -734,26 +734,6 @@
 	scallnr = ureg->ax;
 	up->scallnr = scallnr;
 
-	if(up->procctl == Proc_tracesyscall){
-		/*
-		 * Redundant validaddr.  Do we care?
-		 * Tracing syscalls is not exactly a fast path...
-		 * Beware, validaddr currently does a pexit rather
-		 * than an error if there's a problem; that might
-		 * change in the future.
-		 */
-		if(sp < (USTKTOP-BY2PG) || sp > (USTKTOP-sizeof(Sargs)-BY2WD))
-			validaddr(sp, sizeof(Sargs)+BY2WD, 0);
-
-		syscallfmt(scallnr, ureg->pc, (va_list)(sp+BY2WD));
-		up->procctl = Proc_stopme;
-		procctl(up);
-		if(up->syscalltrace)
-			free(up->syscalltrace);
-		up->syscalltrace = nil;
-		startns = todget(nil);
-	}
-
 	if(scallnr == RFORK && up->fpstate == FPactive){
 		fpsave(&up->fpsave);
 		up->fpstate = FPinactive;
@@ -763,6 +743,20 @@
 	up->nerrlab = 0;
 	ret = -1;
 	if(!waserror()){
+		if(sp<(USTKTOP-BY2PG) || sp>(USTKTOP-sizeof(Sargs)-BY2WD))
+			validaddr(sp, sizeof(Sargs)+BY2WD, 0);
+
+		up->s = *((Sargs*)(sp+BY2WD));
+
+		if(up->procctl == Proc_tracesyscall){
+			syscallfmt(scallnr, ureg->pc, (va_list)up->s.args);
+			s = splhi();
+			up->procctl = Proc_stopme;
+			procctl(up);
+			splx(s);
+			startns = todget(nil);
+		}
+
 		if(scallnr >= nsyscall || systab[scallnr] == 0){
 			pprint("bad sys call number %lud pc %lux\n",
 				scallnr, ureg->pc);
@@ -769,11 +763,6 @@
 			postnote(up, 1, "sys: bad sys call", NDebug);
 			error(Ebadarg);
 		}
-
-		if(sp<(USTKTOP-BY2PG) || sp>(USTKTOP-sizeof(Sargs)-BY2WD))
-			validaddr(sp, sizeof(Sargs)+BY2WD, 0);
-
-		up->s = *((Sargs*)(sp+BY2WD));
 		up->psstate = sysctab[scallnr];
 
 		ret = systab[scallnr](up->s.args);
@@ -804,14 +793,11 @@
 
 	if(up->procctl == Proc_tracesyscall){
 		stopns = todget(nil);
-		up->procctl = Proc_stopme;
-		sysretfmt(scallnr, (va_list)(sp+BY2WD), ret, startns, stopns);
+		sysretfmt(scallnr, (va_list)up->s.args, ret, startns, stopns);
 		s = splhi();
+		up->procctl = Proc_stopme;
 		procctl(up);
 		splx(s);
-		if(up->syscalltrace)
-			free(up->syscalltrace);
-		up->syscalltrace = nil;
 	}
 
 	up->insyscall = 0;
@@ -818,7 +804,7 @@
 	up->psstate = 0;
 
 	if(scallnr == NOTED)
-		noted(ureg, *(ulong*)(sp+BY2WD));
+		noted(ureg, up->s.args[0]);
 
 	if(scallnr!=RFORK && (up->procctl || up->nnote)){
 		splhi();
--- a/sys/src/9/port/devproc.c
+++ b/sys/src/9/port/devproc.c
@@ -737,9 +737,12 @@
 		memmove(a, &up->genbuf[offset], n);
 		return n;
 	case Qsyscall:
-		if(!p->syscalltrace)
-			return 0;
-		n = readstr(offset, a, n, p->syscalltrace);
+		eqlock(&p->debug);
+		if(p->syscalltrace)
+			n = readstr(offset, a, n, p->syscalltrace);
+		else
+			n = 0;
+		qunlock(&p->debug);
 		return n;
 
 	case Qmem:
--- a/sys/src/9/port/portfns.h
+++ b/sys/src/9/port/portfns.h
@@ -326,6 +326,8 @@
 int		swapcount(ulong);
 int		swapfull(void);
 void		swapinit(void);
+void		syscallfmt(ulong syscallno, ulong pc, va_list list);
+void		sysretfmt(ulong syscallno, va_list list, long ret, uvlong start, uvlong stop);
 void		timeradd(Timer*);
 void		timerdel(Timer*);
 void		timersinit(void);
--- a/sys/src/9/port/proc.c
+++ b/sys/src/9/port/proc.c
@@ -622,10 +622,7 @@
 	p->pdbg = 0;
 	p->fpstate = FPinit;
 	p->kp = 0;
-	if(up && up->procctl == Proc_tracesyscall)
-		p->procctl = Proc_tracesyscall;
-	else
-		p->procctl = 0;
+	p->procctl = 0;
 	p->syscalltrace = 0;	
 	p->notepending = 0;
 	p->ureg = 0;
@@ -1078,8 +1075,6 @@
 	Chan *dot;
 	void (*pt)(Proc*, int, vlong);
 
-	if(up->syscalltrace)
-		free(up->syscalltrace);
 	up->alarm = 0;
 	if (up->tt)
 		timerdel(up);
@@ -1193,6 +1188,10 @@
 	if(up->pdbg) {
 		wakeup(&up->pdbg->sleep);
 		up->pdbg = 0;
+	}
+	if(up->syscalltrace) {
+		free(up->syscalltrace);
+		up->syscalltrace = 0;
 	}
 	qunlock(&up->debug);
 
--- a/sys/src/9/port/syscallfmt.c
+++ b/sys/src/9/port/syscallfmt.c
@@ -22,6 +22,7 @@
 	}
 	validaddr((ulong)a, n, 0);
 	t = smalloc(n+1);
+	t[n] = 0;
 	for(i = 0; i < n; i++)
 		if(a[i] > 0x20 && a[i] < 0x7f)	/* printable ascii? */
 			t[i] = a[i];
@@ -52,7 +53,7 @@
 }
 
 void
-syscallfmt(int syscallno, ulong pc, va_list list)
+syscallfmt(ulong syscallno, ulong pc, va_list list)
 {
 	long l;
 	Fmt fmt;
@@ -65,16 +66,13 @@
 	fmtstrinit(&fmt);
 	fmtprint(&fmt, "%uld %s ", up->pid, up->text);
 
-	if(syscallno > nsyscall)
-		fmtprint(&fmt, " %d ", syscallno);
+	if(syscallno >= nsyscall)
+		fmtprint(&fmt, " %uld ", syscallno);
 	else
 		fmtprint(&fmt, "%s ", sysctab[syscallno]?
 			sysctab[syscallno]: "huh?");
 
 	fmtprint(&fmt, "%ulx ", pc);
-	if(up->syscalltrace != nil)
-		free(up->syscalltrace);
-
 	switch(syscallno){
 	case SYSR1:
 		p = va_arg(list, uintptr);
@@ -301,11 +299,15 @@
 		break;
 	}
 
-	up->syscalltrace = fmtstrflush(&fmt);
+	a = fmtstrflush(&fmt);
+	qlock(&up->debug);
+	free(up->syscalltrace);
+	up->syscalltrace = a;
+	qunlock(&up->debug);
 }
 
 void
-sysretfmt(int syscallno, va_list list, long ret, uvlong start, uvlong stop)
+sysretfmt(ulong syscallno, va_list list, long ret, uvlong start, uvlong stop)
 {
 	long l;
 	void* v;
@@ -316,9 +318,6 @@
 
 	fmtstrinit(&fmt);
 
-	if(up->syscalltrace)
-		free(up->syscalltrace);
-
 	errstr = "\"\"";
 	switch(syscallno){
 	default:
@@ -402,5 +401,10 @@
 		break;
 	}
 	fmtprint(&fmt, " %s %#llud %#llud\n", errstr, start, stop);
-	up->syscalltrace = fmtstrflush(&fmt);
+
+	a = fmtstrflush(&fmt);
+	qlock(&up->debug);
+	free(up->syscalltrace);
+	up->syscalltrace = a;
+	qunlock(&up->debug);
 }
--- a/sys/src/9/port/sysproc.c
+++ b/sys/src/9/port/sysproc.c
@@ -162,6 +162,8 @@
 	}
 	p->hang = up->hang;
 	p->procmode = up->procmode;
+	if(up->procctl == Proc_tracesyscall)
+		p->procctl = Proc_tracesyscall;
 
 	/* Craft a return frame which will cause the child to pop out of
 	 * the scheduler in user mode with the return register zero
--