code: plan9front

Download patch

ref: 07e14e43baafbfe7855a2003faa15e70ec5640a1
parent: 4521ae32bb95562b4210c23bee02a9fe1b1e3137
author: Ori Bernstein <ori@eigenstate.org>
date: Wed Aug 10 08:27:15 EDT 2022

9/port: allow kiloprocs -- allocate procs lazily

Treallocate the small data structures around procs eagerly,
but use malloc to allocate the large proc data structures
when we need them, which allows us to scale to many more procs.

There are still many scalability bottlenecks, so we only crank
up the nproc limit by a little bit this time around, and crank
it up more as we optimize more.

--- a/sys/src/9/bcm/main.c
+++ b/sys/src/9/bcm/main.c
@@ -222,8 +222,8 @@
 	conf.nproc = 100 + ((conf.npage*BY2PG)/MB)*5;
 	if(cpuserver)
 		conf.nproc *= 3;
-	if(conf.nproc > 2000)
-		conf.nproc = 2000;
+	if(conf.nproc > 4000)
+		conf.nproc = 4000;
 	conf.nswap = conf.npage*3;
 	conf.nswppo = 4096;
 	conf.nimage = 200;
--- a/sys/src/9/bcm64/main.c
+++ b/sys/src/9/bcm64/main.c
@@ -93,8 +93,8 @@
 	conf.nproc = 100 + ((conf.npage*BY2PG)/MB)*5;
 	if(cpuserver)
 		conf.nproc *= 3;
-	if(conf.nproc > 2000)
-		conf.nproc = 2000;
+	if(conf.nproc > 4000)
+		conf.nproc = 4000;
 	conf.nswap = conf.npage*3;
 	conf.nswppo = 4096;
 	conf.nimage = 200;
--- a/sys/src/9/imx8/main.c
+++ b/sys/src/9/imx8/main.c
@@ -191,8 +191,8 @@
 	conf.nproc = 100 + ((conf.npage*BY2PG)/MB)*5;
 	if(cpuserver)
 		conf.nproc *= 3;
-	if(conf.nproc > 2000)
-		conf.nproc = 2000;
+	if(conf.nproc > 4000)
+		conf.nproc = 4000;
 	conf.nswap = conf.npage*3;
 	conf.nswppo = 4096;
 	conf.nimage = 200;
--- a/sys/src/9/kw/main.c
+++ b/sys/src/9/kw/main.c
@@ -433,8 +433,8 @@
 	conf.nproc = 100 + ((conf.npage*BY2PG)/MB)*5;
 	if(cpuserver)
 		conf.nproc *= 3;
-	if(conf.nproc > 2000)
-		conf.nproc = 2000;
+	if(conf.nproc > 4000)
+		conf.nproc = 4000;
 	conf.nswap = conf.npage*3;
 	conf.nswppo = 4096;
 	conf.nimage = 200;
--- a/sys/src/9/mtx/main.c
+++ b/sys/src/9/mtx/main.c
@@ -183,8 +183,8 @@
 	conf.nproc = 100 + ((conf.npage*BY2PG)/MB)*5;
 	if(cpuserver)
 		conf.nproc *= 3;
-	if(conf.nproc > 2000)
-		conf.nproc = 2000;
+	if(conf.nproc > 4000)
+		conf.nproc = 4000;
 	conf.nimage = 200;
 	conf.nswap = conf.nproc*80;
 	conf.nswppo = 4096;
--- a/sys/src/9/omap/main.c
+++ b/sys/src/9/omap/main.c
@@ -452,8 +452,8 @@
 	conf.nproc = 100 + ((conf.npage*BY2PG)/MB)*5;
 	if(cpuserver)
 		conf.nproc *= 3;
-	if(conf.nproc > 2000)
-		conf.nproc = 2000;
+	if(conf.nproc > 4000)
+		conf.nproc = 4000;
 	conf.nswap = conf.npage*3;
 	conf.nswppo = 4096;
 	conf.nimage = 200;
--- a/sys/src/9/pc/main.c
+++ b/sys/src/9/pc/main.c
@@ -158,8 +158,8 @@
 	conf.nproc = 100 + ((conf.npage*BY2PG)/MB)*5;
 	if(cpuserver)
 		conf.nproc *= 3;
-	if(conf.nproc > 2000)
-		conf.nproc = 2000;
+	if(conf.nproc > 4000)
+		conf.nproc = 4000;
 	conf.nimage = 200;
 	conf.nswap = conf.nproc*80;
 	conf.nswppo = 4096;
--- a/sys/src/9/pc64/main.c
+++ b/sys/src/9/pc64/main.c
@@ -43,8 +43,8 @@
 	conf.nproc = 100 + ((conf.npage*BY2PG)/MB)*5;
 	if(cpuserver)
 		conf.nproc *= 3;
-	if(conf.nproc > 2000)
-		conf.nproc = 2000;
+	if(conf.nproc > 4000)
+		conf.nproc = 4000;
 	conf.nimage = 200;
 	conf.nswap = conf.nproc*80;
 	conf.nswppo = 4096;
--- a/sys/src/9/port/devproc.c
+++ b/sys/src/9/port/devproc.c
@@ -149,7 +149,8 @@
 #define	QSHIFT	5	/* location in qid of proc slot # */
 
 #define	QID(q)		((((ulong)(q).path)&0x0000001F)>>0)
-#define	SLOT(q)		(((((ulong)(q).path)&0x07FFFFFE0)>>QSHIFT)-1)
+#define	SLOTMAX		0x4000000
+#define	SLOT(q)		(((((ulong)(q).path)>>QSHIFT)&(SLOTMAX-1))-1)
 #define	PID(q)		((q).vers)
 #define	NOTEID(q)	((q).vers)
 
@@ -202,6 +203,8 @@
 			return -1;
 
 		p = proctab(s);
+		if(p == nil)
+			return 0;
 		pid = p->pid;
 		if(pid == 0)
 			return 0;
@@ -227,7 +230,7 @@
 		panic("procgen");
 
 	tab = &procdir[s];
-	path = c->qid.path&~(((1<<QSHIFT)-1));	/* slot component */
+	path = c->qid.path&~((1<<QSHIFT)-1);	/* slot component */
 
 	/* p->procmode determines default mode for files in /proc */
 	p = proctab(SLOT(c->qid));
@@ -281,8 +284,9 @@
 static void
 procinit(void)
 {
-	if(conf.nproc >= (1<<(16-QSHIFT))-1)
-		print("warning: too many procs for devproc\n");
+	/* slot masks: lets see how big we can go */
+	if(conf.nproc > SLOTMAX)
+		panic("warning: too many procs for devproc\n");
 }
 
 static Chan*
@@ -335,8 +339,7 @@
 		setnoteid(p, noteid);
 		return;
 	}
-	for(i = 0; i < conf.nproc; i++){
-		pp = proctab(i);
+	for(i = 0; (pp = proctab(i)) != nil; i++){
 		if(pp->noteid != noteid || pp->kp)
 			continue;
 		if(strcmp(pp->user, p->user) == 0){
@@ -354,8 +357,7 @@
 	Proc *p;
 	int i;
 
-	for(i = 0; i < conf.nproc; i++){
-		p = proctab(i);
+	for(i = 0; (p = proctab(i)) != nil; i++){
 		if(p == up)
 			continue;
 		if(p->noteid != noteid || p->kp)
@@ -414,7 +416,7 @@
 		nexterror();
 	}
 	pid = PID(c->qid);
-	if(p->pid != pid)
+	if(p == nil || p->pid != pid)
 		error(Eprocdied);
 
 	omode = openmode(omode0);
--- a/sys/src/9/port/devswap.c
+++ b/sys/src/9/port/devswap.c
@@ -162,7 +162,7 @@
 	while(waserror())
 		;
 
-	x = -1;
+	x = 0;
 	for(;;){
 		up->psstate = "Reclaim";
 		if(reclaim()){
@@ -183,12 +183,13 @@
 
 		i = ageclock;
 		do {
-			if(++x >= conf.nproc){
+			p = proctab(x++);
+			if(p == nil){
 				if(++ageclock == i)
 					goto Killbig;
 				x = 0;
+				continue;
 			}
-			p = proctab(x);
 		} while(p->state == Dead || p->noswap || !canqlock(&p->seglock));
 		up->psstate = "Pageout";
 		for(i = 0; i < NSEG; i++) {
@@ -274,8 +275,7 @@
 	 * Now we must do hardwork to ensure all processes which have tlb
 	 * entries for this segment will be flushed if we succeed in paging it out
 	 */
-	for(x = 0; x < conf.nproc; x++){
-		p = proctab(x);
+	for(x = 0; (p = proctab(x)) != nil; x++){
 		if(p->state == Dead)
 			continue;
 		for(i = 0; i < NSEG; i++){
--- a/sys/src/9/port/edf.c
+++ b/sys/src/9/port/edf.c
@@ -373,17 +373,15 @@
 		pt(p, SAdmit, 0);
 
 	/* Look for another proc with the same period to synchronize to */
-	SET(r);
-	for(i=0; i<conf.nproc; i++) {
-		r = proctab(i);
+	for(i=0; (r = proctab(i)) != nil; i++) {
 		if(r->state == Dead || r == p)
 			continue;
-		if (r->edf == nil || (r->edf->flags & Admitted) == 0)
+		if(r->edf == nil || (r->edf->flags & Admitted) == 0)
 			continue;
-		if (r->edf->T == e->T)
-				break;
+		if(r->edf->T == e->T)
+			break;
 	}
-	if (i == conf.nproc){
+	if(r == nil){
 		/* Can't synchronize to another proc, release now */
 		e->t = now;
 		e->d = 0;
@@ -627,11 +625,10 @@
 	/* initialize */
 	DPRINT("schedulability test %lud\n", theproc->pid);
 	qschedulability = nil;
-	for(i=0; i<conf.nproc; i++) {
-		p = proctab(i);
+	for(i=0; (p = proctab(i)) != nil; i++) {
 		if(p->state == Dead)
 			continue;
-		if ((p->edf == nil || (p->edf->flags & Admitted) == 0) && p != theproc)
+		if((p->edf == nil || (p->edf->flags & Admitted) == 0) && p != theproc)
 			continue;
 		p->edf->testtype = Rl;
 		p->edf->testtime = 0;
--- a/sys/src/9/port/portdat.h
+++ b/sys/src/9/port/portdat.h
@@ -671,6 +671,7 @@
 	ulong	pid;
 	ulong	noteid;		/* Equivalent of note group */
 	ulong	parentpid;
+	ulong	index;
 
 	Proc	*parent;	/* Process to send wait record on exit */
 	Lock	exl;		/* Lock count and waitq */
@@ -760,7 +761,7 @@
 	ulong	delaysched;
 	ulong	priority;	/* priority level */
 	ulong	basepri;	/* base priority level */
-	uchar	fixedpri;	/* priority level deson't change */
+	uchar	fixedpri;	/* priority level doesn't change */
 	ulong	cpu;		/* cpu average */
 	ulong	lastupdate;
 	uchar	yield;		/* non-zero if the process just did a sleep(0) */
--- a/sys/src/9/port/proc.c
+++ b/sys/src/9/port/proc.c
@@ -23,8 +23,9 @@
 static struct Procalloc
 {
 	Lock;
-	Proc*	arena;
-	Proc*	free;
+	Proc	**tab;
+	Proc	*free;
+	int	nextindex;
 } procalloc;
 
 enum
@@ -630,13 +631,25 @@
 Proc*
 newproc(void)
 {
+	char *b;
 	Proc *p;
 
 	lock(&procalloc);
 	p = procalloc.free;
-	if(p == nil || (p->kstack == nil && (p->kstack = malloc(KSTACK)) == nil)){
-		unlock(&procalloc);
-		return nil;
+	if(p == nil){
+		if(procalloc.nextindex >= conf.nproc){
+			unlock(&procalloc);
+			return nil;
+		}
+		b = malloc(KSTACK+sizeof(Proc));
+		if(b == nil){
+			unlock(&procalloc);
+			return nil;
+		}
+		p = (Proc*)(b + KSTACK);
+		p->index = procalloc.nextindex++;
+		p->kstack = b;
+		procalloc.tab[p->index] = p;
 	}
 	procalloc.free = p->qnext;
 	p->qnext = nil;
@@ -682,8 +695,7 @@
 		/* pick a machine to wire to */
 		memset(nwired, 0, sizeof(nwired));
 		p->wired = nil;
-		for(i=0; i<conf.nproc; i++){
-			pp = proctab(i);
+		for(i=0; (pp = proctab(i)) != nil; i++){
 			wm = pp->wired;
 			if(wm != nil && pp->pid)
 				nwired[wm->machno]++;
@@ -720,20 +732,14 @@
 void
 procinit0(void)		/* bad planning - clashes with devproc.c */
 {
-	Proc *p;
-	int i;
-
-	p = xalloc(conf.nproc*sizeof(Proc));
-	if(p == nil){
+	procalloc.free = nil;
+	/* allocate 1 extra for a nil terminator */
+	procalloc.tab = xalloc((conf.nproc+1)*sizeof(Proc*));
+	if(procalloc.tab == nil){
 		xsummary();
 		panic("cannot allocate %lud procs (%ludMB)", conf.nproc, conf.nproc*sizeof(Proc)/(1024*1024));
 	}
-	procalloc.arena = p;
-	procalloc.free = p;
-	for(i=0; i<conf.nproc-1; i++, p++)
-		p->qnext = p+1;
-	p->qnext = nil;
-
+	memset(procalloc.tab, 0, (conf.nproc+1)*sizeof(Proc*));
 	pidinit();
 }
 
@@ -1263,7 +1269,7 @@
 Proc*
 proctab(int i)
 {
-#define proctab(x) (&procalloc.arena[(x)])
+#define proctab(x) (procalloc.tab[(x)])
 	return proctab(i);
 }
 
@@ -1304,8 +1310,7 @@
 	 */
 	memset(await, 0, conf.nmach*sizeof(await[0]));
 	nwait = 0;
-	for(i = 0; i < conf.nproc; i++){
-		p = proctab(i);
+	for(i = 0; (p = proctab(i)) != nil; i++){
 		if(p->state != Dead && (*match)(p, a)){
 			p->newtlb = 1;
 			for(nm = 0; nm < conf.nmach; nm++){
@@ -1572,8 +1577,7 @@
 
 	max = 0;
 	kp = nil;
-	for(i = 0; i < conf.nproc; i++) {
-		p = proctab(i);
+	for(i = 0; (p = proctab(i)) != nil; i++) {
 		if(p->state == Dead || p->kp || p->parentpid == 0)
 			continue;
 		if((p->noswap || (p->procmode & 0222) == 0) && strcmp(eve, p->user) == 0)
@@ -1588,8 +1592,7 @@
 		return;
 	print("%lud: %s killed: %s\n", kp->pid, kp->text, why);
 	qlock(&kp->seglock);
-	for(i = 0; i < conf.nproc; i++) {
-		p = proctab(i);
+	for(i = 0; (p = proctab(i)) != nil; i++) {
 		if(p->state == Dead || p->kp)
 			continue;
 		if(p != kp && p->seg[BSEG] != nil && p->seg[BSEG] == kp->seg[BSEG])
@@ -1624,8 +1627,7 @@
 	Proc *p;
 	int i;
 
-	for(i = 0; i < conf.nproc; i++){
-		p = proctab(i);
+	for(i = 0; (p = proctab(i)) != nil; i++){
 		qlock(&p->debug);
 		if(p->user != nil && strcmp(old, p->user) == 0)
 			kstrdup(&p->user, new);
@@ -1809,12 +1811,15 @@
 int
 procindex(ulong pid)
 {
+	Proc *p;
 	Pid *i;
+	int x;
 
 	i = pidlookup(pid);
 	if(i != nil){
-		int x = i->procindex;
-		if(proctab(x)->pid == pid)
+		x = i->procindex;
+		p = proctab(x);
+		if(p != nil && p->pid == pid)
 			return x;
 	}
 	return -1;
@@ -1869,7 +1874,7 @@
 		p->parentpid = 0;
 
 	i = pidadd(0);
-	i->procindex = (int)(p - procalloc.arena);
+	i->procindex = p->index;
 
 	if(p->noteid == 0){
 		incref(i);
--- a/sys/src/9/ppc/main.c
+++ b/sys/src/9/ppc/main.c
@@ -246,8 +246,8 @@
 	conf.nproc = 100 + ((conf.npage*BY2PG)/MB)*5;
 	if(cpuserver)
 		conf.nproc *= 3;
-	if(conf.nproc > 2000)
-		conf.nproc = 2000;
+	if(conf.nproc > 4000)
+		conf.nproc = 4000;
 	conf.nimage = 200;
 	conf.nswap = conf.nproc*80;
 	conf.nswppo = 4096;
--- a/sys/src/9/xen/main.c
+++ b/sys/src/9/xen/main.c
@@ -252,8 +252,8 @@
 	conf.nproc = 100 + ((conf.npage*BY2PG)/MB)*5;
 	if(cpuserver)
 		conf.nproc *= 3;
-	if(conf.nproc > 2000)
-		conf.nproc = 2000;
+	if(conf.nproc > 4000)
+		conf.nproc = 4000;
 	conf.nimage = 200;
 	conf.nswap = conf.nproc*80;
 	conf.nswppo = 4096;