shithub: 9ferno

Download patch

ref: 9946b8d9e571a85fd4c104086cc64591842e9bce
parent: 9661fb64092acfcf370688c5e56934e400965822
author: joe9 <joe9mail@gmail.com>
date: Thu Jul 8 00:20:26 EDT 2021

attempting inferno native amd64 build

diff: cannot open b/9front/386/include//null: 'b/9front/386/include//null' does not exist diff: cannot open b/9front/386//null: 'b/9front/386//null' does not exist diff: cannot open b/Inferno/amd64/include//null: 'b/Inferno/amd64/include//null' does not exist diff: cannot open b/Inferno/amd64//null: 'b/Inferno/amd64//null' does not exist diff: cannot open b/os/pc64//null: 'b/os/pc64//null' does not exist
--- /dev/null
+++ b/9front/386/include/emu.h
@@ -1,0 +1,46 @@
+/*
+ * system- and machine-specific declarations for emu:
+ * floating-point save and restore, signal handling primitive, and
+ * implementation of the current-process variable `up'.
+ */
+
+extern	Proc**	Xup;
+#define	up	(*Xup)
+
+typedef	struct	FPU	FPU;
+
+/*
+ * This structure must agree with FPsave and FPrestore asm routines
+ */
+struct FPU
+{
+	/* TODO check this 28 bytes. 28 -> 56? */
+	uchar	env[28];
+};
+
+typedef jmp_buf osjmpbuf;
+#define	ossetjmp(buf)	setjmp(buf)
+
+/*
+ * system- and machine-specific declarations for emu:
+ * floating-point save and restore, signal handling primitive, and
+ * implementation of the current-process variable `up'.
+ */
+
+extern	Proc**	Xup;
+#define	up	(*Xup)
+
+typedef	struct	FPU	FPU;
+
+/*
+ * This structure must agree with FPsave and FPrestore asm routines
+ */
+struct FPU
+{
+	/* TODO check this 28 bytes. 28 -> 56? */
+	uchar	env[28];
+};
+
+typedef jmp_buf osjmpbuf;
+#define	ossetjmp(buf)	setjmp(buf)
+
--- /dev/null
+++ b/9front/386/include/lib9.h
@@ -1,0 +1,2 @@
+#include "../../include/lib9.h"
+#include "../../include/lib9.h"
--- /dev/null
+++ b/9front/386/include/u.h
@@ -1,0 +1,6 @@
+#include "/amd64/include/u.h"
+typedef intptr WORD;
+typedef uintptr        UWORD;
+#include "/amd64/include/u.h"
+typedef intptr WORD;
+typedef uintptr        UWORD;
--- /dev/null
+++ b/Inferno/amd64/include/lib9.h
@@ -1,0 +1,16 @@
+#include <u.h>
+#include <kern.h>
+
+/*
+ *	Extensions for Inferno to basic libc.h
+ */
+
+#define __LITTLE_ENDIAN	/* math/dtoa.c only */
+#include <u.h>
+#include <kern.h>
+
+/*
+ *	Extensions for Inferno to basic libc.h
+ */
+
+#define __LITTLE_ENDIAN	/* math/dtoa.c only */
--- /dev/null
+++ b/Inferno/amd64/include/u.h
@@ -1,0 +1,176 @@
+#define nil		((void*)0)
+typedef	unsigned short	ushort;
+typedef	unsigned char	uchar;
+typedef unsigned long	ulong;
+typedef unsigned int	uint;
+typedef signed char	schar;
+typedef	long long	vlong;
+typedef	unsigned long long uvlong;
+typedef long long	intptr;
+typedef unsigned long long uintptr;
+typedef unsigned long	usize;
+typedef	uint		Rune;
+typedef union FPdbleword FPdbleword;
+typedef uintptr		jmp_buf[2];
+#define	JMPBUFSP	0
+#define	JMPBUFPC	1
+#define	JMPBUFDPC	0
+typedef unsigned int	mpdigit;	/* for /sys/include/mp.h */
+typedef unsigned char	u8int;
+typedef unsigned short	u16int;
+typedef unsigned int	u32;
+typedef unsigned long long u64int;
+typedef signed char s8int;
+typedef signed short s16int;
+typedef signed int s32;
+typedef signed long long s64int;
+typedef unsigned char	u8;
+typedef unsigned short	u16;
+typedef unsigned int	u32;
+typedef unsigned long long u64;
+typedef signed char s8;
+typedef signed short s16;
+typedef signed int s32;
+typedef signed long long s64;
+
+/* MXCSR */
+/* fcr */
+#define	FPFTZ	(1<<15)	/* amd64 */
+#define	FPINEX	(1<<12)
+#define	FPUNFL	(1<<11)
+#define	FPOVFL	(1<<10)
+#define	FPZDIV	(1<<9)
+#define	FPDNRM	(1<<8)	/* amd64 */
+#define	FPINVAL	(1<<7)
+#define	FPDAZ	(1<<6)	/* amd64 */
+#define	FPRNR	(0<<13)
+#define	FPRZ	(3<<13)
+#define	FPRPINF	(2<<13)
+#define	FPRNINF	(1<<13)
+#define	FPRMASK	(3<<13)
+#define	FPPEXT	0
+#define	FPPSGL	0
+#define	FPPDBL	0
+#define	FPPMASK	0
+/* fsr */
+#define	FPAINEX	(1<<5)
+#define	FPAUNFL	(1<<4)
+#define	FPAOVFL	(1<<3)
+#define	FPAZDIV	(1<<2)
+#define	FPADNRM	(1<<1)	/* not in plan 9 */
+#define	FPAINVAL	(1<<0)
+union FPdbleword
+{
+	double	x;
+	struct {	/* little endian */
+		uint lo;
+		uint hi;
+	};
+};
+
+typedef	char*	va_list;
+#define va_start(list, start) list =\
+	(sizeof(start) < 8?\
+		(char*)((vlong*)&(start)+1):\
+		(char*)(&(start)+1))
+#define va_end(list)\
+	USED(list)
+#define va_arg(list, mode)\
+	((sizeof(mode) == 1)?\
+		((list += 8), (mode*)list)[-8]:\
+	(sizeof(mode) == 2)?\
+		((list += 8), (mode*)list)[-4]:\
+	(sizeof(mode) == 4)?\
+		((list += 8), (mode*)list)[-2]:\
+		((list += sizeof(mode)), (mode*)list)[-1])
+
+typedef intptr WORD;
+typedef uintptr        UWORD;
+#define nil		((void*)0)
+typedef	unsigned short	ushort;
+typedef	unsigned char	uchar;
+typedef unsigned long	ulong;
+typedef unsigned int	uint;
+typedef signed char	schar;
+typedef	long long	vlong;
+typedef	unsigned long long uvlong;
+typedef long long	intptr;
+typedef unsigned long long uintptr;
+typedef unsigned long	usize;
+typedef	uint		Rune;
+typedef union FPdbleword FPdbleword;
+typedef uintptr		jmp_buf[2];
+#define	JMPBUFSP	0
+#define	JMPBUFPC	1
+#define	JMPBUFDPC	0
+typedef unsigned int	mpdigit;	/* for /sys/include/mp.h */
+typedef unsigned char	u8int;
+typedef unsigned short	u16int;
+typedef unsigned int	u32;
+typedef unsigned long long u64int;
+typedef signed char s8int;
+typedef signed short s16int;
+typedef signed int s32;
+typedef signed long long s64int;
+typedef unsigned char	u8;
+typedef unsigned short	u16;
+typedef unsigned int	u32;
+typedef unsigned long long u64;
+typedef signed char s8;
+typedef signed short s16;
+typedef signed int s32;
+typedef signed long long s64;
+
+/* MXCSR */
+/* fcr */
+#define	FPFTZ	(1<<15)	/* amd64 */
+#define	FPINEX	(1<<12)
+#define	FPUNFL	(1<<11)
+#define	FPOVFL	(1<<10)
+#define	FPZDIV	(1<<9)
+#define	FPDNRM	(1<<8)	/* amd64 */
+#define	FPINVAL	(1<<7)
+#define	FPDAZ	(1<<6)	/* amd64 */
+#define	FPRNR	(0<<13)
+#define	FPRZ	(3<<13)
+#define	FPRPINF	(2<<13)
+#define	FPRNINF	(1<<13)
+#define	FPRMASK	(3<<13)
+#define	FPPEXT	0
+#define	FPPSGL	0
+#define	FPPDBL	0
+#define	FPPMASK	0
+/* fsr */
+#define	FPAINEX	(1<<5)
+#define	FPAUNFL	(1<<4)
+#define	FPAOVFL	(1<<3)
+#define	FPAZDIV	(1<<2)
+#define	FPADNRM	(1<<1)	/* not in plan 9 */
+#define	FPAINVAL	(1<<0)
+union FPdbleword
+{
+	double	x;
+	struct {	/* little endian */
+		uint lo;
+		uint hi;
+	};
+};
+
+typedef	char*	va_list;
+#define va_start(list, start) list =\
+	(sizeof(start) < 8?\
+		(char*)((vlong*)&(start)+1):\
+		(char*)(&(start)+1))
+#define va_end(list)\
+	USED(list)
+#define va_arg(list, mode)\
+	((sizeof(mode) == 1)?\
+		((list += 8), (mode*)list)[-8]:\
+	(sizeof(mode) == 2)?\
+		((list += 8), (mode*)list)[-4]:\
+	(sizeof(mode) == 4)?\
+		((list += 8), (mode*)list)[-2]:\
+		((list += sizeof(mode)), (mode*)list)[-1])
+
+typedef intptr WORD;
+typedef uintptr        UWORD;
--- /dev/null
+++ b/Inferno/amd64/include/ureg.h
@@ -1,0 +1,66 @@
+struct Ureg {
+	u64	ax;
+	u64	bx;
+	u64	cx;
+	u64	dx;
+	u64	si;
+	u64	di;
+	u64	bp;
+	u64	r8;
+	u64	r9;
+	u64	r10;
+	u64	r11;
+	u64	r12;
+	u64	r13;
+	u64	r14;
+	u64	r15;
+
+	u16	ds;
+	u16	es;
+	u16	fs;
+	u16	gs;
+
+	u64	trap;				/* trap type */
+	u64	ecode;				/* error code (or zero) */
+	u64	pc;				/* pc */
+	u64	cs;				/* old context */
+	u64	flags;				/* old flags */
+	union {					/* sp */
+		u64	usp;
+		u64	sp;
+	};
+	u64	ss;				/* old stack segment */
+};
+struct Ureg {
+	u64	ax;
+	u64	bx;
+	u64	cx;
+	u64	dx;
+	u64	si;
+	u64	di;
+	u64	bp;
+	u64	r8;
+	u64	r9;
+	u64	r10;
+	u64	r11;
+	u64	r12;
+	u64	r13;
+	u64	r14;
+	u64	r15;
+
+	u16	ds;
+	u16	es;
+	u16	fs;
+	u16	gs;
+
+	u64	trap;				/* trap type */
+	u64	ecode;				/* error code (or zero) */
+	u64	pc;				/* pc */
+	u64	cs;				/* old context */
+	u64	flags;				/* old flags */
+	union {					/* sp */
+		u64	usp;
+		u64	sp;
+	};
+	u64	ss;				/* old stack segment */
+};
--- a/NetBSD/power/include/lib9.h
+++ b/NetBSD/power/include/lib9.h
@@ -43,7 +43,7 @@
 
 typedef uint8_t		u8int;
 typedef uint16_t	u16int;
-typedef uint32_t	u32int;
+typedef uint32_t	u32;
 typedef uvlong u64int;
 
 typedef uintptr_t	uintptr;
--- a/Nt/386/include/lib9.h
+++ b/Nt/386/include/lib9.h
@@ -64,7 +64,7 @@
 typedef unsigned int Rune;
 typedef __int64		vlong;
 typedef unsigned __int64		uvlong;
-typedef unsigned int u32int;
+typedef unsigned int u32;
 typedef uvlong u64int;
 
 typedef unsigned int	mpdigit;	/* for /sys/include/mp.h */
--- a/OpenBSD/386/include/lib9.h
+++ b/OpenBSD/386/include/lib9.h
@@ -46,7 +46,7 @@
 typedef unsigned int Rune;
 typedef long long int	vlong;
 typedef unsigned long long int	uvlong;
-typedef unsigned int u32int;
+typedef unsigned int u32;
 typedef uvlong u64int;
 
 typedef unsigned int	mpdigit;	/* for /sys/include/mp.h */
--- a/OpenBSD/amd64/include/lib9.h
+++ b/OpenBSD/amd64/include/lib9.h
@@ -167,7 +167,7 @@
 	int	r;			/* % format Rune */
 	int	width;
 	int	prec;
-	u32int	flags;
+	u32	flags;
 };
 
 enum{
@@ -308,8 +308,8 @@
  */
 typedef
 struct Lock {
-	u32int	val;
-	u32int	pid;
+	u32	val;
+	u32	pid;
 } Lock;
 
 extern int	_tas(int*);
@@ -330,7 +330,7 @@
 extern	void	qlock(QLock*);
 extern	void	qunlock(QLock*);
 extern	int	canqlock(QLock*);
-extern	void	_qlockinit(u32int (*)(u32int, u32int));	/* called only by the thread library */
+extern	void	_qlockinit(u32 (*)(u32, u32));	/* called only by the thread library */
 
 typedef
 struct RWLock
@@ -405,7 +405,7 @@
 struct Qid
 {
 	u64int	path;
-	u32int	vers;
+	u32	vers;
 	uchar	type;
 } Qid;
 
@@ -416,9 +416,9 @@
 	uint	dev;	/* server subtype */
 	/* file data */
 	Qid	qid;	/* unique id from server */
-	u32int	mode;	/* permissions */
-	u32int	atime;	/* last read time */
-	u32int	mtime;	/* last write time */
+	u32	mode;	/* permissions */
+	u32	atime;	/* last read time */
+	u32	mtime;	/* last write time */
 	s64int	length;	/* file length */
 	char	*name;	/* last element of path */
 	char	*uid;	/* owner name */
@@ -430,15 +430,15 @@
 extern	Dir*	dirfstat(int);
 extern	int	dirwstat(char*, Dir*);
 extern	int	dirfwstat(int, Dir*);
-extern	s32int	dirread(int, Dir**);
+extern	s32	dirread(int, Dir**);
 extern	void	nulldir(Dir*);
-extern	s32int	dirreadall(int, Dir**);
+extern	s32	dirreadall(int, Dir**);
 
 typedef
 struct Waitmsg
 {
 	int pid;	/* of loved one */
-	u32int time[3];	/* of loved one & descendants */
+	u32 time[3];	/* of loved one & descendants */
 	char	*msg;
 } Waitmsg;
 
@@ -505,7 +505,7 @@
 #define FPAZDIV FPZDIV
 #define FPAINVAL        FPINVAL
 
-extern  void    setfcr(u32int);
-extern  void    setfsr(u32int);
-extern  u32int   getfcr(void);
-extern  u32int   getfsr(void);
+extern  void    setfcr(u32);
+extern  void    setfsr(u32);
+extern  u32   getfcr(void);
+extern  u32   getfsr(void);
--- a/Solaris/sparc/include/lib9.h
+++ b/Solaris/sparc/include/lib9.h
@@ -32,7 +32,7 @@
 typedef unsigned int Rune;
 typedef long long int	vlong;
 typedef unsigned long long int	uvlong;
-typedef unsigned int u32int;
+typedef unsigned int u32;
 typedef uvlong u64int;
 
 typedef unsigned int	mpdigit;	/* for /sys/include/mp.h */
--- a/emu/9front/devfs.c
+++ b/emu/9front/devfs.c
@@ -122,7 +122,7 @@
 		}
 		n = name[j];
 		if(strcmp(n, ".") != 0 && !(isdotdot(n) && nc->qid.path == rootqid.path)){	/* TO DO: underlying qids aliased */
-			//print("** ufs walk '%s' -> %s\n", current->s, n);
+			// print("** ufs walk '%s' -> %s\n", current->s, n);
 			next = current;
 			incref(&next->r);
 			next = addelem(current, n);
@@ -141,7 +141,7 @@
 		}
 		wq->qid[wq->nqid++] = nc->qid;
 	}
-//	print("** ufs walk '%s'\n", current->s);
+	// print("** ufs walk '%s'\n", current->s);
 
 	poperror();
 	if(wq->nqid < nname){
--- a/emu/9front/win.c
+++ b/emu/9front/win.c
@@ -65,7 +65,7 @@
 	postnote(PNPROC, keybdpid, Eintr);
 }
 
-uchar*
+Memdata*
 attachscreen(Rectangle *r, ulong *chan, int *d, int *width, int *softscreen)
 {
 	int fd;
@@ -163,7 +163,7 @@
 	}
 
 	*softscreen = 1;
-	return (uchar*)data;
+	return (Memdata*)data;
 }
 
 static ulong*
@@ -188,7 +188,7 @@
 	 */
 	if(imageid > 0){
 		ubuf[0] = 'f';
-		BPLONG(ubuf+1, imageid);
+		BP32INT(ubuf+1, imageid);
 		if(write(datafd, ubuf, 1+4) != 1+4)
 			fprint(2, "attachwindow: cannot free old window: %r\n");
 	}
@@ -197,7 +197,7 @@
 	 */
 	ubuf[0] = 'n';
 	++imageid;
-	BPLONG(ubuf+1, imageid);
+	BP32INT(ubuf+1, imageid);
 	ubuf[5] = n;
 	memmove(ubuf+6, winname, n);
 	if(write(datafd, ubuf, 6+n) != 6+n){
@@ -274,11 +274,11 @@
 			dy = chunksize/bpl;
 		n = dy*bpl;
 		chunk[0] = 'y';
-		BPLONG(chunk+1, imageid);
-		BPLONG(chunk+5, r.min.x);
-		BPLONG(chunk+9, r.min.y);
-		BPLONG(chunk+13, r.max.x);
-		BPLONG(chunk+17, r.min.y+dy);
+		BP32INT(chunk+1, imageid);
+		BP32INT(chunk+5, r.min.x);
+		BP32INT(chunk+9, r.min.y);
+		BP32INT(chunk+13, r.max.x);
+		BP32INT(chunk+17, r.min.y+dy);
 		memmove(chunk+21, data, n);
 		ndata += n;
 		data += n;
@@ -345,8 +345,8 @@
 		return;
 	}
 
-	BPLONG(curs+0*4, c->hotx);
-	BPLONG(curs+1*4, c->hoty);
+	BP32INT(curs+0*4, c->hotx);
+	BP32INT(curs+1*4, c->hoty);
 
 	w = (c->maxx-c->minx);
 	h = (c->maxy-c->miny)/2;
--- a/emu/OpenBSD/mkfile
+++ b/emu/OpenBSD/mkfile
@@ -4,7 +4,7 @@
 
 <../../mkconfig
 SYSTARG=OpenBSD
-OBJTYPE=386
+# OBJTYPE=386
 
 INSTALLDIR=$ROOT/$SYSTARG/$OBJTYPE/bin	#path of directory where kernel is installed
 
--- a/emu/OpenBSD/os.c
+++ b/emu/OpenBSD/os.c
@@ -316,8 +316,8 @@
 	return 0;
 }
 
-int
-segflush(void *p, ulong n)
+s32
+segflush(void *p, u32 n)
 {
 	return mprotect(p, n, PROT_EXEC|PROT_READ|PROT_WRITE);
 }
--- a/emu/port/alloc.c
+++ b/emu/port/alloc.c
@@ -574,7 +574,7 @@
 }
 
 int
-poolread(char *va, int count, ulong offset)
+poolread(char *va, int count, uintptr offset)
 {
 	Pool *p;
 	int n, i, signed_off;
@@ -905,7 +905,7 @@
 static void
 corrupted(char *str, char *msg, Pool *p, Bhdr *b, void *v)
 {
-	print("%s(%p): pool %s CORRUPT: %s at %p'%lud(magic=%lux)\n",
+	print("%s(%p): pool %s CORRUPT: %s at %p'%ud(magic=%ux)\n",
 		str, v, p->name, msg, b, b->size, b->magic);
 	dumpvl("bad Bhdr", (uintptr *)((uintptr)b & ~3)-4, 10);
 }
--- a/emu/port/devcons.c
+++ b/emu/port/devcons.c
@@ -7,7 +7,7 @@
 #include	"keyboard.h"
 
 extern int cflag;
-int	exdebug;
+extern int exdebug;
 extern int keepbroken;
 
 enum
@@ -61,7 +61,7 @@
 };
 
 Queue*	gkscanq;		/* Graphics keyboard raw scancodes */
-char*	gkscanid;		/* name of raw scan format (if defined) */
+extern	char	gkscanid[];	/* name of raw scan format (if defined) */
 Queue*	gkbdq;			/* Graphics keyboard unprocessed input */
 Queue*	kbdq;			/* Console window unprocessed keyboard input */
 Queue*	lineq;			/* processed console input */
@@ -214,7 +214,7 @@
 
 	case Qscancode:
 		qlock(&kbd.gq);
-		if(gkscanq != nil || gkscanid == nil) {
+		if(gkscanq != nil || gkscanid[0] == '\0') {
 			qunlock(&kbd.q);
 			c->flag &= ~COPEN;
 			if(gkscanq)
@@ -625,19 +625,4 @@
 
 	randomread(&x, sizeof(x));
 	return x;
-}
-
-QLock grandomlk;
-
-void
-_genrandomqlock(void)
-{
-	qlock(&grandomlk);
-}
-
-
-void
-_genrandomqunlock(void)
-{
-	qunlock(&grandomlk);
 }
--- a/emu/port/devdraw.c
+++ b/emu/port/devdraw.c
@@ -27,8 +27,8 @@
  */
 #define	QSHIFT	4	/* location in qid of client # */
 
-#define	QID(q)		((((u32int)(q).path)&0x0000000F)>>0)
-#define	CLIENTPATH(q)	((((u32int)q)&0x7FFFFFF0)>>QSHIFT)
+#define	QID(q)		((((u32)(q).path)&0x0000000F)>>0)
+#define	CLIENTPATH(q)	((((u32)q)&0x7FFFFFF0)>>QSHIFT)
 #define	CLIENT(q)	CLIENTPATH((q).path)
 
 #define	NHASH		(1<<5)
@@ -45,21 +45,21 @@
 typedef struct Refx Refx;
 typedef struct DName DName;
 
-u32int blanktime = 30;	/* in minutes; a half hour */
+u32 blanktime = 30;	/* in minutes; a half hour */
 
 struct Draw
 {
 	QLock	q;
-	s32int		clientid;
-	s32int		nclient;
+	s32		clientid;
+	s32		nclient;
 	Client**	client;
-	s32int		nname;
+	s32		nname;
 	DName*	name;
-	s32int		vers;
-	s32int		softscreen;
-	s32int		blanked;	/* screen turned off */
-	u32int		blanktime;	/* time of last operation */
-	u32int		savemap[3*256];
+	s32		vers;
+	s32		softscreen;
+	s32		blanked;	/* screen turned off */
+	u32		blanktime;	/* time of last operation */
+	u32		savemap[3*256];
 };
 
 struct Client
@@ -70,13 +70,13 @@
 	Refresh*	refresh;
 	Rendez		refrend;
 	uchar*		readdata;
-	s32int		nreaddata;
-	s32int		busy;
-	s32int		clientid;
-	s32int		slot;
-	s32int		refreshme;
-	s32int		infoid;
-	s32int	op;	/* compositing operator - SoverD by default */
+	s32		nreaddata;
+	s32		busy;
+	s32		clientid;
+	s32		slot;
+	s32		refreshme;
+	s32		infoid;
+	s32	op;	/* compositing operator - SoverD by default */
 };
 
 struct Refresh
@@ -97,13 +97,13 @@
 	char			*name;
 	Client	*client;
 	DImage*		dimage;
-	s32int			vers;
+	s32			vers;
 };
 
 struct FChar
 {
-	s32int		minx;	/* left edge of bits */
-	s32int		maxx;	/* right edge of bits */
+	s32		minx;	/* left edge of bits */
+	s32		maxx;	/* right edge of bits */
 	uchar		miny;	/* first non-zero scan-line */
 	uchar		maxy;	/* last non-zero scan-line + 1 */
 	schar		left;	/* offset of baseline */
@@ -118,13 +118,13 @@
  */
 struct DImage
 {
-	s32int		id;
-	s32int		ref;
+	s32		id;
+	s32		ref;
 	char		*name;
-	s32int		vers;
+	s32		vers;
 	Memimage*	image;
-	s32int		ascent;
-	s32int		nfchar;
+	s32		ascent;
+	s32		nfchar;
 	FChar*		fchar;
 	DScreen*	dscreen;	/* 0 if not a window */
 	DImage*	fromname;	/* image this one is derived from, by name */
@@ -139,9 +139,9 @@
 
 struct DScreen
 {
-	s32int		id;
-	s32int		public;
-	s32int		ref;
+	s32		id;
+	s32		public;
+	s32		ref;
 	DImage	*dimage;
 	DImage	*dfill;
 	Memscreen*	screen;
@@ -1248,7 +1248,7 @@
 }
 
 uchar*
-drawcoord(uchar *p, uchar *maxp, s32int oldx, s32int *newx)
+drawcoord(uchar *p, uchar *maxp, s32 oldx, s32 *newx)
 {
 	int b, x;
 
@@ -1325,10 +1325,10 @@
 void
 drawmesg(Client *client, void *av, int n)
 {
-	s32int c, op, repl, m, y, dstid, scrnid, ni, ci, j, nw, e0, e1, ox, oy, esize, oesize, doflush;
+	s32 c, op, repl, m, y, dstid, scrnid, ni, ci, j, nw, e0, e1, ox, oy, esize, oesize, doflush;
 	uchar *u, *a, refresh;
 	char *fmt;
-	u32int value, chan;
+	u32 value, chan;
 	Rectangle r, clipr;
 	Point p, q, *pp, sp;
 	Memimage *i, *dst, *src, *mask;
--- a/emu/port/devpointer.c
+++ b/emu/port/devpointer.c
@@ -258,12 +258,12 @@
 		}
 		if(n < 8)
 			error(Eshort);
-		cur.hotx = BGLONG((uchar*)va+0*4);
-		cur.hoty = BGLONG((uchar*)va+1*4);
+		cur.hotx = BG32INT((uchar*)va+0*4);
+		cur.hoty = BG32INT((uchar*)va+1*4);
 		cur.minx = 0;
 		cur.miny = 0;
-		cur.maxx = BGLONG((uchar*)va+2*4);
-		cur.maxy = BGLONG((uchar*)va+3*4);
+		cur.maxx = BG32INT((uchar*)va+2*4);
+		cur.maxy = BG32INT((uchar*)va+3*4);
 		if(cur.maxx%8 != 0 || cur.maxy%2 != 0 || n-4*4 != (cur.maxx/8 * cur.maxy))
 			error(Ebadarg);
 		cur.data = (uchar*)va + 4*4;
--- a/emu/port/devssl.c
+++ b/emu/port/devssl.c
@@ -49,7 +49,7 @@
 	ushort	blocklen;	/* blocking length */
 
 	ushort	diglen;		/* length of digest */
-	DigestState *(*hf)(uchar*, ulong, uchar*, DigestState*);	/* hash func */
+	DigestState *(*hf)(uchar*, u32, uchar*, DigestState*);	/* hash func */
 
 	/* for SSL format */
 	int	max;			/* maximum unpadded data per msg */
@@ -854,7 +854,7 @@
 {
 	char	*name;
 	int	diglen;
-	DigestState *(*hf)(uchar*, ulong, uchar*, DigestState*);
+	DigestState *(*hf)(uchar*, u32, uchar*, DigestState*);
 };
 
 Hashalg hashtab[] =
--- a/emu/port/exportfs.c
+++ b/emu/port/exportfs.c
@@ -105,7 +105,7 @@
 static char	Edupfid[]	= "fid in use";
 static char	Eaccess[] = "read/write -- not open in suitable mode";
 static char	Ecount[] = "read/write -- count too big";
-int	exdebug = 0;
+extern int	exdebug;
 
 int
 export(int fd, char *dir, int async)
@@ -257,7 +257,7 @@
 			if(q != nil || n > 6000)
 				break;
 			if(n%600 == 0)
-				print("exportproc %ld: waiting for memory (%d) for request\n", up->pid, msize);
+				print("exportproc %d: waiting for memory (%d) for request\n", up->pid, msize);
 			osenter();
 			osmillisleep(100);
 			osleave();
--- a/emu/port/fns.h
+++ b/emu/port/fns.h
@@ -20,7 +20,7 @@
 void		cinit(void);
 char*	clipread(void);
 int		clipwrite(char*);
-void	(*coherence)(void);
+/*void	(*coherence)(void); moved this to port/main.c to avoid the duplicate symbols error */
 void		copen(Chan*);
 void		cmderror(Cmdbuf*, char*);
 Block*	concatblock(Block*);
--- a/emu/port/lock.c
+++ b/emu/port/lock.c
@@ -2,6 +2,8 @@
 #include	"fns.h"
 #include	"error.h"
 
+extern void	(*coherence)(void);
+
 void
 lock(Lock *l)
 {
--- a/emu/port/main.c
+++ b/emu/port/main.c
@@ -6,15 +6,18 @@
 #include	"draw.h"
 #include	"version.h"
 
+#define DP if(1){}else print
+void	(*coherence)(void) = nil;	/* used by port/lock.c and port/win-x11a.c */
+int	exdebug = 0;
 int		rebootargc = 0;
 char**		rebootargv;
+char	gkscanid[32] = "";
 static	char	*imod = "/dis/emuinit.dis";
 extern	char*	hosttype;
-char*	tkfont;	/* for libtk/utils.c */
-int	tkstylus;	/* libinterp/tk.c */
+extern	char*	tkfont;	/* for libtk/utils.c */
+extern int	tkstylus;	/* libinterp/tk.c */
 extern	int	mflag;
 	int	dflag;
-	int vflag;
 	int	vflag;
 	Procs	procs;
 	char	*eve;
@@ -230,6 +233,8 @@
 void
 nofence(void)
 {
+	int i;
+	USED(i);
 }
 
 void
@@ -348,9 +353,16 @@
 void
 error(char *err)
 {
-	if(err != up->env->errstr && up->env->errstr != nil)
+	DP("error pid %d err %p %s up->nerr %d up->env->errstr %s getcallerpc %p\n",
+		up->pid, err, err, up->nerr, up->env->errstr, getcallerpc(&err));
+	DP("error pid %d err %p %s up->nerr %d getcallerpc %p\n",
+		up->pid, err, err, up->nerr, getcallerpc(&err));
+	if(err != up->env->errstr && up->env->errstr != nil){
 		kstrcpy(up->env->errstr, err, ERRMAX);
-//	ossetjmp(up->estack[NERR-1]);
+		DP("error after kstrcpy err %p %s up->nerr %d up->env->errstr %s\n",
+			err, err, up->nerr, up->env->errstr);
+	}
+	// ossetjmp(up->estack[NERR-1]);
 	nexterror();
 }
 
@@ -367,8 +379,24 @@
 }
 
 void
+showjmpbuf(char *str)
+{
+	DP("%p called %s pid %d up->nerr %d\n",
+		getcallerpc(&str), str, up->pid, up->nerr);
+	for(int i = 0; i<up->nerr; i++){
+		DP("	i %d: %p has %p",
+			i, up->estack[i], *(uintptr*)(up->estack[i]));
+		/* below segfaults on OpenBSD */
+		/* DP("has %p", **(uintptr**)(up->estack[i])); */
+		DP("\n");
+	}
+
+}
+
+void
 nexterror(void)
 {
+	showjmpbuf("nexterror");
 	oslongjmp(nil, up->estack[--up->nerr], 1);
 }
 
--- a/emu/port/master
+++ b/emu/port/master
@@ -3,7 +3,6 @@
 %	mem
 *	indir
 /	root
-A	audio
 C	cmd
 D	ssl
 F	tinyfs
@@ -17,6 +16,7 @@
 d	dup
 e	env
 i	draw
+k	ds
 m	pointer
 p	prog
 s	srv
--- a/emu/port/sysfile.c
+++ b/emu/port/sysfile.c
@@ -815,8 +815,9 @@
 	vlong off;
 	long m;
 
-	if(waserror())
+	if(waserror()){
 		return -1;
+	}
 	c.c = fdtochan(up->env->fgrp, fd, OWRITE, 1, 1);
 	if(waserror()){
 		cclose(c.c);
--- a/emu/port/win-x11a.c
+++ b/emu/port/win-x11a.c
@@ -56,8 +56,9 @@
 #include <sys/ipc.h>
 #include <sys/shm.h>
 
-static int displaydepth;
-extern ulong displaychan;
+extern void	(*coherence)(void);
+static int	displaydepth;
+extern ulong	displaychan;
 
 enum
 {
@@ -118,7 +119,7 @@
 static int              is_shm;
 
 static int putsnarf, assertsnarf;
-char *gkscanid = "emu_x11";
+extern char gkscanid[];
 
 /*
  * The documentation for the XSHM extension implies that if the server
@@ -294,13 +295,13 @@
 {
 	int dx, width;
 	uchar *p, *ep, *cp;
-	u32int v, w, *dp, *wp, *edp, *lp;
+	u32 v, w, *dp, *wp, *edp, *lp;
 
 	width = Dx(r);
 	dx = Xsize - width;
-	dp = (u32int*)(gscreendata + (r.min.y * Xsize + r.min.x) * 4);
-	wp = (u32int*)(xscreendata + (r.min.y * Xsize + r.min.x) * 4);
-	edp = (u32int*)(gscreendata + (r.max.y * Xsize + r.max.x) * 4);
+	dp = (u32*)(gscreendata + (r.min.y * Xsize + r.min.x) * 4);
+	wp = (u32*)(xscreendata + (r.min.y * Xsize + r.min.x) * 4);
+	edp = (u32*)(gscreendata + (r.max.y * Xsize + r.max.x) * 4);
 	while(dp < edp) {
 		lp = dp + width;
 		while(dp < lp){
@@ -345,12 +346,12 @@
 {
 	int dx, width;
 	uchar *p, *ep, *lp;
-	u32int *wp;
+	u32 *wp;
 
 	width = Dx(r);
 	dx = Xsize - width;
 	p = gscreendata + r.min.y * Xsize + r.min.x;
-	wp = (u32int *)(xscreendata + (r.min.y * Xsize + r.min.x) * 4);
+	wp = (u32 *)(xscreendata + (r.min.y * Xsize + r.min.x) * 4);
 	ep = gscreendata + r.max.y * Xsize + r.max.x;
 	while(p < ep) {
 		lp = p + width;
@@ -862,6 +863,7 @@
 
 	xdrawable = 0;
 
+	strncpy(gkscanid, "emu_x11", 32);
 	dispname = getenv("DISPLAY");
 	if(dispname == nil)
 		dispname = "not set";
--- a/include/draw.h
+++ b/include/draw.h
@@ -147,14 +147,14 @@
 	Ncomp	= 12,
 } Drawop;
 
-extern	char*	chantostr(char*, u32int);
-extern	u32int	strtochan(char*);
-extern	int		chantodepth(u32int);
+extern	char*	chantostr(char*, u32);
+extern	u32	strtochan(char*);
+extern	int		chantodepth(u32);
 
 struct	Point
 {
-	s32int	x;
-	s32int	y;
+	s32	x;
+	s32	y;
 };
 
 struct Rectangle
@@ -168,7 +168,7 @@
 struct Screen
 {
 	Display	*display;	/* display holding data */
-	s32int	id;		/* id of system-held Screen */
+	s32	id;		/* id of system-held Screen */
 	Image	*image;		/* unused; for reference only */
 	Image	*fill;		/* color to paint behind windows */
 };
@@ -184,20 +184,20 @@
 struct Display
 {
 	void*	qlock;
-	s32int		locking;	/*program is using lockdisplay */
-	s32int		dirno;
+	s32		locking;	/*program is using lockdisplay */
+	s32		dirno;
 	void	*datachan;
 	void	*refchan;
 	void	*ctlchan;
-	s32int		imageid;
-	s32int		local;
-	s32int		depth;
-	u32int	chan;
+	s32		imageid;
+	s32		local;
+	s32		depth;
+	u32	chan;
 	void		(*error)(Display*, char*);
 	char		*devdir;
 	char		*windir;
 	char		oldlabel[64];
-	u32int		dataqid;
+	u32		dataqid;
 	Image		*white;
 	Image		*black;
 	Image		*image;
@@ -204,7 +204,7 @@
 	Image		*opaque;
 	Image		*transparent;
 	uchar		buf[Displaybufsize+1];	/* +1 for flush message */
-	s32int			bufsize;
+	s32			bufsize;
 	uchar		*bufp;
 	Font		*defaultfont;
 	Subfont		*defaultsubfont;
@@ -217,12 +217,12 @@
 struct Image
 {
 	Display		*display;	/* display holding data */
-	s32int		id;		/* id of system-held Image */
+	s32		id;		/* id of system-held Image */
 	Rectangle	r;		/* rectangle in data area, local coords */
 	Rectangle 	clipr;		/* clipping region */
-	s32int		depth;		/* number of bits per pixel */
-	u32int	chan;
-	s32int		repl;		/* flag: data replicates to tile clipr */
+	s32		depth;		/* number of bits per pixel */
+	u32	chan;
+	s32		repl;		/* flag: data replicates to tile clipr */
 	Screen		*screen;	/* 0 if not a window */
 	Image		*next;	/* next in list of windows */
 	Reffn		reffn;
@@ -231,15 +231,15 @@
 
 struct RGB
 {
-	u32int	red;
-	u32int	green;
-	u32int	blue;
+	u32	red;
+	u32	green;
+	u32	blue;
 };
 
 /*
  * Subfonts
  *
- * given char c, Subfont *f, Fontchar *i, and Pos32int p, one says
+ * given char c, Subfont *f, Fontchar *i, and Pos32 p, one says
  *	i = f->info+c;
  *	draw(b, Rect(p.x+i->left, p.y+i->top,
  *		p.x+i->left+((i+1)->x-i->x), p.y+i->bottom),
@@ -250,7 +250,7 @@
 
 struct	Fontchar
 {
-	s32int		x;		/* left edge of bits */
+	s32		x;		/* left edge of bits */
 	uchar		top;		/* first non-zero scan-line */
 	uchar		bottom;		/* last non-zero scan-line + 1 */
 	char		left;		/* offset of baseline */
@@ -265,7 +265,7 @@
 	char		ascent;		/* top of image to baseline */
 	Fontchar 	*info;		/* n+1 character descriptors */
 	Image		*bits;		/* of font */
-	s32int		ref;
+	s32		ref;
 };
 
 enum
@@ -289,7 +289,7 @@
 {
 	Rune		min;	/* lowest rune value to be taken from subfont */
 	Rune		max;	/* highest rune value+1 to be taken from subfont */
-	s32int		offset;	/* position in subfont of character at min */
+	s32		offset;	/* position in subfont of character at min */
 	char		*name;			/* stored in font */
 	char		*subfontname;		/* to access subfont */
 };
@@ -305,7 +305,7 @@
 
 struct Cachesubf
 {
-	u32int		age;	/* for replacement */
+	u32		age;	/* for replacement */
 	Cachefont	*cf;	/* font info that owns us */
 	Subfont		*f;	/* attached subfont */
 };
@@ -318,10 +318,10 @@
 	s16int		ascent;	/* top of image to baseline */
 	s16int		width;	/* widest so far; used in caching only */	
 	s16int		nsub;	/* number of subfonts */
-	u32int		age;	/* increasing counter; used for LRU */
-	s32int		maxdepth;	/* maximum depth of all loaded subfonts */
-	s32int		ncache;	/* size of cache */
-	s32int		nsubf;	/* size of subfont list */
+	u32		age;	/* increasing counter; used for LRU */
+	s32		maxdepth;	/* maximum depth of all loaded subfonts */
+	s32		ncache;	/* size of cache */
+	s32		nsubf;	/* size of subfont list */
 	Cacheinfo	*cache;
 	Cachesubf	*subf;
 	Cachefont	**sub;	/* as read from file */
@@ -334,8 +334,8 @@
 /*
  * Image management
  */
-extern Image*	_allocimage(Image*, Display*, Rectangle, u32int, int, u32int, int, int);
-extern Image*	allocimage(Display*, Rectangle, u32int, int, u32int);
+extern Image*	_allocimage(Image*, Display*, Rectangle, u32, int, u32, int, int);
+extern Image*	allocimage(Display*, Rectangle, u32, int, u32);
 extern uchar*	bufimage(Display*, int);
 extern int	bytesperline(Rectangle, int);
 extern void	closedisplay(Display*);
@@ -358,7 +358,7 @@
 extern int	writeimage(int, Image*, int);
 extern Image*	namedimage(Display*, char*);
 extern int	nameimage(Image*, char*, int);
-extern Image* allocimagemix(Display*, u32int, u32int);
+extern Image* allocimagemix(Display*, u32, u32);
 
 /*
  * Colors
@@ -365,18 +365,18 @@
  */
 extern	void	readcolmap(Display*, RGB*);
 extern	void	writecolmap(Display*, RGB*);
-extern	u32int	setalpha(u32int, uchar);
+extern	u32	setalpha(u32, uchar);
 
 /*
  * Windows
  */
 extern Screen*	allocscreen(Image*, Image*, int);
-extern Image*	_allocwindow(Image*, Screen*, Rectangle, int, u32int);
-extern Image*	allocwindow(Screen*, Rectangle, int, u32int);
+extern Image*	_allocwindow(Image*, Screen*, Rectangle, int, u32);
+extern Image*	allocwindow(Screen*, Rectangle, int, u32);
 extern void	bottomnwindows(Image**, int);
 extern void	bottomwindow(Image*);
 extern int	freescreen(Screen*);
-extern Screen*	publicscreen(Display*, int, u32int);
+extern Screen*	publicscreen(Display*, int, u32);
 extern void	topnwindows(Image**, int);
 extern void	topwindow(Image*);
 extern int	originwindow(Image*, Point, Point);
@@ -384,8 +384,8 @@
 /*
  * Geometry
  */
-extern Point		Pt(s32int, s32int);
-extern Rectangle	Rect(s32int, s32int, s32int, s32int);
+extern Point		Pt(s32, s32);
+extern Rectangle	Rect(s32, s32, s32, s32);
 extern Rectangle	Rpt(Point, Point);
 extern Point		addpt(Point, Point);
 extern Point		subpt(Point, Point);
@@ -408,8 +408,8 @@
 extern int		rgb2cmap(int, int, int);
 extern int		cmap2rgb(int);
 extern int		cmap2rgba(int);
-extern void		icossin(int, s32int*, s32int*);
-extern void		icossin2(s32int, s32int, s32int*, s32int*);
+extern void		icossin(int, s32*, s32*);
+extern void		icossin2(s32, s32, s32*, s32*);
 
 /*
  * Graphics
@@ -513,9 +513,9 @@
 extern	int	_drawdebug;	/* set to 1 to see errors from flushimage */
 
 #define	BG16INT(p)		(((p)[0]<<0) | ((p)[1]<<8))
-#define	BG32INT(p)		((s32int)((BG16INT(p)<<0) | (BG16INT(p+2)<<16)))
+#define	BG32INT(p)		((s32)((BG16INT(p)<<0) | (BG16INT(p+2)<<16)))
 #define	BP16INT(p, v)		((p)[0]=(v), (p)[1]=((v)>>8))
-#define	BP32INT(p, v)		(BP16INT(p, ((s32int)v)), BP16INT(p+2, ((s32int)v)>>16))
+#define	BP32INT(p, v)		(BP16INT(p, ((s32)v)), BP16INT(p+2, ((s32)v)>>16))
 
 /*
  * Compressed image file parameters
@@ -529,7 +529,7 @@
 extern	int	_compblocksize(Rectangle, int);
 
 /* XXX backwards helps; should go */
-extern	u32int	drawld2chan[];
+extern	u32	drawld2chan[];
 extern	void		drawsetdebug(int);
 
 /*
@@ -549,7 +549,7 @@
 */
 #define	IRECT(r)	((Rectangle){IPOINT((r).min),IPOINT((r).max)})
 #define	DRECT(r)	((Draw_Rect){DPOINT((r).min),DPOINT((r).max)})
-#define	IPOINT(p)	((Point){(s32int)((p).x),(s32int)((p).y)})
+#define	IPOINT(p)	((Point){(s32)((p).x),(s32)((p).y)})
 #define	DPOINT(p)	((Draw_Point){(p).x,(p).y})
 
 #define P2P(p1, p2)	(p1).x = (p2).x, (p1).y = (p2).y
--- a/include/fcall.h
+++ b/include/fcall.h
@@ -9,11 +9,11 @@
 struct	Fcall
 {
 	uchar	type;
-	u32int	fid;
+	ulong	fid;
 	ushort	tag;
 	/* union { */
 		/* struct { */
-			u32int	msize;		/* Tversion, Rversion */
+			ulong	msize;		/* Tversion, Rversion */
 			char	*version;	/* Tversion, Rversion */
 		/* }; */
 		/* struct { */
@@ -24,23 +24,23 @@
 		/* }; */
 		/* struct { */
 			Qid	qid;		/* Rattach, Ropen, Rcreate */
-			u32int	iounit;		/* Ropen, Rcreate */
+			ulong	iounit;		/* Ropen, Rcreate */
 		/* }; */
 		/* struct { */
 			Qid	aqid;		/* Rauth */
 		/* }; */
 		/* struct { */
-			u32int	afid;		/* Tauth, Tattach */
+			ulong	afid;		/* Tauth, Tattach */
 			char	*uname;		/* Tauth, Tattach */
 			char	*aname;		/* Tauth, Tattach */
 		/* }; */
 		/* struct { */
-			u32int	perm;		/* Tcreate */ 
+			ulong	perm;		/* Tcreate */ 
 			char	*name;		/* Tcreate */
 			uchar	mode;		/* Tcreate, Topen */
 		/* }; */
 		/* struct { */
-			u32int	newfid;		/* Twalk */
+			ulong	newfid;		/* Twalk */
 			ushort	nwname;		/* Twalk */
 			char	*wname[MAXWELEM];	/* Twalk */
 		/* }; */
@@ -50,7 +50,7 @@
 		/* }; */
 		/* struct { */
 			vlong	offset;		/* Tread, Twrite */
-			u32int	count;		/* Tread, Twrite, Rread */
+			ulong	count;		/* Tread, Twrite, Rread */
 			char	*data;		/* Twrite, Rread */
 		/* }; */
 		/* struct { */
@@ -63,9 +63,9 @@
 
 #define	GBIT8(p)	((p)[0])
 #define	GBIT16(p)	((p)[0]|((p)[1]<<8))
-#define	GBIT32(p)	((u32int)((p)[0]|((p)[1]<<8)|((p)[2]<<16)|((p)[3]<<24)))
-#define	GBIT64(p)	((u32int)((p)[0]|((p)[1]<<8)|((p)[2]<<16)|((p)[3]<<24)) |\
-				((vlong)((p)[4]|((p)[5]<<8)|((p)[6]<<16)|((p)[7]<<24)) << 32))
+#define	GBIT32(p)	((u32)((p)[0]|((p)[1]<<8)|((p)[2]<<16)|((p)[3]<<24)))
+#define	GBIT64(p)	((u32)((p)[0]|((p)[1]<<8)|((p)[2]<<16)|((p)[3]<<24)) |\
+				((s64)((p)[4]|((p)[5]<<8)|((p)[6]<<16)|((p)[7]<<24)) << 32))
 
 #define	PBIT8(p,v)	(p)[0]=(v)
 #define	PBIT16(p,v)	(p)[0]=(v);(p)[1]=(v)>>8
@@ -83,8 +83,8 @@
 /* The count, however, excludes itself; total size is BIT16SZ+count */
 #define STATFIXLEN	(BIT16SZ+QIDSZ+5*BIT16SZ+4*BIT32SZ+1*BIT64SZ)	/* amount of fixed length data in a stat buffer */
 
-#define	NOTAG		(ushort)~0U	/* Dummy tag */
-#define	NOFID		(u32int)~0U	/* Dummy fid */
+#define	NOTAG		(u16)~0U	/* Dummy tag */
+#define	NOFID		(u32)~0U	/* Dummy fid */
 #define	IOHDRSZ		24	/* ample room for Twrite/Rread header (iounit) */
 
 enum
--- a/include/freetype/config/ftconfig.h.orig
+++ /dev/null
@@ -1,334 +1,0 @@
-/***************************************************************************/
-/*                                                                         */
-/*  ftconfig.h                                                             */
-/*                                                                         */
-/*    ANSI-specific configuration file (specification only).               */
-/*                                                                         */
-/*  Copyright 1996-2001, 2002 by                                           */
-/*  David Turner, Robert Wilhelm, and Werner Lemberg.                      */
-/*                                                                         */
-/*  This file is part of the FreeType project, and may only be used,       */
-/*  modified, and distributed under the terms of the FreeType project      */
-/*  license, LICENSE.TXT.  By continuing to use, modify, or distribute     */
-/*  this file you indicate that you have read the license and              */
-/*  understand and accept it fully.                                        */
-/*                                                                         */
-/***************************************************************************/
-
-
-  /*************************************************************************/
-  /*                                                                       */
-  /* This header file contains a number of macro definitions that are used */
-  /* by the rest of the engine.  Most of the macros here are automatically */
-  /* determined at compile time, and you should not need to change it to   */
-  /* port FreeType, except to compile the library with a non-ANSI          */
-  /* compiler.                                                             */
-  /*                                                                       */
-  /* Note however that if some specific modifications are needed, we       */
-  /* advise you to place a modified copy in your build directory.          */
-  /*                                                                       */
-  /* The build directory is usually `freetype/builds/<system>', and        */
-  /* contains system-specific files that are always included first when    */
-  /* building the library.                                                 */
-  /*                                                                       */
-  /* This ANSI version should stay in `include/freetype/config'.           */
-  /*                                                                       */
-  /*************************************************************************/
-
-
-#ifndef __FTCONFIG_H__
-#define __FTCONFIG_H__
-
-#include <ft2build.h>
-#include FT_CONFIG_OPTIONS_H
-#include FT_CONFIG_STANDARD_LIBRARY_H
-
-FT_BEGIN_HEADER
-
-
-  /*************************************************************************/
-  /*                                                                       */
-  /*               PLATFORM-SPECIFIC CONFIGURATION MACROS                  */
-  /*                                                                       */
-  /* These macros can be toggled to suit a specific system.  The current   */
-  /* ones are defaults used to compile FreeType in an ANSI C environment   */
-  /* (16bit compilers are also supported).  Copy this file to your own     */
-  /* `freetype/builds/<system>' directory, and edit it to port the engine. */
-  /*                                                                       */
-  /*************************************************************************/
-
-
-  /* The number of bytes in an `int' type.  */
-#if   FT_UINT_MAX == 0xFFFFFFFFUL
-#define FT_SIZEOF_INT  4
-#elif FT_UINT_MAX == 0xFFFFU
-#define FT_SIZEOF_INT  2
-#elif FT_UINT_MAX > 0xFFFFFFFFU && FT_UINT_MAX == 0xFFFFFFFFFFFFFFFFU
-#define FT_SIZEOF_INT  8
-#else
-#error "Unsupported number of bytes in `int' type!"
-#endif
-
-  /* The number of bytes in a `long' type.  */
-#if   FT_ULONG_MAX == 0xFFFFFFFFUL
-#define FT_SIZEOF_LONG  4
-#elif FT_ULONG_MAX > 0xFFFFFFFFU && FT_ULONG_MAX == 0xFFFFFFFFFFFFFFFFU
-#define FT_SIZEOF_LONG  8
-#else
-#error "Unsupported number of bytes in `long' type!"
-#endif
-
-
-  /* Preferred alignment of data */
-#define FT_ALIGNMENT  8
-
-
-  /* FT_UNUSED is a macro used to indicate that a given parameter is not  */
-  /* used -- this is only used to get rid of unpleasant compiler warnings */
-#ifndef FT_UNUSED
-#define FT_UNUSED( arg )  ( (arg) = (arg) )
-#endif
-
-
-  /*************************************************************************/
-  /*                                                                       */
-  /*                     AUTOMATIC CONFIGURATION MACROS                    */
-  /*                                                                       */
-  /* These macros are computed from the ones defined above.  Don't touch   */
-  /* their definition, unless you know precisely what you are doing.  No   */
-  /* porter should need to mess with them.                                 */
-  /*                                                                       */
-  /*************************************************************************/
-
-
-  /*************************************************************************/
-  /*                                                                       */
-  /* Mac support                                                           */
-  /*                                                                       */
-  /*   This is the only necessary change, so it is defined here instead    */
-  /*   providing a new configuration file.                                 */
-  /*                                                                       */
-#if defined( __APPLE__ ) || ( defined( __MWERKS__ ) && defined( macintosh ) )
-#define FT_MACINTOSH 1
-#endif
-
-
-  /*************************************************************************/
-  /*                                                                       */
-  /* IntN types                                                            */
-  /*                                                                       */
-  /*   Used to guarantee the size of some specific integers.               */
-  /*                                                                       */
-  typedef signed short    FT_Int16;
-  typedef unsigned short  FT_UInt16;
-
-#if FT_SIZEOF_INT == 4
-
-  typedef signed int      FT_Int32;
-  typedef unsigned int    FT_UInt32;
-
-#elif FT_SIZEOF_LONG == 4
-
-  typedef signed long     FT_Int32;
-  typedef unsigned long   FT_UInt32;
-
-#else
-#error "no 32bit type found -- please check your configuration files"
-#endif
-
-  /* now, lookup for an integer type that is at least 32 bits */
-#if FT_SIZEOF_INT >= 4
-
-  typedef int            FT_Fast;
-  typedef unsigned int   FT_UFast;
-
-#elif FT_SIZEOF_LONG >= 4
-
-  typedef long           FT_Fast;
-  typedef unsigned long  FT_UFast;
-
-#endif
-
-
-  /* determine whether we have a 64-bit int type for platforms without */
-  /* Autoconf                                                          */
-#if FT_SIZEOF_LONG == 8
-
-  /* FT_LONG64 must be defined if a 64-bit type is available */
-#define FT_LONG64
-#define FT_INT64  long
-
-#elif defined( _MSC_VER ) && _MSC_VER >= 900  /* Visual C++ (and Intel C++) */
-
-  /* this compiler provides the __int64 type */
-#define FT_LONG64
-#define FT_INT64  __int64
-
-#elif defined( __BORLANDC__ )  /* Borland C++ */
-
-  /* XXXX: We should probably check the value of __BORLANDC__ in order */
-  /*       to test the compiler version.                               */
-
-  /* this compiler provides the __int64 type */
-#define FT_LONG64
-#define FT_INT64  __int64
-
-#elif defined( __WATCOMC__ )   /* Watcom C++ */
-
-  /* Watcom doesn't provide 64-bit data types */
-
-#elif defined( __MWKS__ )      /* Metrowerks CodeWarrior */
-
-  /* I don't know if it provides 64-bit data types, any suggestion */
-  /* is welcome.                                                   */
-
-#elif defined( __GNUC__ )
-
-  /* GCC provides the "long long" type */
-#define FT_LONG64
-#define FT_INT64  long long int
-
-#endif /* FT_SIZEOF_LONG == 8 */
-
-
-  /*************************************************************************/
-  /*                                                                       */
-  /* A 64-bit data type will create compilation problems if you compile    */
-  /* in strict ANSI mode.  To avoid them, we disable their use if          */
-  /* __STDC__ is defined.  You can however ignore this rule by             */
-  /* defining the FT_CONFIG_OPTION_FORCE_INT64 configuration macro.        */
-  /*                                                                       */
-#if defined( FT_LONG64 ) && !defined( FT_CONFIG_OPTION_FORCE_INT64 )
-
-#ifdef __STDC__
-
-  /* undefine the 64-bit macros in strict ANSI compilation mode */
-#undef FT_LONG64
-#undef FT_INT64
-
-#endif /* __STDC__ */
-
-#endif /* FT_LONG64 && !FT_CONFIG_OPTION_FORCE_INT64 */
-
-
-#ifdef FT_MAKE_OPTION_SINGLE_OBJECT
-
-#define FT_LOCAL( x )      static  x
-#define FT_LOCAL_DEF( x )  static  x
-
-#else
-
-#ifdef __cplusplus
-#define FT_LOCAL( x )      extern "C"  x
-#define FT_LOCAL_DEF( x )  extern "C"  x
-#else
-#define FT_LOCAL( x )      extern  x
-#define FT_LOCAL_DEF( x )  x
-#endif
-
-#endif /* FT_MAKE_OPTION_SINGLE_OBJECT */
-
-
-#ifndef FT_BASE
-
-#ifdef __cplusplus
-#define FT_BASE( x )  extern "C"  x
-#else
-#define FT_BASE( x )  extern  x
-#endif
-
-#endif /* !FT_BASE */
-
-
-#ifndef FT_BASE_DEF
-
-#ifdef __cplusplus
-#define FT_BASE_DEF( x )  extern "C"  x
-#else
-#define FT_BASE_DEF( x )  extern  x
-#endif
-
-#endif /* !FT_BASE_DEF */
-
-
-#ifndef FT_EXPORT
-
-#ifdef __cplusplus
-#define FT_EXPORT( x )  extern "C"  x
-#else
-#define FT_EXPORT( x )  extern  x
-#endif
-
-#endif /* !FT_EXPORT */
-
-
-#ifndef FT_EXPORT_DEF
-
-#ifdef __cplusplus
-#define FT_EXPORT_DEF( x )  extern "C"  x
-#else
-#define FT_EXPORT_DEF( x )  extern  x
-#endif
-
-#endif /* !FT_EXPORT_DEF */
-
-
-#ifndef FT_EXPORT_VAR
-
-#ifdef __cplusplus
-#define FT_EXPORT_VAR( x )  extern "C"  x
-#else
-#define FT_EXPORT_VAR( x )  extern  x
-#endif
-
-#endif /* !FT_EXPORT_VAR */
-
-  /* The following macros are needed to compile the library with a   */
-  /* C++ compiler and with 16bit compilers.                          */
-  /*                                                                 */
-
-  /* This is special.  Within C++, you must specify `extern "C"' for */
-  /* functions which are used via function pointers, and you also    */
-  /* must do that for structures which contain function pointers to  */
-  /* assure C linkage -- it's not possible to have (local) anonymous */
-  /* functions which are accessed by (global) function pointers.     */
-  /*                                                                 */
-  /*                                                                 */
-  /* FT_CALLBACK_DEF is used to _define_ a callback function.        */
-  /*                                                                 */
-  /* FT_CALLBACK_TABLE is used to _declare_ a constant variable that */
-  /* contains pointers to callback functions.                        */
-  /*                                                                 */
-  /* FT_CALLBACK_TABLE_DEF is used to _define_ a constant variable   */
-  /* that contains pointers to callback functions.                   */
-  /*                                                                 */
-  /*                                                                 */
-  /* Some 16bit compilers have to redefine these macros to insert    */
-  /* the infamous `_cdecl' or `__fastcall' declarations.             */
-  /*                                                                 */
-#ifndef FT_CALLBACK_DEF
-#ifdef __cplusplus
-#define FT_CALLBACK_DEF( x )  extern "C"  x
-#else
-#define FT_CALLBACK_DEF( x )  static  x
-#endif
-#endif /* FT_CALLBACK_DEF */
-
-#ifndef FT_CALLBACK_TABLE
-#ifdef __cplusplus
-#define FT_CALLBACK_TABLE      extern "C"
-#define FT_CALLBACK_TABLE_DEF  extern "C"
-#else
-#define FT_CALLBACK_TABLE      extern
-#define FT_CALLBACK_TABLE_DEF  /* nothing */
-#endif
-#endif /* FT_CALLBACK_TABLE */
-
-
-FT_END_HEADER
-
-
-#endif /* __FTCONFIG_H__ */
-
-
-/* END */
--- a/include/freetype/config/ftmodule.h.orig
+++ /dev/null
@@ -1,19 +1,0 @@
-FT_USE_MODULE(autohint_module_class)
-FT_USE_MODULE(cff_driver_class)
-FT_USE_MODULE(t1cid_driver_class)
-FT_USE_MODULE(pcf_driver_class)
-FT_USE_MODULE(bdf_driver_class)
-FT_USE_MODULE(psaux_module_class)
-FT_USE_MODULE(psnames_module_class)
-FT_USE_MODULE(pshinter_module_class)
-FT_USE_MODULE(ft_raster1_renderer_class)
-FT_USE_MODULE(sfnt_module_class)
-FT_USE_MODULE(ft_smooth_renderer_class)
-FT_USE_MODULE(ft_smooth_lcd_renderer_class)
-FT_USE_MODULE(ft_smooth_lcdv_renderer_class)
-FT_USE_MODULE(tt_driver_class)
-FT_USE_MODULE(t1_driver_class)
-FT_USE_MODULE(t42_driver_class)
-FT_USE_MODULE(pfr_driver_class)
-FT_USE_MODULE(winfnt_driver_class)
-
--- a/include/freetype/config/ftstdlib.h.orig
+++ /dev/null
@@ -1,140 +1,0 @@
-/***************************************************************************/
-/*                                                                         */
-/*  ftstdlib.h                                                             */
-/*                                                                         */
-/*    ANSI-specific library and header configuration file (specification   */
-/*    only).                                                               */
-/*                                                                         */
-/*  Copyright 2002 by                                                      */
-/*  David Turner, Robert Wilhelm, and Werner Lemberg.                      */
-/*                                                                         */
-/*  This file is part of the FreeType project, and may only be used,       */
-/*  modified, and distributed under the terms of the FreeType project      */
-/*  license, LICENSE.TXT.  By continuing to use, modify, or distribute     */
-/*  this file you indicate that you have read the license and              */
-/*  understand and accept it fully.                                        */
-/*                                                                         */
-/***************************************************************************/
-
-
-  /*************************************************************************/
-  /*                                                                       */
-  /* This file is used to group all #includes to the ANSI C library that   */
-  /* FreeType normally requires.  It also defines macros to rename the     */
-  /* standard functions within the FreeType source code.                   */
-  /*                                                                       */
-  /* Load a file which defines __FTSTDLIB_H__ before this one to override  */
-  /* it.                                                                   */
-  /*                                                                       */
-  /*************************************************************************/
-
-
-#ifndef __FTSTDLIB_H__
-#define __FTSTDLIB_H__
-
-
-  /**********************************************************************/
-  /*                                                                    */
-  /*                           integer limits                           */
-  /*                                                                    */
-  /* UINT_MAX and ULONG_MAX are used to automatically compute the size  */
-  /* of `int' and `long' in bytes at compile-time.  So far, this works  */
-  /* for all platforms the library has been tested on.                  */
-  /*                                                                    */
-  /* Note that on the extremely rare platforms that do not provide      */
-  /* integer types that are _exactly_ 16 and 32 bits wide (e.g. some    */
-  /* old Crays where `int' is 36 bits), we do not make any guarantee    */
-  /* about the correct behaviour of FT2 with all fonts.                 */
-  /*                                                                    */
-  /* In these case, "ftconfig.h" will refuse to compile anyway with a   */
-  /* message like "couldn't find 32-bit type" or something similar.     */
-  /*                                                                    */
-  /* IMPORTANT NOTE: We do not define aliases for heap management and   */
-  /*                 i/o routines (i.e. malloc/free/fopen/fread/...)    */
-  /*                 since these functions should all be encapsulated   */
-  /*                 by platform-specific implementations of            */
-  /*                 "ftsystem.c".                                      */
-  /*                                                                    */
-  /**********************************************************************/
-
-
-#include <limits.h>
-
-#define FT_UINT_MAX   UINT_MAX
-#define FT_ULONG_MAX  ULONG_MAX
-
-
-  /**********************************************************************/
-  /*                                                                    */
-  /*                 character and string processing                    */
-  /*                                                                    */
-  /**********************************************************************/
-
-
-#include <ctype.h>
-
-#define ft_isalnum  isalnum
-#define ft_isupper  isupper
-#define ft_islower  islower
-#define ft_xdigit   isxdigit
-
-
-#include <string.h>
-
-#define ft_strlen   strlen
-#define ft_strcmp   strcmp
-#define ft_strncmp  strncmp
-#define ft_memcpy   memcpy
-#define ft_strcpy   strcpy
-#define ft_strncpy  strncpy
-#define ft_memset   memset
-#define ft_memmove  memmove
-#define ft_memcmp   memcmp
-
-#include <stdio.h>
-
-#define ft_sprintf  sprintf
-
-
-  /**********************************************************************/
-  /*                                                                    */
-  /*                             sorting                                */
-  /*                                                                    */
-  /**********************************************************************/
-
-
-#include <stdlib.h>
-
-#define ft_qsort  qsort
-#define ft_exit   exit    /* only used to exit from unhandled exceptions */
-
-#define ft_atoi   atoi
-
-
-  /**********************************************************************/
-  /*                                                                    */
-  /*                         execution control                          */
-  /*                                                                    */
-  /**********************************************************************/
-
-
-#include <setjmp.h>
-
-#define ft_jmp_buf  jmp_buf   /* note: this cannot be a typedef since */
-                              /*       jmp_buf is defined as a macro  */
-                              /*       on certain platforms           */
-
-#define ft_setjmp   setjmp    /* same thing here */
-#define ft_longjmp  longjmp   /* "               */
-
-
-  /* the following is only used for debugging purposes, i.e. when */
-  /* FT_DEBUG_LEVEL_ERROR or FT_DEBUG_LEVEL_TRACE are defined     */
-  /*                                                              */
-#include <stdarg.h>
-
-
-#endif /* __FTSTDLIB_H__ */
-
-
-/* END */
--- a/include/interp.h
+++ b/include/interp.h
@@ -31,7 +31,7 @@
 
 	/* STRUCTALIGN is the unit to which the compiler aligns structs. */
 	/* It really should be defined somewhere else */
-	STRUCTALIGN = sizeof(int)	/* must be >=2 because of Strings */
+	STRUCTALIGN = sizeof(intptr)	/* must be >=2 because of Strings */
 };
 
 enum
@@ -90,10 +90,10 @@
 {
 	uchar	stack[1];
 	struct {
-		Type*	TR;
-		uchar*	SP;
-		uchar*	TS;
-		uchar*	EX;
+		Type*	TR;	/* type register */
+		uchar*	SP;	/* nil or prior stack extent's pointer to next available space */
+		uchar*	TS;	/* nil or prior stack extent's pointer to the last space */
+		uchar*	EX;	/* nil or pointer to prior Stackext */
 		union {
 			uchar	fu[1];
 			Frame	fr[1];
@@ -193,8 +193,8 @@
 
 struct Alt
 {
-	int	nsend;
-	int	nrecv;
+	intptr	nsend;
+	intptr	nrecv;
 	Altc	ac[1];
 };
 
@@ -287,10 +287,13 @@
 	char*	name;	/* Implements type */
 	char*	path;		/* File module loaded from */
 	Module*	link;		/* Links */
-	Link*	ext;		/* External dynamic links */
-	Import**	ldt;	/* Internal linkage descriptor tables */
+	Link*	ext;		/* External dynamic links, 
+				   list of functions exported by this module */
+	Import**	ldt;	/* Internal linkage descriptor tables,
+				   lists of functions imported by the current 
+				   module by module */
 	Handler*	htab;	/* Exception handler table */
-	ulong*	pctab;	/* dis pc to code pc when compiled */
+	uintptr*	pctab;	/* dis pc to code pc when compiled */
 	void*	dlm;		/* dynamic C module */
 };
 
@@ -298,6 +301,12 @@
 {
 	Linkpc	u;		/* PC of Dynamic link */
 	Type*	frame;		/* Frame type for this entry */
+	char	*name;		/* name from the Link structure for
+					debugging info. could end
+					up being a dangling pointer
+					if destroylinks() takes 
+					down the underlying Link
+					structure? */
 };
 
 struct Modlink
@@ -337,15 +346,15 @@
 struct Except
 {
 	char*	s;
-	ulong	pc;
+	uintptr	pc;
 };
 
 struct Handler
 {
-	ulong	pc1;
-	ulong	pc2;
-	ulong	eoff;
-	ulong	ne;
+	uintptr	pc1;
+	uintptr	pc2;
+	uintptr	eoff;
+	uintptr	ne;
 	Type*	t;
 	Except*	etab;
 };
@@ -481,7 +490,7 @@
 extern	void		noptrs(Type*, void*);
 extern	int		nprog(void);
 extern	void		opinit(void);
-extern	Module*		parsemod(char*, uchar*, ulong, Dir*);
+extern	Module*		parsemod(char*, uchar*, u32, Dir*);
 extern	Module*		parsedmod(char*, int, ulong, Qid);
 extern	void		prefabmodinit(void);
 extern	Prog*		progn(int);
@@ -498,7 +507,7 @@
 extern	void		rungc(Prog*);
 extern	void		runtime(Module*, Link*, char*, int, void(*)(void*), Type*);
 extern	void		safemem(void*, Type*, void (*)(void*));
-extern	int		segflush(void *, ulong);
+extern	s32		segflush(void *, u32);
 extern	void		isend(void);
 extern	void	setdbreg(uchar*);
 extern	uchar*	setdbloc(uchar*);
@@ -535,8 +544,8 @@
 extern	int		brpatch(Inst*, Module*);
 extern	void		readimagemodinit(void);
 
-#define	O(t,e)		((long)(&((t*)0)->e))
-#define	OA(t,e)		((long)(((t*)0)->e))
+#define	O(t,e)		((intptr)(&((t*)0)->e))
+#define	OA(t,e)		((intptr)(((t*)0)->e))
 
 #pragma	varargck	type	"D"	Inst*
 #pragma varargck argpos errorf 1
--- a/include/kern.h
+++ b/include/kern.h
@@ -1,7 +1,7 @@
 typedef unsigned long size_t;
 
 #define	nelem(x)	(sizeof(x)/sizeof((x)[0]))
-#define	offsetof(s, m)	(ulong)(&(((s*)0)->m))
+#define	offsetof(s, m)	(u32)(&(((s*)0)->m))
 #define	assert(x)	if(x){}else _assert("x")
 
 /*
@@ -185,6 +185,12 @@
 #pragma	varargck	type	"lx"	long
 #pragma	varargck	type	"ld"	ulong
 #pragma	varargck	type	"lx"	ulong
+#pragma varargck	type	"zd"	intptr
+#pragma varargck	type	"zo"	intptr
+#pragma varargck	type	"zx"	intptr
+#pragma varargck	type	"zd"	uintptr
+#pragma varargck	type	"zo"	uintptr
+#pragma varargck	type	"zx"	uintptr
 #pragma	varargck	type	"d"	int
 #pragma	varargck	type	"x"	int
 #pragma	varargck	type	"c"	int
@@ -311,7 +317,7 @@
 extern	double	cputime(void);
 extern	long	times(long*);
 extern	long	tm2sec(Tm*);
-extern	vlong	nsec(void);
+extern	s64	nsec(void);
 
 /*
  * one-of-a-kind
@@ -467,8 +473,8 @@
 typedef
 struct Qid
 {
-	uvlong	path;
-	ulong	vers;
+	u64	path;
+	u32	vers;
 	uchar	type;
 } Qid;
 
@@ -476,14 +482,14 @@
 struct Dir {
 
 	/* system-modified data */
-	ushort	type;	/* server type */
-	uint	dev;	/* server subtype */
+	u16	type;	/* server type */
+	u32	dev;	/* server subtype */
 	/* file data */
 	Qid	qid;	/* unique id from server */
-	ulong	mode;	/* permissions */
-	ulong	atime;	/* last read time */
-	ulong	mtime;	/* last write time */
-	vlong	length;	/* file length */
+	u32	mode;	/* permissions */
+	u32	atime;	/* last read time */
+	u32	mtime;	/* last write time */
+	s64	length;	/* file length */
 	char	*name;	/* last element of path */
 	char	*uid;	/* owner name */
 	char	*gid;	/* group name */
@@ -494,9 +500,9 @@
 extern	Dir*	dirfstat(int);
 extern	int	dirwstat(char*, Dir*);
 extern	int	dirfwstat(int, Dir*);
-extern	long	dirread(int, Dir**);
+extern	s32	dirread(int, Dir**);
 extern	void	nulldir(Dir*);
-extern	long	dirreadall(int, Dir**);
+extern	s32	dirreadall(int, Dir**);
 
 #define CHDIR		0x80000000	/* mode bit for directories */
 #define CHAPPEND	0x40000000	/* mode bit for append only files */
@@ -519,7 +525,7 @@
 struct IOchunk
 {
 	void	*addr;
-	ulong	len;
+	u32	len;
 } IOchunk;
 
 extern	void	_exits(char*);
@@ -561,7 +567,7 @@
 extern	long	segattach(int, char*, void*, ulong);
 extern	int	segbrk(void*, void*);
 extern	int	segdetach(void*);
-extern	int	segflush(void*, ulong);
+extern	s32	segflush(void*, u32);
 extern	int	segfree(void*, ulong);
 extern	int	sleep(long);
 extern	int	stat(char*, uchar*, int);
@@ -572,8 +578,8 @@
 extern	int	wstat(char*, char*);
 extern	ulong	rendezvous(ulong, ulong);
 
-extern	int	getpid(void);
-extern	int	getppid(void);
+extern	intptr	getpid(void);
+extern	intptr	getppid(void);
 extern	void	rerrstr(char*, uint);
 extern	char*	sysname(void);
 extern	void	werrstr(char*, ...);
--- a/include/libsec.h
+++ b/include/libsec.h
@@ -18,12 +18,12 @@
 typedef struct AESstate AESstate;
 struct AESstate
 {
-	ulong	setup;
+	u32	setup;
 	int	rounds;
 	int	keybytes;
 	uchar	key[AESmaxkey];		/* unexpanded key */
-	u32int	ekey[4*(AESmaxrounds + 1)];	/* encryption key */
-	u32int	dkey[4*(AESmaxrounds + 1)];	/* decryption key */
+	u32	ekey[4*(AESmaxrounds + 1)];	/* encryption key */
+	u32	dkey[4*(AESmaxrounds + 1)];	/* decryption key */
 	uchar	ivec[AESbsize];	/* initialization vector */
 };
 
@@ -45,13 +45,13 @@
 typedef struct BFstate BFstate;
 struct BFstate
 {
-	ulong	setup;
+	u32	setup;
 
 	uchar	key[56];
 	uchar	ivec[8];
 
-	u32int 	pbox[BFrounds+2];
-	u32int	sbox[1024];
+	u32 	pbox[BFrounds+2];
+	u32	sbox[1024];
 };
 
 void	setupBFstate(BFstate *s, uchar key[], int keybytes, uchar *ivec);
@@ -73,15 +73,15 @@
 typedef struct DESstate DESstate;
 struct DESstate
 {
-	ulong	setup;
+	u32	setup;
 	uchar	key[8];		/* unexpanded key */
-	ulong	expanded[32];	/* expanded key */
+	u32	expanded[32];	/* expanded key */
 	uchar	ivec[8];	/* initialization vector */
 };
 
 void	setupDESstate(DESstate *s, uchar key[8], uchar *ivec);
-void	des_key_setup(uchar[8], ulong[32]);
-void	block_cipher(ulong*, uchar*, int);
+void	des_key_setup(uchar[8], u32[32]);
+void	block_cipher(u32*, uchar*, int);
 void	desCBCencrypt(uchar*, int, DESstate*);
 void	desCBCdecrypt(uchar*, int, DESstate*);
 void	desECBencrypt(uchar*, int, DESstate*);
@@ -90,7 +90,7 @@
 /* for backward compatibility with 7 byte DES key format */
 void	des56to64(uchar *k56, uchar *k64);
 void	des64to56(uchar *k64, uchar *k56);
-void	key_setup(uchar[7], ulong[32]);
+void	key_setup(uchar[7], u32[32]);
 
 /* triple des encrypt/decrypt orderings */
 enum {
@@ -105,14 +105,14 @@
 typedef struct DES3state DES3state;
 struct DES3state
 {
-	ulong	setup;
+	u32	setup;
 	uchar	key[3][8];		/* unexpanded key */
-	ulong	expanded[3][32];	/* expanded key */
+	u32	expanded[3][32];	/* expanded key */
 	uchar	ivec[8];		/* initialization vector */
 };
 
 void	setupDES3state(DES3state *s, uchar key[3][8], uchar *ivec);
-void	triple_block_cipher(ulong keys[3][32], uchar*, int);
+void	triple_block_cipher(u32 keys[3][32], uchar*, int);
 void	des3CBCencrypt(uchar*, int, DES3state*);
 void	des3CBCdecrypt(uchar*, int, DES3state*);
 void	des3ECBencrypt(uchar*, int, DES3state*);
@@ -158,13 +158,13 @@
 typedef struct DigestState DigestState;
 struct DigestState
 {
-	u64int len;
-	u32int state[5];
+	u64 len;
+	u32 state[5];
 	uchar buf[Digestbsize];
 	int blen;
-	u64int nb128[2];
-	u64int h64[8];
-	u32int h32[8];
+	u64 nb128[2];
+	u64 h64[8];
+	u32 h32[8];
 	char malloced;
 	char seeded;
 };
@@ -175,15 +175,15 @@
 typedef struct DigestState SHA256state;
 typedef struct DigestState SHA512state;
 
-DigestState* md4(uchar*, ulong, uchar*, DigestState*);
-DigestState* md5(uchar*, ulong, uchar*, DigestState*);
-DigestState* sha1(uchar*, ulong, uchar*, DigestState*);
-DigestState* sha224(uchar*, ulong, uchar*, DigestState*);
-DigestState* sha256(uchar*, ulong, uchar*, DigestState*);
-DigestState* sha384(uchar*, ulong, uchar*, DigestState*);
-DigestState* sha512(uchar*, ulong, uchar*, DigestState*);
-DigestState* hmac_md5(uchar*, ulong, uchar*, ulong, uchar*, DigestState*);
-DigestState* hmac_sha1(uchar*, ulong, uchar*, ulong, uchar*, DigestState*);
+DigestState* md4(uchar*, u32, uchar*, DigestState*);
+DigestState* md5(uchar*, u32, uchar*, DigestState*);
+DigestState* sha1(uchar*, u32, uchar*, DigestState*);
+DigestState* sha224(uchar*, u32, uchar*, DigestState*);
+DigestState* sha256(uchar*, u32, uchar*, DigestState*);
+DigestState* sha384(uchar*, u32, uchar*, DigestState*);
+DigestState* sha512(uchar*, u32, uchar*, DigestState*);
+DigestState* hmac_md5(uchar*, u32, uchar*, u32, uchar*, DigestState*);
+DigestState* hmac_sha1(uchar*, u32, uchar*, u32, uchar*, DigestState*);
 char* md5pickle(MD5state*);
 MD5state* md5unpickle(char*);
 char* sha1pickle(SHA1state*);
@@ -196,8 +196,8 @@
 void	_genrandomqlock(void);
 void	_genrandomqunlock(void);
 void	prng(uchar *buf, int nbytes);
-ulong	fastrand(void);
-ulong	nfastrand(ulong);
+u32	fastrand(void);
+u32	nfastrand(u32);
 
 /*/////////////////////////////////////////////////////// */
 /* primes */
@@ -274,7 +274,7 @@
 void		asn1dump(uchar *der, int len);
 uchar*		decodePEM(char *s, char *type, int *len, char **new_s);
 PEMChain*	decodepemchain(char *s, char *type);
-uchar*		X509gen(RSApriv *priv, char *subj, ulong valid[2], int *certlen);
+uchar*		X509gen(RSApriv *priv, char *subj, u32 valid[2], int *certlen);
 uchar*		X509req(RSApriv *priv, char *subj, int *certlen);
 char*		X509verify(uchar *cert, int ncert, RSApub *pk);
 void		X509dump(uchar *cert, int ncert);
--- a/include/logfs.h
+++ b/include/logfs.h
@@ -126,21 +126,21 @@
 long logfsbootgetsize(LogfsBoot*);
 void logfsboottrace(LogfsBoot*, int);
 
-char *logfsserverattach(LogfsServer*, u32int, char*, Qid*);
-char *logfsserverclunk(LogfsServer*, u32int);
-char *logfsservercreate(LogfsServer*, u32int, char*, u32int, uchar, Qid*);
+char *logfsserverattach(LogfsServer*, u32, char*, Qid*);
+char *logfsserverclunk(LogfsServer*, u32);
+char *logfsservercreate(LogfsServer*, u32, char*, u32, uchar, Qid*);
 char *logfsserverflush(LogfsServer*);
 char *logfsservernew(LogfsBoot*, LogfsLowLevel*, LogfsIdentityStore*, ulong, int, LogfsServer**);
-char *logfsserveropen(LogfsServer*, u32int, uchar mode, Qid*);
-char *logfsserverread(LogfsServer*, u32int, u32int, u32int, uchar*, u32int, u32int*);
-char *logfsserverremove(LogfsServer*, u32int);
-char *logfsserverstat(LogfsServer*, u32int, uchar*, u32int, ushort*);
-char *logfsserverwalk(LogfsServer*, u32int, u32int, ushort, char **, ushort*, Qid*);
-char *logfsserverwrite(LogfsServer*, u32int, u32int, u32int, uchar*, u32int*);
-char *logfsserverwstat(LogfsServer*, u32int, uchar*, ushort nstat);
+char *logfsserveropen(LogfsServer*, u32, uchar mode, Qid*);
+char *logfsserverread(LogfsServer*, u32, u32, u32, uchar*, u32, u32*);
+char *logfsserverremove(LogfsServer*, u32);
+char *logfsserverstat(LogfsServer*, u32, uchar*, u32, ushort*);
+char *logfsserverwalk(LogfsServer*, u32, u32, ushort, char **, ushort*, Qid*);
+char *logfsserverwrite(LogfsServer*, u32, u32, u32, uchar*, u32*);
+char *logfsserverwstat(LogfsServer*, u32, uchar*, ushort nstat);
 void logfsserverfree(LogfsServer **);
 char *logfsserverlogsweep(LogfsServer*, int, int*);
-char *logfsserverreadpathextent(LogfsServer*, u32int, int, u32int*, u32int*, long*, int*, int*);
+char *logfsserverreadpathextent(LogfsServer*, u32, int, u32*, u32*, long*, int*, int*);
 
 char *logfsservertestcmd(LogfsServer*, int, char **);
 void logfsservertrace(LogfsServer*, int);
--- a/include/memdraw.h
+++ b/include/memdraw.h
@@ -21,9 +21,9 @@
 {
 	uintptr	*base;	/* allocated data pointer */
 	uchar	*bdata;	/* pointer to first byte of actual data; word-aligned */
-	s32int		ref;		/* number of Memimages using this data */
+	s32		ref;		/* number of Memimages using this data */
 	void*	imref;
-	s32int		allocd;	/* is this malloc'd? */
+	s32		allocd;	/* is this malloc'd? */
 };
 
 enum {
@@ -39,20 +39,20 @@
 {
 	Rectangle	r;		/* rectangle in data area, local coords */
 	Rectangle	clipr;		/* clipping region */
-	s32int		depth;	/* number of bits of storage per pixel */
-	s32int		nchan;	/* number of channels */
-	u32int	chan;	/* channel descriptions */
+	s32		depth;	/* number of bits of storage per pixel */
+	s32		nchan;	/* number of channels */
+	u32	chan;	/* channel descriptions */
 	Memcmap	*cmap;
 
 	Memdata	*data;	/* pointer to data; shared by windows in this image */
-	s32int		zero;		/* data->bdata+zero==&byte containing (0,0) */
-	u32int	width;	/* width in words of a single scan line */
+	s32		zero;		/* data->bdata+zero==&byte containing (0,0) */
+	u32	width;	/* width in words of a single scan line */
 	Memlayer	*layer;	/* nil if not a layer*/
-	u32int	flags;
+	u32	flags;
 
-	s32int		shift[NChan];
-	s32int		mask[NChan];
-	s32int		nbits[NChan];
+	s32		shift[NChan];
+	s32		mask[NChan];
+	s32		nbits[NChan];
 };
 
 struct Memcmap
@@ -101,14 +101,14 @@
 	Rectangle sr;
 	Memimage *mask;
 	Rectangle mr;
-	s32int op;
+	s32 op;
 
-	u32int state;
-	u32int mval;	/* if Simplemask, the mask pixel in mask format */
-	u32int mrgba;	/* mval in rgba */
-	u32int sval;	/* if Simplesrc, the source pixel in src format */
-	u32int srgba;	/* sval in rgba */
-	u32int sdval;	/* sval in dst format */
+	u32 state;
+	u32 mval;	/* if Simplemask, the mask pixel in mask format */
+	u32 mrgba;	/* mval in rgba */
+	u32 sval;	/* if Simplesrc, the source pixel in src format */
+	u32 srgba;	/* sval in rgba */
+	u32 sdval;	/* sval in dst format */
 };
 
 /*
@@ -115,8 +115,8 @@
  * Memimage management
  */
 
-extern Memimage*	allocmemimage(Rectangle, u32int);
-extern Memimage*	allocmemimaged(Rectangle, u32int, Memdata*);
+extern Memimage*	allocmemimage(Rectangle, u32);
+extern Memimage*	allocmemimaged(Rectangle, u32, Memdata*);
 extern Memimage*	readmemimage(int);
 extern Memimage*	creadmemimage(int);
 extern int	writememimage(int, Memimage*);
@@ -124,11 +124,11 @@
 extern int		loadmemimage(Memimage*, Rectangle, uchar*, int);
 extern int		cloadmemimage(Memimage*, Rectangle, uchar*, int);
 extern int		unloadmemimage(Memimage*, Rectangle, uchar*, int);
-extern u32int*	u32addr(Memimage*, Point);
+extern u32*	u32addr(Memimage*, Point);
 extern uchar*	byteaddr(Memimage*, Point);
 extern int		drawclip(Memimage*, Rectangle*, Memimage*, Point*, Memimage*, Point*, Rectangle*, Rectangle*);
-extern void	memfillcolor(Memimage*, u32int);
-extern int		memsetchan(Memimage*, u32int);
+extern void	memfillcolor(Memimage*, u32);
+extern int		memsetchan(Memimage*, u32);
 
 /*
  * Graphics
--- a/include/memlayer.h
+++ b/include/memlayer.h
@@ -18,7 +18,7 @@
 	Memscreen	*screen;	/* screen this layer belongs to */
 	Memimage	*front;	/* window in front of this one */
 	Memimage	*rear;	/* window behind this one*/
-	s32int		clear;	/* layer is fully visible */
+	s32		clear;	/* layer is fully visible */
 	Memimage	*save;	/* save area for obscured parts */
 	Refreshfn	refreshfn;		/* function to call to refresh obscured parts if save==nil */
 	void		*refreshptr;	/* argument to refreshfn */
@@ -34,7 +34,7 @@
  * All these functions accept screen coordinates, not local ones.
  */
 void			_memlayerop(void (*fn)(Memimage*, Rectangle, Rectangle, void*, int), Memimage*, Rectangle, Rectangle, void*);
-Memimage*	memlalloc(Memscreen*, Rectangle, Refreshfn, void*, u32int);
+Memimage*	memlalloc(Memscreen*, Rectangle, Refreshfn, void*, u32);
 void			memldelete(Memimage*);
 void			memlfree(Memimage*);
 void			memltofront(Memimage*);
--- a/include/pool.h
+++ b/include/pool.h
@@ -16,8 +16,8 @@
 
 struct Bhdr
 {
-	ulong	magic;
-	ulong	size;
+	u32	magic;
+	u32	size;
 	union {
 		uchar data[1];
 		struct {
@@ -52,7 +52,7 @@
 
 #define B2LIMIT(b)	((Bhdr*)((uchar*)b + b->csize))
 
-#define BHDRSIZE	((int)(((Bhdr*)0)->u.data)+sizeof(Btail))
+#define BHDRSIZE	((u32)(((Bhdr*)0)->u.data)+sizeof(Btail))
 
 extern	void	(*poolfault)(void *, char *, ulong);
 extern	void	poolinit(void);
@@ -64,7 +64,7 @@
 extern	ulong	poolmsize(Pool*, void*);
 extern	void	poolmutable(void*);
 extern	char*	poolname(Pool*);
-extern	int	poolread(char*, int, ulong);
+extern	int	poolread(char*, int, uintptr);
 extern	void*	poolrealloc(Pool*, void*, ulong);
 extern	int	poolsetsize(char*, int);
 extern	void	poolsetcompact(Pool*, void (*)(void*, void*));
@@ -71,3 +71,4 @@
 extern	char*	poolaudit(char*(*)(int, Bhdr *));
 
 extern	void	(*poolmonitor)(int, ulong, Bhdr*, ulong);
+extern	void	showframe(void *f, void *t);
--- a/include/styx.h
+++ b/include/styx.h
@@ -9,11 +9,11 @@
 struct	Fcall
 {
 	uchar	type;
-	u32int	fid;
+	ulong	fid;
 	ushort	tag;
 	/* union { */
 		/* struct { */
-			u32int	msize;		/* Tversion, Rversion */
+			ulong	msize;		/* Tversion, Rversion */
 			char	*version;	/* Tversion, Rversion */
 		/* }; */
 		/* struct { */
@@ -24,23 +24,23 @@
 		/* }; */
 		/* struct { */
 			Qid	qid;		/* Rattach, Ropen, Rcreate */
-			u32int	iounit;		/* Ropen, Rcreate */
+			ulong	iounit;		/* Ropen, Rcreate */
 		/* }; */
 		/* struct { */
 			Qid	aqid;		/* Rauth */
 		/* }; */
 		/* struct { */
-			u32int	afid;		/* Tauth, Tattach */
+			ulong	afid;		/* Tauth, Tattach */
 			char	*uname;		/* Tauth, Tattach */
 			char	*aname;		/* Tauth, Tattach */
 		/* }; */
 		/* struct { */
-			u32int	perm;		/* Tcreate */ 
+			ulong	perm;		/* Tcreate */ 
 			char	*name;		/* Tcreate */
 			uchar	mode;		/* Tcreate, Topen */
 		/* }; */
 		/* struct { */
-			u32int	newfid;		/* Twalk */
+			ulong	newfid;		/* Twalk */
 			ushort	nwname;		/* Twalk */
 			char	*wname[MAXWELEM];	/* Twalk */
 		/* }; */
@@ -50,7 +50,7 @@
 		/* }; */
 		/* struct { */
 			vlong	offset;		/* Tread, Twrite */
-			u32int	count;		/* Tread, Twrite, Rread */
+			ulong	count;		/* Tread, Twrite, Rread */
 			char	*data;		/* Twrite, Rread */
 		/* }; */
 		/* struct { */
@@ -64,8 +64,8 @@
 #define	GBIT8(p)	((p)[0])
 #define	GBIT16(p)	((p)[0]|((p)[1]<<8))
 #define	GBIT32(p)	((p)[0]|((p)[1]<<8)|((p)[2]<<16)|((p)[3]<<24))
-#define	GBIT64(p)	((vlong)((p)[0]|((p)[1]<<8)|((p)[2]<<16)|((p)[3]<<24)) |\
-				((vlong)((p)[4]|((p)[5]<<8)|((p)[6]<<16)|((p)[7]<<24)) << 32))
+#define	GBIT64(p)	((s64)((p)[0]|((p)[1]<<8)|((p)[2]<<16)|((p)[3]<<24)) |\
+				((s64)((p)[4]|((p)[5]<<8)|((p)[6]<<16)|((p)[7]<<24)) << 32))
 
 #define	PBIT8(p,v)	(p)[0]=(v)
 #define	PBIT16(p,v)	(p)[0]=(v);(p)[1]=(v)>>8
@@ -83,8 +83,8 @@
 /* The count, however, excludes itself; total size is BIT16SZ+count */
 #define STATFIXLEN	(BIT16SZ+QIDSZ+5*BIT16SZ+4*BIT32SZ+1*BIT64SZ)	/* amount of fixed length data in a stat buffer */
 
-#define	NOTAG		(ushort)~0U	/* Dummy tag */
-#define	NOFID		(u32int)~0U	/* Dummy fid */
+#define	NOTAG		(u16)~0U	/* Dummy tag */
+#define	NOFID		(u32)~0U	/* Dummy fid */
 #define	IOHDRSZ		24	/* ample room for Twrite/Rread header (iounit) */
 
 enum
--- a/lib9/fcallfmt.c
+++ b/lib9/fcallfmt.c
@@ -18,7 +18,7 @@
 static void fdirconv(char*, char*, Dir*);
 static char *qidtype(char*, uchar);
 
-#define	QIDFMT	"(%.16llux %lud %s)"
+#define	QIDFMT	"(%.16llux %ud %s)"
 
 int
 fcallfmt(Fmt *fmt)
--- a/libdraw/alloc.c
+++ b/libdraw/alloc.c
@@ -5,13 +5,13 @@
 #define DP if(1){}else print
 
 Image*
-allocimage(Display *d, Rectangle r, u32int chan, int repl, u32int val)
+allocimage(Display *d, Rectangle r, u32 chan, int repl, u32 val)
 {
 	return _allocimage(nil, d, r, chan, repl, val, 0, 0);
 }
 
 Image*
-_allocimage(Image *ai, Display *d, Rectangle r, u32int chan, int repl, u32int val, int screenid, int refresh)
+_allocimage(Image *ai, Display *d, Rectangle r, u32 chan, int repl, u32 val, int screenid, int refresh)
 {
 	uchar *a;
 	char *err;
@@ -104,7 +104,7 @@
 	char *err, buf[12*12+1];
 	Image *i;
 	int id, n;
-	u32int chan;
+	u32 chan;
 
 	err = 0;
 	i = 0;
--- a/libdraw/allocimagemix.c
+++ b/libdraw/allocimagemix.c
@@ -2,7 +2,7 @@
 #include "draw.h"
 
 Image*
-allocimagemix(Display *d, ulong color1, ulong color3)
+allocimagemix(Display *d, u32int color1, u32int color3)
 {
 	Image *t, *b;
 	static Image *qmask;
--- a/libdraw/arith.c
+++ b/libdraw/arith.c
@@ -2,7 +2,7 @@
 #include "draw.h"
 
 Point
-Pt(int x, int y)
+Pt(s32int x, s32int y)
 {
 	Point p;
 
@@ -12,7 +12,7 @@
 }
 
 Rectangle
-Rect(int x, int y, int bx, int by)
+Rect(s32int x, s32int y, s32int bx, s32int by)
 {
 	Rectangle r;
 
@@ -158,7 +158,7 @@
 		r1->max.y = r2.max.y;
 }
 
-ulong
+u32int
 drawld2chan[] = {
 	GREY1,
 	GREY2,
@@ -166,8 +166,8 @@
 	CMAP8,
 };
 
-ulong
-setalpha(ulong color, uchar alpha)
+u32int
+setalpha(u32int color, uchar alpha)
 {
 	int red, green, blue;
 
--- a/libdraw/chan.c
+++ b/libdraw/chan.c
@@ -3,9 +3,9 @@
 
 static char channames[] = "rgbkamx";
 char*
-chantostr(char *buf, ulong cc)
+chantostr(char *buf, u32int cc)
 {
-	ulong c, rc;
+	u32int c, rc;
 	char *p;
 
 	if(chantodepth(cc) == 0)
@@ -35,11 +35,11 @@
 	return c==' ' || c== '\t' || c=='\r' || c=='\n';
 }
 
-ulong
+u32int
 strtochan(char *s)
 {
 	char *p, *q;
-	ulong c;
+	u32int c;
 	int t, n;
 
 	c = 0;
@@ -61,7 +61,7 @@
 }
 
 int
-chantodepth(ulong c)
+chantodepth(u32int c)
 {
 	int n;
 
--- a/libdraw/cloadimage.c
+++ b/libdraw/cloadimage.c
@@ -34,11 +34,11 @@
 		if(a == nil)
 			return -1;
 		a[0] = 'Y';
-		BPLONG(a+1, i->id);
-		BPLONG(a+5, r.min.x);
-		BPLONG(a+9, miny);
-		BPLONG(a+13, r.max.x);
-		BPLONG(a+17, maxy);
+		BP32INT(a+1, i->id);
+		BP32INT(a+5, r.min.x);
+		BP32INT(a+9, miny);
+		BP32INT(a+13, r.max.x);
+		BP32INT(a+17, maxy);
 		memmove(a+21, data, nb);
 		miny = maxy;
 		data += nb;
--- a/libdraw/creadimage.c
+++ b/libdraw/creadimage.c
@@ -10,7 +10,7 @@
 	int m, nb, miny, maxy, new, ldepth, ncblock;
 	uchar *buf, *a;
 	Image *i;
-	ulong chan;
+	u32int chan;
 	int font;
 
 	font = dolock&2;
@@ -99,11 +99,11 @@
 		if(a == nil)
 			goto Erroutlock;
 		a[0] = 'Y';
-		BPLONG(a+1, i->id);
-		BPLONG(a+5, r.min.x);
-		BPLONG(a+9, miny);
-		BPLONG(a+13, r.max.x);
-		BPLONG(a+17, maxy);
+		BP32INT(a+1, i->id);
+		BP32INT(a+5, r.min.x);
+		BP32INT(a+9, miny);
+		BP32INT(a+13, r.max.x);
+		BP32INT(a+17, maxy);
 		if(!font&&!new)	/* old image: flip the data bits */
 			_twiddlecompressed(buf, nb);
 		memmove(a+21, buf, nb);
--- a/libdraw/draw.c
+++ b/libdraw/draw.c
@@ -30,17 +30,17 @@
 	if(mask == nil)
 		mask = dst->display->opaque;
 	a[0] = 'd';
-	BPLONG(a+1, dst->id);
-	BPLONG(a+5, src->id);
-	BPLONG(a+9, mask->id);
-	BPLONG(a+13, r->min.x);
-	BPLONG(a+17, r->min.y);
-	BPLONG(a+21, r->max.x);
-	BPLONG(a+25, r->max.y);
-	BPLONG(a+29, p0->x);
-	BPLONG(a+33, p0->y);
-	BPLONG(a+37, p1->x);
-	BPLONG(a+41, p1->y);
+	BP32INT(a+1, dst->id);
+	BP32INT(a+5, src->id);
+	BP32INT(a+9, mask->id);
+	BP32INT(a+13, r->min.x);
+	BP32INT(a+17, r->min.y);
+	BP32INT(a+21, r->max.x);
+	BP32INT(a+25, r->max.y);
+	BP32INT(a+29, p0->x);
+	BP32INT(a+33, p0->y);
+	BP32INT(a+37, p1->x);
+	BP32INT(a+41, p1->y);
 }
 
 void
--- a/libdraw/ellipse.c
+++ b/libdraw/ellipse.c
@@ -15,17 +15,17 @@
 		return;
 	}
 	a[0] = cmd;
-	BPLONG(a+1, dst->id);
-	BPLONG(a+5, src->id);
-	BPLONG(a+9, c->x);
-	BPLONG(a+13, c->y);
-	BPLONG(a+17, xr);
-	BPLONG(a+21, yr);
-	BPLONG(a+25, thick);
-	BPLONG(a+29, sp->x);
-	BPLONG(a+33, sp->y);
-	BPLONG(a+37, alpha);
-	BPLONG(a+41, phi);
+	BP32INT(a+1, dst->id);
+	BP32INT(a+5, src->id);
+	BP32INT(a+9, c->x);
+	BP32INT(a+13, c->y);
+	BP32INT(a+17, xr);
+	BP32INT(a+21, yr);
+	BP32INT(a+25, thick);
+	BP32INT(a+29, sp->x);
+	BP32INT(a+33, sp->y);
+	BP32INT(a+37, alpha);
+	BP32INT(a+41, phi);
 }
 
 void
--- a/libdraw/line.c
+++ b/libdraw/line.c
@@ -20,15 +20,15 @@
 		return;
 	}
 	a[0] = 'L';
-	BPLONG(a+1, dst->id);
-	BPLONG(a+5, p0.x);
-	BPLONG(a+9, p0.y);
-	BPLONG(a+13, p1.x);
-	BPLONG(a+17, p1.y);
-	BPLONG(a+21, end0);
-	BPLONG(a+25, end1);
-	BPLONG(a+29, radius);
-	BPLONG(a+33, src->id);
-	BPLONG(a+37, sp.x);
-	BPLONG(a+41, sp.y);
+	BP32INT(a+1, dst->id);
+	BP32INT(a+5, p0.x);
+	BP32INT(a+9, p0.y);
+	BP32INT(a+13, p1.x);
+	BP32INT(a+17, p1.y);
+	BP32INT(a+21, end0);
+	BP32INT(a+25, end1);
+	BP32INT(a+29, radius);
+	BP32INT(a+33, src->id);
+	BP32INT(a+37, sp.x);
+	BP32INT(a+41, sp.y);
 }
--- a/libdraw/loadimage.c
+++ b/libdraw/loadimage.c
@@ -5,7 +5,7 @@
 int
 loadimage(Image *i, Rectangle r, uchar *data, int ndata)
 {
-	long dy;
+	s32int dy;
 	int n, bpl, roff, dstroff, lskip, llen, y;
 	uchar *a;
 	int chunk;
@@ -49,11 +49,11 @@
 			return -1;
 		}
 		a[0] = 'y';
-		BPLONG(a+1, i->id);
-		BPLONG(a+5, dstr.min.x);
-		BPLONG(a+9, dstr.min.y);
-		BPLONG(a+13, dstr.max.x);
-		BPLONG(a+17, dstr.min.y+dy);
+		BP32INT(a+1, i->id);
+		BP32INT(a+5, dstr.min.x);
+		BP32INT(a+9, dstr.min.y);
+		BP32INT(a+13, dstr.max.x);
+		BP32INT(a+17, dstr.min.y+dy);
 		a += 21;
 		for (y = 0; y < dy; y++) {
 			memmove(a, data, llen);
--- a/libdraw/replclipr.c
+++ b/libdraw/replclipr.c
@@ -12,13 +12,13 @@
 		return;
 	}
 	b[0] = 'c';
-	BPLONG(b+1, i->id);
+	BP32INT(b+1, i->id);
 	repl = repl!=0;
 	b[5] = repl;
-	BPLONG(b+6, clipr.min.x);
-	BPLONG(b+10, clipr.min.y);
-	BPLONG(b+14, clipr.max.x);
-	BPLONG(b+18, clipr.max.y);
+	BP32INT(b+6, clipr.min.x);
+	BP32INT(b+10, clipr.min.y);
+	BP32INT(b+14, clipr.max.x);
+	BP32INT(b+18, clipr.max.y);
 	i->repl = repl;
 	i->clipr = clipr;
 }
--- a/libdraw/unloadimage.c
+++ b/libdraw/unloadimage.c
@@ -37,11 +37,11 @@
 		if(dy > Dy(r))
 			dy = Dy(r);
 		a[0] = 'r';
-		BPLONG(a+1, i->id);
-		BPLONG(a+5, r.min.x);
-		BPLONG(a+9, r.min.y);
-		BPLONG(a+13, r.max.x);
-		BPLONG(a+17, r.min.y+dy);
+		BP32INT(a+1, i->id);
+		BP32INT(a+5, r.min.x);
+		BP32INT(a+9, r.min.y);
+		BP32INT(a+13, r.max.x);
+		BP32INT(a+17, r.min.y+dy);
 		if(flushimage(d, 0) < 0)
 			return -1;
 		if(d->local == 0)
--- a/libdraw/window.c
+++ b/libdraw/window.c
@@ -49,7 +49,7 @@
 }
 
 Screen*
-publicscreen(Display *d, int id, u32int chan)
+publicscreen(Display *d, int id, u32 chan)
 {
 	uchar *a;
 	Screen *s;
@@ -101,13 +101,13 @@
 }
 
 Image*
-allocwindow(Screen *s, Rectangle r, int ref, u32int val)
+allocwindow(Screen *s, Rectangle r, int ref, u32 val)
 {
 	return _allocwindow(nil, s, r, ref, val);
 }
 
 Image*
-_allocwindow(Image *i, Screen *s, Rectangle r, int ref, u32int val)
+_allocwindow(Image *i, Screen *s, Rectangle r, int ref, u32 val)
 {
 	Display *d;
 
--- a/libinterp/crypt.c
+++ b/libinterp/crypt.c
@@ -11,6 +11,7 @@
 #include "ipint.h"
 
 #define	MPX(x)	checkIPint((void*)(x))
+#define DP if(1){}else print
 
 static Type*	TDigestState;
 static Type*	TAESstate;
@@ -190,7 +191,7 @@
 }
 
 static Crypt_DigestState*
-crypt_digest_x(Array *buf, int n, Array *digest, int dlen, Crypt_DigestState *state, DigestState* (*fn)(uchar*, ulong, uchar*, DigestState*))
+crypt_digest_x(Array *buf, u32 n, Array *digest, int dlen, Crypt_DigestState *state, DigestState* (*fn)(uchar*, u32, uchar*, DigestState*))
 {
 	Heap *h;
 	XDigestState *ds;
@@ -324,7 +325,7 @@
 }
 
 static Crypt_DigestState*
-crypt_hmac_x(Array *data, int n, Array *key, Array *digest, int dlen, Crypt_DigestState *state, DigestState* (*fn)(uchar*, ulong, uchar*, ulong, uchar*, DigestState*))
+crypt_hmac_x(Array *data, u32 n, Array *key, Array *digest, int dlen, Crypt_DigestState *state, DigestState* (*fn)(uchar*, u32, uchar*, u32, uchar*, DigestState*))
 {
 	Heap *h;
 	XDigestState *ds;
@@ -403,14 +404,26 @@
 	f->ret->t1 = H;
 	destroy(v);
 
+	DP("Crypt_dhparams\n");
 	p = mpnew(0);
 	alpha = mpnew(0);
+	DP("Crypt_dhparams p 0x%p sign %d size %d top %d p 0x%p flags 0x%x\n"
+		"	alpha 0x%p sign %d size %d top %d p 0x%p flags 0x%x\n",
+			p, p->sign, p->size, p->top, p->p, p->flags,
+			alpha, alpha->sign, alpha->size, alpha->top, alpha->p, alpha->flags);
 	release();
-	if(f->nbits == 1024)
+	if(f->nbits == 1024){
+		DP("DSAprimes\n");
 		DSAprimes(alpha, p, nil);
-	else
+	}else{
+		DP("gensafeprime\n");
 		gensafeprime(p, alpha, f->nbits, 0);
+	}
 	acquire();
+	DP("Crypt_dhparams p 0x%p sign %d size %d top %d p 0x%p flags 0x%x\n"
+		"	alpha 0x%p sign %d size %d top %d p 0x%p flags 0x%x\n",
+			p, p->sign, p->size, p->top, p->p, p->flags,
+			alpha, alpha->sign, alpha->size, alpha->top, alpha->p, alpha->flags);
 	f->ret->t0 = newIPint(alpha);
 	f->ret->t1 = newIPint(p);
 }
--- a/libinterp/dlm-9front.c
+++ b/libinterp/dlm-9front.c
@@ -11,7 +11,7 @@
 extern Dynobj* dynld(int);
 extern char*	enverror(void);
 
-typedef struct{char *name; long sig; void (*fn)(void*); int size; int np; uchar map[16];} Runtab;
+typedef struct{char *name; s32 sig; void (*fn)(void*); int size; int np; uchar map[16];} Runtab;
 
 static void*
 addr(char *pre, char *suf, Dynobj *o, ulong sig)
--- a/libinterp/dlm-Inferno.c
+++ b/libinterp/dlm-Inferno.c
@@ -11,7 +11,7 @@
 extern Dynobj* dynld(int);
 extern char*	enverror(void);
 
-typedef struct{char *name; long sig; void (*fn)(void*); int size; int np; uchar map[16];} Runtab;
+typedef struct{char *name; s32 sig; void (*fn)(void*); int size; int np; uchar map[16];} Runtab;
 
 static void*
 addr(char *pre, char *suf, Dynobj *o, ulong sig)
--- a/libinterp/draw.c
+++ b/libinterp/draw.c
@@ -10,6 +10,7 @@
 #include "memdraw.h"
 #include "memlayer.h"
 
+#define DP if(1){}else print
 /*
  * When a Display is remote, it must be locked to synchronize the
  * outgoing message buffer with the refresh demon, which runs as a
@@ -552,16 +553,19 @@
 
 	f = fp;
 	d = checkimage(f->dst);
-	if(f->src == H)
+	if(f->src == H){
 		s = d->display->black;
-	else
+	}else{
 		s = checkimage(f->src);
+	}
 	if(f->matte == H)
 		m = d->display->white;	/* ones */
 	else
 		m = checkimage(f->matte);
-	if(d->display!=s->display || d->display!=m->display)
+	if(d->display!=s->display || d->display!=m->display){
+		DP("imagedraw d->display!=s->display || d->display!=m->display\n");
 		return;
+	}
 	locked = lockdisplay(d->display);
 	drawop(d, IRECT(f->r), s, m, IPOINT(f->p), op);
 	checkflush(f->dst);
@@ -1815,7 +1819,7 @@
 Draw_icossin(void *fp)
 {
 	F_Draw_icossin *f;
-	int s, c;
+	s32int s, c;
 
 	f = fp;
 	icossin(f->deg, &s, &c);
@@ -1827,7 +1831,7 @@
 Draw_icossin2(void *fp)
 {
 	F_Draw_icossin2 *f;
-	int s, c;
+	s32int s, c;
 
 	f = fp;
 	icossin2(f->p.x, f->p.y, &s, &c);
@@ -2174,11 +2178,11 @@
 		locked = lockdisplay(d);
 		p = buf;
 		for(i=0; i<n; i+=5*4,p+=5*4){
-			id = BGLONG(p+0*4);
-			r.min.x = BGLONG(p+1*4);
-			r.min.y = BGLONG(p+2*4);
-			r.max.x = BGLONG(p+3*4);
-			r.max.y = BGLONG(p+4*4);
+			id = BG32INT(p+0*4);
+			r.min.x = BG32INT(p+1*4);
+			r.min.y = BG32INT(p+2*4);
+			r.max.x = BG32INT(p+3*4);
+			r.max.y = BG32INT(p+4*4);
 			for(im=d->windows; im; im=im->next)
 				if(im->id == id)
 					break;
@@ -2216,7 +2220,7 @@
 			acquire();
 		kgerrstr(err, sizeof err);
 		if(_drawdebug || strcmp(err, "screen id in use") != 0 && strcmp(err, exImage) != 0){
-			print("flushimage fail: (%d not %d) d=%lux: %s\nbuffer: ", m, n, (ulong)d, err);
+			print("flushimage fail: (%d not %d) d=%zx: %s\nbuffer: ", m, n, (uintptr)d, err);
 			for(tp = d->buf; tp < d->bufp; tp++)
 				print("%.2x ", (int)*tp);
 			print("\n");
@@ -2224,6 +2228,15 @@
 		d->bufp = d->buf;	/* might as well; chance of continuing */
 		return -1;
 	}
+	/* to debug what is being sent to memdraw() */
+	DP("doflush sending d->buf[0] = %c\n", d->buf[0]);
+	for(int i = 1; i < n; i+=4){
+		DP("\td->buf[%d] = ", i);
+		for(int j = 0; j < 4; j++){
+			DP(" %x", d->buf[i+j]);
+		}
+		DP("\n");
+	}
 	d->bufp = d->buf;
 	if(d->local == 0)
 		acquire();
@@ -2325,7 +2338,7 @@
 	}
 	if(d->bufp+n > d->buf+Displaybufsize){
 		if(d->local==0 && currun()!=libqlowner(d->qlock)) {
-			print("bufimage: %lux %lux\n", (ulong)libqlowner(d->qlock), (ulong)currun());
+			print("bufimage: %zx %zx\n", (uintptr)libqlowner(d->qlock), (uintptr)currun());
 			abort();
 		}
 		if(doflush(d) < 0)
--- a/libinterp/gc.c
+++ b/libinterp/gc.c
@@ -2,6 +2,8 @@
 #include "interp.h"
 #include "pool.h"
 
+#define DP if(1){}else print
+
 enum
 {
 	Quanta		= 50,		/* Allocated blocks to sweep each time slice usually */
@@ -43,7 +45,7 @@
 static	Ptrhash	*ptrtab[PTRHASH];
 static	Ptrhash	*ptrfree;
 
-#define	HASHPTR(p)	(((ulong)(p) >> 6) & (PTRHASH - 1))
+#define	HASHPTR(p)	(((uintptr)(p) >> 6) & (PTRHASH - 1))
 
 void
 ptradd(Heap *v)
@@ -211,7 +213,39 @@
 	marker = (gccolor-1)%3;
 	sweeper = (gccolor-2)%3;
 
+/* for debugging
+	DP("rootset root=0x%p\n", root);
 	while(root != nil) {
+		DP("Prog state %d pid %d ticks %ld\n",
+			root->state, root->pid, root->ticks);
+		DP("\tpc 0x%p module %s %s\n",
+			root->R.PC, root->R.M->m->name, root->R.M->m->path);
+		sp = root->R.SP;
+		ex = root->R.EX;
+		while(ex != nil) {
+			sx = (Stkext*)ex;
+			fp = sx->reg.tos.fu;
+			DP("Stkext stack extent 0x%p sp stack pointer 0x%p TR type register 0x%p \n"
+				"\tEX previous stack extent 0x%p\n"
+				"\tSP 0x%p TS top of stack 0x%p fp 0x%p\n",
+				sx, sp, sx->reg.TR, sx->reg.EX,
+				sx->reg.SP, sx->reg.TS, fp);
+			while(fp != sp) {
+				f = (Frame*)fp;
+				t = f->t;
+				if(t == nil){
+					DP("t == nil\n");
+					t = sx->reg.TR;
+				}
+				fp += t->size;
+				showframe((void*)f, t);
+			}
+			ex = sx->reg.EX;
+			sp = sx->reg.SP;
+		}
+		root = root->next;
+	}*/
+	while(root != nil) {
 		ml = root->R.M;
 		h = D2H(ml);
 		Setmark(h);
@@ -229,8 +263,9 @@
 			while(fp != sp) {
 				f = (Frame*)fp;
 				t = f->t;
-				if(t == nil)
+				if(t == nil){
 					t = sx->reg.TR;
+				}
 				fp += t->size;
 				t->mark(t, f);
 				ml = f->mr;
@@ -282,7 +317,7 @@
 	int i;
 	Module *m;
 
-	print("sweep h=0x%lux t=0x%lux c=%d", (ulong)h, (ulong)h->t, h->color);
+	print("sweep h=0x%zx t=0x%zx c=%d", (uintptr)h, (uintptr)h->t, h->color);
 	for(m = modules; m != nil; m = m->link) {
 		for(i = 0; i < m->ntype; i++) {
 			if(m->type[i] == h->t) {
@@ -369,8 +404,9 @@
 	if(quanta > MaxQuanta)
 		quanta = MaxQuanta;
 
-	if(base != nil)		/* Completed this iteration ? */
+	if(base != nil){		/* Completed this iteration ? */
 		return;
+	}
 	if(nprop == 0) {	/* Completed the epoch ? */
 		gcepochs++;
 		gccolor++;
--- a/libinterp/ipint.c
+++ b/libinterp/ipint.c
@@ -88,7 +88,6 @@
 	uchar *p;
 	int n, o;
 	void *v;
-
 	f = fp;
 	v = *f->ret;
 	*f->ret = H;
@@ -668,6 +667,7 @@
 	F_IPint_and *f;
 	mpint *ret, *i1, *i2;
 	void *v;
+print("IPint_and fp 0x%p\n", fp);
 
 	f = fp;
 	v = *f->ret;
--- a/libinterp/keyring.c
+++ b/libinterp/keyring.c
@@ -781,7 +781,7 @@
 	int n;
 
 	sa = checkSigAlg(c->x.sa);
-	n = snprint(buf, len, "%s\n%s\n%s\n%d\n", string2c(sa->x.name),
+	n = snprint(buf, len, "%s\n%s\n%s\n%zd\n", string2c(sa->x.name),
 		string2c(c->x.ha), string2c(c->x.signer), c->x.exp);
 	return n + (*sa->vec->sig2str)(c->signa, buf+n, len - n);
 }
@@ -824,7 +824,7 @@
 	if(strcmp(ha, "sha") == 0)
 		ha = "sha1";	/* normalise */
 	fmtstrinit(&o);
-	fmtprint(&o, "sigalg=%q-%q signer=%q expires=%ud", string2c(sa->x.name), ha,
+	fmtprint(&o, "sigalg=%q-%q signer=%q expires=%zd", string2c(sa->x.name), ha,
 		string2c(c->x.signer), c->x.exp);
 	val = bigs2attr(&o, buf, sa->vec->sigattr);
 	free(buf);
@@ -973,7 +973,7 @@
 	if(buf == nil)
 		return;
 	ds = (XDigestState*)f->state;
-	n = snprint(buf, Maxbuf, "%s %d", string2c(sk->x.owner), f->exp);
+	n = snprint(buf, Maxbuf, "%s %zd", string2c(sk->x.owner), f->exp);
 	if(strcmp(string2c(f->ha), "sha") == 0 || strcmp(string2c(f->ha), "sha1") == 0){
 		sha1((uchar*)buf, n, digest, &ds->state);
 		n = Keyring_SHA1dlen;
@@ -1052,7 +1052,7 @@
 	buf = malloc(Maxbuf);
 	if(buf == nil)
 		return 0;
-	n = snprint(buf, Maxbuf, "%s %d", string2c(c->x.signer), c->x.exp);
+	n = snprint(buf, Maxbuf, "%s %zd", string2c(c->x.signer), c->x.exp);
 	if(strcmp(string2c(c->x.ha), "sha") == 0 || strcmp(string2c(c->x.ha), "sha1") == 0){
 		ds = sha1((uchar*)a, len, 0, 0);
 		sha1((uchar*)buf, n, digest, ds);
@@ -1113,7 +1113,7 @@
 	buf = malloc(Maxbuf);
 	if(buf == nil)
 		return;
-	n = snprint(buf, Maxbuf, "%s %d", string2c(c->x.signer), c->x.exp);
+	n = snprint(buf, Maxbuf, "%s %zd", string2c(c->x.signer), c->x.exp);
 	ds = (XDigestState*)f->state;
 
 	if(strcmp(string2c(c->x.ha), "sha") == 0 || strcmp(string2c(c->x.ha), "sha1") == 0){
@@ -1196,7 +1196,7 @@
 }
 
 static Keyring_DigestState*
-keyring_digest_x(Array *buf, int n, Array *digest, int dlen, Keyring_DigestState *state, DigestState* (*fn)(uchar*, ulong, uchar*, DigestState*))
+keyring_digest_x(Array *buf, u32 n, Array *digest, int dlen, Keyring_DigestState *state, DigestState* (*fn)(uchar*, u32, uchar*, DigestState*))
 {
 	Heap *h;
 	XDigestState *ds;
@@ -1330,7 +1330,7 @@
 }
 
 static Keyring_DigestState*
-keyring_hmac_x(Array *data, int n, Array *key, Array *digest, int dlen, Keyring_DigestState *state, DigestState* (*fn)(uchar*, ulong, uchar*, ulong, uchar*, DigestState*))
+keyring_hmac_x(Array *data, u32 n, Array *key, Array *digest, int dlen, Keyring_DigestState *state, DigestState* (*fn)(uchar*, u32, uchar*, u32, uchar*, DigestState*))
 {
 	Heap *h;
 	XDigestState *ds;
--- a/libinterp/link.c
+++ b/libinterp/link.c
@@ -4,6 +4,8 @@
 #include "raise.h"
 #include <kernel.h>
 
+#define DP if(1){}else print
+
 static void
 newlink(Link *l, char *fn, int sig, Type *t)
 {
@@ -39,21 +41,25 @@
 
 	sig = ldt->sig;
 	for(l = m->ext; l->name; l++)
-		if(strcmp(ldt->name, l->name) == 0)
+		if(strcmp(ldt->name, l->name) == 0){
+			DP(" matched l->name %s l->sig 0x%ux\n", l->name, l->sig);
 			break;
+		}
 
 	if(l == nil) {
-		snprint(e, sizeof(e), "link failed fn %s->%s() not implemented", m->name, ldt->name);
+		snprint(e, sizeof(e), "link failed fn %s->%s() not implemented",
+			m->name, ldt->name);
 		goto bad;
 	}
 	if(l->sig != sig) {
 		snprint(e, sizeof(e), "link typecheck %s->%s() %ux/%ux",
-							m->name, ldt->name, l->sig, sig);
+			m->name, ldt->name, l->sig, sig);
 		goto bad;
 	}
 
 	ml->links[i].u = l->u;
 	ml->links[i].frame = l->frame;
+	ml->links[i].name = l->name;
 	return 0;
 bad:
 	kwerrstr(e);
@@ -81,7 +87,10 @@
 
 	return ml;
 }
-
+/* Create a Modlink which connects
+   the functions in the ldt with their code in Module m
+   Module m exports those functions through m->ext
+ */
 Modlink*
 linkmod(Module *m, Import *ldt, int mkmp)
 {
@@ -112,6 +121,8 @@
 	}
 
 	for(i = 0, l = ldt; l->name != nil; i++, l++) {
+		DP("linkmod connect i %d l->name %s l->sig 0x%ux",
+			i, l->name, l->sig);
 		if(linkm(m, ml, i, l) < 0){
 			destroy(ml);
 			return H;
--- a/libinterp/load.c
+++ b/libinterp/load.c
@@ -4,12 +4,14 @@
 #include "raise.h"
 #include <kernel.h>
 
+#define DP if(1){}else print
+#define DNP if(1){}else print
 #define	A(r)	*((Array**)(r))
 
 Module*	modules;
-int	dontcompile;
+int	dontcompile = 1; /* TODO compiler is broken on amd64 atleast */
 
-static int
+static s32
 operand(uchar **p)
 {
 	int c;
@@ -42,10 +44,10 @@
 	return 0;	
 }
 
-static ulong
+static s32
 disw(uchar **p)
 {
-	ulong v;
+	s32 v;
 	uchar *c;
 
 	c = *p;
@@ -58,10 +60,12 @@
 }
 
 double
-canontod(ulong v[2])
+canontod(u32 v[2])
 {
-	union { double d; unsigned long ul[2]; } a;
+	union { double d; u32 ul[2]; } a;
+	/*print("| 0 0x%ux 1 0x%ux |", v[0], v[1]);*/
 	a.d = 1.;
+	/*print(".");*/
 	if(a.ul[0]) {
 		a.ul[0] = v[0];
 		a.ul[1] = v[1];
@@ -70,6 +74,7 @@
 		a.ul[1] = v[0];
 		a.ul[0] = v[1];
 	}
+	/*print(",");*/
 	return a.d;
 }
 
@@ -118,14 +123,39 @@
 	case ISPAWN:
 		if(ip->d.imm < 0 || ip->d.imm >= m->nprog)
 			return 0;
-		ip->d.imm = (WORD)&m->prog[ip->d.imm];
+		ip->d.imm = (intptr)&m->prog[ip->d.imm];
 		break;
 	}
 	return 1;
 }
 
+void
+asmstring(intptr offset, uchar* absoluteoffset, String* stored, int len, uchar *s)
+{
+	uchar *se;
+	int c;
+
+	USED(offset);
+	DP("\tstring\t@mp+%zd=0x%p,len %d at 0x%p:\"", offset, absoluteoffset, len, stored);
+	se = s + len;
+	for(; s < se; s++){
+		c = *s;
+		if(c == '\n')
+			DP("\\n");
+		else if(c == '\0')
+			DP("\\z");
+		else if(c == '"')
+			DP("\\\"");
+		else if(c == '\\')
+			DP("\\\\");
+		else
+			DP("%c", c);
+	}
+	DP("\"\n");
+}
+
 Module*
-parsemod(char *path, uchar *code, ulong length, Dir *dir)
+parsemod(char *path, uchar *code, u32 length, Dir *dir)
 {
 	Heap *h;
 	Inst *ip;
@@ -133,13 +163,15 @@
 	String *s;
 	Module *m;
 	Array *ary;
-	ulong ul[2];
+	u32 ul[2];
 	WORD lo, hi;
-	int lsize, id, v, entry, entryt, tnp, tsz, siglen;
-	int de, pc, i, n, isize, dsize, hsize, dasp;
-	uchar *mod, sm, *istream, **isp, *si, *addr, *dastack[DADEPTH];
+	int lsize, id, v, tnp, tsz, siglen;
+	int de, i, n, isize, dsize, hsize, dasp;
+	uchar *mod, sm, *istream, **isp, *si, *addr, *dastack[DADEPTH], *e, *b;
 	Link *l;
+	intptr pc, entry, entryt;
 
+	DP("\tsource\t\"%s\"\n", path);
 	istream = code;
 	isp = &istream;
 
@@ -193,6 +225,9 @@
 		goto bad;
 	}
 
+	DP("parsemod before instructions isize %d dsize %d hsize %d"
+		" lsize %d entry 0x%zx entryt 0x%zx\n",
+		isize, dsize, hsize, lsize, entry, entryt);
 	m->nprog = isize;
 	m->prog = mallocz(isize*sizeof(Inst), 0);
 	if(m->prog == nil) {
@@ -201,7 +236,6 @@
 	}
 
 	m->ref = 1;
-
 	ip = m->prog;
 	for(i = 0; i < isize; i++) {
 		ip->op = *istream++;
@@ -246,9 +280,13 @@
 			ip->d.i.s = operand(isp);
 			break;
 		}
+		if(i % 10 == 0)
+			DP("#%p\n", ip);
+		DP("	%d %zd %D\n", i, (intptr)ip, ip);
 		ip++;		
 	}
 
+	DP("\tentry\t0,%d\n",hsize);
 	m->ntype = hsize;
 	m->type = malloc(hsize*sizeof(Type*));
 	if(m->type == nil) {
@@ -272,6 +310,11 @@
 			kwerrstr(exNomem);
 			goto bad;
 		}
+		DP("\tdesc\t$%d 0x%p has 0x%p of size %d nptrs %d:\"",
+			id, m->type+id, pt, tsz, tnp);
+		for(e = istream; e < istream+tnp; e++)
+			DP("%.2x", *e);
+		DP("\"\n");
 		istream += tnp;
 		m->type[id] = pt;
 	}
@@ -284,9 +327,12 @@
 		}
 		h = heapz(pt);
 		m->origmp = H2D(uchar*, h);
+		DP("\tm->origmp 0x%p belongs to heap at 0x%p, uses type at 0x%p\n",
+			m->origmp, h, pt);
 	}
 	addr = m->origmp;
 	dasp = 0;
+	DP("\tvar\t@mp, size %d\n", dsize);
 	for(;;) {
 		sm = *istream++;
 		if(sm == 0)
@@ -302,43 +348,67 @@
 			goto bad;
 		case DEFS:
 			s = c2string((char*)istream, n);
+			asmstring(v, si, s, n, istream);
 			istream += n;
 			*(String**)si = s;
 			break;
 		case DEFB:
-			for(i = 0; i < n; i++)
+			DP("\tbyte\t@mp+%d", v);
+			for(i = 0; i < n; i++){
+				DP(",%d", *istream & 0xff);
 				*si++ = *istream++;
+			}
+			DP(" n=%d\n", n);
 			break;
 		case DEFW:
+			DP("\tword\t@mp+%d len %d:", v, n);
 			for(i = 0; i < n; i++) {
 				*(WORD*)si = disw(isp);
+				DP(" 0x%zx", *(WORD*)si);
 				si += sizeof(WORD);
 			}
+			DP("\n");
 			break;
 		case DEFL:
+			DP("\tlong\t@mp+%d", v);
 			for(i = 0; i < n; i++) {
 				hi = disw(isp);
 				lo = disw(isp);
-				*(LONG*)si = (LONG)hi << 32 | (LONG)(ulong)lo;
+				*(LONG*)si = (LONG)hi << 32 | (LONG)(u32)lo;
+				DP(",%lld 0x%zx", *(LONG*)si, *(LONG*)si);
 				si += sizeof(LONG);
 			}
+			DP("\n");
 			break;
 		case DEFF:
+			DP("\treal\t@mp+%d", v);
 			for(i = 0; i < n; i++) {
+				DP(" raw: ");
+				for(int j = 0; j<8; j++){
+					DP(" 0x%x", ((u8*)isp)[j]);
+				}
 				ul[0] = disw(isp);
 				ul[1] = disw(isp);
+				/*print("canontod ul[0] 0x%x ul[1] 0x%x ", ul[0], ul[1]);*/
 				*(REAL*)si = canontod(ul);
+				/*DP("__");
+				DP(",%g", *(REAL*)si);
+				DP("--");*/
 				si += sizeof(REAL);
 			}
+			DP("\n");
 			break;
 		case DEFA:			/* Array */
+			DP("\tarray\t@mp+%d", v);
 			v = disw(isp);
 			if(v < 0 || v > m->ntype) {
 				kwerrstr("bad array type");
 				goto bad;
 			}
+			DP(",$%d", v);
 			pt = m->type[v];
 			v = disw(isp);
+			DP(",%d", v);
 			h = nheap(sizeof(Array)+(pt->size*v));
 			h->t = &Tarray;
 			h->t->ref++;
@@ -348,6 +418,12 @@
 			ary->root = H;
 			ary->data = (uchar*)ary+sizeof(Array);
 			memset((void*)ary->data, 0, pt->size*v);
+			for(i=(intptr)ary->data;
+				i < v;
+				i++){
+				DP(",%d",*(uchar*)(i+ary));
+			}
+			DP("\n");
 			initarray(pt, ary);
 			A(si) = ary;
 			break;			
@@ -357,6 +433,7 @@
 				kwerrstr("ind not array");
 				goto bad;
 			}
+			DP("\tindir\t@mp+%d", v);
 			v = disw(isp);
 			if(v > ary->len || v < 0 || dasp >= DADEPTH) {
 				kwerrstr("array init range");
@@ -364,6 +441,9 @@
 			}
 			dastack[dasp++] = addr;
 			addr = ary->data+v*ary->t->size;
+			DP(",%d,%zd 0x%zx\n",
+				v, (intptr)ary->data+v*ary->t->size,
+				(intptr)ary->data+v*ary->t->size);
 			break;
 		case DAPOP:
 			if(dasp == 0) {
@@ -370,10 +450,15 @@
 				kwerrstr("pop range");
 				goto bad;
 			}
+			DP("\tapop\n");
 			addr = dastack[--dasp];
 			break;
 		}
 	}
+	/*DP("		Initialized origmp\n");
+	for(int i = 0; i < m->type[0]->size/(sizeof(intptr)); i++){
+		DP("\t\t0x%p\t%zx\n", (intptr*)m->origmp+i, *((intptr*) m->origmp+i));
+	}*/
 	mod = istream;
 	if(memchr(mod, 0, 128) == 0) {
 		kwerrstr("bad module name");
@@ -384,6 +469,7 @@
 		kwerrstr(exNomem);
 		goto bad;
 	}
+	DP("\tmodule\t%s\n", m->name);
 	while(*istream++)
 		;
 
@@ -400,6 +486,20 @@
 		if(de != -1)
 			pt = m->type[de];
 		mlink(m, l, istream, v, pc, pt);
+		if(de != -1){
+			DP("\tlink\tidx %d, type %d size %d np %d ",
+				i, de, pt->size, pt->np);
+			if(pt->np > 0){
+				DP("map");
+				for(b = pt->map; b < pt->map+pt->np; b++)
+					DP(" %.2x", *b);
+				DP(" ");
+			}
+			DP(", pc %zd, sig 0x%ux,\"%s\"\n",
+				pc, v, (char*)istream);
+		}else
+			DP("\tlink\tidx %d type %d, pc %zd, sig 0x%ux,\"%s\"\n",
+				i, de, pc, v, (char*)istream);
 		while(*istream++)
 			;
 	}
@@ -420,8 +520,11 @@
 			kwerrstr(exNomem);
 			goto bad;
 		}
+		DP("\tldts\t@ldt,%d\n", nl);
 		for(i = 0; i < nl; i++, i2++){
 			n = operand(isp);
+			DP("\text\t@ldts+%d,%d,%zd\n",
+				i, n, (intptr)i2-(intptr)m->ldt);
 			i1 = *i2 = (Import*)malloc((n+1)*sizeof(Import));
 			if(i1 == nil){
 				kwerrstr(exNomem);
@@ -434,6 +537,9 @@
 					kwerrstr(exNomem);
 					goto bad;
 				}
+				DP("\text\t@ldt+%zd,idx %d, sig 0x%ux,\"%s\"\n",
+					(intptr)i1-(intptr)m->ldt,
+					j, i1->sig, (char*)istream);
 				while(*istream++)
 					;
 			}
@@ -442,7 +548,7 @@
 	}
 
 	if(m->rt & HASEXCEPT){
-		int j, nh;
+		s32 j, nh, descid;
 		Handler *h;
 		Except *e;
 
@@ -452,15 +558,16 @@
 			kwerrstr(exNomem);
 			goto bad;
 		}
+		DP("\texceptions\t%d\n", nh);
 		h = m->htab;
 		for(i = 0; i < nh; i++, h++){
 			h->eoff = operand(isp);
 			h->pc1 = operand(isp);
 			h->pc2 = operand(isp);
-			n = operand(isp);
+			descid = operand(isp);
 			if(n != -1)
-				h->t = m->type[n];
-			n = operand(isp);
+				h->t = m->type[descid];
+			n = operand(isp); /* no of labels */
 			h->ne = n>>16;
 			n &= 0xffff;
 			h->etab = malloc((n+1)*sizeof(Except));
@@ -469,6 +576,10 @@
 				goto bad;
 			}
 			e = h->etab;
+			DP("\texception\t%d: offset %zd pc1 %zd pc2 %zd"
+				" desc %d nlab %d ne %zd\n",
+				i, h->eoff, h->pc1, h->pc2,
+				descid, n, h->ne);
 			for(j = 0; j < n; j++, e++){
 				e->s = strdup((char*)istream);
 				if(e->s == nil){
@@ -478,9 +589,11 @@
 				while(*istream++)
 					;
 				e->pc = operand(isp);
+				DP("\texctab\t\"%s\", %zd\n", e->s, e->pc);
 			}
 			e->s = nil;
 			e->pc = operand(isp);
+			DP("\texctab\t*, %zd\n", e->pc);
 		}
 		istream++;
 	}
@@ -487,7 +600,7 @@
 
 	m->entryt = nil;
 	m->entry = m->prog;
-	if((ulong)entry < isize && (ulong)entryt < hsize) {
+	if(entry < isize && entryt < hsize) {
 		m->entry = &m->prog[entry];
 		m->entryt = m->type[entryt];
 	}
@@ -509,6 +622,7 @@
 		kwerrstr(exNomem);
 		goto bad;
 	}
+	DP("\tsource\t\"%s\"\n", m->path);
 	m->link = modules;
 	modules = m;
 
--- a/libinterp/stack.c
+++ b/libinterp/stack.c
@@ -4,9 +4,36 @@
 #include "raise.h"
 #include <pool.h>
 
+#define DP if(1){}else print
 #define T(r)	*((void**)(R.r))
 
+/* same as the one in xec.c */
 void
+showframe(void *v, void *vt)
+{
+	uchar *p;
+	int i, j;
+	Frame *f;
+	Type *t;
+
+	f = (Frame*) v;
+	t = (Type*) vt;
+	print("frame 0x%p type 0x%p t->size %d t->np %d t->map \"",
+		v, vt, t->size, t->np);
+	for(p = t->map; p < t->map+t->np; p++)
+		print(" %.2x", *p);
+	print("\"\n");
+	for(i = 0; i < t->size/(sizeof(intptr)); i++){
+		print("\t%d %d 0x%p\t%zx\t",
+			i, i*sizeof(intptr), (intptr*)f+i, *((intptr*)f+i));
+		for(j = 0; j < sizeof(intptr); j++){
+			print(" %d=0x%02x", j, *((uchar*)f+i*sizeof(intptr)+j));
+		}
+		print("\n");
+	}
+}
+
+void
 newstack(Prog *p)
 {
 	int l;
@@ -24,9 +51,10 @@
 	f->mr = nil;
 	f->fp = nil;
 	l = p->R.M->m->ss;
-	/* 16 bytes for Stkext record keeping */
-	if(l < t->size+16)
-		l = t->size+16;
+	/* 16 bytes for Stkext record keeping
+		changed to 40 bytes for amd64 */
+	if(l < t->size+sizeof(Stkext))
+		l = t->size+sizeof(Stkext);
 	ns = mallocz(l, 0);
 	if(ns == nil)
 		error(exNomem);
@@ -40,6 +68,12 @@
 	p->R.SP = ns->reg.tos.fu + t->size;
 	p->R.FP = ns->reg.tos.fu;
 
+	DP("newstack R.EX stack extend 0x%p\n"
+		"\tTR type register 0x%p R.SP 0x%p R.TS top of stack 0x%p\n"
+		"\tR.FP=0x%p R.SP-R.FP=%zd t->size=%d called by 0x%p\n",
+		p->R.EX, ns->reg.TR,
+		p->R.SP, p->R.TS,
+		p->R.FP, p->R.SP - p->R.FP, t->size, getcallerpc(&p));
 	memmove(p->R.FP, f, t->size);
 	f = (Frame*)p->R.FP;
 	f->t = nil;
@@ -55,9 +89,10 @@
 
 	t = R.s;
 	l = R.M->m->ss;
-	/* 16 bytes for Stkext record keeping */
-	if(l < t->size+16)
-		l = 2*t->size+16;
+	/* 16 bytes for Stkext record keeping
+		changed to 40 bytes for amd64 */
+	if(l < t->size+sizeof(Stkext))
+		l = 2*t->size+sizeof(Stkext);
 	ns = mallocz(l, 0);
 	if(ns == nil)
 		error(exNomem);
@@ -73,7 +108,6 @@
 	R.EX = ns->stack;
 	R.TS = ns->stack + l;
 	R.SP = ns->reg.tos.fu + t->size;
-
 	if (t->np)
 		initmem(t, f);
 }
--- a/libinterp/xec.c
+++ b/libinterp/xec.c
@@ -7,6 +7,7 @@
 REG	R;			/* Virtual Machine registers */
 String	snil;			/* String known to be zero length */
 
+#define DP	if(1){}else print
 #define Stmp	*((WORD*)(R.FP+NREG*IBY2WD))
 #define Dtmp	*((WORD*)(R.FP+(NREG+2)*IBY2WD))
 
@@ -219,6 +220,7 @@
 
 	a = A(s);
 	i = W(d);
+	DP("indx a %p a->len %lld i %ld\n", a, a->len, i);
 	if(a == H || i >= a->len)
 		error(exBounds);
 	W(m) = (WORD)(a->data+i*a->t->size);
@@ -230,6 +232,7 @@
 
 	a = A(s);
 	i = W(d);
+	DP("indw a %p a->len %lld i %ld\n", a, a->len, i);
 	if(a == H || i >= a->len)
 		error(exBounds);
 	W(m) = (WORD)(a->data+i*sizeof(WORD));
@@ -241,6 +244,7 @@
 
 	a = A(s);
 	i = W(d);
+	DP("indf a %p a->len %lld i %ld\n", a, a->len, i);
 	if(a == H || i >= a->len)
 		error(exBounds);
 	W(m) = (WORD)(a->data+i*sizeof(REAL));
@@ -252,6 +256,7 @@
 
 	a = A(s);
 	i = W(d);
+	DP("indl a %p a->len %lld i %ld\n", a, a->len, i);
 	if(a == H || i >= a->len)
 		error(exBounds);
 	W(m) = (WORD)(a->data+i*sizeof(LONG));
@@ -263,6 +268,7 @@
 
 	a = A(s);
 	i = W(d);
+	DP("indb a %p a->len %lld i %ld\n", a, a->len, i);
 	if(a == H || i >= a->len)
 		error(exBounds);
 	W(m) = (WORD)(a->data+i*sizeof(BYTE));
@@ -348,10 +354,15 @@
 	R.SP  = nsp;
 	f->t  = t;
 	f->mr = nil;
+	DP("frame frame 0x%p t 0x%p t->size %d R.SP 0x%p\n",
+		f, t, t->size, R.SP);
 	if (t->np)
 		initmem(t, f);
 	T(d) = f;
 }
+/* from the module link loaded at src1 using the index src2
+   build the frame at dst
+ */
 OP(mframe)
 {
 	Type *t;
@@ -377,6 +388,7 @@
 		R.s = t;
 		extend();
 		T(d) = R.s;
+		DP("\t\textended frame at *R.d 0x%p\n", *(intptr**)R.d);
 		return;
 	}
 	f = (Frame*)R.SP;
@@ -383,9 +395,13 @@
 	R.SP = nsp;
 	f->t = t;
 	f->mr = nil;
+	DP("\t\tmframe frame 0x%p t 0x%p t->size %d R.SP 0x%p\n",
+		f, t, t->size, R.SP);
 	if (t->np)
 		initmem(t, f);
 	T(d) = f;
+	DP("\t\tframe at *R.d 0x%p is\n", *(intptr**)R.d);
+	if(0) showframe((void *)f, t);
 }
 void
 acheck(int tsz, int sz)
@@ -686,12 +702,26 @@
 	newstack(p);
 	unframe();
 }
+void
+showREG(void)
+{
+	DP("REG PC 0x%p MP 0x%p FP 0x%p SP 0x%p\n"
+		"\tTS 0x%p EX 0x%p M 0x%p IC %d\n"
+		"\txpc 0x%p s 0x%p d 0x%p m 0x%p\n",
+		R.PC, R.MP, R.FP, R.SP,
+		R.TS, R.EX, R.M, R.IC,
+		R.xpc, R.s, R.d, R.m);
+}
 OP(ret)
 {
 	Frame *f;
 	Modlink *m;
 
+	showREG();
 	f = (Frame*)R.FP;
+	DP("Frame at 0x%p lr 0x%p fp 0x%p mr 0x%p t 0x%p\n",
+		f, f->lr, f->fp, f->mr, f->t);
+	/* showframe((void*)f, f->t); */
 	R.FP = f->fp;
 	if(R.FP == nil) {
 		R.FP = (uchar*)f;
@@ -716,6 +746,11 @@
 		R.MP = m->MP;
 	}
 }
+/* load src1, src2, dst
+   src1 pathname to the file containing the object code for a module
+   src2 address of linkage descriptor table, list of functions used from that module
+   dst  Modlink, mechanism to call those functions
+ */
 OP(iload)
 {
 	char *n;
@@ -733,6 +768,7 @@
 		error("obsolete dis");
 	}
 
+	DP("\t\tiload module %s for the ldt index %zd\n", n, W(m));
 	if(strcmp(n, "$self") == 0) {
 		m->ref++;
 		ml = linkmod(m, ldt, 0);
@@ -775,11 +811,21 @@
 	h = D2H(ml);
 	h->ref++;
 
+	DP("\t\tmcall frame at *R.s 0x%p is\n", f);
+	if(0 && f->t != nil)
+		showframe((void *)f, f->t);
 	o = W(m);
-	if(o >= 0)
+	if(o >= 0){
 		l = &ml->links[o].u;
-	else
+		DP("\t\tlink o %d %s\n",
+			o, ml->links[o].name);
+		DP("\t\text o %d %s sig 0x%x\n",
+			o, ml->m->ext[o].name, ml->m->ext[o].sig);
+	}else{
 		l = &ml->m->ext[-o-1].u;
+		DP("\t\text o %d %s sig 0x%x\n",
+			-o-1, ml->m->ext[-o-1].name, ml->m->ext[-o-1].sig);
+	}
 	if(ml->prog == nil) {
 		l->runt(f);
 		h->ref--;
@@ -1320,10 +1366,11 @@
 	p->exval = v;
 	h = D2H(v);
 	h->ref++;
-	if(h->t == &Tstring)
-		error(string2c((String*)v));
-	else
+	if(h->t == &Tstring){
+		 error(string2c((String*)v));
+	}else{
 		error(string2c(*(String**)v));
+	}
 }
 OP(mulx)
 {
@@ -1668,6 +1715,37 @@
 }
 
 void
+showprog(Prog *p)
+{
+	Type *t;
+	Frame *f;
+	Stkext *sx;
+	uchar *fp, *sp, *ex;
+
+	DP("Prog state %d pid %d ticks %lud\n",
+		p->state, p->pid, p->ticks);
+	DP("\tpc 0x%p module %s %s\n",
+		p->R.PC, p->R.M->m->name, p->R.M->m->path);
+	sp = p->R.SP;
+	ex = p->R.EX;
+	while(ex != nil) {
+		sx = (Stkext*)ex;
+		fp = sx->reg.tos.fu;
+		while(fp != sp) {
+			f = (Frame*)fp;
+			t = f->t;
+			if(t == nil)
+				t = sx->reg.TR;
+			fp += t->size;
+			DP("\tFrame 0x%p type 0x%p type size %d\n",
+				f, t, t->size);
+		}
+		ex = sx->reg.EX;
+		sp = sx->reg.SP;
+	}
+}
+
+void
 xec(Prog *p)
 {
 	int op;
@@ -1683,15 +1761,26 @@
 		error(m);
 	}
 
-// print("%lux %lux %lux %lux %lux\n", (ulong)&R, R.xpc, R.FP, R.MP, R.PC);
-
+	// print("%lux %lux %lux %lux %lux\n", (uintptr)&R, R.xpc, R.FP, R.MP, R.PC);
+	showprog(p);
 	if(R.M->compiled)
 		comvec();
 	else do {
+		DP("step: %p: %s pid %d state %d %4zd %D:\tR.PC->op=0x%x R.PC->add=0x%x\n",
+			p, R.M->m->name, p->pid, p->state, R.PC-R.M->prog, R.PC, R.PC->op,
+			R.PC->add);
 		dec[R.PC->add]();
 		op = R.PC->op;
 		R.PC++;
 		optab[op]();
+		DP(" end: %p: ", p);
+		DP("%s ", R.M->m->name);
+		DP("pid %d ", p->pid);
+		DP("state %d", p->state);
+		DP(" %4zd", R.PC-R.M->prog);
+		DP(" %D:\t", R.PC);
+		DP("R.PC->op=0x%x ", R.PC->op, R.PC->add);
+		DP("R.PC->add=0x%x\n", R.PC->add);
 	} while(--R.IC != 0);
 
 	p->R = R;
--- a/libkern/dofmt.c
+++ b/libkern/dofmt.c
@@ -512,6 +512,11 @@
 			f->flags |= FmtVLong;
 		f->flags |= FmtLong;
 		break;
+	case 'z':
+		f->flags |= FmtLong;
+		if(sizeof(intptr) == sizeof(vlong))
+			f->flags |= FmtVLong;
+		break;
 	}
 	return 1;
 }
--- a/libkern/fcallfmt.c
+++ b/libkern/fcallfmt.c
@@ -18,7 +18,7 @@
 static void fdirconv(char*, char*, Dir*);
 static char *qidtype(char*, uchar);
 
-#define	QIDFMT	"(%.16llux %lud %s)"
+#define	QIDFMT	"(%.16llux %ud %s)"
 
 int
 fcallfmt(Fmt *fmt)
@@ -191,8 +191,8 @@
 	char tmp[16];
 
 	seprint(buf, e, "'%s' '%s' '%s' '%s' "
-		"q " QIDFMT " m %#luo "
-		"at %ld mt %ld l %lld "
+		"q " QIDFMT " m %#uo "
+		"at %d mt %d l %lld "
 		"t %d d %d",
 			d->name, d->uid, d->gid, d->muid,
 			d->qid.path, d->qid.vers, qidtype(tmp, d->qid.type), d->mode,
--- a/libkern/fmt.c
+++ b/libkern/fmt.c
@@ -55,6 +55,7 @@
 	's',	_strfmt,
 	'u',	_flagfmt,
 	'x',	_ifmt,
+	'z',	_flagfmt,
 	0,	nil,
 };
 
--- /dev/null
+++ b/libkern/getfcr-amd64.s
@@ -1,0 +1,38 @@
+
+TEXT	setfcr(SB), $4
+	XORL	$(0x3F<<7),RARG	/* bits are cleared in csr to enable them */
+	ANDL	$0xFFC0, RARG	/* just the fcr bits */
+	WAIT	/* is this needed? */
+	STMXCSR	0(SP)
+	MOVL	0(SP), AX
+	ANDL	$~0x3F, AX
+	ORL	RARG, AX
+	MOVL	AX, 0(SP)
+	LDMXCSR	0(SP)
+	RET
+
+TEXT	getfcr(SB), $4
+	WAIT
+	STMXCSR	0(SP)
+	MOVWLZX	0(SP), AX
+	ANDL	$0xFFC0, AX
+	XORL	$(0x3F<<7),AX
+	RET
+
+TEXT	getfsr(SB), $4
+	WAIT
+	STMXCSR	0(SP)
+	MOVL	0(SP), AX
+	ANDL	$0x3F, AX
+	RET
+
+TEXT	setfsr(SB), $4
+	ANDL	$0x3F, RARG
+	WAIT
+	STMXCSR	0(SP)
+	MOVL	0(SP), AX
+	ANDL	$~0x3F, AX
+	ORL	RARG, AX
+	MOVL	AX, 0(SP)
+	LDMXCSR	0(SP)
+	RET
--- /dev/null
+++ b/libkern/memmove-amd64.s
@@ -1,0 +1,81 @@
+TEXT memmove(SB), $0
+	MOVQ	RARG, DI
+	MOVQ	DI, AX			/* return value */
+	MOVQ	p2+8(FP), SI
+	MOVL	n+16(FP), BX
+	CMPL	BX, $0
+	JGT	_ok
+	JEQ	_return			/* nothing to do if n == 0 */
+	MOVL	$0, SI			/* fault if n < 0 */
+
+/*
+ * check and set for backwards:
+ *	(p2 < p1) && ((p2+n) > p1)
+ */
+_ok:
+	CMPQ	SI, DI
+	JGT	_forward
+	JEQ	_return			/* nothing to do if p2 == p1 */
+	MOVQ	SI, DX
+	ADDQ	BX, DX
+	CMPQ	DX, DI
+	JGT	_back
+
+/*
+ * copy whole longs if aligned
+ */
+_forward:
+	CLD
+	MOVQ	SI, DX
+	ORQ	DI, DX
+	ANDL	$3, DX
+	JNE	c3f
+	MOVQ	BX, CX
+	SHRQ	$2, CX
+	ANDL	$3, BX
+	REP;	MOVSL
+
+/*
+ * copy the rest, by bytes
+ */
+	JEQ	_return			/* flags set by above ANDL */
+c3f:
+	MOVL	BX, CX
+	REP;	MOVSB
+
+	RET
+
+/*
+ * whole thing backwards has
+ * adjusted addresses
+ */
+_back:
+	ADDQ	BX, DI
+	ADDQ	BX, SI
+	STD
+	SUBQ	$4, DI
+	SUBQ	$4, SI
+/*
+ * copy whole longs, if aligned
+ */
+	MOVQ	DI, DX
+	ORQ	SI, DX
+	ANDL	$3, DX
+	JNE	c3b
+	MOVL	BX, CX
+	SHRQ	$2, CX
+	ANDL	$3, BX
+	REP;	MOVSL
+/*
+ * copy the rest, by bytes
+ */
+	JEQ	_return			/* flags set by above ANDL */
+
+c3b:
+	ADDQ	$3, DI
+	ADDQ	$3, SI
+	MOVL	BX, CX
+	REP;	MOVSB
+
+_return:
+	RET
--- /dev/null
+++ b/libkern/memset-amd64.s
@@ -1,0 +1,41 @@
+	TEXT	memset(SB),$0
+
+	CLD
+	MOVQ	RARG, DI
+	MOVBLZX	c+8(FP), AX
+	MOVL	n+16(FP), BX
+/*
+ * if not enough bytes, just set bytes
+ */
+	CMPL	BX, $9
+	JLS	c3
+/*
+ * if not aligned, just set bytes
+ */
+	MOVQ	RARG, CX
+	ANDL	$3,CX
+	JNE	c3
+/*
+ * build word in AX
+ */
+	MOVB	AL, AH
+	MOVL	AX, CX
+	SHLL	$16, CX
+	ORL	CX, AX
+/*
+ * set whole longs
+ */
+c1:
+	MOVQ	BX, CX
+	SHRQ	$2, CX
+	ANDL	$3, BX
+	REP;	STOSL
+/*
+ * set the rest, by bytes
+ */
+c3:
+	MOVL	BX, CX
+	REP;	STOSB
+ret:
+	MOVQ	RARG,AX
+	RET
--- /dev/null
+++ b/libkern/mkfile-amd64
@@ -1,0 +1,10 @@
+#
+#	amd64-specific files
+#
+TARGFILES=\
+	frexp-386.$O\
+	getfcr-amd64.$O\
+	memmove-amd64.$O\
+	memset-amd64.$O\
+	nan-386.$O\
+	strchr-amd64.$O\
--- /dev/null
+++ b/libkern/strchr-amd64.s
@@ -1,0 +1,38 @@
+	TEXT	strchr(SB), $0
+
+	MOVQ	RARG, DI
+	MOVB	c+8(FP), AX
+	CMPB	AX, $0
+	JEQ	l2	/**/
+
+/*
+ * char is not null
+ */
+l1:
+	MOVB	(DI), BX
+	CMPB	BX, $0
+	JEQ	ret0
+	ADDQ	$1, DI
+	CMPB	AX, BX
+	JNE	l1
+
+	MOVQ	DI, AX
+	SUBQ	$1, AX
+	RET
+
+/*
+ * char is null
+ */
+l2:
+	MOVQ	$-1, CX
+	CLD
+
+	REPN;	SCASB
+
+	MOVQ	DI, AX
+	SUBQ	$1, AX
+	RET
+
+ret0:
+	MOVQ	$0, AX
+	RET
--- a/liblogfs/clunk.c
+++ b/liblogfs/clunk.c
@@ -4,7 +4,7 @@
 #include "local.h"
 
 char *
-logfsserverclunk(LogfsServer *server, u32int fid)
+logfsserverclunk(LogfsServer *server, u32 fid)
 {
 	Fid *f;
 	if(server->trace > 1)
--- a/liblogfs/create.c
+++ b/liblogfs/create.c
@@ -3,7 +3,7 @@
 #include "local.h"
 
 char *
-logfsservercreate(LogfsServer *server, u32int fid, char *name, u32int perm, uchar mode, Qid *qid)
+logfsservercreate(LogfsServer *server, u32 fid, char *name, u32 perm, uchar mode, Qid *qid)
 {
 	Fid *f;
 	char *uid;
--- a/liblogfs/dump.c
+++ b/liblogfs/dump.c
@@ -4,8 +4,8 @@
 #include "fcall.h"
 
 typedef struct WalkState {
-	u32int *flashaddrp;
-	u32int *lengthp;
+	u32 *flashaddrp;
+	u32 *lengthp;
 	int i;
 	int nth;
 } WalkState;
@@ -25,7 +25,7 @@
 }
 
 char *
-logfsserverreadpathextent(LogfsServer *server, u32int path, int nth, u32int *flashaddrp, u32int *lengthp,
+logfsserverreadpathextent(LogfsServer *server, u32 path, int nth, u32 *flashaddrp, u32 *lengthp,
 	long *blockp, int *pagep, int *offsetp)
 {
 	Entry *e;
--- a/liblogfs/extentlist.c
+++ b/liblogfs/extentlist.c
@@ -163,7 +163,7 @@
 logfsextentlistmatch(ExtentList *l, Extent *e)
 {
 	ExtentNode *m;
-	u32int flashmax;
+	u32 flashmax;
 
 	if(l == nil)
 		return nil;
@@ -171,7 +171,7 @@
 	flashmax = e->flashaddr + (e->max - e->min);
 
 	for(m = l->head; m; m = m->next) {
-		u32int l = m->e.max - m->e.min;
+		u32 l = m->e.max - m->e.min;
 		if(e->min < m->e.max && m->e.min < e->max	/* they intersect */
 			&& m->e.flashaddr < flashmax && e->flashaddr < m->e.flashaddr + l) /* the store intersects */
 			return &(m->e);
@@ -183,7 +183,7 @@
 logfsextentlistmatchall(ExtentList *l, int (*func)(void *magic, Extent *), void *magic, Extent *e)
 {
 	ExtentNode *m;
-	u32int flashmax;
+	u32 flashmax;
 
 	if(l == nil)
 		return 1;
@@ -191,7 +191,7 @@
 	flashmax = e->flashaddr + (e->max - e->min);
 
 	for(m = l->head; m; m = m->next) {
-		u32int l;
+		u32 l;
 		if(m->e.min >= e->max)
 			return 1;
 		l = m->e.max - m->e.min;
@@ -210,7 +210,7 @@
 logfsextentlistwalk(ExtentList *l, int (*func)(void *magic, Extent *, int hole), void *magic)
 {
 	ExtentNode *n;
-	u32int last = 0;
+	u32 last = 0;
 	if(l == nil)
 		return 1;
 	for(n = l->head; n; n = n->next) {
@@ -233,10 +233,10 @@
 }
 
 int
-logfsextentlistwalkrange(ExtentList *l, int (*func)(void *magic, u32int baseoffset, u32int limitoffset, Extent *, u32int extentoffset), void *magic, u32int base, u32int limit)
+logfsextentlistwalkrange(ExtentList *l, int (*func)(void *magic, u32 baseoffset, u32 limitoffset, Extent *, u32 extentoffset), void *magic, u32 base, u32 limit)
 {
 	ExtentNode *n;
-	u32int last = 0;
+	u32 last = 0;
 	if(l == nil)
 		return 1;
 	for(n = l->head; n; n = n->next) {
--- a/liblogfs/local.h
+++ b/liblogfs/local.h
@@ -34,8 +34,8 @@
 };
 
 struct Extent {
-	u32int min, max;
-	u32int flashaddr;		/* encode block index, page number, and offset within page to min */
+	u32 min, max;
+	u32 flashaddr;		/* encode block index, page number, and offset within page to min */
 };
 
 char *logfsextentlistnew(ExtentList **);
@@ -44,8 +44,8 @@
 int logfsextentlistwalk(ExtentList *, int (*)(void *, Extent *, int),void *);
 Extent *logfsextentlistmatch(ExtentList *, Extent *);
 int logfsextentlistwalkrange(ExtentList *,
-	int (*)(void *, u32int, u32int, Extent *, u32int),
-	void *, u32int, u32int);
+	int (*)(void *, u32, u32, Extent *, u32),
+	void *, u32, u32);
 int logfsextentlistmatchall(ExtentList *, int (*)(void *, Extent *), void *, Extent *);
 void logfsextentlistreset(ExtentList *);
 
@@ -59,7 +59,7 @@
 	char *gid;
 	ulong mtime;
 	char *muid;
-	u32int perm;
+	u32 perm;
 	struct Entry *next;
 	struct {
 		struct {
@@ -73,9 +73,9 @@
 	} u;
 };
 
-char *logfsentrynew(LogfsServer *, int, u32int, Entry *,
+char *logfsentrynew(LogfsServer *, int, u32, Entry *,
 	char *, char *, char *,
-	u32int, char *, u32int, ulong, ulong, Entry **);
+	u32, char *, u32, ulong, ulong, Entry **);
 void logfsentryclunk(Entry *);
 
 void logfsdrsfree(DirReadState **);
@@ -158,44 +158,44 @@
 
 struct LogMessage {
 	uchar type;
-	u32int path;
+	u32 path;
 	union {
 		struct {
-			u32int nerase;
+			u32 nerase;
 		} start;
 		struct {
-			u32int perm;
-			u32int newpath;
-			u32int mtime;
-			u32int cvers;
+			u32 perm;
+			u32 newpath;
+			u32 mtime;
+			u32 cvers;
 			char *name;
 			char *uid;
 			char *gid;
 		} create;
 		struct {
-			u32int mtime;
+			u32 mtime;
 			char *muid;
 		} remove;
 		struct {
-			u32int mtime;
-			u32int cvers;
+			u32 mtime;
+			u32 cvers;
 			char *muid;
 		} trunc;
 		struct {
-			u32int offset;
-			u32int count;
-			u32int mtime;
-			u32int cvers;
+			u32 offset;
+			u32 count;
+			u32 mtime;
+			u32 cvers;
 			char *muid;
-			u32int flashaddr;
+			u32 flashaddr;
 			uchar *data;
 		} write;
 		struct {
 			char *name;
-			u32int perm;
+			u32 perm;
 			char *uid;
 			char *gid;
-			u32int mtime;
+			u32 mtime;
 			char *muid;
 		} wstat;
 	} u;
@@ -221,10 +221,10 @@
 void logfslogsegmentfree(LogSegment **);
 char *logfslogbytes(LogfsServer *, int, uchar *, uint);
 char *logfslog(LogfsServer *, int, LogMessage *);
-char *logfslogwrite(LogfsServer *, int, u32int, u32int, int, u32int,
-	u32int, char *, uchar *, u32int *);
+char *logfslogwrite(LogfsServer *, int, u32, u32, int, u32,
+	u32, char *, uchar *, u32 *);
 char *logfslogsegmentflush(LogfsServer *, int);
-int lognicesizeforwrite(LogfsServer *, int, u32int, int);
+int lognicesizeforwrite(LogfsServer *, int, u32, int);
 char *logfsscan(LogfsServer *);
 
 struct DataBlock {
@@ -272,7 +272,7 @@
 int logfshashulong(void *, int);
 
 int logfsuserpermcheck(LogfsServer *, Entry *, Fid *, ulong);
-u32int logfsflattenentry(LogfsIdentityStore *, uchar *, u32int, Entry *);
+u32 logfsflattenentry(LogfsIdentityStore *, uchar *, u32, Entry *);
 char *logfsreplay(LogfsServer *, LogSegment *, int);
 void logfsreplayfinddata(LogfsServer *);
 
@@ -287,11 +287,11 @@
 #define loggensucc(g) gensucc((g), L2LogSweeps)
 
 int logfsunconditionallymarkfreeanddirty(void *, Extent *, int);
-void logfsflashaddr2spo(LogfsServer *, u32int, long *, int *, int *);
+void logfsflashaddr2spo(LogfsServer *, u32, long *, int *, int *);
 int logfsgn(uchar **, uchar *, char **);
-u32int logfsspo2flashaddr(LogfsServer *, long, int, int);
+u32 logfsspo2flashaddr(LogfsServer *, long, int, int);
 int logfsgn(uchar **, uchar *, char **);
-void logfsflashaddr2o(LogfsServer *, u32int, int *);
+void logfsflashaddr2o(LogfsServer *, u32, int *);
 void logfsfreedatapages(LogfsServer *, long, Pageset);
 void logfsfreeanddirtydatablockcheck(LogfsServer *, long);
 
--- a/liblogfs/log.c
+++ b/liblogfs/log.c
@@ -4,7 +4,7 @@
 #include "fcall.h"
 
 void
-logfsflashaddr2spo(LogfsServer *server, u32int flashaddr, long *seq, int *page, int *offset)
+logfsflashaddr2spo(LogfsServer *server, u32 flashaddr, long *seq, int *page, int *offset)
 {
 	LogfsLowLevel *ll = server->ll;
 	flashaddr &= ~LogAddr;
@@ -15,7 +15,7 @@
 	*seq = flashaddr;
 }
 
-u32int
+u32
 logfsspo2flashaddr(LogfsServer *server, long seq, int page, int offset)
 {
 //print("logfsspo2flashaddr(%ld, %d, %d)\n", seq, page, offset);
@@ -23,7 +23,7 @@
 }
 
 void
-logfsflashaddr2o(LogfsServer *server, u32int flashaddr, int *offset)
+logfsflashaddr2o(LogfsServer *server, u32 flashaddr, int *offset)
 {
 	LogfsLowLevel *ll = server->ll;
 	flashaddr &= ~LogAddr;
@@ -95,7 +95,7 @@
 }
 
 static char *
-logspace(LogfsServer *server, int active, int takearisk, int nbytes, uchar **where, u32int *flashaddr)
+logspace(LogfsServer *server, int active, int takearisk, int nbytes, uchar **where, u32 *flashaddr)
 {
 	char *errmsg;
 	LogfsLowLevel *ll = server->ll;
@@ -213,7 +213,7 @@
 }
 
 int
-lognicesizeforwrite(LogfsServer *server, int active, u32int count, int muidlen)
+lognicesizeforwrite(LogfsServer *server, int active, u32 count, int muidlen)
 {
 	int rawspace;
 	LogSegment *seg;
@@ -229,8 +229,8 @@
 }
 
 char *
-logfslogwrite(LogfsServer *server, int active, u32int path, u32int offset, int count, u32int mtime, u32int cvers,
-	char *muid, uchar *data, u32int *flashaddr)
+logfslogwrite(LogfsServer *server, int active, u32 path, u32 offset, int count, u32 mtime, u32 cvers,
+	char *muid, uchar *data, u32 *flashaddr)
 {
 	/* 'w' size[2] path[4] offset[4] count[2] mtime[4] cvers[4] muid[s] flashaddr[4] [data[n]] */
 	LogMessage s;
@@ -237,7 +237,7 @@
 	uint size;
 	char *errmsg;
 	uchar *p;
-	u32int faddr;
+	u32 faddr;
 	uint asize;
 
 	s.type = LogfsLogTwrite;
--- a/liblogfs/open.c
+++ b/liblogfs/open.c
@@ -3,7 +3,7 @@
 #include "local.h"
 
 char *
-logfsserveropen(LogfsServer *server, u32int fid, uchar mode, Qid *qid)
+logfsserveropen(LogfsServer *server, u32 fid, uchar mode, Qid *qid)
 {
 	Fid *f;
 	Entry *e;
--- a/liblogfs/read.c
+++ b/liblogfs/read.c
@@ -4,25 +4,25 @@
 #include "fcall.h"
 
 struct DirReadState {
-	u32int offset;
-	u32int lastoffset;
-	u32int limit;
+	u32 offset;
+	u32 lastoffset;
+	u32 limit;
 	uchar *data;
 };
 
 typedef struct ReaderState {
 	uchar *buf;
-	u32int maxoffset;
+	u32 maxoffset;
 	LogfsServer *server;
 	char *errmsg;
 } ReaderState;
 
 static DirReadState *
-drsinit(LogfsIdentityStore *is, Entry *list, uchar *buf, u32int buflen, u32int *rcount)
+drsinit(LogfsIdentityStore *is, Entry *list, uchar *buf, u32 buflen, u32 *rcount)
 {
 	Entry *p, *q;
 	DirReadState *drs;
-	u32int k;
+	u32 k;
 	/*
 	 * stash as many entries as will fit in the read buffer
 	 */
@@ -44,7 +44,7 @@
 	for(q = p; q; q = q->next)
 		k += logfsflattenentry(is, nil, 0, q);
 	if(k) {
-		u32int k2;
+		u32 k2;
 //		print("drsinit: %ud bytes extra\n", k);
 		drs->data = logfsrealloc(nil, k);
 		if(drs->data == nil) {
@@ -61,7 +61,7 @@
 }
 
 static void
-drsread(DirReadState *drs, uchar *buf, u32int buflen, u32int *rcount)
+drsread(DirReadState *drs, uchar *buf, u32 buflen, u32 *rcount)
 {
 	uchar *p;
 	*rcount = 0;
@@ -98,7 +98,7 @@
 }
 
 static int
-reader(void *magic, u32int baseoffset, u32int limitoffset, Extent *e, u32int extentoffset)
+reader(void *magic, u32 baseoffset, u32 limitoffset, Extent *e, u32 extentoffset)
 {
 	ReaderState *s = magic;
 	LogfsServer *server;
@@ -175,7 +175,7 @@
 	pagesize = 1 << ll->l2pagesize;
 	replace = 0;
 	while(baseoffset < limitoffset) {
-		u32int thistime;
+		u32 thistime;
 		thistime = pagesize - offset;
 		if(thistime > (limitoffset - baseoffset))
 			thistime = limitoffset - baseoffset;
@@ -202,7 +202,7 @@
 }
 
 char *
-logfsserverread(LogfsServer *server, u32int fid, u32int offset, u32int count, uchar *buf, u32int buflen, u32int *rcount)
+logfsserverread(LogfsServer *server, u32 fid, u32 offset, u32 count, uchar *buf, u32 buflen, u32 *rcount)
 {
 	Fid *f;
 	Entry *e;
--- a/liblogfs/remove.c
+++ b/liblogfs/remove.c
@@ -69,7 +69,7 @@
 }
 
 char *
-logfsserverremove(LogfsServer *server, u32int fid)
+logfsserverremove(LogfsServer *server, u32 fid)
 {
 	Fid *f;
 	char *errmsg;
--- a/liblogfs/srv.c
+++ b/liblogfs/srv.c
@@ -50,8 +50,8 @@
 }
 
 char *
-logfsentrynew(LogfsServer *server, int inuse, u32int path, Entry *parent, char *name, char *uid, char *gid,
-u32int mtime, char *muid, u32int perm, ulong cvers, ulong length, Entry **ep)
+logfsentrynew(LogfsServer *server, int inuse, u32 path, Entry *parent, char *name, char *uid, char *gid,
+u32 mtime, char *muid, u32 perm, ulong cvers, ulong length, Entry **ep)
 {
 	Entry *e;
 	char *errmsg;
@@ -187,7 +187,7 @@
 }
 
 char *
-logfsserverattach(LogfsServer *server, u32int fid, char *uname, Qid *qid)
+logfsserverattach(LogfsServer *server, u32 fid, char *uname, Qid *qid)
 {
 	char *errmsg;
 	Fid *f;
@@ -224,8 +224,8 @@
 	*namep = name;
 }
 
-u32int
-logfsflattenentry(LogfsIdentityStore *is, uchar *buf, u32int limit, Entry *e)
+u32
+logfsflattenentry(LogfsIdentityStore *is, uchar *buf, u32 limit, Entry *e)
 {
 	int unamelen, gnamelen, munamelen, namelen;
 	uint len;
@@ -284,7 +284,7 @@
 }
 
 char *
-logfsserverstat(LogfsServer *server, u32int fid, uchar *buf, u32int bufsize, ushort *nstat)
+logfsserverstat(LogfsServer *server, u32 fid, uchar *buf, u32 bufsize, ushort *nstat)
 {
 	Fid *f;
 	if(server->trace > 1)
--- a/liblogfs/sweep.c
+++ b/liblogfs/sweep.c
@@ -94,7 +94,7 @@
 typedef struct FixupState {
 	LogfsServer *server;
 	int oldoffset;
-	u32int newflashaddr;
+	u32 newflashaddr;
 } FixupState;
 
 static int
--- a/liblogfs/walk.c
+++ b/liblogfs/walk.c
@@ -4,7 +4,7 @@
 #include "local.h"
 
 char *
-logfsserverwalk(LogfsServer *server, u32int fid, u32int newfid, ushort nwname, char **wname, ushort *nwqid, Qid *wqid)
+logfsserverwalk(LogfsServer *server, u32 fid, u32 newfid, ushort nwname, char **wname, ushort *nwqid, Qid *wqid)
 {
 	ushort i;
 	Entry *e;
--- a/liblogfs/write.c
+++ b/liblogfs/write.c
@@ -104,14 +104,14 @@
 }
 
 static char *
-allocdatapages(LogfsServer *server, u32int count, int *countp, long *blockindexp, int *pagep, u32int *flashaddr, AllocState *state)
+allocdatapages(LogfsServer *server, u32 count, int *countp, long *blockindexp, int *pagep, u32 *flashaddr, AllocState *state)
 {
 	LogfsLowLevel *ll = server->ll;
 	long b, blockindex;
 	DataBlock *db;
 	int pagebase;
-	u32int pages = (count + (1 << ll->l2pagesize) - 1) >> ll->l2pagesize;
-	u32int gapmask;
+	u32 pages = (count + (1 << ll->l2pagesize) - 1) >> ll->l2pagesize;
+	u32 gapmask;
 	long bestfreeblockindex;
 	int bestfree;
 	int pagesperblock = 1 << ll->l2pagesperblock;
@@ -255,7 +255,7 @@
 }
 
 typedef struct Page {
-	u32int pageaddr;
+	u32 pageaddr;
 	int ref;
 } Page;
 
@@ -267,7 +267,7 @@
 } DataStructure;
 
 static int
-deltapage(DataStructure *ds, u32int pageaddr, int add, int delta)
+deltapage(DataStructure *ds, u32 pageaddr, int add, int delta)
 {
 	int i;
 	for(i = 0; i < ds->nentries; i++)
@@ -299,12 +299,12 @@
  * only called for data addresses
  */
 static int
-deltapages(DataStructure *ds, LogfsLowLevel *ll, u32int baseflashaddr, int range, int add, int delta)
+deltapages(DataStructure *ds, LogfsLowLevel *ll, u32 baseflashaddr, int range, int add, int delta)
 {
 	long seq;
 	int page, offset;
 	int pages;
-	u32int pageaddr;
+	u32 pageaddr;
 	int x;
 
 //print("deltapages(%ud, %ud, %d, %d)\n", baseflashaddr, limitflashaddr, add, delta);
@@ -318,13 +318,13 @@
 }
 
 static int
-findpageset(void *magic, u32int baseoffset, u32int limitoffset, Extent *e, u32int extentoffset)
+findpageset(void *magic, u32 baseoffset, u32 limitoffset, Extent *e, u32 extentoffset)
 {
 	DataStructure *ds = magic;
 	LogfsLowLevel *ll;
-	u32int flashaddr;
-	u32int range;
-	u32int residue;
+	u32 flashaddr;
+	u32 range;
+	u32 residue;
 
 	if(e == nil || (e->flashaddr & LogAddr) != 0)
 		return 1;
@@ -357,7 +357,7 @@
 }
 
 static char *
-zappages(LogfsServer *server, Entry *e, u32int min, u32int max)
+zappages(LogfsServer *server, Entry *e, u32 min, u32 max)
 {
 	DataStructure ds;
 	long seq;
@@ -433,11 +433,11 @@
 }
 
 char *
-logfsserverwrite(LogfsServer *server, u32int fid, u32int offset, u32int count, uchar *buf, u32int *rcount)
+logfsserverwrite(LogfsServer *server, u32 fid, u32 offset, u32 count, uchar *buf, u32 *rcount)
 {
 	Fid *f;
 	Entry *e;
-	u32int now;
+	u32 now;
 	char *muid;
 	int muidlen;
 	LogfsLowLevel *ll = server->ll;
@@ -469,7 +469,7 @@
 		thistime = lognicesizeforwrite(server, 1, count, muidlen);
 		if(thistime == 0) {
 			int p;
-			u32int n;
+			u32 n;
 			long blockindex;
 			int pagebase;
 			AllocState state;
@@ -481,7 +481,7 @@
 			if(thistime == 0)
 				return logfselogfull;
 			for(p = pagebase, n = 0; n < thistime; p++, n += pagesize) {
-				u32int mask;
+				u32 mask;
 				DataBlock *db = server->datablock + blockindex;
 				errmsg = (*ll->writepage)(ll, buf + n, db->block, p);
 				if(errmsg) {
--- a/liblogfs/wstat.c
+++ b/liblogfs/wstat.c
@@ -4,7 +4,7 @@
 #include "local.h"
 
 char *
-logfsserverwstat(LogfsServer *server, u32int fid, uchar *stat, ushort nstat)
+logfsserverwstat(LogfsServer *server, u32 fid, uchar *stat, ushort nstat)
 {
 	Fid *f;
 	uchar *p;
@@ -11,7 +11,7 @@
 	ushort len;
 	uchar *mep;
 	Qid qid;
-	u32int perm, mtime;
+	u32 perm, mtime;
 	uvlong length;
 	char *name, *uname, *gname, *muname;
 	int qiddonttouch, permdonttouch, mtimedonttouch, lengthdonttouch;
--- a/libmath/dtoa.c
+++ b/libmath/dtoa.c
@@ -36,11 +36,11 @@
 #define word1(x) ((FPdbleword*)&x)->lo
 #else
 #ifdef __LITTLE_ENDIAN
-#define word0(x) ((unsigned  long *)&x)[1]
-#define word1(x) ((unsigned  long *)&x)[0]
+#define word0(x) ((u32 *)&x)[1]
+#define word1(x) ((u32 *)&x)[0]
 #else
-#define word0(x) ((unsigned  long *)&x)[0]
-#define word1(x) ((unsigned  long *)&x)[1]
+#define word0(x) ((u32 *)&x)[0]
+#define word1(x) ((u32 *)&x)[1]
 #endif
 #endif
 
@@ -89,7 +89,7 @@
 Bigint {
 	struct Bigint *next;
 	int	k, maxwds, sign, wds;
-	unsigned  long x[1];
+	u32 x[1];
 };
 
 typedef struct Bigint Bigint;
@@ -107,7 +107,7 @@
 		freelist[k] = rv->next;
 	} else {
 		x = 1 << k;
-		rv = (Bigint * )malloc(sizeof(Bigint) + (x - 1) * sizeof(unsigned  long));
+		rv = (Bigint * )malloc(sizeof(Bigint) + (x - 1) * sizeof(u32));
 		if(rv == nil)
 			return nil;
 		rv->k = k;
@@ -129,15 +129,15 @@
 	}
 }
 
-#define Bcopy(x,y) memcpy((char *)&x->sign, (char *)&y->sign, \
-y->wds*sizeof(long) + 2*sizeof(int))
+#define Bcopy(x,y) memmove((char *)&x->sign, (char *)&y->sign, \
+y->wds*sizeof(s32) + 2*sizeof(s32))
 
 static Bigint *
 multadd(Bigint *b, int m, int a)	/* multiply by m and add a */
 {
 	int	i, wds;
-	unsigned  long * x, y;
-	unsigned  long xi, z;
+	u32 * x, y;
+	u32 xi, z;
 	Bigint * b1;
 
 	wds = b->wds;
@@ -164,11 +164,11 @@
 }
 
 static Bigint *
-s2b(const char *s, int nd0, int nd, unsigned  long y9)
+s2b(const char *s, int nd0, int nd, u32 y9)
 {
 	Bigint * b;
 	int	i, k;
-	long x, y;
+	s32 x, y;
 
 	x = (nd + 8) / 9;
 	for (k = 0, y = 1; x > y; y <<= 1, k++) 
@@ -192,7 +192,7 @@
 }
 
 static int	
-hi0bits(register unsigned  long x)
+hi0bits(register u32 x)
 {
 	register int	k = 0;
 
@@ -221,10 +221,10 @@
 }
 
 static int	
-lo0bits(unsigned  long *y)
+lo0bits(u32 *y)
 {
 	register int	k;
-	register unsigned  long x = *y;
+	register u32 x = *y;
 
 	if (x & 7) {
 		if (x & 1)
@@ -279,9 +279,9 @@
 {
 	Bigint * c;
 	int	k, wa, wb, wc;
-	unsigned  long carry, y, z;
-	unsigned  long * x, *xa, *xae, *xb, *xbe, *xc, *xc0;
-	unsigned  long z2;
+	u32 carry, y, z;
+	u32 * x, *xa, *xae, *xb, *xbe, *xc, *xc0;
+	u32 z2;
 
 	if (a->wds < b->wds) {
 		c = a;
@@ -387,7 +387,7 @@
 {
 	int	i, k1, n, n1;
 	Bigint * b1;
-	unsigned  long * x, *x1, *xe, z;
+	u32 * x, *x1, *xe, z;
 
 	n = k >> 5;
 	k1 = b->k;
@@ -421,7 +421,7 @@
 static int	
 cmp(Bigint *a, Bigint *b)
 {
-	unsigned  long * xa, *xa0, *xb, *xb0;
+	u32 * xa, *xa0, *xb, *xb0;
 	int	i, j;
 
 	i = a->wds;
@@ -446,9 +446,9 @@
 {
 	Bigint * c;
 	int	i, wa, wb;
-	long borrow, y;	/* We need signed shifts here. */
-	unsigned  long * xa, *xae, *xb, *xbe, *xc;
-	long z;
+	s32 borrow, y;	/* We need signed shifts here. */
+	u32 * xa, *xae, *xb, *xbe, *xc;
+	s32 z;
 
 	i = cmp(a, b);
 	if (!i) {
@@ -501,7 +501,7 @@
 static double	
 ulp(double x)
 {
-	register long L;
+	register s32 L;
 	double	a;
 
 	L = (word0(x) & Exp_mask) - (P - 1) * Exp_msk1;
@@ -529,7 +529,7 @@
 static double	
 b2d(Bigint *a, int *e)
 {
-	unsigned  long * xa, *xa0, w, y, z;
+	u32 * xa, *xa0, w, y, z;
 	int	k;
 	double	d;
 #define d0 word0(d)
@@ -566,7 +566,7 @@
 {
 	Bigint * b;
 	int	de, i, k;
-	unsigned  long * x, y, z;
+	u32 * x, y, z;
 #define d0 word0(d)
 #define d1 word1(d)
 
@@ -679,8 +679,8 @@
 	e, e1, esign, i, j, k, nd, nd0, nf, nz, nz0, sign;
 	const char * s, *s0, *s1;
 	double	aadj, aadj1, adj, rv, rv0;
-	long L;
-	unsigned  long y, z;
+	s32 L;
+	u32 y, z;
 	Bigint * bb, *bb1, *bd, *bd0, *bs, *delta;
 	sign = nz0 = nz = 0;
 	rv = 0.;
@@ -856,7 +856,6 @@
 	e1 += nd - k;
 
 	scale = 0;
-
 	/* Get starting approximation = rv * 10**e1 */
 
 	if (e1 > 0) {
@@ -1164,11 +1163,11 @@
 quorem(Bigint *b, Bigint *S)
 {
 	int	n;
-	long borrow, y;
-	unsigned  long carry, q, ys;
-	unsigned  long * bx, *bxe, *sx, *sxe;
-	long z;
-	unsigned  long si, zs;
+	s32 borrow, y;
+	u32 carry, q, ys;
+	u32 * bx, *bxe, *sx, *sxe;
+	s32 z;
+	u32 si, zs;
 
 	n = S->wds;
 	if (b->wds < n)
@@ -1236,9 +1235,9 @@
 {
 	int	j, k, *r;
 
-	j = sizeof(unsigned  long);
+	j = sizeof(u32);
 	for (k = 0; 
-	    sizeof(Bigint) - sizeof(unsigned  long) - sizeof(int) + j <= i; 
+	    sizeof(Bigint) - sizeof(u32) - sizeof(int) + j <= i; 
 	    j <<= 1)
 		k++;
 	r = (int * )Balloc(k);
@@ -1348,10 +1347,10 @@
 	int	bbits, b2, b5, be, dig, i, ieps, ilim, ilim0, ilim1,
 	j, j1, k, k0, k_check, leftright, m2, m5, s2, s5,
 	spec_case, try_quick;
-	long L;
+	s32 L;
 #ifndef Sudden_Underflow
 	int	denorm;
-	unsigned  long x;
+	u32 x;
 #endif
 	Bigint * b, *b1, *delta, *mlo, *mhi, *S;
 	double	d2, ds, eps;
@@ -1363,7 +1362,6 @@
 		word0(d) &= ~Sign_bit;	/* clear sign bit */
 	} else
 		*sign = 0;
-
 	if ((word0(d) & Exp_mask) == Exp_mask) {
 		/* Infinity or NaN */
 		*decpt = 9999;
--- a/libmemdraw/alloc.c
+++ b/libmemdraw/alloc.c
@@ -22,10 +22,10 @@
 }
 
 Memimage*
-allocmemimaged(Rectangle r, u32int chan, Memdata *md)
+allocmemimaged(Rectangle r, u32 chan, Memdata *md)
 {
-	s32int d;
-	u32int l;
+	s32 d;
+	u32 l;
 	Memimage *i;
 
 	if((d = chantodepth(chan)) == 0) {
@@ -40,7 +40,7 @@
 		return nil;
 
 	i->data = md;
-	i->zero = sizeof(u32int)*l*r.min.y;
+	i->zero = sizeof(u32)*l*r.min.y;
 	
 	if(r.min.x >= 0)
 		i->zero += (r.min.x*d)/8;
@@ -61,10 +61,10 @@
 }
 
 Memimage*
-allocmemimage(Rectangle r, u32int chan)
+allocmemimage(Rectangle r, u32 chan)
 {
-	s32int d;
-	u32int l, nw;
+	s32 d;
+	u32 l, nw;
 	Memdata *md;
 	Memimage *i;
 
@@ -80,7 +80,7 @@
 		return nil;
 
 	md->ref = 1;
-	md->base = poolalloc(imagmem, 2*sizeof(intptr)+nw*sizeof(u32int));
+	md->base = poolalloc(imagmem, 2*sizeof(intptr)+nw*sizeof(u32));
 	if(md->base == nil){
 		free(md);
 		return nil;
@@ -120,10 +120,10 @@
 /*
  * Wordaddr is deprecated.
  */
-u32int*
+u32*
 u32addr(Memimage *i, Point p)
 {
-	return (u32int*) ((uintptr)byteaddr(i, p) & ~(sizeof(uintptr)-1));
+	return (u32*) ((uintptr)byteaddr(i, p) & ~(sizeof(uintptr)-1));
 }
 
 uchar*
@@ -131,7 +131,7 @@
 {
 	uchar *a;
 
-	a = i->data->bdata+i->zero+(s32int)(sizeof(u32int)*p.y*i->width);
+	a = i->data->bdata+i->zero+(s32)(sizeof(u32)*p.y*i->width);
 
 	if(i->depth < 8){
 		/*
@@ -138,7 +138,7 @@
 		 * We need to always round down,
 		 * but C rounds toward zero.
 		 */
-		s32int np;
+		s32 np;
 		np = 8/i->depth;
 		if(p.x < 0)
 			return a+(p.x-np+1)/np;
@@ -150,12 +150,12 @@
 }
 
 int
-memsetchan(Memimage *i, u32int chan)
+memsetchan(Memimage *i, u32 chan)
 {
-	s32int d;
-	s32int t, j, k;
-	u32int cc;
-	s32int bytes;
+	s32 d;
+	s32 t, j, k;
+	u32 cc;
+	s32 bytes;
 
 	if((d = chantodepth(chan)) == 0) {
 		werrstr("bad channel descriptor");
--- a/libmemdraw/draw.c
+++ b/libmemdraw/draw.c
@@ -86,9 +86,9 @@
 	return 0;
 }
 
-static u32int imgtorgba(Memimage*, u32int);
-static u32int rgbatoimg(Memimage*, u32int);
-static u32int pixelbits(Memimage*, Point);
+static u32 imgtorgba(Memimage*, u32);
+static u32 rgbatoimg(Memimage*, u32);
+static u32 pixelbits(Memimage*, Point);
 
 #define DBG if(drawdebug)
 void
@@ -388,7 +388,7 @@
 	uchar	*blu;
 	uchar	*alpha;
 	uchar	*grey;
-	u32int	*rgba;
+	u32	*rgba;
 	int	delta;	/* number of bytes to add to pointer to get next pixel to the right */
 
 	/* used by boolcalc* for mask data */
@@ -430,7 +430,7 @@
 
 	int	replcache;	/* if set, cache buffers */
 	Buffer	bcache[MAXBCACHE];
-	u32int	bfilled;
+	u32	bfilled;
 	uchar	*bufbase;
 	int	bufoff;
 	int	bufdelta;
@@ -533,7 +533,7 @@
 	p->bytey0s = byteaddr(img, Pt(img->r.min.x, img->r.min.y));
 	p->bytermin = byteaddr(img, Pt(r.min.x, img->r.min.y));
 	p->bytey0e = byteaddr(img, Pt(img->r.max.x, img->r.min.y));
-	p->bwidth = sizeof(u32int)*img->width;
+	p->bwidth = sizeof(u32)*img->width;
 
 	assert(p->bytey0s <= p->bytermin && p->bytermin <= p->bytey0e);
 
@@ -824,7 +824,7 @@
 	Buffer obdst;
 	int fd, sadelta;
 	int i, sa, ma;
-	u32int s, t;
+	u32 s, t;
 
 	obdst = bdst;
 	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
@@ -875,7 +875,7 @@
 	Buffer obdst;
 	int fs, sadelta;
 	int i, ma, da;
-	u32int s, t;
+	u32 s, t;
 
 	obdst = bdst;
 	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
@@ -928,7 +928,7 @@
 	Buffer obdst;
 	int fs, fd, sadelta;
 	int i, sa, ma, da;
-	u32int s, t, q1, q2;
+	u32 s, t, q1, q2;
 
 	obdst = bdst;
 	sadelta = bsrc.alpha == &ones ? 0 : bsrc.delta;
@@ -1002,7 +1002,7 @@
 	Buffer obdst;
 	int fd, sadelta;
 	int i, sa, ma;
-	u32int s, t, q1, q2;
+	u32 s, t, q1, q2;
 
 	USED(op);
 	obdst = bdst;
@@ -1095,7 +1095,7 @@
 	Buffer obdst;
 	int fd;
 	int i, ma;
-	u32int s, t;
+	u32 s, t;
 
 	// iprint("\t\talphacalcS");
 	USED(op);
@@ -1181,7 +1181,7 @@
 	Buffer obdst;
 	int fs, fd;
 	int i, ma, da, zero;
-	u32int s, t;
+	u32 s, t;
 
 	obdst = bdst;
 	zero = !(op&1);
@@ -1315,7 +1315,7 @@
 	uchar *repl, *r, *w, *ow, bits;
 	int i, n, sh, depth, x, dx, npack, nbits;
 
-	b.rgba = (u32int*)buf;
+	b.rgba = (u32*)buf;
 	b.grey = w = buf;
 	b.red = b.blu = b.grn = w;
 	b.alpha = &ones;
@@ -1405,7 +1405,7 @@
 writenbit(Param *p, uchar *w, Buffer src)
 {
 	uchar *r;
-	u32int bits;
+	u32 bits;
 	int i, sh, depth, npack, nbits, x, ex;
 
 	assert(src.grey != nil && src.delta == 1);
@@ -1493,7 +1493,7 @@
 		}
 	}
 
-	b.rgba = (u32int*)(buf-copyalpha);
+	b.rgba = (u32*)(buf-copyalpha);
 
 	if(convgrey){
 		b.grey = buf;
@@ -1536,7 +1536,7 @@
 	int dx, isgrey, convgrey, alphaonly, copyalpha, i, nb;
 	uchar *begin, *end, *r, *w, *rrepl, *grepl, *brepl, *arepl, *krepl;
 	uchar ured, ugrn, ublu;
-	u32int u;
+	u32 u;
 
 	img = p->img;
 	begin = p->bytey0s + y*p->bwidth;
@@ -1560,7 +1560,7 @@
 			memmove(buf, r, dx*nb);
 			r = buf;
 		}
-		b.rgba = (u32int*)r;
+		b.rgba = (u32*)r;
 		if(copyalpha)
 			b.alpha = r+img->shift[CAlpha]/8;
 		else
@@ -1613,7 +1613,7 @@
 	}
 	
 	b.alpha = copyalpha ? buf : &ones;
-	b.rgba = (u32int*)buf;
+	b.rgba = (u32*)buf;
 	if(alphaonly){
 		b.red = b.grn = b.blu = b.grey = nil;
 		if(!copyalpha)
@@ -1642,7 +1642,7 @@
 	Memimage *img;
 	int i, isalpha, isgrey, nb, delta, dx, adelta;
 	uchar ff, *red, *grn, *blu, *grey, *alpha;
-	u32int u, mask;
+	u32 u, mask;
 
 	img = p->img;
 
@@ -1748,7 +1748,7 @@
 	q = p->bytermin + y*p->bwidth;
 	b.red = q;	/* ptr to data */
 	b.grn = b.blu = b.grey = b.alpha = nil;
-	b.rgba = (u32int*)q;
+	b.rgba = (u32*)q;
 	b.delta = p->img->depth/8;
 	return b;
 }
@@ -1827,13 +1827,13 @@
 boolcopy32(Buffer bdst, Buffer bsrc, Buffer bmask, int dx, int i, int o)
 {
 	uchar *m;
-	u32int *r, *w, *ew;
+	u32 *r, *w, *ew;
 
 	USED(i);
 	USED(o);
 	m = bmask.grey;
-	w = (u32int*)bdst.red;
-	r = (u32int*)bsrc.red;
+	w = (u32*)bdst.red;
+	r = (u32*)bsrc.red;
 	ew = w+dx;
 	for(; w < ew; w++,r++)
 		if(*m++)
@@ -1865,7 +1865,7 @@
 
 	b.red = buf;
 	b.blu = b.grn = b.grey = b.alpha = nil;
-	b.rgba = (u32int*)buf;
+	b.rgba = (u32*)buf;
 	b.delta = 0;
 	
 	return b;
@@ -1902,11 +1902,11 @@
 	return genconv;
 }
 
-static u32int
+static u32
 pixelbits(Memimage *i, Point pt)
 {
 	uchar *p;
-	u32int val;
+	u32 val;
 	int off, bpp, npack;
 
 	val = 0;
@@ -1987,9 +1987,9 @@
 }
 
 static void
-memset32(void *vp, u32int val, int n)
+memset32(void *vp, u32 val, int n)
 {
-	u32int *p, *ep;
+	u32 *p, *ep;
 
 	p = vp;
 	ep = p+n;
@@ -1998,7 +1998,7 @@
 }
 
 static void
-memset24(void *vp, u32int val, int n)
+memset24(void *vp, u32 val, int n)
 {
 	uchar *p, *ep;
 	uchar a,b,c;
@@ -2015,12 +2015,12 @@
 	}
 }
 
-static u32int
-imgtorgba(Memimage *img, u32int val)
+static u32
+imgtorgba(Memimage *img, u32 val)
 {
 	uchar r, g, b, a;
 	int nb, ov, v;
-	u32int chan;
+	u32 chan;
 	uchar *p;
 
 	a = 0xFF;
@@ -2063,12 +2063,12 @@
 	return (r<<24)|(g<<16)|(b<<8)|a;	
 }
 
-static u32int
-rgbatoimg(Memimage *img, u32int rgba)
+static u32
+rgbatoimg(Memimage *img, u32 rgba)
 {
-	u32int chan;
+	u32 chan;
 	int d, nb;
-	u32int v;
+	u32 v;
 	uchar *p, r, g, b, a, m;
 
 	v = 0;
@@ -2112,8 +2112,8 @@
 static int
 memoptdraw(Memdrawparam *par)
 {
-	s32int m, y, dy, dx, op;
-	u32int v;
+	s32 m, y, dy, dx, op;
+	u32 v;
 	Memimage *src;
 	Memimage *dst;
 
@@ -2131,12 +2131,12 @@
 	m = Simplesrc|Simplemask|Fullmask;
 	if((par->state&m)==m && (par->srgba&0xFF) == 0xFF && (op ==S || op == SoverD)){
 		uchar *dp, p[4];
-		s32int d, dwid, ppb, np, nb;
+		s32 d, dwid, ppb, np, nb;
 		uchar lm, rm;
 
 DBG print("Simplesrc|Simplemask|Fullmask\n");
 DBG print("memopt, dst %p, dst->data->bdata %p\n", dst, dst->data->bdata);
-		dwid = dst->width*sizeof(u32int);
+		dwid = dst->width*sizeof(u32);
 		dp = byteaddr(dst, par->r.min);
 		v = par->sdval;
 DBG print("sdval %ud, depth %d\n", v, dst->depth);
@@ -2219,7 +2219,7 @@
 			p[1] = v>>8;
 			p[2] = v>>16;
 			p[3] = v>>24;
-			v = *(u32int*)p;
+			v = *(u32*)p;
 			for(y=0; y<dy; y++, dp+=dwid)
 				memset32(dp, v, dx);
 			return 1;
@@ -2237,8 +2237,8 @@
 	if((par->state&(m|Replsrc))==m && src->depth >= 8 
 	&& src->chan == dst->chan && (op == S || (op == SoverD && !(src->flags&Falpha)))){
 		uchar *sp, *dp;
-		s32int swid, dwid, nb;
-		u32int dir;
+		s32 swid, dwid, nb;
+		u32 dir;
 
 DBG print("Simplemask|Fullmask\n");
 		if(src->data == dst->data && byteaddr(dst, par->r.min) > byteaddr(src, par->sr.min))
@@ -2246,8 +2246,8 @@
 		else
 			dir = 1;
 
-		swid = src->width*sizeof(u32int);
-		dwid = dst->width*sizeof(u32int);
+		swid = src->width*sizeof(u32);
+		dwid = dst->width*sizeof(u32);
 		sp = byteaddr(src, par->sr.min);
 		dp = byteaddr(dst, par->r.min);
 		if(dir == -1){
@@ -2279,16 +2279,16 @@
 	&& (par->r.min.x&7)==(par->sr.min.x&7) && (par->r.min.x&7)==(par->mr.min.x&7)){
 		uchar *sp, *dp, *mp;
 		uchar lm, rm;
-		s32int swid, dwid, mwid;
-		s32int i, x, dir;
+		s32 swid, dwid, mwid;
+		s32 i, x, dir;
 
 DBG print("Simplemask|Simplesrc|Replmask|Replsrc");
 		sp = byteaddr(src, par->sr.min);
 		dp = byteaddr(dst, par->r.min);
 		mp = byteaddr(par->mask, par->mr.min);
-		swid = src->width*sizeof(u32int);
-		dwid = dst->width*sizeof(u32int);
-		mwid = par->mask->width*sizeof(u32int);
+		swid = src->width*sizeof(u32);
+		dwid = dst->width*sizeof(u32);
+		mwid = par->mask->width*sizeof(u32);
 
 		if(src->data == dst->data && byteaddr(dst, par->r.min) > byteaddr(src, par->sr.min)){
 			dir = -1;
@@ -2377,12 +2377,12 @@
 static int
 chardraw(Memdrawparam *par)
 {
-	u32int bits;
+	u32 bits;
 	int i, ddepth, dy, dx, x, bx, ex, y, npack, bsh, depth, op;
-	u32int v, maskwid, dstwid;
+	u32 v, maskwid, dstwid;
 	uchar *wp, *rp, *q, *wc;
 	ushort *ws;
-	u32int *wl;
+	u32 *wl;
 	uchar sp[4];
 	Rectangle r, mr;
 	Memimage *mask, *src, *dst;
@@ -2406,13 +2406,13 @@
 //if(drawdebug) iprint("chardraw...");
 
 	depth = mask->depth;
-	maskwid = mask->width*sizeof(u32int);
+	maskwid = mask->width*sizeof(u32);
 	rp = byteaddr(mask, mr.min);
 	npack = 8/depth;
 	bsh = (mr.min.x % npack) * depth;
 
 	wp = byteaddr(dst, r.min);
-	dstwid = dst->width*sizeof(u32int);
+	dstwid = dst->width*sizeof(u32);
 DBG print("bsh %d\n", bsh);
 	dy = Dy(r);
 	dx = Dx(r);
@@ -2484,8 +2484,8 @@
 			}
 			break;
 		case 32:
-			wl = (u32int*)wp;
-			v = *(u32int*)sp;
+			wl = (u32*)wp;
+			v = *(u32*)sp;
 			for(x=bx; x>ex; x--, wl++){
 				i = x&7;
 				if(i == 8-1)
@@ -2510,7 +2510,7 @@
  *
  * This code is just plain wrong for >8bpp.
  *
-u32int
+u32
 membyteval(Memimage *src)
 {
 	int i, val, bpp;
@@ -2530,9 +2530,9 @@
  */
 
 void
-memfillcolor(Memimage *i, u32int val)
+memfillcolor(Memimage *i, u32 val)
 {
-	u32int bits;
+	u32 bits;
 	int d, y;
 	uchar p[4];
 
@@ -2552,7 +2552,7 @@
 		p[1] = bits>>8;
 		p[2] = bits>>16;
 		p[3] = bits>>24;
-		bits = *(u32int*)p;
+		bits = *(u32*)p;
 		memset32(u32addr(i, i->r.min), bits, i->width*Dy(i->r));
 		break;
 	}
--- a/libmemdraw/drawtest.c
+++ b/libmemdraw/drawtest.c
@@ -306,9 +306,9 @@
 void
 checkline(Rectangle r, Point sp, Point mp, int y, Memimage *stmp, Memimage *mtmp)
 {
-	u32int *dp;
+	u32 *dp;
 	int nb;
-	u32int *saved;
+	u32 *saved;
 
 	dp = wordaddr(dst, Pt(0, y));
 	saved = savedstbits + y*dst->width;
--- a/libmemlayer/draw.c
+++ b/libmemlayer/draw.c
@@ -11,7 +11,7 @@
 	Memlayer		*dstlayer;
 	Memimage	*src;
 	Memimage	*mask;
-	s32int	op;
+	s32	op;
 };
 
 static
--- a/libmemlayer/lalloc.c
+++ b/libmemlayer/lalloc.c
@@ -4,7 +4,7 @@
 #include "memlayer.h"
 
 Memimage*
-memlalloc(Memscreen *s, Rectangle screenr, Refreshfn refreshfn, void *refreshptr, u32int val)
+memlalloc(Memscreen *s, Rectangle screenr, Refreshfn refreshfn, void *refreshptr, u32 val)
 {
 	Memlayer *l;
 	Memimage *n;
--- a/libsec/port/aes.c
+++ b/libsec/port/aes.c
@@ -32,7 +32,7 @@
 #include <libsec.h>
 
 typedef uchar	u8;
-typedef u32int	u32;
+typedef u32	u32;
 #define FULL_UNROLL
 
 static const u32 Td0[256];
@@ -44,8 +44,8 @@
 static int rijndaelKeySetupEnc(u32 rk[/*4*(Nr + 1)*/], const u8 cipherKey[], int keyBits);
 static int rijndaelKeySetupDec(u32 rk[/*4*(Nr + 1)*/], const u8 cipherKey[], int keyBits);
 static int rijndaelKeySetup(u32 erk[/*4*(Nr + 1)*/], u32 drk[/*4*(Nr + 1)*/], const u8 cipherKey[], int keyBits);
-static void	rijndaelEncrypt(const u32int rk[], int Nr, const uchar pt[16], uchar ct[16]);
-static void	rijndaelDecrypt(const u32int rk[], int Nr, const uchar ct[16], uchar pt[16]);
+static void	rijndaelEncrypt(const u32 rk[], int Nr, const uchar pt[16], uchar ct[16]);
+static void	rijndaelDecrypt(const u32 rk[], int Nr, const uchar ct[16], uchar pt[16]);
 
 void
 setupAESstate(AESstate *s, uchar key[], int keybytes, uchar *ivec)
--- a/libsec/port/blowfish.c
+++ b/libsec/port/blowfish.c
@@ -7,17 +7,17 @@
 // 	Fast Software Encryption
 // 	Cambridge Security Workshop, Cambridge, England (1993)
 
-static u32int sbox[1024];
-static u32int pbox[BFrounds+2];
+static u32 sbox[1024];
+static u32 pbox[BFrounds+2];
 
-static void bfencrypt(u32int *, BFstate *);
-static void bfdecrypt(u32int *, BFstate *);
+static void bfencrypt(u32 *, BFstate *);
+static void bfdecrypt(u32 *, BFstate *);
 
 void
 setupBFstate(BFstate *s, uchar key[], int keybytes, uchar *ivec)
 {
 	int i, j;
-	u32int n, buf[2];
+	u32 n, buf[2];
 
 	memset(s, 0, sizeof(*s));
 	memset(buf, 0, sizeof buf);
@@ -77,16 +77,16 @@
 {
 	int i;
 	uchar *p;
-	u32int bo[2], bi[2], b;
+	u32 bo[2], bi[2], b;
 
 	assert((n & 7) == 0);
 
-	bo[0] =  s->ivec[0] | ((u32int) s->ivec[1]<<8) | ((u32int)s->ivec[2]<<16) | ((u32int)s->ivec[3]<<24);
-	bo[1] =  s->ivec[4] | ((u32int) s->ivec[5]<<8) | ((u32int)s->ivec[6]<<16) | ((u32int)s->ivec[7]<<24);
+	bo[0] =  s->ivec[0] | ((u32) s->ivec[1]<<8) | ((u32)s->ivec[2]<<16) | ((u32)s->ivec[3]<<24);
+	bo[1] =  s->ivec[4] | ((u32) s->ivec[5]<<8) | ((u32)s->ivec[6]<<16) | ((u32)s->ivec[7]<<24);
 
 	for(i=0; i < n; i += 8, buf += 8) {
-		bi[0] =  buf[0] | ((u32int) buf[1]<<8) | ((u32int)buf[2]<<16) | ((u32int)buf[3]<<24);
-		bi[1] =  buf[4] | ((u32int) buf[5]<<8) | ((u32int)buf[6]<<16) | ((u32int)buf[7]<<24);
+		bi[0] =  buf[0] | ((u32) buf[1]<<8) | ((u32)buf[2]<<16) | ((u32)buf[3]<<24);
+		bi[1] =  buf[4] | ((u32) buf[5]<<8) | ((u32)buf[6]<<16) | ((u32)buf[7]<<24);
 
 		bi[0] ^= bo[0];
 		bi[1] ^= bo[1];
@@ -134,16 +134,16 @@
 {
 	int i;
 	uchar *p;
-	u32int b, bo[2], bi[2], xr[2];
+	u32 b, bo[2], bi[2], xr[2];
 
 	assert((n & 7) == 0);
 
-	bo[0] =  s->ivec[0] | ((u32int) s->ivec[1]<<8) | ((u32int)s->ivec[2]<<16) | ((u32int)s->ivec[3]<<24);
-	bo[1] =  s->ivec[4] | ((u32int) s->ivec[5]<<8) | ((u32int)s->ivec[6]<<16) | ((u32int)s->ivec[7]<<24);
+	bo[0] =  s->ivec[0] | ((u32) s->ivec[1]<<8) | ((u32)s->ivec[2]<<16) | ((u32)s->ivec[3]<<24);
+	bo[1] =  s->ivec[4] | ((u32) s->ivec[5]<<8) | ((u32)s->ivec[6]<<16) | ((u32)s->ivec[7]<<24);
 
 	for(i=0; i < n; i += 8, buf += 8) {
-		bi[0] =  buf[0] | ((u32int) buf[1]<<8) | ((u32int)buf[2]<<16) | ((u32int)buf[3]<<24);
-		bi[1] =  buf[4] | ((u32int) buf[5]<<8) | ((u32int)buf[6]<<16) | ((u32int)buf[7]<<24);
+		bi[0] =  buf[0] | ((u32) buf[1]<<8) | ((u32)buf[2]<<16) | ((u32)buf[3]<<24);
+		bi[1] =  buf[4] | ((u32) buf[5]<<8) | ((u32)buf[6]<<16) | ((u32)buf[7]<<24);
 
 		xr[0] = bi[0];
 		xr[1] = bi[1];
@@ -193,11 +193,11 @@
 bfECBencrypt(uchar *buf, int n, BFstate *s)
 {
 	int i;
-	u32int b[2];
+	u32 b[2];
 
 	for(i=0; i < n; i += 8, buf += 8) {
-		b[0] =  buf[0] | ((u32int) buf[1]<<8) | ((u32int)buf[2]<<16) | ((u32int)buf[3]<<24);
-		b[1] =  buf[4] | ((u32int) buf[5]<<8) | ((u32int)buf[6]<<16) | ((u32int)buf[7]<<24);
+		b[0] =  buf[0] | ((u32) buf[1]<<8) | ((u32)buf[2]<<16) | ((u32)buf[3]<<24);
+		b[1] =  buf[4] | ((u32) buf[5]<<8) | ((u32)buf[6]<<16) | ((u32)buf[7]<<24);
 
 		bfencrypt(b, s);
 
@@ -219,11 +219,11 @@
 bfECBdecrypt(uchar *buf, int n, BFstate *s)
 {
 	int i;
-	u32int b[2];
+	u32 b[2];
 
 	for(i=0; i < n; i += 8, buf += 8) {
-		b[0] =  buf[0] | ((u32int) buf[1]<<8) | ((u32int)buf[2]<<16) | ((u32int)buf[3]<<24);
-		b[1] =  buf[4] | ((u32int) buf[5]<<8) | ((u32int)buf[6]<<16) | ((u32int)buf[7]<<24);
+		b[0] =  buf[0] | ((u32) buf[1]<<8) | ((u32)buf[2]<<16) | ((u32)buf[3]<<24);
+		b[1] =  buf[4] | ((u32) buf[5]<<8) | ((u32)buf[6]<<16) | ((u32)buf[7]<<24);
 
 		bfdecrypt(b, s);
 
@@ -242,11 +242,11 @@
 }
 
 static void
-bfencrypt(u32int *b, BFstate *s)
+bfencrypt(u32 *b, BFstate *s)
 {
 	int i;
-	u32int l, r;
-	u32int *pb, *sb;
+	u32 l, r;
+	u32 *pb, *sb;
 
 	l = b[0];
 	r = b[1];
@@ -276,11 +276,11 @@
 }
 
 static void
-bfdecrypt(u32int *b, BFstate *s)
+bfdecrypt(u32 *b, BFstate *s)
 {
 	int i;
-	u32int l, r;
-	u32int *pb, *sb;
+	u32 l, r;
+	u32 *pb, *sb;
 
 	l = b[0];
 	r = b[1];
@@ -309,7 +309,7 @@
 	return;
 }
 
-static u32int pbox[BFrounds+2] = {
+static u32 pbox[BFrounds+2] = {
 	0x243f6a88, 0x85a308d3, 0x13198a2e, 0x03707344, 
 	0xa4093822, 0x299f31d0, 0x082efa98, 0xec4e6c89, 
 	0x452821e6, 0x38d01377, 0xbe5466cf, 0x34e90c6c, 
@@ -317,7 +317,7 @@
 	0x9216d5d9, 0x8979fb1b
 };
 
-static u32int sbox[1024] = {
+static u32 sbox[1024] = {
 	0xd1310ba6L, 0x98dfb5acL, 0x2ffd72dbL, 0xd01adfb7L, 
 	0xb8e1afedL, 0x6a267e96L, 0xba7c9045L, 0xf12c7f99L, 
 	0x24a19947L, 0xb3916cf7L, 0x0801f2e2L, 0x858efc16L, 
--- a/libsec/port/des.c
+++ b/libsec/port/des.c
@@ -4,7 +4,7 @@
 /*
  * integrated sbox & p perm
  */
-static u32int spbox[] = {
+static u32 spbox[] = {
 
 0x00808200,0x00000000,0x00008000,0x00808202,0x00808002,0x00008202,0x00000002,0x00008000,
 0x00000200,0x00808200,0x00808202,0x00000200,0x00800202,0x00808002,0x00800000,0x00000002,
@@ -81,7 +81,7 @@
 
 /*
  * for manual index calculation
- * #define fetch(box, i, sh) (*((u32int*)((uchar*)spbox + (box << 8) + ((i >> (sh)) & 0xfc))))
+ * #define fetch(box, i, sh) (*((u32*)((uchar*)spbox + (box << 8) + ((i >> (sh)) & 0xfc))))
  */
 #define fetch(box, i, sh) ((spbox+(box << 6))[((i >> (sh + 2)) & 0x3f)])
 
@@ -89,16 +89,16 @@
  * DES electronic codebook encryption of one block
  */
 void
-block_cipher(ulong key[32], uchar text[8], int decrypting)
+block_cipher(u32 key[32], uchar text[8], int decrypting)
 {
-	u32int right, left, v0, v1;
+	u32 right, left, v0, v1;
 	int i, keystep;
 
 	/*
 	 * initial permutation
 	 */
-	v0 = text[0] | ((u32int)text[2]<<8) | ((u32int)text[4]<<16) | ((u32int)text[6]<<24);
-	left = text[1] | ((u32int)text[3]<<8) | ((u32int)text[5]<<16) | ((u32int)text[7]<<24);
+	v0 = text[0] | ((u32)text[2]<<8) | ((u32)text[4]<<16) | ((u32)text[6]<<24);
+	left = text[1] | ((u32)text[3]<<8) | ((u32)text[5]<<16) | ((u32)text[7]<<24);
 	right = (left & 0xaaaaaaaa) | ((v0 >> 1) & 0x55555555);
 	left = ((left << 1) & 0xaaaaaaaa) | (v0 & 0x55555555);
 	left = ((left << 6) & 0x33003300)
@@ -180,17 +180,17 @@
  * triple DES electronic codebook encryption of one block
  */
 void
-triple_block_cipher(ulong expanded_key[3][32], uchar text[8], int ende)
+triple_block_cipher(u32 expanded_key[3][32], uchar text[8], int ende)
 {
-	ulong *key;
-	u32int right, left, v0, v1;
+	u32 *key;
+	u32 right, left, v0, v1;
 	int i, j, keystep;
 
 	/*
 	 * initial permutation
 	 */
-	v0 = text[0] | ((u32int)text[2]<<8) | ((u32int)text[4]<<16) | ((u32int)text[6]<<24);
-	left = text[1] | ((u32int)text[3]<<8) | ((u32int)text[5]<<16) | ((u32int)text[7]<<24);
+	v0 = text[0] | ((u32)text[2]<<8) | ((u32)text[4]<<16) | ((u32)text[6]<<24);
+	left = text[1] | ((u32)text[3]<<8) | ((u32)text[5]<<16) | ((u32)text[7]<<24);
 	right = (left & 0xaaaaaaaa) | ((v0 >> 1) & 0x55555555);
 	left = ((left << 1) & 0xaaaaaaaa) | (v0 & 0x55555555);
 	left = ((left << 6) & 0x33003300)
@@ -281,7 +281,7 @@
 /*
  * key compression permutation, 4 bits at a time
  */
-static u32int comptab[] = {
+static u32 comptab[] = {
 
 0x000000,0x010000,0x000008,0x010008,0x000080,0x010080,0x000088,0x010088,
 0x000000,0x010000,0x000008,0x010008,0x000080,0x010080,0x000088,0x010088,
@@ -332,9 +332,9 @@
 };
 
 static void
-keycompperm(u32int left, u32int right, ulong *ek)
+keycompperm(u32 left, u32 right, u32 *ek)
 {
-	u32int v0, v1;
+	u32 v0, v1;
 	int i;
 
 	for(i = 0; i < 16; i++){
@@ -369,12 +369,12 @@
 }
 
 void
-des_key_setup(uchar key[8], ulong *ek)
+des_key_setup(uchar key[8], u32 *ek)
 {
-	u32int left, right, v0, v1;
+	u32 left, right, v0, v1;
 
-	v0 = key[0] | ((u32int)key[2] << 8) | ((u32int)key[4] << 16) | ((u32int)key[6] << 24);
-	v1 = key[1] | ((u32int)key[3] << 8) | ((u32int)key[5] << 16) | ((u32int)key[7] << 24);
+	v0 = key[0] | ((u32)key[2] << 8) | ((u32)key[4] << 16) | ((u32)key[6] << 24);
+	v1 = key[1] | ((u32)key[3] << 8) | ((u32)key[5] << 16) | ((u32)key[7] << 24);
 	left = ((v0 >> 1) & 0x40404040)
 		| ((v0 >> 2) & 0x10101010)
 		| ((v0 >> 3) & 0x04040404)
@@ -433,10 +433,10 @@
 void
 des56to64(uchar *k56, uchar *k64)
 {
-	u32int hi, lo;
+	u32 hi, lo;
 
-	hi = ((u32int)k56[0]<<24)|((u32int)k56[1]<<16)|((u32int)k56[2]<<8)|k56[3];
-	lo = ((u32int)k56[4]<<24)|((u32int)k56[5]<<16)|((u32int)k56[6]<<8);
+	hi = ((u32)k56[0]<<24)|((u32)k56[1]<<16)|((u32)k56[2]<<8)|k56[3];
+	lo = ((u32)k56[4]<<24)|((u32)k56[5]<<16)|((u32)k56[6]<<8);
 
 	k64[0] = parity[(hi>>25)&0x7f];
 	k64[1] = parity[(hi>>18)&0x7f];
@@ -454,12 +454,12 @@
 void
 des64to56(uchar *k64, uchar *k56)
 {
-	u32int hi, lo;
+	u32 hi, lo;
 
-	hi = (((u32int)k64[0]&0xfe)<<24)|(((u32int)k64[1]&0xfe)<<17)|(((u32int)k64[2]&0xfe)<<10)
+	hi = (((u32)k64[0]&0xfe)<<24)|(((u32)k64[1]&0xfe)<<17)|(((u32)k64[2]&0xfe)<<10)
 		|((k64[3]&0xfe)<<3)|(k64[4]>>4);
-	lo = (((u32int)k64[4]&0xfe)<<28)|(((u32int)k64[5]&0xfe)<<21)|(((u32int)k64[6]&0xfe)<<14)
-		|(((u32int)k64[7]&0xfe)<<7);
+	lo = (((u32)k64[4]&0xfe)<<28)|(((u32)k64[5]&0xfe)<<21)|(((u32)k64[6]&0xfe)<<14)
+		|(((u32)k64[7]&0xfe)<<7);
 
 	k56[0] = hi>>24;
 	k56[1] = hi>>16;
@@ -471,7 +471,7 @@
 }
 
 void
-key_setup(uchar key[7], ulong *ek)
+key_setup(uchar key[7], u32 *ek)
 {
 	uchar k64[8];
 
--- a/libsec/port/dsaprimes.c
+++ b/libsec/port/dsaprimes.c
@@ -12,7 +12,7 @@
 static void
 Hrand(uchar *s)
 {
-	ulong *u = (ulong*)s;
+	u32 *u = (u32*)s;
 	*u++ = fastrand();
 	*u++ = fastrand();
 	*u++ = fastrand();
@@ -48,7 +48,7 @@
 forever:
 	do{
 		Hrand(s);
-		memcpy(sj, s, 20);
+		memmove(sj, s, 20);
 		sha1(s, 20, Hs, 0);
 		Hincr(sj);
 		sha1(sj, 20, Hs1, 0);
@@ -65,7 +65,7 @@
 	Hincr(sj);
 	mpleft(q, 1, q2);
 	while(i<4096){
-		memcpy(sjk, sj, 20);
+		memmove(sjk, sj, 20);
 		for(k=0; k <= n; k++){
 			sha1(sjk, 20, Hs, 0);
 			letomp(Hs, 20, Vk);
--- a/libsec/port/fastrand.c
+++ b/libsec/port/fastrand.c
@@ -5,10 +5,10 @@
  *  use the X917 random number generator to create random
  *  numbers (faster than truerand() but not as random).
  */
-ulong
+u32
 fastrand(void)
 {
-	ulong x;
+	u32 x;
 	
 	genrandom((uchar*)&x, sizeof x);
 	return x;
--- a/libsec/port/genrandom.c
+++ b/libsec/port/genrandom.c
@@ -5,7 +5,7 @@
 
 typedef struct State{
 	int		seeded;
-	uvlong		seed;
+	u64		seed;
 	DES3state	des3;
 } State;
 static State x917state;
@@ -14,7 +14,7 @@
 X917(uchar *rand, int nrand)
 {
 	int i, m, n8;
-	uvlong I, x;
+	u64 I, x;
 
 	/* 1. Compute intermediate value I = Ek(time). */
 	I = nsec();
@@ -26,7 +26,7 @@
 		x = I ^ x917state.seed;
 		triple_block_cipher(x917state.des3.expanded, (uchar*)&x, 0);
 		n8 = (nrand>8) ? 8 : nrand;
-		memcpy(rand, (uchar*)&x, n8);
+		memmove(rand, (uchar*)&x, n8);
 		rand += 8;
 		nrand -= 8;
 		x ^= I;
@@ -41,10 +41,10 @@
 	int n;
 	uchar mix[128];
 	uchar key3[3][8];
-	ulong *ulp;
+	u32 *ulp;
 
-	ulp = (ulong*)key3;
-	for(n = 0; n < sizeof(key3)/sizeof(ulong); n++)
+	ulp = (u32*)key3;
+	for(n = 0; n < sizeof(key3)/sizeof(u32); n++)
 		ulp[n] = truerand();
 	setupDES3state(&x917state.des3, key3, nil);
 	X917(mix, sizeof mix);
@@ -59,4 +59,19 @@
 		X917init();
 	X917(p, n);
 	_genrandomqunlock();
+}
+
+QLock grandomlk;
+
+void
+_genrandomqlock(void)
+{
+	qlock(&grandomlk);
+}
+
+
+void
+_genrandomqunlock(void)
+{
+	qunlock(&grandomlk);
 }
--- a/libsec/port/hmac.c
+++ b/libsec/port/hmac.c
@@ -3,8 +3,8 @@
 
 /* rfc2104 */
 static DigestState*
-hmac_x(uchar *p, ulong len, uchar *key, ulong klen, uchar *digest, DigestState *s,
-	DigestState*(*x)(uchar*, ulong, uchar*, DigestState*), int xlen)
+hmac_x(uchar *p, u32 len, uchar *key, u32 klen, uchar *digest, DigestState *s,
+	DigestState*(*x)(uchar*, u32, uchar*, DigestState*), int xlen)
 {
 	int i;
 	uchar pad[65], innerdigest[256];
@@ -44,13 +44,13 @@
 }
 
 DigestState*
-hmac_sha1(uchar *p, ulong len, uchar *key, ulong klen, uchar *digest, DigestState *s)
+hmac_sha1(uchar *p, u32 len, uchar *key, u32 klen, uchar *digest, DigestState *s)
 {
 	return hmac_x(p, len, key, klen, digest, s, sha1, SHA1dlen);
 }
 
 DigestState*
-hmac_md5(uchar *p, ulong len, uchar *key, ulong klen, uchar *digest, DigestState *s)
+hmac_md5(uchar *p, u32 len, uchar *key, u32 klen, uchar *digest, DigestState *s)
 {
 	return hmac_x(p, len, key, klen, digest, s, md5, MD5dlen);
 }
--- a/libsec/port/md4.c
+++ b/libsec/port/md4.c
@@ -91,17 +91,17 @@
 	{ 15,	S34},	
 };
 
-static void encode(uchar*, u32int*, ulong);
-static void decode(u32int*, uchar*, ulong);
+static void encode(uchar*, u32*, u32);
+static void decode(u32*, uchar*, u32);
 
 static void
-md4block(uchar *p, ulong len, MD4state *s)
+md4block(uchar *p, u32 len, MD4state *s)
 {
 	int i;
-	u32int a, b, c, d, tmp;
+	u32 a, b, c, d, tmp;
 	MD4Table *t;
 	uchar *end;
-	u32int x[16];
+	u32 x[16];
 
 	for(end = p+len; p < end; p += 64){
 		a = s->state[0];
@@ -145,9 +145,9 @@
 }
 
 MD4state*
-md4(uchar *p, ulong len, uchar *digest, MD4state *s)
+md4(uchar *p, u32 len, uchar *digest, MD4state *s)
 {
-	u32int x[16];
+	u32 x[16];
 	uchar buf[128];
 	int i;
 	uchar *e;
@@ -238,13 +238,13 @@
 }
 
 /*
- *	encodes input (u32int) into output (uchar). Assumes len is
+ *	encodes input (u32) into output (uchar). Assumes len is
  *	a multiple of 4.
  */
 static void
-encode(uchar *output, u32int *input, ulong len)
+encode(uchar *output, u32 *input, u32 len)
 {
-	u32int x;
+	u32 x;
 	uchar *e;
 
 	for(e = output + len; output < e;) {
@@ -257,11 +257,11 @@
 }
 
 /*
- *	decodes input (uchar) into output (u32int). Assumes len is
+ *	decodes input (uchar) into output (u32). Assumes len is
  *	a multiple of 4.
  */
 static void
-decode(u32int *output, uchar *input, ulong len)
+decode(u32 *output, uchar *input, u32 len)
 {
 	uchar *e;
 
--- a/libsec/port/md5.c
+++ b/libsec/port/md5.c
@@ -27,14 +27,14 @@
  *	documentation and/or software.
  */
 
-static void encode(uchar*, u32int*, ulong);
+static void encode(uchar*, u32*, u32);
 
-extern void _md5block(uchar*, ulong, u32int*);
+extern void _md5block(uchar*, u32, u32*);
 
 MD5state*
-md5(uchar *p, ulong len, uchar *digest, MD5state *s)
+md5(uchar *p, u32 len, uchar *digest, MD5state *s)
 {
-	u32int x[16];
+	u32 x[16];
 	uchar buf[128];
 	int i;
 	uchar *e;
@@ -128,13 +128,13 @@
 }
 
 /*
- *	encodes input (u32int) into output (uchar). Assumes len is
+ *	encodes input (u32) into output (uchar). Assumes len is
  *	a multiple of 4.
  */
 static void
-encode(uchar *output, u32int *input, ulong len)
+encode(uchar *output, u32 *input, u32 len)
 {
-	u32int x;
+	u32 x;
 	uchar *e;
 
 	for(e = output + len; output < e;) {
--- a/libsec/port/md5block.c
+++ b/libsec/port/md5block.c
@@ -53,7 +53,7 @@
 	S44=	21
 };
 
-static u32int md5tab[] =
+static u32 md5tab[] =
 {
 	/* round 1 */
 /*[0]*/	0xd76aa478,	
@@ -128,16 +128,16 @@
 	0xeb86d391,	
 };
 
-static void decode(u32int*, uchar*, ulong);
-extern void _md5block(uchar *p, ulong len, u32int *s);
+static void decode(u32*, uchar*, u32);
+extern void _md5block(uchar *p, u32 len, u32 *s);
 
 void
-_md5block(uchar *p, ulong len, u32int *s)
+_md5block(uchar *p, u32 len, u32 *s)
 {
-	u32int a, b, c, d, sh;
-	u32int *t;
+	u32 a, b, c, d, sh;
+	u32 *t;
 	uchar *end;
-	u32int x[16];
+	u32 x[16];
 
 	for(end = p+len; p < end; p += 64){
 		a = s[0];
@@ -253,11 +253,11 @@
 }
 
 /*
- *	decodes input (uchar) into output (u32int). Assumes len is
+ *	decodes input (uchar) into output (u32). Assumes len is
  *	a multiple of 4.
  */
 static void
-decode(u32int *output, uchar *input, ulong len)
+decode(u32 *output, uchar *input, u32 len)
 {
 	uchar *e;
 
--- a/libsec/port/nfastrand.c
+++ b/libsec/port/nfastrand.c
@@ -3,10 +3,10 @@
 
 #define Maxrand	((1UL<<31)-1)
 
-ulong
-nfastrand(ulong n)
+u32
+nfastrand(u32 n)
 {
-	ulong m, r;
+	u32 m, r;
 	
 	/*
 	 * set m to the maximum multiple of n <= 2^31-1
--- a/libsec/port/sha1.c
+++ b/libsec/port/sha1.c
@@ -1,9 +1,9 @@
 #include "os.h"
 #include <libsec.h>
 
-static void encode(uchar*, u32int*, ulong);
+static void encode(uchar*, u32*, u32);
 
-extern void _sha1block(uchar*, ulong, u32int*);
+extern void _sha1block(uchar*, u32, u32*);
 
 /*
  *  we require len to be a multiple of 64 for all but
@@ -11,10 +11,10 @@
  *  to pad.
  */
 SHA1state*
-sha1(uchar *p, ulong len, uchar *digest, SHA1state *s)
+sha1(uchar *p, u32 len, uchar *digest, SHA1state *s)
 {
 	uchar buf[128];
-	u32int x[16];
+	u32 x[16];
 	int i;
 	uchar *e;
 
@@ -108,13 +108,13 @@
 }
 
 /*
- *	encodes input (ulong) into output (uchar). Assumes len is
+ *	encodes input (u32) into output (uchar). Assumes len is
  *	a multiple of 4.
  */
 static void
-encode(uchar *output, u32int *input, ulong len)
+encode(uchar *output, u32 *input, u32 len)
 {
-	u32int x;
+	u32 x;
 	uchar *e;
 
 	for(e = output + len; output < e;) {
--- a/libsec/port/sha1block.c
+++ b/libsec/port/sha1block.c
@@ -1,12 +1,12 @@
 #include "os.h"
 
 void
-_sha1block(uchar *p, ulong len, u32int *s)
+_sha1block(uchar *p, u32 len, u32 *s)
 {
-	u32int a, b, c, d, e, x;
+	u32 a, b, c, d, e, x;
 	uchar *end;
-	u32int *wp, *wend;
-	u32int w[80];
+	u32 *wp, *wend;
+	u32 w[80];
 
 	/* at this point, we have a multiple of 64 bytes */
 	for(end = p+len; p < end;){
--- a/libsec/port/sha2.c
+++ b/libsec/port/sha2.c
@@ -4,11 +4,11 @@
 extern void _sha256block(SHA256state*, uchar*);
 extern void _sha512block(SHA512state*, uchar*);
 
-u32int sha224h0[] = {
+u32 sha224h0[] = {
 0xc1059ed8, 0x367cd507, 0x3070dd17, 0xf70e5939,
 0xffc00b31, 0x68581511, 0x64f98fa7, 0xbefa4fa4,
 };
-u32int sha256h0[] = {
+u32 sha256h0[] = {
 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19,
 };
@@ -40,7 +40,7 @@
 }
 
 static void
-p32(u32int v, uchar *p)
+p32(u32 v, uchar *p)
 {
 	p[0] = v>>24;
 	p[1] = v>>16;
@@ -83,9 +83,9 @@
 
 
 static void
-_sha256(uchar *p, ulong len, SHA256state *s)
+_sha256(uchar *p, u32 len, SHA256state *s)
 {
-	u32int take;
+	u32 take;
 
 	/* complete possible partial block from last time */
 	if(s->blen > 0 && s->blen+len >= SHA256bsize) {
@@ -117,7 +117,7 @@
 {
 	int i;
 	uchar end[SHA256bsize+8];
-	u32int nzero, nb, nd;
+	u32 nzero, nb, nd;
 
 	nzero = (2*SHA256bsize - s->blen - 1 - 8) % SHA256bsize;
 	end[0] = 0x80;
@@ -134,7 +134,7 @@
 }
 
 static SHA256state*
-sha256x(uchar *p, ulong len, uchar *digest, SHA256state *s, int smaller)
+sha256x(uchar *p, u32 len, uchar *digest, SHA256state *s, int smaller)
 {
 	if(s == nil) {
 		s = sha256init();
@@ -159,13 +159,13 @@
 }
 
 SHA256state*
-sha224(uchar *p, ulong len, uchar *digest, SHA256state *s)
+sha224(uchar *p, u32 len, uchar *digest, SHA256state *s)
 {
 	return sha256x(p, len, digest, s, 1);
 }
 
 SHA256state*
-sha256(uchar *p, ulong len, uchar *digest, SHA256state *s)
+sha256(uchar *p, u32 len, uchar *digest, SHA256state *s)
 {
 	return sha256x(p, len, digest, s, 0);
 }
@@ -189,9 +189,9 @@
 }
 
 static void
-_sha512(uchar *p, ulong len, SHA512state *s)
+_sha512(uchar *p, u32 len, SHA512state *s)
 {
-	u32int take;
+	u32 take;
 
 	/* complete possible partial block from last time */
 	if(s->blen > 0 && s->blen+len >= SHA512bsize) {
@@ -223,7 +223,7 @@
 {
 	int i;
 	uchar end[SHA512bsize+16];
-	u32int nzero, n;
+	u32 nzero, n;
 	u64int nb[2];
 
 	nzero = (2*SHA512bsize - s->blen - 1 - 16) % SHA512bsize;
@@ -244,7 +244,7 @@
 }
 
 static SHA512state*
-sha512x(uchar *p, ulong len, uchar *digest, SHA512state *s, int smaller)
+sha512x(uchar *p, u32 len, uchar *digest, SHA512state *s, int smaller)
 {
 	if(s == nil) {
 		s = sha512init();
@@ -269,13 +269,13 @@
 }
 
 SHA512state*
-sha384(uchar *p, ulong len, uchar *digest, SHA512state *s)
+sha384(uchar *p, u32 len, uchar *digest, SHA512state *s)
 {
 	return sha512x(p, len, digest, s, 1);
 }
 
 SHA512state*
-sha512(uchar *p, ulong len, uchar *digest, SHA512state *s)
+sha512(uchar *p, u32 len, uchar *digest, SHA512state *s)
 {
 	return sha512x(p, len, digest, s, 0);
 }
--- a/libsec/port/sha256block.c
+++ b/libsec/port/sha256block.c
@@ -5,7 +5,7 @@
 	SHA256rounds =  64,
 };
 
-u32int sha256const[] = {
+u32 sha256const[] = {
 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
@@ -38,7 +38,7 @@
 #define G	v[6]
 #define H	v[7]
 
-static u32int
+static u32
 g32(uchar *p)
 {
 	return p[0]<<24|p[1]<<16|p[2]<<8|p[3]<<0;
@@ -47,10 +47,10 @@
 void
 _sha256block(SHA256state *s, uchar *buf)
 {
-	u32int w[2*SHA256bsize/4];
+	u32 w[2*SHA256bsize/4];
 	int i, t;
-	u32int t1, t2;
-	u32int v[8];
+	u32 t1, t2;
+	u32 v[8];
 
 	for(t = 0; t < nelem(w)/2; t++) {
 		if(t < 16) {
--- a/libsec/port/sha512block.c
+++ b/libsec/port/sha512block.c
@@ -30,7 +30,7 @@
 };
 
 
-static u32int
+static u32
 g32(uchar *p)
 {
 	return p[0]<<24|p[1]<<16|p[2]<<8|p[3]<<0;
--- a/libsec/port/smallprimes.c
+++ b/libsec/port/smallprimes.c
@@ -1,6 +1,6 @@
 #include "os.h"
 
-ulong smallprimes[1000] = {
+u32 smallprimes[1000] = {
 	2,
 	3,
 	5,
--- a/libsec/port/smallprimetest.c
+++ b/libsec/port/smallprimetest.c
@@ -2,7 +2,7 @@
 #include <mp.h>
 #include <libsec.h>
 
-static ulong smallprimes[] = {
+static u32 smallprimes[] = {
 	2,	3,	5,	7,	11,	13,	17,	19,	23,	29,
 	31,	37,	41,	43,	47,	53,	59,	61,	67,	71,
 	73,	79,	83,	89,	97,	101,	103,	107,	109,	113,
@@ -1007,7 +1007,7 @@
 
 //  return 1 if p is divisable by sp, 0 otherwise
 static int
-divides(mpint *dividend, ulong divisor)
+divides(mpint *dividend, u32 divisor)
 {
 	mpdigit d[2], q;
 	int i;
@@ -1026,7 +1026,7 @@
 smallprimetest(mpint *p)
 {
 	int i;
-	ulong sp;
+	u32 sp;
 
 	for(i = 0; i < nelem(smallprimes); i++){
 		sp = smallprimes[i];
--- a/limbo/dis.c
+++ b/limbo/dis.c
@@ -167,6 +167,7 @@
 	Sym *s;
 	Node *n;
 
+	USED(size);
 	if(0){
 		discon(size);
 		disvar(size, ds);
@@ -439,7 +440,7 @@
 static void
 disbig(long off, Long v)
 {
-	ulong iv;
+	u32int iv;
 
 	disflush(DEFL, off, IBY2LG);
 	iv = v >> 32;
@@ -458,8 +459,8 @@
 static void
 disreal(long off, Real v)
 {
-	ulong bv[2];
-	ulong iv;
+	u32int bv[2];
+	u32int iv;
 
 	disflush(DEFF, off, IBY2LG);
 	dtocanon(v, bv);
--- a/limbo/dtocanon.c
+++ b/limbo/dtocanon.c
@@ -1,9 +1,9 @@
 #include "limbo.h"
 
 void
-dtocanon(double f, ulong v[])
+dtocanon(double f, u32int v[])
 {
-	union { double d; ulong ul[2]; } a;
+	union { double d; u32int ul[2]; } a;
 
 	a.d = 1.;
 	if(a.ul[0]){
@@ -18,9 +18,9 @@
 }
 
 double
-canontod(ulong v[2])
+canontod(u32int v[2])
 {
-	union { double d; unsigned long ul[2]; } a;
+	union { double d; u32int ul[2]; } a;
 
 	a.d = 1.;
 	if(a.ul[0]) {
--- a/limbo/fns.h
+++ b/limbo/fns.h
@@ -37,7 +37,7 @@
 Ok	callcast(Node*, int, int);
 void	callcom(Src*, int, Node*, Node*);
 Type*	calltype(Type*, Node*, Type*);
-double	canontod(ulong v[2]);
+double	canontod(u32int v[2]);
 void	casecheck(Node *cn, Type *ret);
 int	casecmp(Type*, Node*, Node*);
 void	casecom(Node*);
@@ -86,7 +86,7 @@
 int	dotconv(Fmt*);
 char	*dotprint(char*, char*, Decl*, int);
 Type	*dottype(Type*, Decl*);
-void	dtocanon(double, ulong[2]);
+void	dtocanon(double, u32int[2]);
 Decl	*dupdecl(Decl*);
 Decl *dupdecls(Decl*);
 Node	*dupn(int, Src*, Node*);
@@ -313,7 +313,7 @@
 #pragma	varargck	argpos	seprint	3
 void	shareloc(Decl*);
 int	shiftchk(Node*);
-ulong	sign(Decl*);
+u32int	sign(Decl*);
 Node	*simplify(Node*);
 Szal	sizeids(Decl*, long);
 void	sizetype(Type*);
--- a/limbo/stubs.c
+++ b/limbo/stubs.c
@@ -169,7 +169,7 @@
 				print("\t\"");
 				if(id->dot != d)
 					print("%s.", id->dot->sym->name);
-				print("%s\",0x%lux,%s_%s,", id->sym->name, sign(id),
+				print("%s\",0x%ux,%s_%s,", id->sym->name, sign(id),
 					id->dot->sym->name, id->sym->name);
 				if(id->ty->varargs)
 					print("0,0,{0},");
@@ -536,7 +536,7 @@
 	for(tg = t->tags; tg != nil; tg = tg->next)
 		print("#define %s_%s %ld\n", buf, tg->sym->name, offset++);
 	print("struct %s\n{\n", buf);
-	print("	int	pick;\n");
+	print("	WORD	pick;\n");
 	offset = IBY2WD;
 	for(id = t->ids; id != nil; id = id->next){
 		if(id->store == Dfield){
--- a/limbo/types.c
+++ b/limbo/types.c
@@ -51,7 +51,7 @@
 	/* Tchan */	{ 1,	0,	0,	0,	1, },
 	/* Treal */	{ 0,	0,	1,	1,	1, },
 	/* Tfn */	{ 0,	1,	0,	0,	1, },
-	/* Tint */	{ 0,	0,	1,	0,	1, },
+	/* Tint */	{ 0,	0,	1,	1,	1, },
 	/* Tlist */	{ 1,	0,	0,	0,	1, },
 	/* Tmodule */	{ 1,	0,	0,	0,	1, },
 	/* Tref */	{ 1,	0,	0,	0,	1, },
@@ -3537,7 +3537,7 @@
  * sign the same information used
  * for testing type equality
  */
-ulong
+u32int
 sign(Decl *d)
 {
 	Type *t;
@@ -3579,7 +3579,7 @@
 	for(i = 0; i < MD5dlen; i += 4)
 		t->sig ^= md5sig[i+0] | (md5sig[i+1]<<8) | (md5sig[i+2]<<16) | (md5sig[i+3]<<24);
 	if(debug['S'])
-		print("signed %D type %T len %d sig %#lux\n", d, t, sigend, t->sig);
+		print("signed %D type %T len %d sig %#ux\n", d, t, sigend, t->sig);
 	free(sig);
 	return t->sig;
 }
--- a/man/10/c2l
+++ b/man/10/c2l
@@ -213,7 +213,6 @@
 .B /appl/lib/libc.b
 .TP
 .B /appl/lib/libc0.b
-.TP
 .SH "SEE ALSO"
 .IR 2c (10.1),
 .IR limbo (1)
--- a/man/10/styx
+++ b/man/10/styx
@@ -56,11 +56,11 @@
 struct Fcall
 {
 	uchar	type;
-	u32int	fid;
+	u32	fid;
 	ushort	tag;
 	union {
 		struct {
-			u32int	msize;	/* Tversion, Rversion */
+			u32	msize;	/* Tversion, Rversion */
 			char	*version;	/* Tversion, Rversion */
 		};
 		struct {
@@ -71,23 +71,23 @@
 		};
 		struct {
 			Qid	qid;	/* Rattach, Ropen, Rcreate */
-			u32int	iounit;	/* Ropen, Rcreate */
+			u32	iounit;	/* Ropen, Rcreate */
 		};
 		struct {
 			Qid	aqid;	/* Rauth */
 		};
 		struct {
-			u32int	afid;	/* Tauth, Tattach */
+			u32	afid;	/* Tauth, Tattach */
 			char	*uname;	/* Tauth, Tattach */
 			char	*aname;	/* Tauth, Tattach */
 		};
 		struct {
-			u32int	perm;	/* Tcreate */ 
+			u32	perm;	/* Tcreate */ 
 			char	*name;	/* Tcreate */
 			uchar	mode;	/* Tcreate, Topen */
 		};
 		struct {
-			u32int	newfid;	/* Twalk */
+			u32	newfid;	/* Twalk */
 			ushort	nwname;	/* Twalk */
 			char	*wname[MAXWELEM];	/* Twalk */
 		};
@@ -97,7 +97,7 @@
 		};
 		struct {
 			vlong	offset;	/* Tread, Twrite */
-			u32int	count;	/* Tread, Twrite, Rread */
+			u32	count;	/* Tread, Twrite, Rread */
 			char	*data;	/* Twrite, Rread */
 		};
 		struct {
--- a/man/5/attach
+++ b/man/5/attach
@@ -64,7 +64,7 @@
 message should be set to
 .BR NOFID ,
 defined as
-.B (u32int)~0
+.B (u32)~0
 in
 .BR <fcall.h> .
 If the client does wish to authenticate, it must acquire and validate an
--- a/man/fonts
+++ b/man/fonts
@@ -1,10 +1,10 @@
 # mkfile rules to get fonts in Lucida Sans.
 # if you want to use Times, change these next lines to
-# MAN=mantimes
-# FONTS=''
-MAN=mani
-FONTS='.fp 1 R LucidaSans
-.fp 2 I LucidaSansI
-.fp 3 B LucidaSansB
-.fp 5 L LucidaCW
-'
+MAN=mantimes
+FONTS=''
+#MAN=mani
+#FONTS='.fp 1 R LucidaSans
+#.fp 2 I LucidaSansI
+#.fp 3 B LucidaSansB
+#.fp 5 L LucidaCW
+#'
--- a/man/index
+++ b/man/index
@@ -62787,8 +62787,8 @@
 u'ffff	/man/2/json
 u.h	/man/10/0intro
 u.s	/man/9/1copyright
-u32int	/man/10/styx
-u32int	/man/5/attach
+u32	/man/10/styx
+u32	/man/5/attach
 uart	/man/10/plan9.ini
 uart0	/man/10/intrenable
 uarts	/man/10/plan9.ini
--- a/man/mkfile
+++ b/man/mkfile
@@ -23,9 +23,9 @@
 
 
 permind:V:
-	rm -f $LIB/permind/toc
+	test -f $LIB/permind/toc && rm -f $LIB/permind/toc
 	{
-		echo -n $FONTS
+		echo -n $"FONTS
 		echo .am TH
 		echo .tm '\\$1' '\\$2' '\\n%'
 		echo ..
@@ -87,7 +87,7 @@
 	mkindex
 
 
-print.out:V: permind
+print.out:V:
 	{
 		{echo -n $FONTS; cat $LIB/title} | troff
 		{echo -n $FONTS; cat $LIB/trademarks} | troff -ms
--- a/mkfile
+++ b/mkfile
@@ -27,6 +27,7 @@
 	utils/ndate\
 	emu\
 
+
 KERNEL_DIRS=\
 	os\
 	os/boot/pc\
@@ -205,3 +206,68 @@
 
 mkdirs-nt:V:
 	mkdir -p `{cmd /c type lib\emptydirs}
+
+# build iso using 9front's 9bootfat
+#	details in 9front's
+#		/sys/lib/dist/mkfile
+#		/sys/src/boot/{pc,efi}/mkfile
+# TODO
+#	inferno manual talks about plan9.ini being in / and not cfg/
+%inferno.amd64.iso:D: /root/386/9bootiso /root/386/mbr /root/386/pbs
+	ROOT=/mnt/term/home/j/local/plan9/custom/inferno-os
+	rm -fr $target 386 amd64 cfg
+	@{rfork n
+		mkdir 386
+		mkdir cfg
+		mkdir amd64
+		cp /root/386/^(9bootiso mbr pbs) 386
+		cp /root/386/9pc 386
+		cp /root/amd64/9pc64 amd64
+		cp $ROOT/Inferno/amd64/bin/ipc64 amd64
+		cp /sys/lib/dist/cfg/plan9.ini cfg/plan9.ini
+		echo '-----' cfg/plan9.ini '-----'
+		echo 'console=0 b115200' >>cfg/plan9.ini
+		echo 'bootfile=/amd64/ipc64' >>cfg/plan9.ini
+		# echo 'bootfile=/amd64/9pc64' >>cfg/plan9.ini
+		# echo wait >>cfg/plan9.ini
+		cat cfg/plan9.ini
+		echo '        ' '-----'
+		disk/mk9660 -c9j -B 386/9bootiso -p <{echo +} -s $ROOT -v 'Inferno amd64' $target
+		rm -fr 386 amd64 cfg
+	}
+	test -d /mnt/term/tmp && cp -x $target /mnt/term/$target
+
+%inferno.amd64.hybrid.iso:D: /root/386/9bootiso /root/386/9boothyb /root/386/9bootfat /root/386/9bootpxe /root/386/mbr /root/386/pbs /root/386/efiboot.fat /root/386/bootia32.efi /root/386/bootx64.efi
+	ROOT=/mnt/term/home/j/local/plan9/custom/inferno-os
+	rm -fr 386 cfg efi $target
+	@{rfork n
+		mkdir 386
+		mkdir cfg
+		mkdir efi
+		mkdir efi/boot
+		cp /root/386/^(9bootiso 9boothyb 9bootfat 9bootpxe mbr pbs efiboot.fat bootia32.efi bootx64.efi) 386
+		cp /root/386/^(bootia32.efi bootx64.efi) efi/boot
+		cp /root/386/9pc 386
+		cp /sys/lib/dist/cfg/plan9.ini cfg/plan9.ini
+		echo cfg/plan9.ini '-----'
+		echo 'console=0 b115200' >>cfg/plan9.ini
+		echo 'bootfile=Inferno/amd64/bin/ipc64' >>cfg/plan9.ini
+		echo wait >>cfg/plan9.ini
+		cat cfg/plan9.ini
+		echo '           ' '-----'
+		disk/mk9660 -c9j -B 386/9bootiso -E 386/efiboot.fat -p <{echo +} -s $ROOT -v 'Inferno amd64' $target
+		@{rfork n
+			bind /root/386/9boothyb /root/386/9bootfat
+			dd -if /dev/zero -bs 512 -count 4096 >> $target
+			disk/partfs -m /n/partfs $target
+			disk=/n/partfs/sdXX
+			disk/mbr -m /root/386/mbr $disk/data
+			@{echo a p1 '$-1' '$'
+				echo t p1 FAT16
+				echo A p1
+				echo w
+				echo q} | disk/fdisk -b $disk/data
+			disk/format -b /root/386/pbs -d -r 1 $disk/dos /root/386/9bootfat
+		}
+		rm -fr 386 cfg efi
+	}
--- /dev/null
+++ b/mkfiles/mkfile-Inferno-amd64
@@ -1,0 +1,24 @@
+TARGMODEL=	Inferno
+TARGSHTYPE=	rc
+CPUS=		arm mips 386 amd64
+
+O=		6
+OS=		v851ok0q2t6
+
+AR=		ar
+ARFLAGS=	vu
+
+AS=		6a
+ASFLAGS=
+
+CC=		6c
+CFLAGS=		-wFVT -I$ROOT/Inferno/$OBJTYPE/include -I$ROOT/Inferno/include -I$ROOT/include
+ANSICPP= -p
+
+LD=		6l
+LDFLAGS=
+
+SYSLIBS=	-lc
+
+YACC=		yacc
+YFLAGS=		-d
--- a/os/boot/pc/ether8169.c
+++ b/os/boot/pc/ether8169.c
@@ -176,10 +176,10 @@
 
 typedef struct D D;			/* Transmit/Receive Descriptor */
 struct D {
-	u32int	control;
-	u32int	vlan;
-	u32int	addrlo;
-	u32int	addrhi;
+	u32	control;
+	u32	vlan;
+	u32	addrlo;
+	u32	addrhi;
 };
 
 enum {					/* Transmit Descriptor control */
@@ -231,14 +231,14 @@
 	u64int	txok;
 	u64int	rxok;
 	u64int	txer;
-	u32int	rxer;
+	u32	rxer;
 	u16int	misspkt;
 	u16int	fae;
-	u32int	tx1col;
-	u32int	txmcol;
+	u32	tx1col;
+	u32	txmcol;
 	u64int	rxokph;
 	u64int	rxokbrd;
-	u32int	rxokmu;
+	u32	rxokmu;
 	u16int	txabt;
 	u16int	txundrn;
 };
@@ -415,7 +415,7 @@
 static int
 rtl8169reset(Ctlr* ctlr)
 {
-	u32int r;
+	u32 r;
 	int timeo;
 
 	/*
@@ -471,7 +471,7 @@
 static int
 rtl8169init(Ether* edev)
 {
-	u32int r;
+	u32 r;
 	Ctlr *ctlr;
 	u8int cplusc;
 
@@ -739,7 +739,7 @@
 	D *d;
 	int len, rdh;
 	Ctlr *ctlr;
-	u32int control;
+	u32 control;
 	RingBuf *ring;
 
 	ctlr = edev->ctlr;
@@ -784,7 +784,7 @@
 {
 	Ctlr *ctlr;
 	Ether *edev;
-	u32int isr;
+	u32 isr;
 
 	edev = arg;
 	ctlr = edev->ctlr;
@@ -831,7 +831,7 @@
 	Pcidev *p;
 	Ctlr *ctlr;
 	int i, port;
-	u32int bar;
+	u32 bar;
 
 	p = nil;
 	while(p = pcimatch(p, 0, 0)){
@@ -901,7 +901,7 @@
 int
 rtl8169pnp(Ether* edev)
 {
-	u32int r;
+	u32 r;
 	Ctlr *ctlr;
 
 	if(rtl8169ctlrhead == nil)
--- a/os/boot/pc/etherdp83820.c
+++ b/os/boot/pc/etherdp83820.c
@@ -280,13 +280,13 @@
 };
 
 typedef struct {
-	u32int	link;			/* Link to the next descriptor */
-	u32int	bufptr;			/* pointer to data Buffer */
+	u32	link;			/* Link to the next descriptor */
+	u32	bufptr;			/* pointer to data Buffer */
 	int	cmdsts;			/* Command/Status */
 	int	extsts;			/* optional Extended Status */
 
 	Block*	bp;			/* Block containing bufptr */
-	u32int	unused;			/* pad to 64-bit */
+	u32	unused;			/* pad to 64-bit */
 } Desc;
 
 enum {					/* Common cmdsts bits */
--- a/os/init/wminit.b
+++ b/os/init/wminit.b
@@ -85,66 +85,66 @@
 	bind("#I", "/net", sys->MAFTER);
 	bind("#c", "/dev", sys->MAFTER);
 	bind("#H", "/dev", sys->MAFTER);
-	nvramfd := sys->open("#H/hd0nvram", sys->ORDWR);
-	if(nvramfd != nil){
-		spec = "#Fhd0nvram";
-		if(bind(spec, "/nvfs", sys->MAFTER) < 0)
-			print("init: bind %s: %r\n", spec);
-	}
+#	nvramfd := sys->open("#H/hd0nvram", sys->ORDWR);
+#	if(nvramfd != nil){
+#		spec = "#Fhd0nvram";
+#		if(bind(spec, "/nvfs", sys->MAFTER) < 0)
+#			print("init: bind %s: %r\n", spec);
+#	}
 
 	setsysname();
 
-	sys->print("bootp...");
-
-	fd := open("/net/ipifc/clone", sys->OWRITE);
-	if(fd == nil) {
-		print("init: open /net/ipifc/clone: %r\n");
-		exit;
-	}
-	cfg := array of byte "bind ether ether0";
-	if(sys->write(fd, cfg, len cfg) != len cfg) {
-		sys->print("could not bind interface: %r\n");
-		exit;
-	}
-	cfg = array of byte "bootp";
-	if(sys->write(fd, cfg, len cfg) != len cfg) {
-		sys->print("could not bootp: %r\n");
-		exit;
-	}
-
-	fd = open("/net/bootp", sys->OREAD);
-	if(fd == nil) {
-		print("init: open /net/bootp: %r");
-		exit;
-	}
-
-	buf := array[Bootpreadlen] of byte;
-	nr := read(fd, buf, len buf);
-	fd = nil;
-	if(nr <= 0) {
-		print("init: read /net/bootp: %r");
-		exit;
-	}
-
-	(ntok, ls) := sys->tokenize(string buf, " \t\n");
-	while(ls != nil) {
-		if(hd ls == "fsip"){
-			ls = tl ls;
-			break;
-		}
-		ls = tl ls;
-	}
-	if(ls == nil) {
-		print("init: server address not in bootp read");
-		exit;
-	}
-
-	srv := hd ls;
-	sys->print("server %s\nConnect ...\n", srv);
-
-	retrycount := 0;
-	while(rootfs(srv) < 0 && retrycount++ < 5)
-		sleep(1000);
+#	sys->print("bootp...");
+#
+#	fd := open("/net/ipifc/clone", sys->OWRITE);
+#	if(fd == nil) {
+#		print("init: open /net/ipifc/clone: %r\n");
+#		exit;
+#	}
+#	cfg := array of byte "bind ether ether0";
+#	if(sys->write(fd, cfg, len cfg) != len cfg) {
+#		sys->print("could not bind interface: %r\n");
+#		exit;
+#	}
+#	cfg = array of byte "bootp";
+#	if(sys->write(fd, cfg, len cfg) != len cfg) {
+#		sys->print("could not bootp: %r\n");
+#		exit;
+#	}
+#
+#	fd = open("/net/bootp", sys->OREAD);
+#	if(fd == nil) {
+#		print("init: open /net/bootp: %r");
+#		exit;
+#	}
+#
+#	buf := array[Bootpreadlen] of byte;
+#	nr := read(fd, buf, len buf);
+#	fd = nil;
+#	if(nr <= 0) {
+#		print("init: read /net/bootp: %r");
+#		exit;
+#	}
+#
+#	(ntok, ls) := sys->tokenize(string buf, " \t\n");
+#	while(ls != nil) {
+#		if(hd ls == "fsip"){
+#			ls = tl ls;
+#			break;
+#		}
+#		ls = tl ls;
+#	}
+#	if(ls == nil) {
+#		print("init: server address not in bootp read");
+#		exit;
+#	}
+#
+#	srv := hd ls;
+#	sys->print("server %s\nConnect ...\n", srv);
+#
+#	retrycount := 0;
+#	while(rootfs(srv) < 0 && retrycount++ < 5)
+#		sleep(1000);
 
 	cfd := sys->open("/dev/cons", Sys->OWRITE);
 	if (cfd != nil) {
--- a/os/ip/devip.c
+++ b/os/ip/devip.c
@@ -45,9 +45,9 @@
 
 	Nfs=		32,
 };
-#define TYPE(x) 	( ((ulong)(x).path) & Masktype )
-#define CONV(x) 	( (((ulong)(x).path) >> Shiftconv) & Maskconv )
-#define PROTO(x) 	( (((ulong)(x).path) >> Shiftproto) & Maskproto )
+#define TYPE(x) 	( ((u32)(x).path) & Masktype )
+#define CONV(x) 	( (((u32)(x).path) >> Shiftconv) & Maskconv )
+#define PROTO(x) 	( (((u32)(x).path) >> Shiftproto) & Maskproto )
 #define QID(p, c, y) 	( ((p)<<(Shiftproto)) | ((c)<<Shiftconv) | (y) )
 
 static char network[] = "network";
@@ -188,7 +188,7 @@
 	case Qtopdir:
 		if(s == DEVDOTDOT){
 			mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
-			sprint(up->genbuf, "#I%lud", c->dev);
+			sprint(up->genbuf, "#I%ud", c->dev);
 			devdir(c, q, up->genbuf, 0, network, 0555, dp);
 			return 1;
 		}
@@ -212,7 +212,7 @@
 	case Qprotodir:
 		if(s == DEVDOTDOT){
 			mkqid(&q, QID(0, 0, Qtopdir), 0, QTDIR);
-			sprint(up->genbuf, "#I%lud", c->dev);
+			sprint(up->genbuf, "#I%ud", c->dev);
 			devdir(c, q, up->genbuf, 0, network, 0555, dp);
 			return 1;
 		}
@@ -327,7 +327,7 @@
 }
 
 static Walkqid*
-ipwalk(Chan* c, Chan *nc, char **name, int nname)
+ipwalk(Chan* c, Chan *nc, char **name, s32 nname)
 {
 	IPaux *a = c->aux;
 	Walkqid* w;
@@ -338,8 +338,8 @@
 	return w;
 }
 
-static int
-ipstat(Chan* c, uchar* db, int n)
+static s32
+ipstat(Chan* c, uchar* db, s32 n)
 {
 	return devstat(c, db, n, nil, 0, ipgen);
 }
@@ -360,7 +360,7 @@
 };
 
 static Chan*
-ipopen(Chan* c, int omode)
+ipopen(Chan* c, u32 omode)
 {
 	Conv *cv, *nc;
 	Proto *p;
@@ -511,8 +511,8 @@
 	return c;
 }
 
-static int
-ipwstat(Chan *c, uchar *dp, int n)
+static s32
+ipwstat(Chan *c, uchar *dp, s32 n)
 {
 	Dir *d;
 	Conv *cv;
@@ -620,13 +620,13 @@
 	Statelen=	32*1024,
 };
 
-static long
-ipread(Chan *ch, void *a, long n, vlong off)
+static s32
+ipread(Chan *ch, void *a, s32 n, s64 off)
 {
 	Conv *c;
 	Proto *x;
 	char *buf, *p;
-	long rv;
+	s32 rv;
 	Fs *f;
 	ulong offset = off;
 
@@ -655,7 +655,7 @@
 	case Qlog:
 		return netlogread(f, a, offset, n);
 	case Qctl:
-		sprint(up->genbuf, "%lud", CONV(ch->qid));
+		sprint(up->genbuf, "%ud", CONV(ch->qid));
 		return readstr(offset, p, n, up->genbuf);
 	case Qremote:
 		buf = smalloc(Statelen);
@@ -711,7 +711,7 @@
 }
 
 static Block*
-ipbread(Chan* ch, long n, ulong offset)
+ipbread(Chan* ch, s32 n, u32 offset)
 {
 	Conv *c;
 	Proto *x;
@@ -1065,8 +1065,8 @@
 		c->ttl = atoi(cb->f[1]);
 }
 
-static long
-ipwrite(Chan* ch, void *v, long n, vlong off)
+static s32
+ipwrite(Chan* ch, void *v, s32 n, s64 off)
 {
 	Conv *c;
 	Proto *x;
@@ -1160,8 +1160,8 @@
 	return n;
 }
 
-static long
-ipbwrite(Chan* ch, Block* bp, ulong offset)
+static s32
+ipbwrite(Chan* ch, Block* bp, u32 offset)
 {
 	Conv *c;
 	Proto *x;
--- a/os/ip/ipv6.h
+++ b/os/ip/ipv6.h
@@ -1,3 +1,4 @@
+#undef MIN
 #define MIN(a, b) ((a) <= (b) ? (a) : (b))
 
 /* rfc 3513 defines the address prefices */
--- a/os/ip/ptclbsum.c
+++ b/os/ip/ptclbsum.c
@@ -21,7 +21,7 @@
 	mdsum = 0;
 
 	x = 0;
-	if((ulong)addr & 1) {
+	if((uintptr)addr & 1) {
 		if(len) {
 			hisum += addr[0];
 			len--;
--- /dev/null
+++ b/os/pc/archgeneric.c
@@ -1,0 +1,95 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "ureg.h"
+#include "../port/error.h"
+
+extern int i8259assign(Vctl*);
+extern int i8259irqno(int, int);
+extern void i8259init(void);
+extern int i8259isr(int);
+extern int i8259enable(Vctl*);
+extern int i8259disable(int);
+extern int i8259vecno(int);
+
+void
+archreset(void)
+{
+	i8042reset();
+
+	/*
+	 * Often the BIOS hangs during restart if a conventional 8042
+	 * warm-boot sequence is tried. The following is Intel specific and
+	 * seems to perform a cold-boot, but at least it comes back.
+	 * And sometimes there is no keyboard...
+	 *
+	 * The reset register (0xcf9) is usually in one of the bridge
+	 * chips. The actual location and sequence could be extracted from
+	 * ACPI but why bother, this is the end of the line anyway.
+	 */
+	print("Takes a licking and keeps on ticking...\n");
+	*(ushort*)KADDR(0x472) = 0x1234;	/* BIOS warm-boot flag */
+	outb(0xcf9, 0x02);
+	outb(0xcf9, 0x06);
+
+	print("can't reset\n");
+	for(;;)
+		idle();
+}
+
+void
+delay(int millisecs)
+{
+	millisecs *= m->loopconst;
+	if(millisecs <= 0)
+		millisecs = 1;
+	aamloop(millisecs);
+}
+
+void
+microdelay(int microsecs)
+{
+	microsecs *= m->loopconst;
+	microsecs /= 1000;
+	if(microsecs <= 0)
+		microsecs = 1;
+	aamloop(microsecs);
+}
+
+/*  
+ *  performance measurement ticks.  must be low overhead.
+ *  doesn't have to count over a second.
+ */
+ulong
+perfticks(void)
+{
+	uvlong x;
+
+	if(m->havetsc)
+		cycles(&x);
+	else
+		x = 0;
+	return x;
+}
+
+PCArch archgeneric = {
+.id=		"generic",
+.ident=		0,
+.reset=		archreset,
+
+.intrinit=	i8259init,
+.intrassign=	i8259assign,
+.intrirqno=	i8259irqno,
+.intrvecno=	i8259vecno,
+.intrspurious=	i8259isr,
+.intrenable=	i8259enable,
+.intrdisable=	i8259disable,
+
+.clockinit=	i8253init,
+.clockenable=	i8253enable,
+.fastclock=	i8253read,
+.timerset=	i8253timerset,
+};
--- a/os/pc/audio.h
+++ b/os/pc/audio.h
@@ -9,7 +9,7 @@
 
 #define seteisadma(a, b)	dmainit(a, Bufsize);
 #define CACHELINESZ		8
-#define UNCACHED(type, v)	(type*)((ulong)(v))
+#define UNCACHED(type, v)	(type*)((uintptr)(v))
 
 #define Int0vec
 #define setvec(v, f, a)		intrenable(v, f, a, BUSUNKNOWN, "audio")
--- /dev/null
+++ b/os/pc/bios32.c
@@ -1,0 +1,121 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+
+#define VFLAG(...)	if(vflag) print(__VA_ARGS__)
+
+#define UPTR2INT(p)	((uintptr)(p))
+
+#define l16get(p)	(((p)[1]<<8)|(p)[0])
+#define l32get(p)	(((u32)l16get(p+2)<<16)|l16get(p))
+
+static int vflag = 0;
+
+typedef struct BIOS32sdh {		/* BIOS32 Service Directory Header */
+	u8int	signature[4];		/* "_32_" */
+	u8int	physaddr[4];		/* physical address of entry point */
+	u8int	revision;
+	u8int	length;			/* of header in paragraphs */
+	u8int	checksum;		/* */
+	u8int	reserved[5];
+} BIOS32sdh;
+
+typedef struct BIOS32si {		/* BIOS32 Service Interface */
+	u8int*	base;			/* base address of service */
+	int	length;			/* length of service */
+	u32	offset;			/* service entry-point from base */
+
+	u16int	ptr[3];			/* far pointer m16:32 */
+} BIOS32si;
+
+static Lock bios32lock;
+static u16int bios32ptr[3];
+static void* bios32entry;
+
+int
+bios32ci(BIOS32si* si, BIOS32ci* ci)
+{
+	int r;
+
+	lock(&bios32lock);
+	r = bios32call(ci, si->ptr);
+	unlock(&bios32lock);
+
+	return r;
+}
+
+static int
+bios32locate(void)
+{
+	uintptr ptr;
+	BIOS32sdh *sdh;
+
+	VFLAG("bios32link\n");
+	if((sdh = sigsearch("_32_", sizeof(BIOS32sdh))) == nil)
+		return -1;
+	VFLAG("sdh @ %#p, entry %#ux\n", sdh, l32get(sdh->physaddr));
+
+	bios32entry = vmap(l32get(sdh->physaddr), 4096+1);
+	VFLAG("entry @ %#p\n", bios32entry);
+	ptr = UPTR2INT(bios32entry);
+	bios32ptr[0] = ptr & 0xffff;
+	bios32ptr[1] = (ptr>>16) & 0xffff;
+	bios32ptr[2] = KESEL;
+	VFLAG("bios32link: ptr %ux %ux %ux\n",
+		bios32ptr[0], bios32ptr[1], bios32ptr[2]);
+
+	return 0;
+}
+
+void
+BIOS32close(BIOS32si* si)
+{
+	vunmap(si->base, si->length);
+	free(si);
+}
+
+BIOS32si*
+bios32open(char* id)
+{
+	uint ptr;
+	BIOS32ci ci;
+	BIOS32si *si;
+
+	lock(&bios32lock);
+	if(bios32ptr[2] == 0 && bios32locate() < 0){
+		unlock(&bios32lock);
+		return nil;
+	}
+
+	VFLAG("bios32si: %s\n", id);
+	memset(&ci, 0, sizeof(BIOS32ci));
+	ci.eax = (id[3]<<24|(id[2]<<16)|(id[1]<<8)|id[0]);
+
+	bios32call(&ci, bios32ptr);
+	unlock(&bios32lock);
+
+	VFLAG("bios32si: eax %ux\n", ci.eax);
+	if(ci.eax & 0xff)
+		return nil;
+	VFLAG("bios32si: base %#ux length %#ux offset %#ux\n",
+		ci.ebx, ci.ecx, ci.edx);
+
+	if((si = malloc(sizeof(BIOS32si))) == nil)
+		return nil;
+	if((si->base = vmap(ci.ebx, ci.ecx)) == nil){
+		free(si);
+		return nil;
+	}
+	si->length = ci.ecx;
+
+	ptr = UPTR2INT(si->base)+ci.edx;
+	si->ptr[0] = ptr & 0xffff;
+	si->ptr[1] = (ptr>>16) & 0xffff;
+	si->ptr[2] = KESEL;
+	VFLAG("bios32si: eax entry %ux\n", ptr);
+
+	return si;
+}
--- /dev/null
+++ b/os/pc/bootargs.c
@@ -1,0 +1,194 @@
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+
+#define	MAXCONF 64
+static char *confname[MAXCONF];
+static char *confval[MAXCONF];
+static int nconf;
+
+/* screen.c */
+extern char* rgbmask2chan(char *buf, int depth, u32 rm, u32 gm, u32 bm);
+
+/* vgavesa.c */
+extern char* vesabootscreenconf(char*, char*, uchar*);
+
+static void
+multibootargs(void)
+{
+	extern ulong multibootptr;
+	ulong *multiboot;
+	char *cp, *ep, *s;
+	ulong *m, l;
+	int i, n;
+
+	if(multibootptr == 0 || multibootptr >= MemMin)
+		return;
+
+	multiboot = (ulong*)KADDR(multibootptr);
+
+	cp = BOOTARGS;
+	ep = cp + BOOTARGSLEN-1;
+
+	/* memory map */
+	if((multiboot[0] & (1<<6)) != 0 && (l = multiboot[11]) >= 24){
+		cp = seprint(cp, ep, "*e820=");
+		m = KADDR(multiboot[12]);
+		while(m[0] >= 20 && m[0]+4 <= l){
+			uvlong base, size;
+			m++;
+			base = ((uvlong)m[0] | (uvlong)m[1]<<32);
+			size = ((uvlong)m[2] | (uvlong)m[3]<<32);
+			cp = seprint(cp, ep, "%.1lux %.16llux %.16llux ",
+				m[4] & 0xF, base, base+size);
+			l -= m[-1]+4;
+			m = (ulong*)((uintptr)m + m[-1]);
+		}
+		cp[-1] = '\n';
+	}
+
+	if((multiboot[0] & (1<<12)) != 0 && multiboot[22] != 0){	/* framebuffer */
+		uchar *p = (uchar*)multiboot + 112;
+		int depth = multiboot[27] & 0xFF;
+		char chan[32];
+
+		switch((multiboot[27]>>8) & 0xFF){
+		case 0:
+			snprint(chan, sizeof chan, "m%d", depth);
+			if(0){
+		case 1:
+			rgbmask2chan(chan, depth,
+				(1UL<<p[1])-1 << p[0],
+				(1UL<<p[3])-1 << p[2],
+				(1UL<<p[5])-1 << p[4]);
+			}
+			cp = seprint(cp, ep, "*bootscreen=%dx%dx%d %s %#lux\n",
+				(int)multiboot[24]*8 / depth,
+				(int)multiboot[26],
+				depth,
+				chan,
+				multiboot[22]);
+		}
+	} else
+	if((multiboot[0] & (1<<11)) != 0 && multiboot[19] != 0)		/* vbe mode info */
+		cp = vesabootscreenconf(cp, ep, KADDR(multiboot[19]));
+
+	/* plan9.ini passed as the first module */
+	if((multiboot[0] & (1<<3)) != 0 && multiboot[5] > 0 && multiboot[6] != 0){
+		m = KADDR(multiboot[6]);
+		s = (char*)KADDR(m[0]);
+		if(m[1] > m[0]){
+			n = utfnlen(s, m[1] - m[0]);
+			if(n > 0)
+				cp = seprint(cp, ep, "%.*s\n", n, s);
+		}
+	}
+
+	/* command line */
+	if((multiboot[0] & (1<<2)) != 0 && multiboot[4] != 0){
+		n = tokenize(KADDR(multiboot[4]), confval, MAXCONF);
+		for(i=0; i<n; i++)
+			cp = seprint(cp, ep, "%s\n", confval[i]);
+	}
+
+	*cp = 0;
+}
+
+void
+bootargsinit(void)
+{
+	int i, j, n;
+	char *cp, *line[MAXCONF], *p, *q;
+
+	multibootargs();
+
+	/*
+	 *  parse configuration args from dos file plan9.ini
+	 */
+	cp = BOOTARGS;	/* where b.com leaves its config */
+	cp[BOOTARGSLEN-1] = 0;
+
+	/*
+	 * Strip out '\r', change '\t' -> ' '.
+	 */
+	p = cp;
+	for(q = cp; *q; q++){
+		if(*q == '\r')
+			continue;
+		if(*q == '\t')
+			*q = ' ';
+		*p++ = *q;
+	}
+	*p = 0;
+
+	n = getfields(cp, line, MAXCONF, 1, "\n");
+	for(i = 0; i < n; i++){
+		if(*line[i] == '#')
+			continue;
+		cp = strchr(line[i], '=');
+		if(cp == nil)
+			continue;
+		*cp++ = '\0';
+		for(j = 0; j < nconf; j++){
+			if(cistrcmp(confname[j], line[i]) == 0)
+				break;
+		}
+		confname[j] = line[i];
+		confval[j] = cp;
+		if(j == nconf)
+			nconf++;
+	}
+}
+
+char*
+getconf(char *name)
+{
+	int i;
+
+	for(i = 0; i < nconf; i++)
+		if(cistrcmp(confname[i], name) == 0)
+			return confval[i];
+	return 0;
+}
+
+void
+setconfenv(void)
+{
+	int i;
+
+	for(i = 0; i < nconf; i++){
+		if(confname[i][0] != '*')
+			ksetenv(confname[i], confval[i], 0);
+		ksetenv(confname[i], confval[i], 1);
+	}
+}
+
+void
+writeconf(void)
+{
+	char *p, *q;
+	int n;
+
+	p = getconfenv();
+	if(waserror()) {
+		free(p);
+		nexterror();
+	}
+
+	/* convert to name=value\n format */
+	for(q=p; *q; q++) {
+		q += strlen(q);
+		*q = '=';
+		q += strlen(q);
+		*q = '\n';
+	}
+	n = q - p + 1;
+	if(n >= BOOTARGSLEN)
+		error("kernel configuration too large");
+	memmove(BOOTARGS, p, n);
+	memset(BOOTLINE, 0, BOOTLINELEN);
+	poperror();
+	free(p);
+}
--- a/os/pc/cga.c
+++ b/os/pc/cga.c
@@ -50,6 +50,11 @@
 	outb(0x3D4+1, data);
 }
 
+/* TODO BUG the cursor is 2 characters beyond
+ * could use pos = cgapos -2
+ * but skips a character when 'd' is pressed
+ * is it a bug in the shell prompt?
+ */
 static void
 movecursor(void)
 {
@@ -109,10 +114,15 @@
 	else
 		lock(&cgascreenlock);
 
-	while(n-- > 0)
+	while(n-- > 0){
+		outb(0x3D6, *s);
+		// outb(0x3D6, '-');
 		cgascreenputc(*s++);
+		// outb(0x3D6, '_');
+	}
 
 	unlock(&cgascreenlock);
+		// outb(0x3D6, ',');
 }
 
 void
@@ -122,6 +132,11 @@
 	cgapos = cgaregr(0x0E)<<8;
 	cgapos |= cgaregr(0x0F);
 	cgapos *= 2;
+
+	if(cgapos >= Width*Height){
+		cgapos = 0;
+		movecursor();
+	}
 
 	screenputs = cgascreenputs;
 }
--- a/os/pc/devarch.c
+++ b/os/pc/devarch.c
@@ -7,48 +7,38 @@
 #include "ureg.h"
 #include "../port/error.h"
 
-typedef struct IOMap IOMap;
-struct IOMap
-{
-	IOMap	*next;
-	int	reserved;
-	char	tag[13];
-	ulong	start;
-	ulong	end;
-};
-
-static struct
-{
-	Lock;
-	IOMap	*m;
-	IOMap	*free;
-	IOMap	maps[32];		// some initial free maps
-
-	QLock	ql;			// lock for reading map
-} iomap;
-
 enum {
 	Qdir = 0,
-	Qioalloc = 1,
 	Qiob,
 	Qiow,
 	Qiol,
+	Qmsr,
 	Qbase,
 
-	Qmax = 16,
+	Qmax = 32,
 };
 
-typedef long Rdwrfn(Chan*, void*, long, vlong);
+enum {				/* cpuid standard function codes */
+	Highstdfunc = 0,	/* also returns vendor string */
+	Procsig,
+	Proctlbcache,
+	Procserial,
+	
+	Highextfunc = 0x80000000,
+	Procextfeat,
+};
 
+typedef s32 Rdwrfn(Chan*, void*, s32, s64);
+
 static Rdwrfn *readfn[Qmax];
 static Rdwrfn *writefn[Qmax];
 
 static Dirtab archdir[Qmax] = {
 	".",		{ Qdir, 0, QTDIR },	0,	0555,
-	"ioalloc",	{ Qioalloc, 0 },	0,	0444,
 	"iob",		{ Qiob, 0 },		0,	0660,
 	"iow",		{ Qiow, 0 },		0,	0660,
 	"iol",		{ Qiol, 0 },		0,	0660,
+	"msr",		{ Qmsr, 0 },		0,	0660,
 };
 Lock archwlock;	/* the lock is only for changing archdir */
 int narchdir = Qbase;
@@ -55,8 +45,6 @@
 int (*_pcmspecial)(char*, ISAConf*);
 void (*_pcmspecialclose)(int);
 
-static int doi8253set = 1;
-
 /*
  * Add a file to the #P listing.  Once added, you can't delete it.
  * You can't add a file with the same name as one already there,
@@ -64,7 +52,7 @@
  * like change the Qid version.  Changing the Qid path is disallowed.
  */
 Dirtab*
-addarchfile(char *name, int perm, Rdwrfn *rdfn, Rdwrfn *wrfn)
+addarchfile(char *name, u32 perm, Rdwrfn *rdfn, Rdwrfn *wrfn)
 {
 	int i;
 	Dirtab d;
@@ -77,6 +65,7 @@
 	lock(&archwlock);
 	if(narchdir >= Qmax){
 		unlock(&archwlock);
+		print("addarchfile: out of entries for %s\n", name);
 		return nil;
 	}
 
@@ -100,19 +89,15 @@
 ioinit(void)
 {
 	char *excluded;
-	int i;
 
-	for(i = 0; i < nelem(iomap.maps)-1; i++)
-		iomap.maps[i].next = &iomap.maps[i+1];
-	iomap.maps[i].next = nil;
-	iomap.free = iomap.maps;
+	iomapinit(0xffff);
 
 	/*
 	 * This is necessary to make the IBM X20 boot.
 	 * Have not tracked down the reason.
+	 * i82557 is at 0x1000, the dummy entry is needed for swappable devs.
 	 */
-	ioalloc(0x0fff, 1, 0, "dummy");	// i82557 is at 0x1000, the dummy
-					// entry is needed for swappable devs.
+	ioalloc(0x0fff, 1, 0, "dummy");
 
 	if ((excluded = getconf("ioexclude")) != nil) {
 		char *s;
@@ -137,165 +122,14 @@
 			ioalloc(io_s, io_e - io_s + 1, 0, "pre-allocated");
 		}
 	}
-
 }
 
-// Reserve a range to be ioalloced later. 
-// This is in particular useful for exchangable cards, such
-// as pcmcia and cardbus cards.
-int
-ioreserve(int, int size, int align, char *tag)
-{
-	IOMap *m, **l;
-	int i, port;
-
-	lock(&iomap);
-	// find a free port above 0x400 and below 0x1000
-	port = 0x400;
-	for(l = &iomap.m; *l; l = &(*l)->next){
-		m = *l;
-		if (m->start < 0x400) continue;
-		i = m->start - port;
-		if(i > size)
-			break;
-		if(align > 0)
-			port = ((port+align-1)/align)*align;
-		else
-			port = m->end;
-	}
-	if(*l == nil){
-		unlock(&iomap);
-		return -1;
-	}
-	m = iomap.free;
-	if(m == nil){
-		print("ioalloc: out of maps");
-		unlock(&iomap);
-		return port;
-	}
-	iomap.free = m->next;
-	m->next = *l;
-	m->start = port;
-	m->end = port + size;
-	m->reserved = 1;
-	strncpy(m->tag, tag, sizeof(m->tag));
-	m->tag[sizeof(m->tag)-1] = 0;
-	*l = m;
-
-	archdir[0].qid.vers++;
-
-	unlock(&iomap);
-	return m->start;
-}
-
-//
-//	alloc some io port space and remember who it was
-//	alloced to.  if port < 0, find a free region.
-//
-int
-ioalloc(int port, int size, int align, char *tag)
-{
-	IOMap *m, **l;
-	int i;
-
-	lock(&iomap);
-	if(port < 0){
-		// find a free port above 0x400 and below 0x1000
-		port = 0x400;
-		for(l = &iomap.m; *l; l = &(*l)->next){
-			m = *l;
-			if (m->start < 0x400) continue;
-			i = m->start - port;
-			if(i > size)
-				break;
-			if(align > 0)
-				port = ((port+align-1)/align)*align;
-			else
-				port = m->end;
-		}
-		if(*l == nil){
-			unlock(&iomap);
-			return -1;
-		}
-	} else {
-		// Only 64KB I/O space on the x86.
-		if((port+size) > 0x10000){
-			unlock(&iomap);
-			return -1;
-		}
-		// see if the space clashes with previously allocated ports
-		for(l = &iomap.m; *l; l = &(*l)->next){
-			m = *l;
-			if(m->end <= port)
-				continue;
-			if(m->reserved && m->start == port && m->end == port + size) {
-				m->reserved = 0;
-				unlock(&iomap);
-				return m->start;
-			}
-			if(m->start >= port+size)
-				break;
-			unlock(&iomap);
-			return -1;
-		}
-	}
-	m = iomap.free;
-	if(m == nil){
-		print("ioalloc: out of maps");
-		unlock(&iomap);
-		return port;
-	}
-	iomap.free = m->next;
-	m->next = *l;
-	m->start = port;
-	m->end = port + size;
-	strncpy(m->tag, tag, sizeof(m->tag));
-	m->tag[sizeof(m->tag)-1] = 0;
-	*l = m;
-
-	archdir[0].qid.vers++;
-
-	unlock(&iomap);
-	return m->start;
-}
-
-void
-iofree(int port)
-{
-	IOMap *m, **l;
-
-	lock(&iomap);
-	for(l = &iomap.m; *l; l = &(*l)->next){
-		if((*l)->start == port){
-			m = *l;
-			*l = m->next;
-			m->next = iomap.free;
-			iomap.free = m;
-			break;
-		}
-		if((*l)->start > port)
-			break;
-	}
-	archdir[0].qid.vers++;
-	unlock(&iomap);
-}
-
-int
-iounused(int start, int end)
-{
-	IOMap *m;
-
-	for(m = iomap.m; m; m = m->next){
-		if(start >= m->start && start < m->end
-		|| start <= m->start && end > m->start)
-			return 0; 
-	}
-	return 1;
-}
-
 static void
-checkport(int start, int end)
+checkport(u32 start, u32 end)
 {
+	if(end < start || end > 0x10000)
+		error(Ebadarg);
+
 	/* standard vga regs are OK */
 	if(start >= 0x2b0 && end <= 0x2df+1)
 		return;
@@ -319,14 +153,14 @@
 	return devwalk(c, nc, name, nname, archdir, narchdir, devgen);
 }
 
-static int
-archstat(Chan* c, uchar* dp, int n)
+static s32
+archstat(Chan* c, uchar* dp, s32 n)
 {
 	return devstat(c, dp, n, archdir, narchdir, devgen);
 }
 
 static Chan*
-archopen(Chan* c, int omode)
+archopen(Chan* c, u32 omode)
 {
 	return devopen(c, omode, archdir, narchdir, devgen);
 }
@@ -336,39 +170,33 @@
 {
 }
 
-enum
+static s32
+archread(Chan *c, void *a, s32 n, s64 offset)
 {
-	Linelen= 31,
-};
-
-static long
-archread(Chan *c, void *a, long n, vlong offset)
-{
-	char *buf, *p;
-	int port;
-	ushort *sp;
-	ulong *lp;
-	IOMap *m;
+	u32 port, end;
+	uchar *cp;
+	u16 *sp;
+	u32 *lp;
+	s64 *vp;
 	Rdwrfn *fn;
 
+	port = offset;
+	end = port+n;
 	switch((ulong)c->qid.path){
-
 	case Qdir:
 		return devdirread(c, a, n, archdir, narchdir, devgen);
 
 	case Qiob:
-		port = offset;
-		checkport(offset, offset+n);
-		for(p = a; port < offset+n; port++)
-			*p++ = inb(port);
+		checkport(port, end);
+		for(cp = a; port < end; port++)
+			*cp++ = inb(port);
 		return n;
 
 	case Qiow:
 		if(n & 1)
 			error(Ebadarg);
-		checkport(offset, offset+n);
-		sp = a;
-		for(port = offset; port < offset+n; port += 2)
+		checkport(port, end);
+		for(sp = a; port < end; port += 2)
 			*sp++ = ins(port);
 		return n;
 
@@ -375,69 +203,54 @@
 	case Qiol:
 		if(n & 3)
 			error(Ebadarg);
-		checkport(offset, offset+n);
-		lp = a;
-		for(port = offset; port < offset+n; port += 4)
+		checkport(port, end);
+		for(lp = a; port < end; port += 4)
 			*lp++ = inl(port);
 		return n;
 
-	case Qioalloc:
-		break;
+	case Qmsr:
+		if(n & 7)
+			error(Ebadarg);
+		if((ulong)n/8 > -port)
+			error(Ebadarg);
+		end = port+(n/8);
+		for(vp = a; port != end; port++)
+			if(rdmsr(port, vp++) < 0)
+				error(Ebadarg);
+		return n;
 
 	default:
 		if(c->qid.path < narchdir && (fn = readfn[c->qid.path]))
 			return fn(c, a, n, offset);
 		error(Eperm);
-		break;
+		return 0;
 	}
-
-	if((buf = malloc(n)) == nil)
-		error(Enomem);
-	p = buf;
-	n = n/Linelen;
-	offset = offset/Linelen;
-
-	lock(&iomap);
-	for(m = iomap.m; n > 0 && m != nil; m = m->next){
-		if(offset-- > 0)
-			continue;
-		sprint(p, "%8lux %8lux %-12.12s\n", m->start, m->end-1, m->tag);
-		p += Linelen;
-		n--;
-	}
-	unlock(&iomap);
-
-	n = p - buf;
-	memmove(a, buf, n);
-	free(buf);
-
-	return n;
 }
 
-static long
-archwrite(Chan *c, void *a, long n, vlong offset)
+static s32
+archwrite(Chan *c, void *a, s32 n, s64 offset)
 {
-	char *p;
-	int port;
-	ushort *sp;
-	ulong *lp;
+	u32 port, end;
+	uchar *cp;
+	u16 *sp;
+	u32 *lp;
+	s64 *vp;
 	Rdwrfn *fn;
 
+	port = offset;
+	end = port+n;
 	switch((ulong)c->qid.path){
-
 	case Qiob:
-		p = a;
-		checkport(offset, offset+n);
-		for(port = offset; port < offset+n; port++)
-			outb(port, *p++);
+		checkport(port, end);
+		for(cp = a; port < end; port++)
+			outb(port, *cp++);
 		return n;
 
 	case Qiow:
 		if(n & 1)
 			error(Ebadarg);
-		checkport(offset, offset+n);
-		sp = a;
-		for(port = offset; port < offset+n; port += 2)
+		checkport(port, end);
+		for(sp = a; port < end; port += 2)
 			outs(port, *sp++);
 		return n;
 
@@ -444,14 +257,24 @@
 	case Qiol:
 		if(n & 3)
 			error(Ebadarg);
-		checkport(offset, offset+n);
-		lp = a;
-		for(port = offset; port < offset+n; port += 4)
+		checkport(port, end);
+		for(lp = a; port < end; port += 4)
 			outl(port, *lp++);
 		return n;
 
+	case Qmsr:
+		if(n & 7)
+			error(Ebadarg);
+		if((ulong)n/8 > -port)
+			error(Ebadarg);
+		end = port+(n/8);
+		for(vp = a; port != end; port++)
+			if(wrmsr(port, *vp++) < 0)
+				error(Ebadarg);
+		return n;
+
 	default:
-		if(c->qid.path < narchdir && (fn = writefn[c->qid.path]))
+		if(c->qid.path < narchdir && (fn = writefn[c->qid.path]) != nil)
 			return fn(c, a, n, offset);
 		error(Eperm);
 		break;
@@ -497,6 +320,22 @@
 }
 
 /*
+ * 386 has no compare-and-swap instruction.
+ * Run it with interrupts turned off instead.
+ */
+static s32
+cmpswap386(s32 *addr, s32 old, s32 new)
+{
+	int r, s;
+
+	s = splhi();
+	if(r = (*addr == old))
+		*addr = new;
+	splx(s);
+	return r;
+}
+
+/*
  * On a uniprocessor, you'd think that coherence could be nop,
  * but it can't.  We still need a barrier when using coherence() in
  * device drivers.
@@ -506,26 +345,11 @@
  */
 void (*coherence)(void) = nop;
 
+s32 (*cmpswap)(s32*, s32, s32) = cmpswap386;
+
 PCArch* arch;
 extern PCArch* knownarch[];
 
-PCArch archgeneric = {
-.id=		"generic",
-.ident=		0,
-.reset=		i8042reset,
-.serialpower=	unimplemented,
-.modempower=	unimplemented,
-
-.intrinit=	i8259init,
-.intrenable=	i8259enable,
-.intrvecno=	i8259vecno,
-.intrdisable=	i8259disable,
-
-.clockenable=	i8253enable,
-.fastclock=	i8253read,
-.timerset=	i8253timerset,
-};
-
 typedef struct X86type X86type;
 struct X86type {
 	int	family;
@@ -558,8 +382,15 @@
 	{ 6,	7,	16,	"PentiumIII/Xeon", },
 	{ 6,	8,	16,	"PentiumIII/Xeon", },
 	{ 6,	0xB,	16,	"PentiumIII/Xeon", },
+	{ 6,	0xF,	16,	"Xeon5000-series", },
+	{ 6,	0x16,	16,	"Celeron", },
+	{ 6,	0x17,	16,	"Core 2/Xeon", },
+	{ 6,	0x1A,	16,	"Core i7/Xeon", },
+	{ 6,	0x1C,	16,	"Atom", },
+	{ 6,	0x1D,	16,	"Xeon MP", },
 	{ 0xF,	1,	16,	"P4", },	/* P4 */
 	{ 0xF,	2,	16,	"PentiumIV/Xeon", },
+	{ 0xF,	6,	16,	"PentiumIV/Xeon", },
 
 	{ 3,	-1,	32,	"386", },	/* family defaults */
 	{ 4,	-1,	22,	"486", },
@@ -586,18 +417,25 @@
 	{ 5,	1,	23,	"AMD-K5", },	/* guesswork */
 	{ 5,	2,	23,	"AMD-K5", },	/* guesswork */
 	{ 5,	3,	23,	"AMD-K5", },	/* guesswork */
+	{ 5,	4,	23,	"AMD Geode GX1", },	/* guesswork */
+	{ 5,	5,	23,	"AMD Geode GX2", },	/* guesswork */
 	{ 5,	6,	11,	"AMD-K6", },	/* trial and error */
 	{ 5,	7,	11,	"AMD-K6", },	/* trial and error */
 	{ 5,	8,	11,	"AMD-K6-2", },	/* trial and error */
 	{ 5,	9,	11,	"AMD-K6-III", },/* trial and error */
+	{ 5,	0xa,	23,	"AMD Geode LX", },	/* guesswork */
 
 	{ 6,	1,	11,	"AMD-Athlon", },/* trial and error */
 	{ 6,	2,	11,	"AMD-Athlon", },/* trial and error */
 
+	{ 0x1F,	9,	11,	"AMD-K10 Opteron G34", },/* guesswork */
+
 	{ 4,	-1,	22,	"Am486", },	/* guesswork */
 	{ 5,	-1,	23,	"AMD-K5/K6", },	/* guesswork */
 	{ 6,	-1,	11,	"AMD-Athlon", },/* guesswork */
-	{ 0xF,	-1,	11,	"AMD64", },	/* guesswork */
+	{ 0xF,	-1,	11,	"AMD-K8", },	/* guesswork */
+	{ 0x1F,	-1,	11,	"AMD-K10", },	/* guesswork */
+	{ 23,	1,	13,	"AMD Ryzen" },
 
 	{ -1,	-1,	11,	"unknown", },	/* total default */
 };
@@ -610,6 +448,7 @@
 	{5,	4,	23,	"Winchip",},	/* guesswork */
 	{6,	7,	23,	"Via C3 Samuel 2 or Ezra",},
 	{6,	8,	23,	"Via C3 Ezra-T",},
+	{6,	9,	23,	"Via C3 Eden-N",},
 	{ -1,	-1,	23,	"unknown", },	/* total default */
 };
 
@@ -622,8 +461,6 @@
 	{ -1,	-1,	23,	"unknown", },	/* total default */
 };
 
-static X86type *cputype;
-
 static void	simplecycles(uvlong*);
 void	(*cycles)(uvlong*) = simplecycles;
 void	_cycles(uvlong*);	/* in l.s */
@@ -637,15 +474,9 @@
 void
 cpuidprint(void)
 {
-	int i;
-	char buf[128];
-
-	i = sprint(buf, "cpu%d: %dMHz ", m->machno, m->cpumhz);
-	if(m->cpuidid[0])
-		i += sprint(buf+i, "%12.12s ", m->cpuidid);
-	sprint(buf+i, "%s (cpuid: AX 0x%4.4uX DX 0x%4.4uX)\n",
-		m->cpuidtype, m->cpuidax, m->cpuiddx);
-	print(buf);
+	print("cpu%d: %dMHz %s %s (AX %8.8uX CX %8.8uX DX %8.8uX)\n",
+		m->machno, m->cpumhz, m->cpuidid, m->cpuidtype,
+		m->cpuidax, m->cpuidcx, m->cpuiddx);
 }
 
 /*
@@ -658,18 +489,46 @@
  *		(if so turn it on)
  *	- whether or not it supports the page global flag
  *		(if so turn it on)
+ *	- detect PAT feature and add write-combining entry
+ *	- detect MTRR support and synchronize state with cpu0
+ *	- detect NX support and enable it for AMD64
+ *	- detect watchpoint support
+ *	- detect FPU features and enable the FPU
  */
 int
 cpuidentify(void)
 {
-	char *p;
-	int family, model, nomce;
+	int family, model;
 	X86type *t, *tab;
-	ulong cr4;
-	vlong mca, mct;
+	u32 regs[4];
+	uintptr cr4;
 
-	cpuid(m->cpuidid, &m->cpuidax, &m->cpuiddx);
-	if(strncmp(m->cpuidid, "AuthenticAMD", 12) == 0)
+	cpuid(Highstdfunc, 0, regs);
+	memmove(m->cpuidid,   &regs[1], BY2WD);	/* bx */
+	memmove(m->cpuidid+4, &regs[3], BY2WD);	/* dx */
+	memmove(m->cpuidid+8, &regs[2], BY2WD);	/* cx */
+	m->cpuidid[12] = '\0';
+
+	cpuid(Procsig, 0, regs);
+	m->cpuidax = regs[0];
+	m->cpuidcx = regs[2];
+	m->cpuiddx = regs[3];
+	
+	m->cpuidfamily = m->cpuidax >> 8 & 0xf;
+	m->cpuidmodel = m->cpuidax >> 4 & 0xf;
+	m->cpuidstepping = m->cpuidax & 0xf;
+	switch(m->cpuidfamily){
+	case 15:
+		m->cpuidfamily += m->cpuidax >> 20 & 0xff;
+		m->cpuidmodel += m->cpuidax >> 16 & 0xf;
+		break;
+	case 6:
+		m->cpuidmodel += m->cpuidax >> 16 & 0xf;
+		break;
+	}
+
+	if(strncmp(m->cpuidid, "AuthenticAMD", 12) == 0 ||
+	   strncmp(m->cpuidid, "Geode by NSC", 12) == 0)
 		tab = x86amd;
 	else if(strncmp(m->cpuidid, "CentaurHauls", 12) == 0)
 		tab = x86winchip;
@@ -677,9 +536,9 @@
 		tab = x86sis;
 	else
 		tab = x86intel;
-	
-	family = X86FAMILY(m->cpuidax);
-	model = X86MODEL(m->cpuidax);
+
+	family = m->cpuidfamily;
+	model = m->cpuidmodel;
 	for(t=tab; t->name; t++)
 		if((t->family == family && t->model == model)
 		|| (t->family == family && t->model == -1)
@@ -686,50 +545,70 @@
 		|| (t->family == -1))
 			break;
 
+	m->aalcycles = t->aalcycles;
 	m->cpuidtype = t->name;
 
 	/*
 	 *  if there is one, set tsc to a known value
 	 */
-	if(m->cpuiddx & 0x10){
+	if(m->cpuiddx & Tsc){
 		m->havetsc = 1;
 		cycles = _cycles;
-		if(m->cpuiddx & 0x20)
+		if(m->cpuiddx & Cpumsr)
 			wrmsr(0x10, 0);
 	}
 
 	/*
- 	 *  use i8253 to guess our cpu speed
-	 */
-	guesscpuhz(t->aalcycles);
-
-	/*
 	 * If machine check exception, page size extensions or page global bit
 	 * are supported enable them in CR4 and clear any other set extensions.
 	 * If machine check was enabled clear out any lingering status.
 	 */
-	if(m->cpuiddx & 0x2088){
-		cr4 = 0;
-		if(m->cpuiddx & 0x08)
+	if(m->cpuiddx & (Pge|Mce|Pse)){
+		vlong mca, mct;
+
+		cr4 = getcr4();
+		if(m->cpuiddx & Pse)
 			cr4 |= 0x10;		/* page size extensions */
-		if(p = getconf("*nomce"))
-			nomce = strtoul(p, 0, 0);
-		else
-			nomce = 0;
-		if((m->cpuiddx & 0x80) && !nomce){
-			cr4 |= 0x40;		/* machine check enable */
-			if(family == 5){
+
+		if((m->cpuiddx & Mce) != 0 && getconf("*nomce") == nil){
+			if((m->cpuiddx & Mca) != 0){
+				vlong cap;
+				int bank;
+
+				cap = 0;
+				rdmsr(0x179, &cap);
+
+				if(cap & 0x100)
+					wrmsr(0x17B, ~0ULL);	/* enable all mca features */
+
+				bank = cap & 0xFF;
+				if(bank > 64)
+					bank = 64;
+
+				/* init MCi .. MC1 (except MC0) */
+				while(--bank > 0){
+					wrmsr(0x400 + bank*4, ~0ULL);
+					wrmsr(0x401 + bank*4, 0);
+				}
+
+				if(family != 6 || model >= 0x1A)
+					wrmsr(0x400, ~0ULL);
+
+				wrmsr(0x401, 0);
+			}
+			else if(family == 5){
 				rdmsr(0x00, &mca);
 				rdmsr(0x01, &mct);
 			}
+			cr4 |= 0x40;		/* machine check enable */
 		}
-	
+
 		/*
 		 * Detect whether the chip supports the global bit
 		 * in page directory and page table entries.  When set
 		 * in a particular entry, it means ``don't bother removing
-		 * this from the TLB when CR3 changes.''  
-		 * 
+		 * this from the TLB when CR3 changes.''
+		 *
 		 * We flag all kernel pages with this bit.  Doing so lessens the
 		 * overhead of switching processes on bare hardware,
 		 * even more so on VMware.  See mmu.c:/^memglobal.
@@ -739,54 +618,121 @@
 		 * the PGE bit in CR4, writing to CR3, and then
 		 * restoring the PGE bit.
 		 */
-		if(m->cpuiddx & 0x2000){
+		if(m->cpuiddx & Pge){
 			cr4 |= 0x80;		/* page global enable bit */
 			m->havepge = 1;
 		}
-
 		putcr4(cr4);
-		if(m->cpuiddx & 0x80)
+
+		if((m->cpuiddx & (Mca|Mce)) == Mce)
 			rdmsr(0x01, &mct);
 	}
 
-	cputype = t;
+#ifdef PATWC
+	/* IA32_PAT write combining */
+	if((m->cpuiddx & Pat) != 0){
+		vlong pat;
+
+		if(rdmsr(0x277, &pat) != -1){
+			pat &= ~(255LL<<(PATWC*8));
+			pat |= 1LL<<(PATWC*8);	/* WC */
+			wrmsr(0x277, pat);
+		}
+	}
+#endif
+
+	if((m->cpuiddx & Mtrr) != 0 && getconf("*nomtrr") == nil)
+		mtrrsync();
+
+	if(strcmp(m->cpuidid, "GenuineIntel") == 0 && (m->cpuidcx & Rdrnd) != 0)
+		hwrandbuf = rdrandbuf;
+	else
+		hwrandbuf = nil;
+	
+	if(sizeof(uintptr) == 8) {
+		/* 8-byte watchpoints are supported in Long Mode */
+		m->havewatchpt8 = 1;
+
+		/* check and enable NX bit */
+		cpuid(Highextfunc, 0, regs);
+		if(regs[0] >= Procextfeat){
+			cpuid(Procextfeat, 0, regs);
+			if((regs[3] & (1<<20)) != 0){
+				vlong efer;
+
+				/* enable no-execute feature */
+				if(rdmsr(Efer, &efer) != -1){
+					efer |= 1ull<<11;
+					if(wrmsr(Efer, efer) != -1)
+						m->havenx = 1;
+				}
+			}
+		}
+	} else if(strcmp(m->cpuidid, "GenuineIntel") == 0){
+		/* some random CPUs that support 8-byte watchpoints */
+		if(family == 15 && (model == 3 || model == 4 || model == 6)
+		|| family == 6 && (model == 15 || model == 23 || model == 28))
+			m->havewatchpt8 = 1;
+		/* Intel SDM claims amd64 support implies 8-byte watchpoint support */
+		cpuid(Highextfunc, 0, regs);
+		if(regs[0] >= Procextfeat){
+			cpuid(Procextfeat, 0, regs);
+			if((regs[3] & 1<<29) != 0)
+				m->havewatchpt8 = 1;
+		}
+	}
+
+	fpuinit();
+
 	return t->family;
 }
 
-static long
-cputyperead(Chan*, void *a, long n, vlong offset)
+static s32
+cputyperead(Chan*, void *a, s32 n, s64 offset)
 {
 	char str[32];
-	ulong mhz;
 
-	mhz = (m->cpuhz+999999)/1000000;
-
-	snprint(str, sizeof(str), "%s %lud\n", cputype->name, mhz);
+	snprint(str, sizeof(str), "%s %d\n", m->cpuidtype, m->cpumhz);
 	return readstr(offset, a, n, str);
 }
 
-static long
-archctlread(Chan*, void *a, long nn, vlong offset)
+static s32
+archctlread(Chan*, void *a, s32 nn, s64 offset)
 {
-	char buf[256];
 	int n;
-	
-	n = snprint(buf, sizeof buf, "cpu %s %lud%s\n",
-		cputype->name, (ulong)(m->cpuhz+999999)/1000000,
-		m->havepge ? " pge" : "");
-	n += snprint(buf+n, sizeof buf-n, "pge %s\n", getcr4()&0x80 ? "on" : "off");
-	n += snprint(buf+n, sizeof buf-n, "coherence ");
+	char *buf, *p, *ep;
+
+	p = buf = smalloc(READSTR);
+	ep = p + READSTR;
+	p = seprint(p, ep, "cpu %s %d%s\n",
+		m->cpuidtype, m->cpumhz, m->havepge ? " pge" : "");
+	p = seprint(p, ep, "pge %s\n", getcr4()&0x80 ? "on" : "off");
+	p = seprint(p, ep, "coherence ");
 	if(coherence == mb386)
-		n += snprint(buf+n, sizeof buf-n, "mb386\n");
+		p = seprint(p, ep, "mb386\n");
 	else if(coherence == mb586)
-		n += snprint(buf+n, sizeof buf-n, "mb586\n");
+		p = seprint(p, ep, "mb586\n");
+	else if(coherence == mfence)
+		p = seprint(p, ep, "mfence\n");
 	else if(coherence == nop)
-		n += snprint(buf+n, sizeof buf-n, "nop\n");
+		p = seprint(p, ep, "nop\n");
 	else
-		n += snprint(buf+n, sizeof buf-n, "0x%p\n", coherence);
-	n += snprint(buf+n, sizeof buf-n, "i8253set %s\n", doi8253set ? "on" : "off");
-	buf[n] = 0;
-	return readstr(offset, a, nn, buf);
+		p = seprint(p, ep, "0x%p\n", coherence);
+	p = seprint(p, ep, "cmpswap ");
+	if(cmpswap == cmpswap386)
+		p = seprint(p, ep, "cmpswap386\n");
+	else if(cmpswap == cmpswap486)
+		p = seprint(p, ep, "cmpswap486\n");
+	else
+		p = seprint(p, ep, "0x%p\n", cmpswap);
+	p = seprint(p, ep, "arch %s\n", arch->id);
+	n = p - buf;
+	n += mtrrprint(p, ep - p);
+	buf[n] = '\0';
+
+	n = readstr(offset, a, nn, buf);
+	free(buf);
+	return n;
 }
 
 enum
@@ -793,7 +739,7 @@
 {
 	CMpge,
 	CMcoherence,
-	CMi8253set,
+	CMcache,
 };
 
 static Cmdtab archctlmsg[] =
@@ -800,14 +746,16 @@
 {
 	CMpge,		"pge",		2,
 	CMcoherence,	"coherence",	2,
-	CMi8253set,	"i8253set",	2,
+	CMcache,	"cache",	4,
 };
 
-static long
-archctlwrite(Chan*, void *a, long n, vlong)
+static s32
+archctlwrite(Chan*, void *a, s32 n, s64)
 {
+	uvlong base, size;
 	Cmdbuf *cb;
 	Cmdtab *ct;
+	char *ep;
 
 	cb = parsecmd(a, n);
 	if(waserror()){
@@ -830,11 +778,14 @@
 		if(strcmp(cb->f[1], "mb386") == 0)
 			coherence = mb386;
 		else if(strcmp(cb->f[1], "mb586") == 0){
-			if(X86FAMILY(m->cpuidax) < 5)
+			if(m->cpuidfamily < 5)
 				error("invalid coherence ctl on this cpu family");
 			coherence = mb586;
-		}
-		else if(strcmp(cb->f[1], "nop") == 0){
+		}else if(strcmp(cb->f[1], "mfence") == 0){
+			if((m->cpuiddx & Sse2) == 0)
+				error("invalid coherence ctl on this cpu family");
+			coherence = mfence;
+		}else if(strcmp(cb->f[1], "nop") == 0){
 			/* only safe on vmware */
 			if(conf.nmach > 1)
 				error("cannot disable coherence on a multiprocessor");
@@ -842,14 +793,16 @@
 		}else
 			cmderror(cb, "invalid coherence ctl");
 		break;
-	case CMi8253set:
-		if(strcmp(cb->f[1], "on") == 0)
-			doi8253set = 1;
-		else if(strcmp(cb->f[1], "off") == 0){
-			doi8253set = 0;
-			(*arch->timerset)(0);
-		}else
-			cmderror(cb, "invalid i2853set ctl");
+	case CMcache:
+		base = strtoull(cb->f[1], &ep, 0);
+		if(*ep)
+			error("cache: parse error: base not a number?");
+		size = strtoull(cb->f[2], &ep, 0);
+		if(*ep)
+			error("cache: parse error: size not a number?");
+		ep = mtrr(base, size, cb->f[3]);
+		if(ep != nil)
+			error(ep);
 		break;
 	}
 	free(cb);
@@ -857,33 +810,64 @@
 	return n;
 }
 
+static long
+rmemrw(int isr, void *a, long n, vlong off)
+{
+	uintptr addr = off;
+
+	if(off < 0 || n < 0)
+		error("bad offset/count");
+	if(isr){
+		if(addr >= MB)
+			return 0;
+		if(addr+n > MB)
+			n = MB - addr;
+		memmove(a, KADDR(addr), n);
+	}else{
+		/* allow vga framebuf's write access */
+		if(addr >= MB || addr+n > MB ||
+		    (addr < 0xA0000 || addr+n > 0xB0000+0x10000))
+			error("bad offset/count in write");
+		memmove(KADDR(addr), a, n);
+	}
+	return n;
+}
+
+static s32
+rmemread(Chan*, void *a, s32 n, s64 off)
+{
+	return rmemrw(1, a, n, off);
+}
+
+static s32
+rmemwrite(Chan*, void *a, s32 n, s64 off)
+{
+	return rmemrw(0, a, n, off);
+}
+
 void
 archinit(void)
 {
 	PCArch **p;
 
-	arch = 0;
-	for(p = knownarch; *p; p++){
-		if((*p)->ident && (*p)->ident() == 0){
+	arch = knownarch[0];
+	for(p = knownarch; *p != nil; p++){
+		if((*p)->ident != nil && (*p)->ident() == 0){
 			arch = *p;
 			break;
 		}
 	}
-	if(arch == 0)
-		arch = &archgeneric;
-	else{
-		if(arch->id == 0)
-			arch->id = archgeneric.id;
-		if(arch->reset == 0)
-			arch->reset = archgeneric.reset;
-		if(arch->serialpower == 0)
-			arch->serialpower = archgeneric.serialpower;
-		if(arch->modempower == 0)
-			arch->modempower = archgeneric.modempower;
-		if(arch->intrinit == 0)
-			arch->intrinit = archgeneric.intrinit;
-		if(arch->intrenable == 0)
-			arch->intrenable = archgeneric.intrenable;
+	if(arch != knownarch[0]){
+		if(arch->id == nil)
+			arch->id = knownarch[0]->id;
+		if(arch->reset == nil)
+			arch->reset = knownarch[0]->reset;
+		if(arch->intrinit == nil)
+			arch->intrinit = knownarch[0]->intrinit;
+		if(arch->intrassign == nil)
+			arch->intrassign = knownarch[0]->intrassign;
+		if(arch->clockinit == nil)
+			arch->clockinit = knownarch[0]->clockinit;
 	}
 
 	/*
@@ -891,14 +875,21 @@
 	 *  We get another chance to set it in mpinit() for a
 	 *  multiprocessor.
 	 */
-	if(X86FAMILY(m->cpuidax) == 3)
+	if(m->cpuidfamily == 3)
 		conf.copymode = 1;
 
-	if(X86FAMILY(m->cpuidax) >= 5)
+	if(m->cpuidfamily >= 4)
+		cmpswap = cmpswap486;
+
+	if(m->cpuidfamily >= 5)
 		coherence = mb586;
 
+	if(m->cpuiddx & Sse2)
+		coherence = mfence;
+
 	addarchfile("cputype", 0444, cputyperead, nil);
 	addarchfile("archctl", 0664, archctlread, archctlwrite);
+	addarchfile("realmodemem", 0660, rmemread, rmemwrite);
 }
 
 /*
@@ -929,12 +920,203 @@
 	return (*arch->fastclock)(hz);
 }
 
+u64
+µs(void)
+{
+	return fastticks2us((*arch->fastclock)(nil));
+}
+
 /*
  *  set next timer interrupt
  */
 void
-timerset(uvlong x)
+timerset(Tval x)
 {
-	if(doi8253set)
-		(*arch->timerset)(x);
+	(*arch->timerset)(x);
+}
+
+/*
+ *  put the processor in the halt state if we've no processes to run.
+ *  an interrupt will get us going again.
+ *
+ *  halting in an smp system can result in a startup latency for
+ *  processes that become ready.
+ *  if idle_spin is zero, we care more about saving energy
+ *  than reducing this latency.
+ *
+ *  the performance loss with idle_spin == 0 seems to be slight
+ *  and it reduces lock contention (thus system time and real time)
+ *  on many-core systems with large values of NPROC.
+ */
+void
+idlehands(void)
+{
+	extern int nrdy, idle_spin;
+
+	if(conf.nmach == 1)
+		halt();
+	else if(m->cpuidcx & Monitor)
+		mwait(&nrdy);
+	else if(idle_spin == 0)
+		halt();
+}
+
+int
+isaconfig(char *class, int ctlrno, ISAConf *isa)
+{
+	char cc[32], *p, *x;
+	int i;
+
+	snprint(cc, sizeof cc, "%s%d", class, ctlrno);
+	p = getconf(cc);
+	if(p == nil)
+		return 0;
+
+	x = nil;
+	kstrdup(&x, p);
+	p = x;
+
+	isa->type = "";
+	isa->nopt = tokenize(p, isa->opt, NISAOPT);
+	for(i = 0; i < isa->nopt; i++){
+		p = isa->opt[i];
+		if(cistrncmp(p, "type=", 5) == 0)
+			isa->type = p + 5;
+		else if(cistrncmp(p, "port=", 5) == 0)
+			isa->port = strtoull(p+5, &p, 0);
+		else if(cistrncmp(p, "irq=", 4) == 0)
+			isa->irq = strtoul(p+4, &p, 0);
+		else if(cistrncmp(p, "dma=", 4) == 0)
+			isa->dma = strtoul(p+4, &p, 0);
+		else if(cistrncmp(p, "mem=", 4) == 0)
+			isa->mem = strtoul(p+4, &p, 0);
+		else if(cistrncmp(p, "size=", 5) == 0)
+			isa->size = strtoul(p+5, &p, 0);
+		else if(cistrncmp(p, "freq=", 5) == 0)
+			isa->freq = strtoul(p+5, &p, 0);
+	}
+	return 1;
+}
+
+void
+dumpmcregs(void)
+{
+	vlong v, w;
+	int bank;
+
+	if((m->cpuiddx & (Mce|Cpumsr)) != (Mce|Cpumsr))
+		return;
+	if((m->cpuiddx & Mca) == 0){
+		rdmsr(0x00, &v);
+		rdmsr(0x01, &w);
+		iprint("MCA %8.8llux MCT %8.8llux\n", v, w);
+		return;
+	}
+	rdmsr(0x179, &v);
+	rdmsr(0x17A, &w);
+	iprint("MCG CAP %.16llux STATUS %.16llux\n", v, w);
+
+	bank = v & 0xFF;
+	if(bank > 64)
+		bank = 64;
+	while(--bank >= 0){
+		rdmsr(0x401 + bank*4, &v);
+		if((v & (1ull << 63)) == 0)
+			continue;
+		iprint("MC%d STATUS %.16llux", bank, v);
+		if(v & (1ull << 58)){
+			rdmsr(0x402 + bank*4, &w);
+			iprint(" ADDR %.16llux", w);
+		}
+		if(v & (1ull << 59)){
+			rdmsr(0x403 + bank*4, &w);
+			iprint(" MISC %.16llux", w);
+		}
+		iprint("\n");
+	}
+}
+
+static void
+nmihandler(Ureg *ureg, void*)
+{
+	iprint("cpu%d: nmi PC %#p, status %ux\n",
+		m->machno, ureg->pc, inb(0x61));
+	while(m->machno != 0)
+		;
+}
+
+void
+nmienable(void)
+{
+	int x;
+
+	trapenable(VectorNMI, nmihandler, nil, "nmi");
+
+	/*
+	 * Hack: should be locked with NVRAM access.
+	 */
+	outb(0x70, 0x80);		/* NMI latch clear */
+	outb(0x70, 0);
+
+	x = inb(0x61) & 0x07;		/* Enable NMI */
+	outb(0x61, 0x0C|x);
+	outb(0x61, x);
+}
+
+void
+setupwatchpts(Proc *pr, Watchpt *wp, int nwp)
+{
+	int i;
+	u8int cfg;
+	Watchpt *p;
+
+	if(nwp > 4)
+		error("there are four watchpoints.");
+	if(nwp == 0){
+		memset(pr->dr, 0, sizeof(pr->dr));
+		return;
+	}
+	for(p = wp; p < wp + nwp; p++){
+		switch(p->type){
+		case WATCHRD|WATCHWR: case WATCHWR:
+			break;
+		case WATCHEX:
+			if(p->len != 1)
+				error("length must be 1 on breakpoints");
+			break;
+		default:
+			error("type must be rw-, -w- or --x");
+		}
+		switch(p->len){
+		case 1: case 2: case 4:
+			break;
+		case 8:
+			if(m->havewatchpt8) break;
+		default:
+			error(m->havewatchpt8 ? "length must be 1,2,4,8" : "length must be 1,2,4");
+		}
+		if((p->addr & p->len - 1) != 0)
+			error("address must be aligned according to length");
+	}
+	
+	memset(pr->dr, 0, sizeof(pr->dr));
+	pr->dr[6] = 0xffff8ff0;
+	for(i = 0; i < nwp; i++){
+		pr->dr[i] = wp[i].addr;
+		switch(wp[i].type){
+			case WATCHRD|WATCHWR: cfg = 3; break;
+			case WATCHWR: cfg = 1; break;
+			case WATCHEX: cfg = 0; break;
+			default: continue;
+		}
+		switch(wp[i].len){
+			case 1: break;
+			case 2: cfg |= 4; break;
+			case 4: cfg |= 12; break;
+			case 8: cfg |= 8; break;
+			default: continue;
+		}
+		pr->dr[7] |= cfg << 16 + 4 * i;
+		pr->dr[7] |= 1 << 2 * i + 1;
+	}
 }
--- /dev/null
+++ b/os/pc/devkbd.c
@@ -1,0 +1,449 @@
+/*
+ * keyboard input
+ */
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"io.h"
+#include	"../port/error.h"
+
+enum {
+	Data=		0x60,		/* data port */
+
+	Status=		0x64,		/* status port */
+	 Inready=	0x01,		/*  input character ready */
+	 Outbusy=	0x02,		/*  output busy */
+	 Sysflag=	0x04,		/*  system flag */
+	 Cmddata=	0x08,		/*  cmd==0, data==1 */
+	 Inhibit=	0x10,		/*  keyboard/mouse inhibited */
+	 Minready=	0x20,		/*  mouse character ready */
+	 Rtimeout=	0x40,		/*  general timeout */
+	 Parity=	0x80,
+
+	Cmd=		0x64,		/* command port (write only) */
+};
+
+enum
+{
+	/* controller command byte */
+	Cscs1=		(1<<6),		/* scan code set 1 */
+	Cauxdis=	(1<<5),		/* mouse disable */
+	Ckbddis=	(1<<4),		/* kbd disable */
+	Csf=		(1<<2),		/* system flag */
+	Cauxint=	(1<<1),		/* mouse interrupt enable */
+	Ckbdint=	(1<<0),		/* kbd interrupt enable */
+};
+
+enum {
+	Qdir,
+	Qscancode,
+	Qleds,
+};
+
+static Dirtab kbdtab[] = {
+	".",		{Qdir, 0, QTDIR},	0,	0555,
+	"scancode",	{Qscancode, 0},		0,	0440,
+	"leds",		{Qleds, 0},		0,	0220,
+};
+
+static Lock i8042lock;
+static uchar ccc;
+static void (*auxputc)(int, int);
+static int nokbd = 1;			/* flag: no PS/2 keyboard */
+
+static struct {
+	Ref ref;
+	Queue *q;
+} kbd;
+
+/*
+ *  wait for output no longer busy
+ */
+static int
+outready(void)
+{
+	int tries;
+
+	for(tries = 0; (inb(Status) & Outbusy); tries++){
+		if(tries > 500)
+			return -1;
+		delay(2);
+	}
+	return 0;
+}
+
+/*
+ *  wait for input
+ */
+static int
+inready(void)
+{
+	int tries;
+
+	for(tries = 0; !(inb(Status) & Inready); tries++){
+		if(tries > 500)
+			return -1;
+		delay(2);
+	}
+	return 0;
+}
+
+/*
+ *  ask 8042 to reset the machine
+ */
+void
+i8042reset(void)
+{
+	int i, x;
+
+	if(nokbd)
+		return;
+
+	*((ushort*)KADDR(0x472)) = 0x1234;	/* BIOS warm-boot flag */
+
+	/*
+	 *  newer reset the machine command
+	 */
+	outready();
+	outb(Cmd, 0xFE);
+	outready();
+
+	/*
+	 *  Pulse it by hand (old somewhat reliable)
+	 */
+	x = 0xDF;
+	for(i = 0; i < 5; i++){
+		x ^= 1;
+		outready();
+		outb(Cmd, 0xD1);
+		outready();
+		outb(Data, x);	/* toggle reset */
+		delay(100);
+	}
+}
+
+int
+i8042auxcmd(int cmd)
+{
+	unsigned int c;
+	int tries;
+
+	c = 0;
+	tries = 0;
+
+	ilock(&i8042lock);
+	do{
+		if(tries++ > 2)
+			break;
+		if(outready() < 0)
+			break;
+		outb(Cmd, 0xD4);
+		if(outready() < 0)
+			break;
+		outb(Data, cmd);
+		if(outready() < 0)
+			break;
+		if(inready() < 0)
+			break;
+		c = inb(Data);
+	} while(c == 0xFE || c == 0);
+	iunlock(&i8042lock);
+
+	if(c != 0xFA){
+		print("i8042: %2.2ux returned to the %2.2ux command (pc=%#p)\n",
+			c, cmd, getcallerpc(&cmd));
+		return -1;
+	}
+	return 0;
+}
+
+/*
+ * set keyboard's leds for lock states (scroll, numeric, caps).
+ *
+ * at least one keyboard (from Qtronics) also sets its numeric-lock
+ * behaviour to match the led state, though it has no numeric keypad,
+ * and some BIOSes bring the system up with numeric-lock set and no
+ * setting to change that.  this combination steals the keys for these
+ * characters and makes it impossible to generate them: uiolkjm&*().
+ * thus we'd like to be able to force the numeric-lock led (and behaviour) off.
+ */
+static void
+setleds(int leds)
+{
+	static int old = -1;
+
+	if(nokbd || leds == old)
+		return;
+	leds &= 7;
+	ilock(&i8042lock);
+	for(;;){
+		if(outready() < 0)
+			break;
+		outb(Data, 0xed);		/* `reset keyboard lock states' */
+		if(outready() < 0)
+			break;
+		outb(Data, leds);
+		if(outready() < 0)
+			break;
+		old = leds;
+		break;
+	}
+	iunlock(&i8042lock);
+}
+
+/*
+ *  keyboard interrupt
+ */
+static void
+i8042intr(Ureg*, void*)
+{
+	int s, c;
+	uchar b;
+
+	/*
+	 *  get status
+	 */
+	ilock(&i8042lock);
+	s = inb(Status);
+	if(!(s&Inready)){
+		iunlock(&i8042lock);
+		return;
+	}
+
+	/*
+	 *  get the character
+	 */
+	c = inb(Data);
+	iunlock(&i8042lock);
+
+	/*
+	 *  if it's the aux port...
+	 */
+	if(s & Minready){
+		if(auxputc != nil)
+			auxputc(c, 0);
+		return;
+	}
+
+	b = c & 0xff;
+	qproduce(kbd.q, &b, 1);
+}
+
+void
+i8042auxenable(void (*putc)(int, int))
+{
+	static char err[] = "i8042: aux init failed\n";
+
+	ilock(&i8042lock);
+
+	/* enable kbd/aux xfers and interrupts */
+	ccc &= ~Cauxdis;
+	ccc |= Cauxint;
+
+	if(outready() < 0)
+		print(err);
+	outb(Cmd, 0x60);			/* write control register */
+	if(outready() < 0)
+		print(err);
+	outb(Data, ccc);
+	if(outready() < 0)
+		print(err);
+	outb(Cmd, 0xA8);			/* auxiliary device enable */
+	if(outready() < 0){
+		print(err);
+		iunlock(&i8042lock);
+		return;
+	}
+	auxputc = putc;
+	intrenable(IrqAUX, i8042intr, 0, BUSUNKNOWN, "kbdaux");
+
+	iunlock(&i8042lock);
+}
+
+static void
+kbdpoll(void)
+{
+	if(nokbd || qlen(kbd.q) > 0)
+		return;
+	i8042intr(0, 0);
+}
+
+static void
+kbdshutdown(void)
+{
+	if(nokbd)
+		return;
+	/* disable kbd and aux xfers and interrupts */
+	ccc &= ~(Ckbdint|Cauxint);
+	ccc |= (Cauxdis|Ckbddis);
+	outready();
+	outb(Cmd, 0x60);
+	outready();
+	outb(Data, ccc);
+	outready();
+}
+
+static Chan *
+kbdattach(char *spec)
+{
+	return devattach(L'b', spec);
+}
+
+static Walkqid*
+kbdwalk(Chan *c, Chan *nc, char **name, int nname)
+{
+	return devwalk(c, nc, name, nname, kbdtab, nelem(kbdtab), devgen);
+}
+
+static int
+kbdstat(Chan *c, uchar *dp, int n)
+{
+	return devstat(c, dp, n, kbdtab, nelem(kbdtab), devgen);
+}
+
+static Chan*
+kbdopen(Chan *c, u32 omode)
+{
+	if(!iseve())
+		error(Eperm);
+	if(c->qid.path == Qscancode){
+		if(waserror()){
+			decref(&kbd.ref);
+			nexterror();
+		}
+		if(incref(&kbd.ref) != 1)
+			error(Einuse);
+		c = devopen(c, omode, kbdtab, nelem(kbdtab), devgen);
+		poperror();
+		return c;
+	}
+	return devopen(c, omode, kbdtab, nelem(kbdtab), devgen);
+}
+
+static void
+kbdclose(Chan *c)
+{
+	if((c->flag & COPEN) && c->qid.path == Qscancode)
+		decref(&kbd.ref);
+}
+
+static Block*
+kbdbread(Chan *c, s32 n, u32 off)
+{
+	if(c->qid.path == Qscancode){
+		kbdpoll();
+		return qbread(kbd.q, n);
+	}
+	return devbread(c, n, off);
+}
+
+static s32
+kbdread(Chan *c, void *a, s32 n, s64)
+{
+	if(c->qid.path == Qscancode){
+		kbdpoll();
+		return qread(kbd.q, a, n);
+	}
+	if(c->qid.path == Qdir)
+		return devdirread(c, a, n, kbdtab, nelem(kbdtab), devgen);
+	error(Egreg);
+	return 0;
+}
+
+static s32
+kbdwrite(Chan *c, void *a, s32 n, s64)
+{
+	char tmp[8+1], *p;
+
+	if(c->qid.path != Qleds)
+		error(Egreg);
+
+	p = tmp + n;
+	if(n >= sizeof(tmp))
+		p = tmp + sizeof(tmp)-1;
+	memmove(tmp, a, p - tmp);
+	*p = 0;
+
+	setleds(atoi(tmp));
+
+	return n;
+}
+
+static void
+kbdreset(void)
+{
+	static char initfailed[] = "i8042: kbd init failed\n";
+	int c, try;
+
+	print("kbdreset\n");
+	kbd.q = qopen(1024, Qcoalesce, 0, 0);
+	if(kbd.q == nil)
+		panic("kbdreset");
+	qnoblock(kbd.q, 1);
+
+	/* wait for a quiescent controller */
+	try = 1000;
+	while(try-- > 0 && (c = inb(Status)) & (Outbusy | Inready)) {
+		if(c & Inready)
+			inb(Data);
+		delay(1);
+	}
+	if (try <= 0) {
+		print(initfailed);
+		return;
+	}
+
+	/* get current controller command byte */
+	outb(Cmd, 0x20);
+	if(inready() < 0){
+		print("i8042: can't read ccc\n");
+		ccc = 0;
+	} else
+		ccc = inb(Data);
+
+	/* enable kbd xfers and interrupts */
+	ccc &= ~Ckbddis;
+	ccc |= Csf | Ckbdint | Cscs1;
+
+	/* disable ps2 mouse */
+	ccc &= ~Cauxint;
+	ccc |= Cauxdis;
+
+	if(outready() < 0) {
+		print(initfailed);
+		return;
+	}
+	outb(Cmd, 0x60);
+	outready();
+	outb(Data, ccc);
+	outready();
+
+	nokbd = 0;
+	ioalloc(Cmd, 1, 0, "i8042.cs");
+	ioalloc(Data, 1, 0, "i8042.data");
+	intrenable(IrqKBD, i8042intr, 0, BUSUNKNOWN, "kbd");
+}
+
+Dev kbddevtab = {
+	L'b',
+	"kbd",
+
+	kbdreset,
+	devinit,
+	kbdshutdown,
+	kbdattach,
+	kbdwalk,
+	kbdstat,
+	kbdopen,
+	devcreate,
+	kbdclose,
+	kbdread,
+	kbdbread,
+	kbdwrite,
+	devbwrite,
+	devremove,
+	devwstat,
+};
+
--- a/os/pc/devrtc.c
+++ b/os/pc/devrtc.c
@@ -30,12 +30,12 @@
 typedef struct Rtc	Rtc;
 struct Rtc
 {
-	int	sec;
-	int	min;
-	int	hour;
-	int	mday;
-	int	mon;
-	int	year;
+	s32	sec;
+	s32	min;
+	s32	hour;
+	s32	mday;
+	s32	mon;
+	s32	year;
 };
 
 
@@ -51,8 +51,8 @@
 	"rtc",		{Qrtc, 0},	0,	0664,
 };
 
-static ulong rtc2sec(Rtc*);
-static void sec2rtc(ulong, Rtc*);
+static u32 rtc2sec(Rtc*);
+static void sec2rtc(u32, Rtc*);
 
 void
 rtcinit(void)
@@ -73,14 +73,14 @@
 	return devwalk(c, nc, name, nname, rtcdir, nelem(rtcdir), devgen);
 }
 
-static int	 
-rtcstat(Chan* c, uchar* dp, int n)
+static s32	 
+rtcstat(Chan* c, uchar* dp, s32 n)
 {
 	return devstat(c, dp, n, rtcdir, nelem(rtcdir), devgen);
 }
 
 static Chan*
-rtcopen(Chan* c, int omode)
+rtcopen(Chan* c, u32 omode)
 {
 	omode = openmode(omode);
 	switch((ulong)c->qid.path){
@@ -102,7 +102,7 @@
 
 #define GETBCD(o) ((bcdclock[o]&0xf) + 10*(bcdclock[o]>>4))
 
-static long	 
+static s32	 
 _rtctime(void)
 {
 	uchar bcdclock[Nbcd];
@@ -150,7 +150,7 @@
 
 static Lock nvrtlock;
 
-long
+s32
 rtctime(void)
 {
 	int i;
@@ -173,8 +173,8 @@
 	return t;
 }
 
-static long	 
-rtcread(Chan* c, void* buf, long n, vlong off)
+static s32	 
+rtcread(Chan* c, void* buf, s32 n, s64 off)
 {
 	ulong t;
 	char *a, *start;
@@ -220,8 +220,8 @@
 
 #define PUTBCD(n,o) bcdclock[o] = (n % 10) | (((n / 10) % 10)<<4)
 
-static long	 
-rtcwrite(Chan* c, void* buf, long n, vlong off)
+static s32	 
+rtcwrite(Chan* c, void* buf, s32 n, s64 off)
 {
 	int t;
 	char *a, *start;
@@ -330,11 +330,11 @@
 /*
  *  days per month plus days/year
  */
-static	int	dmsize[] =
+static	s32	dmsize[] =
 {
 	365, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
 };
-static	int	ldmsize[] =
+static	s32	ldmsize[] =
 {
 	366, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
 };
@@ -342,8 +342,8 @@
 /*
  *  return the days/month for the given year
  */
-static int*
-yrsize(int y)
+static s32*
+yrsize(s32 y)
 {
 	if((y%4) == 0 && ((y%100) != 0 || (y%400) == 0))
 		return ldmsize;
@@ -354,10 +354,10 @@
 /*
  *  compute seconds since Jan 1 1970
  */
-static ulong
+static u32
 rtc2sec(Rtc *rtc)
 {
-	ulong secs;
+	u32 secs;
 	int i;
 	int *d2m;
 
@@ -390,7 +390,7 @@
  *  compute rtc from seconds since Jan 1 1970
  */
 static void
-sec2rtc(ulong secs, Rtc *rtc)
+sec2rtc(u32 secs, Rtc *rtc)
 {
 	int d;
 	long hms, day;
@@ -439,7 +439,7 @@
 }
 
 uchar
-nvramread(int addr)
+nvramread(intptr addr)
 {
 	uchar data;
 
@@ -452,7 +452,7 @@
 }
 
 void
-nvramwrite(int addr, uchar data)
+nvramwrite(intptr addr, uchar data)
 {
 	ilock(&nvrtlock);
 	outb(Paddr, addr);
--- a/os/pc/devvga.c
+++ b/os/pc/devvga.c
@@ -6,6 +6,8 @@
 #include "mem.h"
 #include "dat.h"
 #include "fns.h"
+#include "io.h"
+#include "ureg.h"
 #include "../port/error.h"
 
 #define	Image	IMAGE
@@ -17,8 +19,8 @@
 typedef struct Vgaseg Vgaseg;
 struct Vgaseg {
 	QLock;
-	ulong	pa;
-	ulong	len;
+	uintptr	pa;
+	u32	len;
 	void*	va;
 };
 
@@ -90,12 +92,12 @@
 }
 
 void
-addvgaseg(char *name, ulong pa, ulong size)
+addvgaseg(char *name, u32 pa, u32 size)
 {
 	int i;
 	Dirtab d;
 	Vgaseg *s;
-	ulong va;
+	uintptr va;
 
 	va = mmukmap(pa, 0, size);
 	if(va == 0)
@@ -126,12 +128,12 @@
 	unlock(&vgadirlock);
 }
 
-static long
-vgasegrd(Vgaseg *s, uchar *buf, long n, ulong offset)
+static s32
+vgasegrd(Vgaseg *s, uchar *buf, s32 n, u32 offset)
 {
 	int i;
 	uchar *a, *d;
-	ulong v;
+	uintptr v;
 
 	if(offset >= s->len)
 		return 0;
@@ -145,7 +147,7 @@
 	}
 	a = buf;
 	while(n > 0){
-		i = 4 - ((ulong)d & 3);
+		i = 4 - ((uintptr)d & 3);
 		if(i > n)
 			i = n;
 		if(i == 3)
@@ -172,12 +174,12 @@
 	return a-buf;
 }
 
-static long
-vgasegwr(Vgaseg *s, uchar *buf, long n, ulong offset)
+static s32
+vgasegwr(Vgaseg *s, uchar *buf, s32 n, u32 offset)
 {
 	int i;
 	uchar *a, *r;
-	ulong v;
+	uintptr v;
 
 	if(offset >= s->len)
 		return 0;
@@ -191,7 +193,7 @@
 	}
 	a = buf;
 	while(n > 0){
-		i = 4 - ((ulong)r & 3);
+		i = 4 - ((uintptr)r & 3);
 		if(i > n)
 			i = n;
 		if(i == 3)
@@ -231,19 +233,19 @@
 }
 
 Walkqid*
-vgawalk(Chan* c, Chan *nc, char** name, int nname)
+vgawalk(Chan* c, Chan *nc, char** name, s32 nname)
 {
 	return devwalk(c, nc, name, nname, vgadir, nvgadir, devgen);
 }
 
 static int
-vgastat(Chan* c, uchar* dp, int n)
+vgastat(Chan* c, uchar* dp, s32 n)
 {
 	return devstat(c, dp, n, vgadir, nvgadir, devgen);
 }
 
 static Chan*
-vgaopen(Chan* c, int omode)
+vgaopen(Chan* c, u32 omode)
 {
 	VGAscr *scr;
 	static char *openctl = "openctl\n";
@@ -290,16 +292,16 @@
 	error(Eperm);
 }
 
-static long
-vgaread(Chan* c, void* a, long n, vlong off)
+static s32
+vgaread(Chan* c, void* a, s32 n, s64 off)
 {
 	int len;
 	char *p, *s;
 	VGAscr *scr;
-	ulong offset = off;
+	u32 offset = off;
 	char chbuf[30];
 
-	switch((ulong)c->qid.path){
+	switch((u32)c->qid.path){
 
 	case Qdir:
 		return devdirread(c, a, n, vgadir, nvgadir, devgen);
@@ -332,12 +334,12 @@
 					physgscreenr.max.x, physgscreenr.max.y);
 		}
 
-		len += snprint(p+len, READSTR-len, "blank time %lud idle %d state %s\n",
+		len += snprint(p+len, READSTR-len, "blank time %ud idle %d state %s\n",
 			blanktime, drawidletime(), scr->isblank ? "off" : "on");
 		len += snprint(p+len, READSTR-len, "hwaccel %s\n", hwaccel ? "on" : "off");
 		len += snprint(p+len, READSTR-len, "hwblank %s\n", hwblank ? "on" : "off");
 		len += snprint(p+len, READSTR-len, "panning %s\n", panning ? "on" : "off");
-		snprint(p+len, READSTR-len, "addr 0x%lux\n", scr->aperture);
+		snprint(p+len, READSTR-len, "addr 0x%zux\n", scr->aperture);
 		n = readstr(offset, a, n, p);
 		poperror();
 		free(p);
@@ -366,7 +368,7 @@
 {
 	int align, i, size, x, y, z;
 	char *chanstr, *p;
-	ulong chan;
+	u32 chan;
 	Cmdtab *ct;
 	VGAscr *scr;
 	extern VGAdev *vgadev[];
@@ -415,8 +417,8 @@
 		break;
 
 	case CMsize:
-		if(drawhasclients())
-			error(Ebusy);
+		/*TODO if(drawhasclients())
+			error(Ebusy);*/
 
 		x = strtoul(cb->f[1], &p, 0);
 		if(x == 0 || x > 2048)
@@ -439,12 +441,12 @@
 		if(chantodepth(chan) != z)
 			error("depth, channel do not match");
 
-		cursoroff(1);
+		cursoroff();
 		deletescreenimage();
-		if(screensize(x, y, z, chan))
-			error(Egreg);
+		/* TODO if(screensize(x, y, z, chan))
+			error(Egreg); */
 		vgascreenwin(scr);
-		cursoron(1);
+		cursoron();
 		return;
 
 	case CMactualsize:
@@ -490,7 +492,7 @@
 			align = 0;
 		else
 			align = strtoul(cb->f[2], 0, 0);
-		if(screenaperture(size, align))
+		if(screenaperture(scr, size, align))
 			error("not enough free address space");
 		return;
 
@@ -546,14 +548,14 @@
 
 char Enooverlay[] = "No overlay support";
 
-static long
-vgawrite(Chan* c, void* a, long n, vlong off)
+static s32
+vgawrite(Chan* c, void* a, s32 n, s64 off)
 {
-	ulong offset = off;
+	uintptr offset = off;
 	Cmdbuf *cb;
 	VGAscr *scr;
 
-	switch((ulong)c->qid.path){
+	switch((u32)c->qid.path){
 
 	case Qdir:
 		error(Eperm);
--- a/os/pc/dma.c
+++ b/os/pc/dma.c
@@ -124,11 +124,11 @@
  *  return the updated transfer length (we can't transfer across 64k
  *  boundaries)
  */
-long
-dmasetup(int chan, void *va, long len, int isread)
+s32
+dmasetup(int chan, void *va, s32 len, s32 isread)
 {
 	DMA *dp;
-	ulong pa;
+	uintptr pa;
 	uchar mode;
 	DMAxfer *xp;
 
@@ -141,7 +141,7 @@
 	 *  use the bounce buffer.
 	 */
 	pa = PADDR(va);
-	if((((ulong)va)&0xF0000000) != KZERO
+	if((((uintptr)va)&0xF0000000) != KZERO
 	|| (pa&0xFFFF0000) != ((pa+len)&0xFFFF0000)
 	|| pa >= 16*MB) {
 		if(xp->bva == nil)
--- /dev/null
+++ b/os/pc/ec.c
@@ -1,0 +1,174 @@
+/*
+ * embedded controller (usually at ports 0x66/0x62)
+ */
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"io.h"
+#include	"../port/error.h"
+
+enum {
+	/* registers */
+	EC_SC	= 0,
+	EC_DATA,
+
+	/* Embedded Controller Status, EC_SC (R) */
+	OBF	= 1<<0,
+	IBF	= 1<<1,
+	CMD	= 1<<3,
+	BURST	= 1<<4,
+	SCI_EVT	= 1<<5,
+	SMI_EVT	= 1<<6,
+
+	/* Embedded Controller Command Set */
+	RD_EC	= 0x80,
+	WR_EC	= 0x81,
+	BE_EC	= 0x82,
+	BD_EC	= 0x83,
+	QR_EC	= 0x84,
+};
+
+static struct {
+	Lock;
+	int	init;
+	int	port[2];	/* EC_SC and EC_DATA */
+} ec;
+
+static uchar
+ecrr(int reg)
+{
+	return inb(ec.port[reg]);
+}
+static void
+ecwr(int reg, uchar val)
+{
+	outb(ec.port[reg], val);
+}
+
+static int
+ecwait(uchar mask, uchar val)
+{
+	int i, s;
+
+	s = 0;
+	for(i=0; i<1000; i++){
+		s = ecrr(EC_SC);
+		if((s & mask) == val)
+			return 0;
+		delay(1);
+	}
+	print("ec: wait timeout status=%x pc=%#p\n", s, getcallerpc(&mask));
+	return -1;
+}
+
+int
+ecread(uchar addr)
+{
+	int r;
+
+	r = -1;
+	lock(&ec);
+	if(!ec.init)
+		goto out;
+	if(ecwait(IBF, 0))
+		goto out;
+	ecwr(EC_SC, RD_EC);
+	if(ecwait(IBF, 0))
+		goto out;
+	ecwr(EC_DATA, addr);
+	if(ecwait(OBF, OBF))
+		goto out;
+	r = ecrr(EC_DATA);
+	ecwait(OBF, 0);
+out:
+	unlock(&ec);
+	return r;
+}
+
+int
+ecwrite(uchar addr, uchar val)
+{
+	int r;
+
+	r = -1;
+	lock(&ec);
+	if(!ec.init)
+		goto out;
+	if(ecwait(IBF, 0))
+		goto out;
+	ecwr(EC_SC, WR_EC);
+	if(ecwait(IBF, 0))
+		goto out;
+	ecwr(EC_DATA, addr);
+	if(ecwait(IBF, 0))
+		goto out;
+	ecwr(EC_DATA, val);
+	if(ecwait(IBF, 0))
+		goto out;
+	r = 0;
+out:
+	unlock(&ec);
+	return r;
+}
+
+static s32
+ecarchread(Chan*, void *a, s32 n, s64 off)
+{
+	int port, v;
+	uchar *p;
+
+	if(off < 0 || off >= 256)
+		return 0;
+	if(off+n > 256)
+		n = 256 - off;
+	p = a;
+	for(port = off; port < off+n; port++){
+		if((v = ecread(port)) < 0)
+			error(Eio);
+		*p++ = v;
+	}
+	return n;
+}
+
+static s32
+ecarchwrite(Chan*, void *a, s32 n, s64 off)
+{
+	int port;
+	uchar *p;
+
+	if(off < 0 || off+n > 256)
+		error(Ebadarg);
+	p = a;
+	for(port = off; port < off+n; port++)
+		if(ecwrite(port, *p++) < 0)
+			error(Eio);
+	return n;
+}
+
+int
+ecinit(int cmdport, int dataport)
+{
+	print("ec: cmd %X, data %X\n", cmdport, dataport);
+
+	if(ioalloc(cmdport, 1, 0, "ec.sc") < 0){
+		print("ec: cant allocate cmd port %X\n", cmdport);
+		return -1;
+	}
+	if(ioalloc(dataport, 1, 0, "ec.data") < 0){
+		print("ec: cant allocate data port %X\n", dataport);
+		iofree(cmdport);
+		return -1;
+	}
+
+	lock(&ec);
+	ec.port[EC_SC] = cmdport;
+	ec.port[EC_DATA] = dataport;
+	ec.init = 1;
+	unlock(&ec);
+
+	addarchfile("ec", 0660, ecarchread, ecarchwrite);
+
+	return 0;
+}
--- /dev/null
+++ b/os/pc/errstr.h
@@ -1,0 +1,61 @@
+char Enoerror[] = "no error";
+char Emount[] = "inconsistent mount";
+char Eunmount[] = "not mounted";
+char Eunion[] = "not in union";
+char Emountrpc[] = "mount rpc error";
+char Eshutdown[] = "mounted device shut down";
+char Enocreate[] = "mounted directory forbids creation";
+char Enonexist[] = "file does not exist";
+char Eexist[] = "file already exists";
+char Ebadsharp[] = "unknown device in # filename";
+char Enotdir[] = "not a directory";
+char Eisdir[] = "file is a directory";
+char Ebadchar[] = "bad character in file name";
+char Efilename[] = "file name syntax";
+char Eperm[] = "permission denied";
+char Ebadusefd[] = "inappropriate use of fd";
+char Ebadarg[] = "bad arg in system call";
+char Einuse[] = "device or object already in use";
+char Eio[] = "i/o error";
+char Etoobig[] = "read or write too large";
+char Etoosmall[] = "read or write too small";
+char Enetaddr[] = "bad network address";
+char Emsgsize[] = "message is too big for protocol";
+char Enetbusy[] = "network device is busy or allocated";
+char Enoproto[] = "network protocol not supported";
+char Enoport[] = "network port not available";
+char Enoifc[] = "bad interface or no free interface slots";
+char Enolisten[] = "not announced";
+char Ehungup[] = "i/o on hungup channel";
+char Ebadctl[] = "bad process or channel control request";
+char Enodev[] = "no free devices";
+char Enoenv[] = "no free environment resources";
+char Ethread[] = "thread exited";
+char Estopped[] = "thread must be stopped";
+char Enochild[] = "no living children";
+char Eioload[] = "i/o error in demand load";
+char Enovmem[] = "out of memory: virtual memory";
+char Ebadld[] = "illegal line discipline";
+char Ebadfd[] = "fd out of range or not open";
+char Eisstream[] = "seek on a stream";
+char Ebadexec[] = "exec header invalid";
+char Etimedout[] = "connection timed out";
+char Econrefused[] = "connection refused";
+char Econinuse[] = "connection in use";
+char Eintr[] = "interrupted";
+char Eneedservice[] = "service required for tcp/udp/il calls";
+char Enomem[] = "out of memory: kernel";
+char Esfnotcached[] = "subfont not cached";
+char Esoverlap[] = "segments overlap";
+char Emouseset[] = "mouse type already set";
+char Erecover[] = "failed to recover fd";
+char Eshort[] = "i/o count too small";
+char Enobitstore[] = "out of screen memory";
+char Egreg[] = "jim'll fix it";
+char Ebadspec[] = "bad attach specifier";
+char Enoattach[] = "mount/attach disallowed";
+char Eshortstat[] = "stat buffer too small";
+char Enegoff[] = "negative i/o offset";
+char Ecmdargs[] = "wrong #args in control message";
+char Ebadstat[] = "malformed stat buffer";
+char	Enofd[] = "no free file descriptors";
--- a/os/pc/ether2114x.c
+++ b/os/pc/ether2114x.c
@@ -14,11 +14,11 @@
 #include "dat.h"
 #include "fns.h"
 #include "io.h"
+#include "../port/pci.h"
 #include "../port/error.h"
 #include "../port/netif.h"
+#include "../port/etherif.h"
 
-#include "etherif.h"
-
 #define DEBUG		(0)
 #define debug		if(DEBUG)print
 
@@ -478,7 +478,7 @@
 				else if(bp = iallocb(Rbsz)){
 					len = ((des->status & Fl)>>16)-4;
 					des->bp->wp = des->bp->rp+len;
-					etheriq(ether, des->bp, 1);
+					etheriq(ether, des->bp);
 					des->bp = bp;
 					des->addr = PCIWADDR(bp->rp);
 				}
@@ -1811,7 +1811,7 @@
 	 */
 	ether->attach = attach;
 	ether->transmit = transmit;
-	ether->interrupt = interrupt;
+	/* ether->interrupt = interrupt; removed in 9front */
 	ether->ifstat = ifstat;
 
 	ether->arg = ether;
@@ -1818,6 +1818,8 @@
 	ether->shutdown = shutdown;
 	ether->multicast = multicast;
 	ether->promiscuous = promiscuous;
+
+	intrenable(ether->irq, interrupt, ether, ether->tbdf, ether->name);
 
 	return 0;
 }
--- a/os/pc/ether83815.c
+++ b/os/pc/ether83815.c
@@ -21,6 +21,7 @@
 #include "dat.h"
 #include "fns.h"
 #include "io.h"
+#include "../port/pci.h"
 #include "../port/error.h"
 #include "../port/netif.h"
 
@@ -337,8 +338,8 @@
 	iunlock(&ctlr->lock);
 }
 
-static long
-ifstat(Ether* ether, void* a, long n, ulong offset)
+static s32
+ifstat(Ether* ether, void* a, s32 n, u32 offset)
 {
 	Ctlr *ctlr;
 	char *buf, *p;
--- a/os/pc/etherdp83820.c
+++ b/os/pc/etherdp83820.c
@@ -277,13 +277,13 @@
 };
 
 typedef struct {
-	u32int	link;			/* Link to the next descriptor */
-	u32int	bufptr;			/* pointer to data Buffer */
+	u32	link;			/* Link to the next descriptor */
+	u32	bufptr;			/* pointer to data Buffer */
 	int	cmdsts;			/* Command/Status */
 	int	extsts;			/* optional Extended Status */
 
 	Block*	bp;			/* Block containing bufptr */
-	u32int	unused;			/* pad to 64-bit */
+	u32	unused;			/* pad to 64-bit */
 } Desc;
 
 enum {					/* Common cmdsts bits */
--- a/os/pc/etherelnk3.c
+++ b/os/pc/etherelnk3.c
@@ -14,6 +14,7 @@
 #include "dat.h"
 #include "fns.h"
 #include "io.h"
+#include "../port/pci.h"
 #include "../port/error.h"
 #include "../port/netif.h"
 
@@ -446,7 +447,7 @@
 	 *	make sure each entry is 8-byte aligned.
 	 */
 	ctlr->upbase = malloc((ctlr->nup+1)*sizeof(Pd));
-	ctlr->upr = (Pd*)ROUNDUP((ulong)ctlr->upbase, 8);
+	ctlr->upr = (Pd*)ROUNDUP((uintptr)ctlr->upbase, 8);
 
 	prev = ctlr->upr;
 	for(pd = &ctlr->upr[ctlr->nup-1]; pd >= ctlr->upr; pd--){
@@ -465,7 +466,7 @@
 	ctlr->uphead = ctlr->upr;
 
 	ctlr->dnbase = malloc((ctlr->ndn+1)*sizeof(Pd));
-	ctlr->dnr = (Pd*)ROUNDUP((ulong)ctlr->dnbase, 8);
+	ctlr->dnr = (Pd*)ROUNDUP((uintptr)ctlr->dnbase, 8);
 
 	prev = ctlr->dnr;
 	for(pd = &ctlr->dnr[ctlr->ndn-1]; pd >= ctlr->dnr; pd--){
@@ -488,7 +489,7 @@
 	 * boundary for EISA busmastering.
 	 */
 	if(bp = f(ROUNDUP(sizeof(Etherpkt), 4) + 31)){
-		addr = (ulong)bp->base;
+		addr = (uintptr)bp->base;
 		addr = ROUNDUP(addr, 32);
 		bp->rp = (uchar*)addr;
 	}
@@ -1158,8 +1159,8 @@
 	iunlock(&ctlr->wlock);
 }
 
-static long
-ifstat(Ether* ether, void* a, long n, ulong offset)
+static s32
+ifstat(Ether* ether, void* a, s32 n, u32 offset)
 {
 	char *p;
 	int len;
--- /dev/null
+++ b/os/pc/ethervirtio.c
@@ -1,0 +1,689 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "../port/pci.h"
+#include "../port/error.h"
+#include "../port/netif.h"
+#include "../port/etherif.h"
+
+/*
+ * virtio ethernet driver
+ * http://docs.oasis-open.org/virtio/virtio/v1.0/virtio-v1.0.html
+ */
+
+typedef struct Vring Vring;
+typedef struct Vdesc Vdesc;
+typedef struct Vused Vused;
+typedef struct Vheader Vheader;
+typedef struct Vqueue Vqueue;
+typedef struct Ctlr Ctlr;
+
+enum {
+	/* §2.1 Device Status Field */
+	Sacknowledge = 1,
+	Sdriver = 2,
+	Sdriverok = 4,
+	Sfeatureok = 8,
+	Sfailed = 128,
+
+	/* §4.1.4.8 Legacy Interfaces: A Note on PCI Device Layout */
+	Qdevfeat = 0,
+	Qdrvfeat = 4,
+	Qaddr = 8,
+	Qsize = 12,
+	Qselect = 14,
+	Qnotify = 16,
+	Qstatus = 18,
+	Qisr = 19,
+	Qmac = 20,
+	Qnetstatus = 26,
+
+	/* flags in Qnetstatus */
+	Nlinkup = (1<<0),
+	Nannounce = (1<<1),
+
+	/* feature bits */
+	Fmac = (1<<5),
+	Fstatus = (1<<16),
+	Fctrlvq = (1<<17),
+	Fctrlrx = (1<<18),
+
+	/* vring used flags */
+	Unonotify = 1,
+	/* vring avail flags */
+	Rnointerrupt = 1,
+
+	/* descriptor flags */
+	Dnext = 1,
+	Dwrite = 2,
+	Dindirect = 4,
+
+	/* struct sizes */
+	VringSize = 4,
+	VdescSize = 16,
+	VusedSize = 8,
+	VheaderSize = 10,
+
+	/* §4.1.5.1.4.1 says pages are 4096 bytes
+	 * for the purposes of the driver.
+	 */
+	VBY2PG	= 4096,
+#define VPGROUND(s)	ROUND(s, VBY2PG)
+
+	Vrxq	= 0,
+	Vtxq	= 1,
+	Vctlq	= 2,
+
+	/* class/cmd for Vctlq */
+	CtrlRx	= 0x00,
+		CmdPromisc	= 0x00,
+		CmdAllmulti	= 0x01,
+	CtrlMac	= 0x01,
+		CmdMacTableSet	= 0x00,
+	CtrlVlan= 0x02,
+		CmdVlanAdd	= 0x00,
+		CmdVlanDel	= 0x01,
+};
+
+struct Vring
+{
+	u16int	flags;
+	u16int	idx;
+};
+
+struct Vdesc
+{
+	u64int	addr;
+	u32	len;
+	u16int	flags;
+	u16int	next;
+};
+
+struct Vused
+{
+	u32	id;
+	u32	len;
+};
+
+struct Vheader
+{
+	u8int	flags;
+	u8int	segtype;
+	u16int	hlen;
+	u16int	seglen;
+	u16int	csumstart;
+	u16int	csumend;
+};
+
+/* §2.4 Virtqueues */
+struct Vqueue
+{
+	Rendez;
+
+	uint	qsize;
+	uint	qmask;
+
+	Vdesc	*desc;
+
+	Vring	*avail;
+	u16int	*availent;
+	u16int	*availevent;
+
+	Vring	*used;
+	Vused	*usedent;
+	u16int	*usedevent;
+	u16int	lastused;
+
+	uint	nintr;
+	uint	nnote;
+};
+
+struct Ctlr {
+	Lock;
+
+	QLock	ctllock;
+
+	int	attached;
+
+	int	port;
+	Pcidev	*pcidev;
+	Ctlr	*next;
+	int	active;
+	int	id;
+	int	typ;
+	ulong	feat;
+	int	nqueue;
+
+	/* virtioether has 3 queues: rx, tx and ctl */
+	Vqueue	queue[3];
+};
+
+static Ctlr *ctlrhead;
+
+static int
+vhasroom(void *v)
+{
+	Vqueue *q = v;
+	return q->lastused != q->used->idx;
+}
+
+static void
+vqnotify(Ctlr *ctlr, int x)
+{
+	Vqueue *q;
+
+	coherence();
+	q = &ctlr->queue[x];
+	if(q->used->flags & Unonotify)
+		return;
+	q->nnote++;
+	outs(ctlr->port+Qnotify, x);
+}
+
+static void
+txproc(void *v)
+{
+	Vheader *header;
+	Block **blocks;
+	Ether *edev;
+	Ctlr *ctlr;
+	Vqueue *q;
+	Vused *u;
+	Block *b;
+	int i, j;
+
+	edev = v;
+	ctlr = edev->ctlr;
+	q = &ctlr->queue[Vtxq];
+
+	header = smalloc(VheaderSize);
+	blocks = smalloc(sizeof(Block*) * (q->qsize/2));
+
+	for(i = 0; i < q->qsize/2; i++){
+		j = i << 1;
+		q->desc[j].addr = PADDR(header);
+		q->desc[j].len = VheaderSize;
+		q->desc[j].next = j | 1;
+		q->desc[j].flags = Dnext;
+
+		q->availent[i] = q->availent[i + q->qsize/2] = j;
+
+		j |= 1;
+		q->desc[j].next = 0;
+		q->desc[j].flags = 0;
+	}
+
+	q->avail->flags &= ~Rnointerrupt;
+
+	while(waserror())
+		;
+
+	while((b = qbread(edev->oq, 1000000)) != nil){
+		for(;;){
+			/* retire completed packets */
+			while((i = q->lastused) != q->used->idx){
+				u = &q->usedent[i & q->qmask];
+				i = (u->id & q->qmask) >> 1;
+				if(blocks[i] == nil)
+					break;
+				freeb(blocks[i]);
+				blocks[i] = nil;
+				q->lastused++;
+			}
+
+			/* have free slot? */
+			i = q->avail->idx & (q->qmask >> 1);
+			if(blocks[i] == nil)
+				break;
+
+			/* ring full, wait and retry */
+			if(!vhasroom(q))
+				sleep(q, vhasroom, q);
+		}
+
+		/* slot is free, fill in descriptor */
+		blocks[i] = b;
+		j = (i << 1) | 1;
+		q->desc[j].addr = PADDR(b->rp);
+		q->desc[j].len = BLEN(b);
+		coherence();
+		q->avail->idx++;
+		vqnotify(ctlr, Vtxq);
+	}
+
+	pexit("ether out queue closed", 1);
+}
+
+static void
+rxproc(void *v)
+{
+	Vheader *header;
+	Block **blocks;
+	Ether *edev;
+	Ctlr *ctlr;
+	Vqueue *q;
+	Vused *u;
+	Block *b;
+	int i, j;
+
+	edev = v;
+	ctlr = edev->ctlr;
+	q = &ctlr->queue[Vrxq];
+
+	header = smalloc(VheaderSize);
+	blocks = smalloc(sizeof(Block*) * (q->qsize/2));
+
+	for(i = 0; i < q->qsize/2; i++){
+		j = i << 1;
+		q->desc[j].addr = PADDR(header);
+		q->desc[j].len = VheaderSize;
+		q->desc[j].next = j | 1;
+		q->desc[j].flags = Dwrite|Dnext;
+
+		q->availent[i] = q->availent[i + q->qsize/2] = j;
+
+		j |= 1;
+		q->desc[j].next = 0;
+		q->desc[j].flags = Dwrite;
+	}
+
+	q->avail->flags &= ~Rnointerrupt;
+
+	while(waserror())
+		;
+
+	for(;;){
+		/* replenish receive ring */
+		do {
+			i = q->avail->idx & (q->qmask >> 1);
+			if(blocks[i] != nil)
+				break;
+			if((b = iallocb(ETHERMAXTU)) == nil)
+				break;
+			blocks[i] = b;
+			j = (i << 1) | 1;
+			q->desc[j].addr = PADDR(b->rp);
+			q->desc[j].len = BALLOC(b);
+			coherence();
+			q->avail->idx++;
+		} while(q->avail->idx != q->used->idx);
+		vqnotify(ctlr, Vrxq);
+
+		/* wait for any packets to complete */
+		if(!vhasroom(q))
+			sleep(q, vhasroom, q);
+
+		/* retire completed packets */
+		while((i = q->lastused) != q->used->idx) {
+			u = &q->usedent[i & q->qmask];
+			i = (u->id & q->qmask) >> 1;
+			if((b = blocks[i]) == nil)
+				break;
+
+			blocks[i] = nil;
+
+			b->wp = b->rp + u->len - VheaderSize;
+			etheriq(edev, b);
+			q->lastused++;
+		}
+	}
+}
+
+static int
+vctlcmd(Ether *edev, uchar class, uchar cmd, uchar *data, int ndata)
+{
+	uchar hdr[2], ack[1];
+	Ctlr *ctlr;
+	Vqueue *q;
+	Vdesc *d;
+	int i;
+
+	ctlr = edev->ctlr;
+	q = &ctlr->queue[Vctlq];
+	if(q->qsize < 3)
+		return -1;
+
+	qlock(&ctlr->ctllock);
+	while(waserror())
+		;
+
+	ack[0] = 0x55;
+	hdr[0] = class;
+	hdr[1] = cmd;
+
+	d = &q->desc[0];
+	d->addr = PADDR(hdr);
+	d->len = sizeof(hdr);
+	d->next = 1;
+	d->flags = Dnext;
+	d++;
+	d->addr = PADDR(data);
+	d->len = ndata;
+	d->next = 2;
+	d->flags = Dnext;
+	d++;
+	d->addr = PADDR(ack);
+	d->len = sizeof(ack);
+	d->next = 0;
+	d->flags = Dwrite;
+
+	i = q->avail->idx & q->qmask;
+	q->availent[i] = 0;
+	coherence();
+
+	q->avail->flags &= ~Rnointerrupt;
+	q->avail->idx++;
+	vqnotify(ctlr, Vctlq);
+	while(!vhasroom(q))
+		sleep(q, vhasroom, q);
+	q->lastused = q->used->idx;
+	q->avail->flags |= Rnointerrupt;
+
+	qunlock(&ctlr->ctllock);
+	poperror();
+
+	if(ack[0] != 0)
+		print("#l%d: vctlcmd: %ux.%ux -> %ux\n", edev->ctlrno, class, cmd, ack[0]);
+
+	return ack[0];
+}
+
+static void
+interrupt(Ureg*, void* arg)
+{
+	Ether *edev;
+	Ctlr *ctlr;
+	Vqueue *q;
+	int i;
+
+	edev = arg;
+	ctlr = edev->ctlr;
+	if(inb(ctlr->port+Qisr) & 1){
+		for(i = 0; i < ctlr->nqueue; i++){
+			q = &ctlr->queue[i];
+			if(vhasroom(q)){
+				q->nintr++;
+				wakeup(q);
+			}
+		}
+	}
+}
+
+static void
+attach(Ether* edev)
+{
+	char name[KNAMELEN];
+	Ctlr* ctlr;
+
+	ctlr = edev->ctlr;
+	lock(ctlr);
+	if(ctlr->attached){
+		unlock(ctlr);
+		return;
+	}
+	ctlr->attached = 1;
+	unlock(ctlr);
+
+	/* ready to go */
+	outb(ctlr->port+Qstatus, inb(ctlr->port+Qstatus) | Sdriverok);
+
+	/* start kprocs */
+	snprint(name, sizeof name, "#l%drx", edev->ctlrno);
+	kproc(name, rxproc, edev, 0);
+	snprint(name, sizeof name, "#l%dtx", edev->ctlrno);
+	kproc(name, txproc, edev, 0);
+}
+
+static long
+ifstat(Ether *edev, void *a, long n, ulong offset)
+{
+	int i, l;
+	char *p;
+	Ctlr *ctlr;
+	Vqueue *q;
+
+	ctlr = edev->ctlr;
+
+	p = smalloc(READSTR);
+
+	l = snprint(p, READSTR, "devfeat %32.32lub\n", ctlr->feat);
+	l += snprint(p+l, READSTR-l, "drvfeat %32.32lub\n", inl(ctlr->port+Qdrvfeat));
+	l += snprint(p+l, READSTR-l, "devstatus %8.8ub\n", inb(ctlr->port+Qstatus));
+	if(ctlr->feat & Fstatus)
+		l += snprint(p+l, READSTR-l, "netstatus %8.8ub\n",  inb(ctlr->port+Qnetstatus));
+
+	for(i = 0; i < ctlr->nqueue; i++){
+		q = &ctlr->queue[i];
+		l += snprint(p+l, READSTR-l,
+			"vq%d %#p size %d avail->idx %d used->idx %d lastused %hud nintr %ud nnote %ud\n",
+			i, q, q->qsize, q->avail->idx, q->used->idx, q->lastused, q->nintr, q->nnote);
+	}
+
+	n = readstr(offset, a, n, p);
+	free(p);
+
+	return n;
+}
+
+static void
+shutdown(Ether* edev)
+{
+	Ctlr *ctlr = edev->ctlr;
+	outb(ctlr->port+Qstatus, 0);
+	pciclrbme(ctlr->pcidev);
+}
+
+static void
+promiscuous(void *arg, int on)
+{
+	Ether *edev = arg;
+	uchar b[1];
+
+	b[0] = on != 0;
+	vctlcmd(edev, CtrlRx, CmdPromisc, b, sizeof(b));
+}
+
+static void
+multicast(void *arg, uchar*, int)
+{
+	Ether *edev = arg;
+	uchar b[1];
+
+	b[0] = edev->nmaddr > 0;
+	vctlcmd(edev, CtrlRx, CmdAllmulti, b, sizeof(b));
+}
+
+/* §2.4.2 Legacy Interfaces: A Note on Virtqueue Layout */
+static ulong
+queuesize(ulong size)
+{
+	return VPGROUND(VdescSize*size + sizeof(u16int)*(3+size))
+		+ VPGROUND(sizeof(u16int)*3 + VusedSize*size);
+}
+
+static int
+initqueue(Vqueue *q, int size)
+{
+	uchar *p;
+
+	/* §2.4: Queue Size value is always a power of 2 and <= 32768 */
+	assert(!(size & (size - 1)) && size <= 32768);
+
+	p = mallocalign(queuesize(size), VBY2PG, 0, 0);
+	if(p == nil){
+		print("ethervirtio: no memory for Vqueue\n");
+		free(p);
+		return -1;
+	}
+
+	q->desc = (void*)p;
+	p += VdescSize*size;
+	q->avail = (void*)p;
+	p += VringSize;
+	q->availent = (void*)p;
+	p += sizeof(u16int)*size;
+	q->availevent = (void*)p;
+	p += sizeof(u16int);
+
+	p = (uchar*)VPGROUND((uintptr)p);
+	q->used = (void*)p;
+	p += VringSize;
+	q->usedent = (void*)p;
+	p += VusedSize*size;
+	q->usedevent = (void*)p;
+
+	q->qsize = size;
+	q->qmask = q->qsize - 1;
+
+	q->lastused = q->avail->idx = q->used->idx = 0;
+
+	q->avail->flags |= Rnointerrupt;
+
+	return 0;
+}
+
+static Ctlr*
+pciprobe(int typ)
+{
+	Ctlr *c, *h, *t;
+	Pcidev *p;
+	int n, i;
+
+	h = t = nil;
+
+	/* §4.1.2 PCI Device Discovery */
+	for(p = nil; p = pcimatch(p, 0, 0);){
+		if(p->vid != 0x1AF4)
+			continue;
+		/* the two possible DIDs for virtio-net */
+		if(p->did != 0x1000 && p->did != 0x1041)
+			continue;
+		/* non-transitional devices will have a revision > 0 */
+		if(p->rid != 0)
+			continue;
+		/* first membar needs to be I/O */
+		if((p->mem[0].bar & 1) == 0)
+			continue;
+		/* non-transitional device will have typ+0x40 */
+		if(pcicfgr16(p, 0x2E) != typ)
+			continue;
+		if((c = mallocz(sizeof(Ctlr), 1)) == nil){
+			print("ethervirtio: no memory for Ctlr\n");
+			break;
+		}
+		c->port = p->mem[0].bar & ~3;
+		if(ioalloc(c->port, p->mem[0].size, 0, "ethervirtio") < 0){
+			print("ethervirtio: port %ux in use\n", c->port);
+			free(c);
+			continue;
+		}
+
+		c->typ = typ;
+		c->pcidev = p;
+		pcienable(p);
+		c->id = (p->did<<16)|p->vid;
+
+		/* §3.1.2 Legacy Device Initialization */
+		outb(c->port+Qstatus, 0);
+		outb(c->port+Qstatus, Sacknowledge|Sdriver);
+
+		/* negotiate feature bits */
+		c->feat = inl(c->port+Qdevfeat);
+		outl(c->port+Qdrvfeat, c->feat & (Fmac|Fstatus|Fctrlvq|Fctrlrx));
+
+		/* §4.1.5.1.4 Virtqueue Configuration */
+		for(i=0; i<nelem(c->queue); i++){
+			outs(c->port+Qselect, i);
+			n = ins(c->port+Qsize);
+			if(n == 0 || (n & (n-1)) != 0){
+				if(i < 2)
+					print("ethervirtio: queue %d has invalid size %d\n", i, n);
+				break;
+			}
+			if(initqueue(&c->queue[i], n) < 0)
+				break;
+			coherence();
+			outl(c->port+Qaddr, PADDR(c->queue[i].desc)/VBY2PG);
+		}
+		if(i < 2){
+			print("ethervirtio: no queues\n");
+			pcidisable(p);
+			free(c);
+			continue;
+		}
+		c->nqueue = i;		
+	
+		if(h == nil)
+			h = c;
+		else
+			t->next = c;
+		t = c;
+	}
+
+	return h;
+}
+
+
+static int
+reset(Ether* edev)
+{
+	static uchar zeros[Eaddrlen];
+	Ctlr *ctlr;
+	int i;
+
+	if(ctlrhead == nil)
+		ctlrhead = pciprobe(1);
+
+	for(ctlr = ctlrhead; ctlr != nil; ctlr = ctlr->next){
+		if(ctlr->active)
+			continue;
+		if(edev->port == 0 || edev->port == ctlr->port){
+			ctlr->active = 1;
+			break;
+		}
+	}
+
+	if(ctlr == nil)
+		return -1;
+
+	edev->ctlr = ctlr;
+	edev->port = ctlr->port;
+	edev->irq = ctlr->pcidev->intl;
+	edev->tbdf = ctlr->pcidev->tbdf;
+	edev->mbps = 1000;
+	edev->link = 1;
+
+	if((ctlr->feat & Fmac) != 0 && memcmp(edev->ea, zeros, Eaddrlen) == 0){
+		for(i = 0; i < Eaddrlen; i++)
+			edev->ea[i] = inb(ctlr->port+Qmac+i);
+	} else {
+		for(i = 0; i < Eaddrlen; i++)
+			outb(ctlr->port+Qmac+i, edev->ea[i]);
+	}
+
+	edev->arg = edev;
+
+	edev->attach = attach;
+	edev->shutdown = shutdown;
+	edev->ifstat = ifstat;
+
+	if((ctlr->feat & (Fctrlvq|Fctrlrx)) == (Fctrlvq|Fctrlrx)){
+		edev->multicast = multicast;
+		edev->promiscuous = promiscuous;
+	}
+
+	pcisetbme(ctlr->pcidev);
+	intrenable(edev->irq, interrupt, edev, edev->tbdf, edev->name);
+
+	return 0;
+}
+
+void
+ethervirtiolink(void)
+{
+	addethercard("virtio", reset);
+}
+
--- a/os/pc/fns.h
+++ b/os/pc/fns.h
@@ -25,10 +25,10 @@
 void	fprestore(FPU*);
 void	fpsave(FPU*);
 ulong	fpstatus(void);
-ulong	getcr0(void);
-ulong	getcr2(void);
-ulong	getcr3(void);
-ulong	getcr4(void);
+u32	getcr0(void);
+u32	getcr2(void);
+u32	getcr3(void);
+u32	getcr4(void);
 char*	getconf(char*);
 void	guesscpuhz(int);
 int	i8042auxcmd(int);
@@ -133,7 +133,7 @@
 void	screeninit(void);
 int	screenprint(char*, ...);			/* debugging */
 void	(*screenputs)(char*, int);
-int	segflush(void*, ulong);
+s32	segflush(void*, u32);
 void	syncclock(void);
 uvlong	tscticks(uvlong*);
 void	trapenable(int, void (*)(Ureg*, void*), void*, char*);
@@ -155,9 +155,9 @@
 int	kbdinready(void);
 
 #define	waserror()	(up->nerrlab++, setlabel(&up->errlab[up->nerrlab-1]))
-#define getcallerpc(x)	(((ulong*)(x))[-1])
-#define KADDR(a)	((void*)((ulong)(a)|KZERO))
-#define PADDR(a)	((ulong)(a)&~KZERO)
+#define getcallerpc(x)	(((uintptr*)(x))[-1])
+#define KADDR(a)	((void*)((uintptr)(a)|KZERO))
+#define PADDR(a)	((uintptr)(a)&~KZERO)
 
 #define	dcflush(a, b)
 #define	clockcheck();
--- /dev/null
+++ b/os/pc/hpet.c
@@ -1,0 +1,126 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+
+/*
+ * HPET timer
+ *
+ * The HPET timer is memory mapped which allows
+ * faster access compared to the classic i8253.
+ * This timer is not used to generate interrupts
+ * as we use the LAPIC timer for that.
+ * Its purpose is to measure the LAPIC timer
+ * and TSC frequencies.
+ */
+
+enum {
+	Cap	= 0x00/4,
+	Period	= 0x04/4,
+	Config	= 0x10/4,
+	Isr	= 0x20/4,
+	Ctrlo	= 0xF0/4,
+	Ctrhi	= 0xF4/4,
+};
+
+static struct {
+	Lock;
+	u32	*mmio;
+	uvlong	last;
+	uvlong	freq;
+} hpet;
+
+int
+hpetprobe(uvlong pa)
+{
+	u32 cap, period;
+	int mhz;
+
+	if((hpet.mmio = vmap(pa, 1024)) == nil)
+		return -1;
+	cap = hpet.mmio[Cap];
+	period = hpet.mmio[Period];
+	if(period == 0 || period > 0x05F4E100)
+		return -1;
+	hpet.freq = 1000000000000000ULL / period;
+	mhz = (hpet.freq + 500000) / 1000000;
+
+	print("HPET: %llux %.8ux %d MHz \n", pa, cap, mhz);
+
+	return 0;
+}
+
+static uvlong
+hpetcpufreq(void)
+{
+	u32 x, y;
+	uvlong a, b;
+	int loops;
+
+	ilock(&hpet);
+	for(loops = 1000;;loops += 1000){
+		cycles(&a);
+		x = hpet.mmio[Ctrlo];
+		aamloop(loops);
+		cycles(&b);
+		y = hpet.mmio[Ctrlo] - x;
+		if(y >= hpet.freq/HZ || loops >= 1000000)
+			break;
+	}
+	iunlock(&hpet);
+
+	if(m->havetsc && b > a){
+		b -= a;
+		m->cyclefreq = b * hpet.freq / y;
+		m->aalcycles = (b + loops-1) / loops;
+		return m->cyclefreq;
+	}
+	return (vlong)loops*m->aalcycles * hpet.freq / y;
+}
+
+void
+hpetinit(void)
+{
+	uvlong cpufreq;
+
+	if(m->machno != 0){
+		m->cpuhz = MACHP(0)->cpuhz;
+		m->cpumhz = MACHP(0)->cpumhz;
+		m->cyclefreq = MACHP(0)->cyclefreq;
+		m->loopconst = MACHP(0)->loopconst;
+		return;
+	}
+
+	/* start counting */
+	hpet.mmio[Config] |= 1;
+
+	/* measure loopconst for delay() and tsc frequencies */
+	cpufreq = hpetcpufreq();
+
+	m->loopconst = (cpufreq/1000)/m->aalcycles;	/* AAM+LOOP's for 1 ms */
+	m->cpuhz = cpufreq;
+
+	/* round to the nearest megahz */
+	m->cpumhz = (cpufreq+500000)/1000000L;
+	if(m->cpumhz == 0)
+		m->cpumhz = 1;
+}
+
+uvlong
+hpetread(uvlong *hz)
+{
+	uvlong ticks;
+
+	if(hz != nil)
+		*hz = hpet.freq;
+
+	ilock(&hpet);
+	ticks = hpet.last;
+	ticks += hpet.mmio[Ctrlo] - (u32)ticks;
+	hpet.last = ticks;
+	iunlock(&hpet);
+
+	return ticks;
+}
--- a/os/pc/i8253.c
+++ b/os/pc/i8253.c
@@ -251,7 +251,7 @@
 	uvlong ticks;
 
 	if(hz)
-		*hz = i8253.hz;
+		*hz = Freq<<Tickshift;
 
 	ilock(&i8253);
 	outb(Tmode, Latch2);
@@ -276,39 +276,4 @@
 	iunlock(&i8253);
 
 	return ticks<<Tickshift;
-}
-
-void
-delay(int millisecs)
-{
-	millisecs *= m->loopconst;
-	if(millisecs <= 0)
-		millisecs = 1;
-	aamloop(millisecs);
-}
-
-void
-microdelay(int microsecs)
-{
-	microsecs *= m->loopconst;
-	microsecs /= 1000;
-	if(microsecs <= 0)
-		microsecs = 1;
-	aamloop(microsecs);
-}
-
-/*  
- *  performance measurement ticks.  must be low overhead.
- *  doesn't have to count over a second.
- */
-ulong
-perfticks(void)
-{
-	uvlong x;
-
-	if(m->havetsc)
-		cycles(&x);
-	else
-		x = 0;
-	return x;
 }
--- a/os/pc/i8259.c
+++ b/os/pc/i8259.c
@@ -197,3 +197,101 @@
 	iunlock(&i8259lock);
 	return 0;
 }
+
+static int
+irqenable(Vctl *v, int shared)
+{
+	if(shared)
+		return 0;
+	ilock(&i8259lock);
+	i8259mask &= ~(1<<v->irq);
+	if(v->irq < 8)
+		outb(Int0aux, i8259mask & 0xFF);
+	else
+		outb(Int1aux, (i8259mask>>8) & 0xFF);
+	iunlock(&i8259lock);
+	return 0;
+}
+
+static int
+irqdisable(Vctl *v, int shared)
+{
+	if(shared)
+		return 0;
+	ilock(&i8259lock);
+	i8259mask |= 1<<v->irq;
+	if(v->irq < 8)
+		outb(Int0aux, i8259mask & 0xFF);
+	else
+		outb(Int1aux, (i8259mask>>8) & 0xFF);
+	iunlock(&i8259lock);
+	return 0;
+}
+
+int
+i8259assign(Vctl *v)
+{
+	int irq, irqbit;
+
+	/*
+	 * Given an IRQ, enable the corresponding interrupt in the i8259
+	 * and return the vector to be used. The i8259 is set to use a fixed
+	 * range of vectors starting at VectorPIC.
+	 */
+	irq = v->irq;
+	if(irq < 0 || irq > MaxIrqPIC){
+		print("i8259enable: irq %d out of range\n", irq);
+		return -1;
+	}
+	irqbit = 1<<irq;
+
+	ilock(&i8259lock);
+	if(!(i8259mask & irqbit) && !(i8259elcr & irqbit)){
+		print("i8259enable: irq %d shared but not level\n", irq);
+		iunlock(&i8259lock);
+		return -1;
+	}
+	iunlock(&i8259lock);
+
+	if(i8259elcr & irqbit)
+		v->eoi = i8259isr;
+	else
+		v->isr = i8259isr;
+
+	v->enable = irqenable;
+	v->disable = irqdisable;
+
+	return VectorPIC+irq;
+}
+
+int
+i8259irqno(int irq, int tbdf)
+{
+	if(tbdf != BUSUNKNOWN && (irq == 0xff || irq == 0))
+		return -1;
+
+	/*
+	 * IRQ2 doesn't really exist, it's used to gang the interrupt
+	 * controllers together. A device set to IRQ2 will appear on
+	 * the second interrupt controller as IRQ9.
+	 */
+	if(irq == 2)
+		irq = 9;
+
+	return irq;
+}
+
+void
+i8259on(void)
+{
+	outb(Int0aux, i8259mask&0xFF);
+	outb(Int1aux, (i8259mask>>8)&0xFF);
+}
+
+void
+i8259off(void)
+{
+	outb(Int0aux, 0xFF);
+	outb(Int1aux, 0xFF);
+}
+
--- a/os/pc/memory.c
+++ b/os/pc/memory.c
@@ -26,8 +26,8 @@
 };
 
 typedef struct {
-	int	size;
-	ulong	addr;
+	u32	size;
+	uintptr	addr;
 } Map;
 
 typedef struct {
@@ -80,13 +80,13 @@
 
 	print("%s\n", rmap->name);	
 	for(mp = rmap->map; mp->size; mp++)
-		print("\t%8.8luX %8.8uX %8.8luX\n", mp->addr, mp->size, mp->addr+mp->size);
+		print("\t%8.8zuX %8.8uX %8.8zuX\n", mp->addr, mp->size, mp->addr+mp->size);
 }
 
 void
 memdebug(void)
 {
-	ulong maxpa, maxpa1, maxpa2;
+	uintptr maxpa, maxpa1, maxpa2;
 
 	if(MEMDEBUG == 0)
 		return;
@@ -94,7 +94,7 @@
 	maxpa = (nvramread(0x18)<<8)|nvramread(0x17);
 	maxpa1 = (nvramread(0x31)<<8)|nvramread(0x30);
 	maxpa2 = (nvramread(0x16)<<8)|nvramread(0x15);
-	print("maxpa = %luX -> %luX, maxpa1 = %luX maxpa2 = %luX\n",
+	print("maxpa = %zuX -> %zuX, maxpa1 = %zuX maxpa2 = %zuX\n",
 		maxpa, MB+maxpa*KB, maxpa1, maxpa2);
 
 	mapprint(&rmapram);
@@ -104,10 +104,10 @@
 }
 
 void
-mapfree(RMap* rmap, ulong addr, ulong size)
+mapfree(RMap* rmap, uintptr addr, ulong size)
 {
 	Map *mp;
-	ulong t;
+	uintptr t;
 
 	if(size <= 0)
 		return;
@@ -134,7 +134,7 @@
 		}
 		else do{
 			if(mp >= rmap->mapend){
-				print("mapfree: %s: losing 0x%luX, %ld\n",
+				print("mapfree: %s: losing 0x%zuX, %ld\n",
 					rmap->name, addr, size);
 				break;
 			}
@@ -149,11 +149,11 @@
 	unlock(rmap);
 }
 
-ulong
-mapalloc(RMap* rmap, ulong addr, int size, int align)
+uintptr
+mapalloc(RMap* rmap, uintptr addr, int size, int align)
 {
 	Map *mp;
-	ulong maddr, oaddr;
+	uintptr maddr, oaddr;
 
 	lock(rmap);
 	for(mp = rmap->map; mp->size; mp++){
@@ -267,9 +267,9 @@
 
 
 static void
-ramscan(ulong maxmem)
+ramscan(uintptr maxmem)
 {
-	ulong *k0, kzero, map, maxpa, pa, *pte, *table, *va, x, n;
+	uintptr *k0, kzero, map, maxpa, pa, *pte, *table, *va, x, n;
 	int nvalid[NMemType];
 	uchar *bda;
 
@@ -291,7 +291,7 @@
 	mapfree(&rmapram, x, n);
 //	memset(KADDR(x), 0, n);			/* keep us honest */
 
-	x = PADDR(PGROUND((ulong)end));
+	x = PADDR(PGROUND((uintptr)end));
 	pa = MemMinMB*MB;
 	mapfree(&rmapram, x, pa-x);
 //	memset(KADDR(x), 0, pa-x);		/* keep us honest */
@@ -323,7 +323,7 @@
 	 * be written and read correctly. The page tables are created here
 	 * on the fly, allocating from low memory as necessary.
 	 */
-	k0 = (ulong*)KADDR(0);
+	k0 = (uintptr*)KADDR(0);
 	kzero = *k0;
 	map = 0;
 	x = 0x12345678;
@@ -368,7 +368,7 @@
 				*pte++ = pa|PTEWRITE|PTEVALID;
 				pa += BY2PG;
 			}while(pa % MB);
-			mmuflushtlb(PADDR(m->pdb));
+			// TODO mmuflushtlb(PADDR(m->pdb));
 			/* memset(va, 0, MB); so damn slow to memset all of memory */
 		}
 		else if(pa < 16*MB){
@@ -409,7 +409,7 @@
 				map = 0;
 		}
 
-		mmuflushtlb(PADDR(m->pdb));
+		// TODO mmuflushtlb(PADDR(m->pdb));
 		x += 0x3141526;
 	}
 
@@ -427,15 +427,15 @@
 	if(maxmem < 0xFFE00000)
 		mapfree(&rmapupa, maxmem, 0x00000000-maxmem);
 	if(MEMDEBUG)
-		print("maxmem %luX %luX\n", maxmem, 0x00000000-maxmem);
+		print("maxmem %zuX %zuX\n", maxmem, 0x00000000-maxmem);
 	*k0 = kzero;
 }
 
 void
-meminit(ulong maxmem)
+meminit(uintptr maxmem)
 {
 	Map *mp, *xmp;
-	ulong pa, *pte;
+	uintptr pa, *pte;
 
 	/*
 	 * Set special attributes for memory between 640KB and 1MB:
@@ -444,11 +444,11 @@
 	 * then scan for useful memory.
 	 */
 	for(pa = 0xA0000; pa < 0xC0000; pa += BY2PG){
-		pte = mmuwalk(m->pdb, (ulong)KADDR(pa), 2, 0);
+		pte = mmuwalk(m->pdb, (uintptr)KADDR(pa), 2, 0);
 		*pte |= PTEWT;
 	}
 	for(pa = 0xC0000; pa < 0x100000; pa += BY2PG){
-		pte = mmuwalk(m->pdb, (ulong)KADDR(pa), 2, 0);
+		pte = mmuwalk(m->pdb, (uintptr)KADDR(pa), 2, 0);
 		*pte |= PTEUNCACHED;
 	}
 	mmuflushtlb(PADDR(m->pdb));
@@ -479,31 +479,31 @@
 		memdebug();
 }
 
-ulong
-umbmalloc(ulong addr, int size, int align)
+uintptr
+umbmalloc(uintptr addr, int size, int align)
 {
-	ulong a;
+	uintptr a;
 
 	if(a = mapalloc(&rmapumb, addr, size, align))
-		return (ulong)KADDR(a);
+		return (uintptr)KADDR(a);
 
 	return 0;
 }
 
 void
-umbfree(ulong addr, int size)
+umbfree(uintptr addr, int size)
 {
 	mapfree(&rmapumb, PADDR(addr), size);
 }
 
-ulong
-umbrwmalloc(ulong addr, int size, int align)
+uintptr
+umbrwmalloc(uintptr addr, int size, int align)
 {
-	ulong a;
+	uintptr a;
 	uchar *p;
 
 	if(a = mapalloc(&rmapumbrw, addr, size, align))
-		return(ulong)KADDR(a);
+		return(uintptr)KADDR(a);
 
 	/*
 	 * Perhaps the memory wasn't visible before
@@ -522,15 +522,15 @@
 }
 
 void
-umbrwfree(ulong addr, int size)
+umbrwfree(uintptr addr, int size)
 {
 	mapfree(&rmapumbrw, PADDR(addr), size);
 }
 
-ulong
-upamalloc(ulong pa, int size, int align)
+uintptr
+upamalloc(uintptr pa, int size, int align)
 {
-	ulong a, ae;
+	uintptr a, ae;
 
 	if(a = mapalloc(&xrmapupa, pa, size, align))
 		return a;
@@ -564,15 +564,15 @@
 }
 
 void
-upafree(ulong pa, int size)
+upafree(uintptr pa, int size)
 {
 	mapfree(&xrmapupa, pa, size);
 }
 
 void
-upareserve(ulong pa, int size)
+upareserve(uintptr pa, int size)
 {
-	ulong a;
+	uintptr a;
 	
 	a = mapalloc(&rmapupa, pa, size, 0);
 	if(a != pa){
--- a/os/pc/mmu.c
+++ b/os/pc/mmu.c
@@ -336,8 +336,8 @@
 		upafree(PADDR(va), size);
 }
 
-int
-segflush(void*, ulong)
+s32
+segflush(void*, u32)
 {
 	return 0;
 }
--- a/os/pc/mp.c
+++ b/os/pc/mp.c
@@ -4,185 +4,23 @@
 #include "dat.h"
 #include "fns.h"
 #include "io.h"
+#include "../port/pci.h"
 #include "ureg.h"
 
 #include "mp.h"
-#include "apbootstrap.h"
+#include "apbootstrap.i"
 
-static Bus* mpbus;
-static Bus* mpbuslast;
-static int mpisabus = -1;
-static int mpeisabus = -1;
-extern int i8259elcr;			/* mask of level-triggered interrupts */
-static Apic mpapic[MaxAPICNO+1];
-static int machno2apicno[MaxAPICNO+1];	/* inverse map: machno -> APIC ID */
-static Lock mprdthilock;
-static int mprdthi;
-static Ref mpvnoref;			/* unique vector assignment */
-static int mpmachno = 1;
+extern void i8259init(void);
 
-static char* buses[] = {
-	"CBUSI ",
-	"CBUSII",
-	"EISA  ",
-	"FUTURE",
-	"INTERN",
-	"ISA   ",
-	"MBI   ",
-	"MBII  ",
-	"MCA   ",
-	"MPI   ",
-	"MPSA  ",
-	"NUBUS ",
-	"PCI   ",
-	"PCMCIA",
-	"TC    ",
-	"VL    ",
-	"VME   ",
-	"XPRESS",
-	0,
-};
+/* filled in by pcmpinit or acpiinit */
+Bus* mpbus;
+Bus* mpbuslast;
+int mpisabus = -1;
+int mpeisabus = -1;
+Apic *mpioapic[MaxAPICNO+1];
+Apic *mpapic[MaxAPICNO+1];
 
-static Apic*
-mkprocessor(PCMPprocessor* p)
-{
-	Apic *apic;
-
-	if(!(p->flags & PcmpEN) || p->apicno > MaxAPICNO)
-		return 0;
-
-	apic = &mpapic[p->apicno];
-	apic->type = PcmpPROCESSOR;
-	apic->apicno = p->apicno;
-	apic->flags = p->flags;
-	apic->lintr[0] = ApicIMASK;
-	apic->lintr[1] = ApicIMASK;
-
-	if(p->flags & PcmpBP){
-		machno2apicno[0] = p->apicno;
-		apic->machno = 0;
-	}
-	else{
-		machno2apicno[mpmachno] = p->apicno;
-		apic->machno = mpmachno;
-		mpmachno++;
-	}
-
-	return apic;
-}
-
-static Bus*
-mkbus(PCMPbus* p)
-{
-	Bus *bus;
-	int i;
-
-	for(i = 0; buses[i]; i++){
-		if(strncmp(buses[i], p->string, sizeof(p->string)) == 0)
-			break;
-	}
-	if(buses[i] == 0)
-		return 0;
-
-	bus = xalloc(sizeof(Bus));
-	if(mpbus)
-		mpbuslast->next = bus;
-	else
-		mpbus = bus;
-	mpbuslast = bus;
-
-	bus->type = i;
-	bus->busno = p->busno;
-	if(bus->type == BusEISA){
-		bus->po = PcmpLOW;
-		bus->el = PcmpLEVEL;
-		if(mpeisabus != -1)
-			print("mkbus: more than one EISA bus\n");
-		mpeisabus = bus->busno;
-	}
-	else if(bus->type == BusPCI){
-		bus->po = PcmpLOW;
-		bus->el = PcmpLEVEL;
-	}
-	else if(bus->type == BusISA){
-		bus->po = PcmpHIGH;
-		bus->el = PcmpEDGE;
-		if(mpisabus != -1)
-			print("mkbus: more than one ISA bus\n");
-		mpisabus = bus->busno;
-	}
-	else{
-		bus->po = PcmpHIGH;
-		bus->el = PcmpEDGE;
-	}
-
-	return bus;
-}
-
-static Bus*
-mpgetbus(int busno)
-{
-	Bus *bus;
-
-	for(bus = mpbus; bus; bus = bus->next){
-		if(bus->busno == busno)
-			return bus;
-	}
-	print("mpgetbus: can't find bus %d\n", busno);
-
-	return 0;
-}
-
-static Apic*
-mkioapic(PCMPioapic* p)
-{
-	Apic *apic;
-
-	if(!(p->flags & PcmpEN) || p->apicno > MaxAPICNO)
-		return 0;
-
-	/*
-	 * Map the I/O APIC.
-	 */
-	if(mmukmap(p->addr, 0, 1024) == 0)
-		return 0;
-
-	apic = &mpapic[p->apicno];
-	apic->type = PcmpIOAPIC;
-	apic->apicno = p->apicno;
-	apic->addr = KADDR(p->addr);
-	apic->flags = p->flags;
-
-	return apic;
-}
-
-static Aintr*
-mkiointr(PCMPintr* p)
-{
-	Bus *bus;
-	Aintr *aintr;
-
-	/*
-	 * According to the MultiProcessor Specification, a destination
-	 * I/O APIC of 0xFF means the signal is routed to all I/O APICs.
-	 * It's unclear how that can possibly be correct so treat it as
-	 * an error for now.
-	 */
-	if(p->apicno == 0xFF)
-		return 0;
-	if((bus = mpgetbus(p->busno)) == 0)
-		return 0;
-
-	aintr = xalloc(sizeof(Aintr));
-	aintr->intr = p;
-	aintr->apic = &mpapic[p->apicno];
-	aintr->next = bus->aintr;
-	bus->aintr = aintr;
-
-	return aintr;
-}
-
-static int
+int
 mpintrinit(Bus* bus, PCMPintr* intr, int vno, int /*irq*/)
 {
 	int el, po, v;
@@ -197,9 +35,8 @@
 	el = intr->flags & PcmpELMASK;
 
 	switch(intr->intr){
-
 	default:				/* PcmpINT */
-		v |= ApicLOWEST;
+		v |= ApicFIXED;			/* no-op */
 		break;
 
 	case PcmpNMI:
@@ -256,297 +93,87 @@
 	return v;
 }
 
-static int
-mklintr(PCMPintr* p)
+uvlong
+tscticks(uvlong *hz)
 {
-	Apic *apic;
-	Bus *bus;
-	int intin, v;
+	if(hz != nil)
+		*hz = m->cpuhz;
 
-	/*
-	 * The offsets of vectors for LINT[01] are known to be
-	 * 0 and 1 from the local APIC vector space at VectorLAPIC.
-	 */
-	if((bus = mpgetbus(p->busno)) == 0)
-		return 0;
-	intin = p->intin;
-
-	/*
-	 * Pentium Pros have problems if LINT[01] are set to ExtINT
-	 * so just bag it, SMP mode shouldn't need ExtINT anyway.
-	 */
-	if(p->intr == PcmpExtINT || p->intr == PcmpNMI)
-		v = ApicIMASK;
-	else
-		v = mpintrinit(bus, p, VectorLAPIC+intin, p->irq);
-
-	if(p->apicno == 0xFF){
-		for(apic = mpapic; apic <= &mpapic[MaxAPICNO]; apic++){
-			if((apic->flags & PcmpEN)
-			&& apic->type == PcmpPROCESSOR)
-				apic->lintr[intin] = v;
-		}
-	}
-	else{
-		apic = &mpapic[p->apicno];
-		if((apic->flags & PcmpEN) && apic->type == PcmpPROCESSOR)
-			apic->lintr[intin] = v;
-	}
-
-	return v;
+	cycles(&m->tscticks);	/* Uses the rdtsc instruction */
+	return m->tscticks;
 }
 
-static void
-checkmtrr(void)
+void
+syncclock(void)
 {
-	int i, vcnt;
-	Mach *mach0;
+	uvlong x;
 
-	/*
-	 * If there are MTRR registers, snarf them for validation.
-	 */
-	if(!(m->cpuiddx & 0x1000))
+	if(arch->fastclock != tscticks)
 		return;
 
-	rdmsr(0x0FE, &m->mtrrcap);
-	rdmsr(0x2FF, &m->mtrrdef);
-	if(m->mtrrcap & 0x0100){
-		rdmsr(0x250, &m->mtrrfix[0]);
-		rdmsr(0x258, &m->mtrrfix[1]);
-		rdmsr(0x259, &m->mtrrfix[2]);
-		for(i = 0; i < 8; i++)
-			rdmsr(0x268+i, &m->mtrrfix[(i+3)]);
+	if(m->machno == 0){
+		wrmsr(0x10, 0);
+		m->tscticks = 0;
+	} else {
+		x = MACHP(0)->tscticks;
+		while(x == MACHP(0)->tscticks)
+			;
+		wrmsr(0x10, MACHP(0)->tscticks);
+		cycles(&m->tscticks);
 	}
-	vcnt = m->mtrrcap & 0x00FF;
-	if(vcnt > nelem(m->mtrrvar))
-		vcnt = nelem(m->mtrrvar);
-	for(i = 0; i < vcnt; i++)
-		rdmsr(0x200+i, &m->mtrrvar[i]);
-
-	/*
-	 * If not the bootstrap processor, compare.
-	 */
-	if(m->machno == 0)
-		return;
-
-	mach0 = MACHP(0);
-	if(mach0->mtrrcap != m->mtrrcap)
-		print("mtrrcap%d: %lluX %lluX\n",
-			m->machno, mach0->mtrrcap, m->mtrrcap);
-	if(mach0->mtrrdef != m->mtrrdef)
-		print("mtrrdef%d: %lluX %lluX\n",
-			m->machno, mach0->mtrrdef, m->mtrrdef);
-	for(i = 0; i < 11; i++){
-		if(mach0->mtrrfix[i] != m->mtrrfix[i])
-			print("mtrrfix%d: i%d: %lluX %lluX\n",
-				m->machno, i, mach0->mtrrfix[i], m->mtrrfix[i]);
-	}
-	for(i = 0; i < vcnt; i++){
-		if(mach0->mtrrvar[i] != m->mtrrvar[i])
-			print("mtrrvar%d: i%d: %lluX %lluX\n",
-				m->machno, i, mach0->mtrrvar[i], m->mtrrvar[i]);
-	}
 }
 
-static void
-squidboy(Apic* apic)
-{
-//	iprint("Hello Squidboy\n");
-
-	machinit();
-	mmuinit();
-
-	cpuidentify();
-	cpuidprint();
-	checkmtrr();
-
-	lock(&mprdthilock);
-	mprdthi |= (1<<apic->apicno)<<24;
-	unlock(&mprdthilock);
-
-	lapicinit(apic);
-	lapiconline();
-	syncclock();
-	timersinit();
-
-	fpoff();
-
-	lock(&active);
-	active.machs |= 1<<m->machno;
-	unlock(&active);
-
-	while(!active.thunderbirdsarego)
-		microdelay(100);
-
-	schedinit();
-}
-
-static void
-mpstartap(Apic* apic)
-{
-	ulong *apbootp, *pdb, *pte;
-	Mach *mach, *mach0;
-	int i, machno;
-	uchar *p;
-
-	mach0 = MACHP(0);
-
-	/*
-	 * Initialise the AP page-tables and Mach structure. The page-tables
-	 * are the same as for the bootstrap processor with the exception of
-	 * the PTE for the Mach structure.
-	 * Xspanalloc will panic if an allocation can't be made.
-	 */
-	p = xspanalloc(4*BY2PG, BY2PG, 0);
-	pdb = (ulong*)p;
-	memmove(pdb, mach0->pdb, BY2PG);
-	p += BY2PG;
-
-	if((pte = mmuwalk(pdb, MACHADDR, 1, 0)) == nil)
-		return;
-	memmove(p, KADDR(PPN(*pte)), BY2PG);
-	*pte = PADDR(p)|PTEWRITE|PTEVALID;
-	if(mach0->havepge)
-		*pte |= PTEGLOBAL;
-	p += BY2PG;
-
-	mach = (Mach*)p;
-	if((pte = mmuwalk(pdb, MACHADDR, 2, 0)) == nil)
-		return;
-	*pte = PADDR(mach)|PTEWRITE|PTEVALID;
-	if(mach0->havepge)
-		*pte |= PTEGLOBAL;
-	p += BY2PG;
-
-	machno = apic->machno;
-	MACHP(machno) = mach;
-	mach->machno = machno;
-	mach->pdb = pdb;
-	mach->gdt = (Segdesc*)p;	/* filled by mmuinit */
-
-	/*
-	 * Tell the AP where its kernel vector and pdb are.
-	 * The offsets are known in the AP bootstrap code.
-	 */
-	apbootp = (ulong*)(APBOOTSTRAP+0x08);
-	*apbootp++ = (ulong)squidboy;
-	*apbootp++ = PADDR(pdb);
-	*apbootp = (ulong)apic;
-
-	/*
-	 * Universal Startup Algorithm.
-	 */
-	p = KADDR(0x467);
-	*p++ = PADDR(APBOOTSTRAP);
-	*p++ = PADDR(APBOOTSTRAP)>>8;
-	i = (PADDR(APBOOTSTRAP) & ~0xFFFF)/16;
-	*p++ = i;
-	*p = i>>8;
-
-	nvramwrite(0x0F, 0x0A);
-	lapicstartap(apic, PADDR(APBOOTSTRAP));
-	for(i = 0; i < 1000; i++){
-		lock(&mprdthilock);
-		if(mprdthi & ((1<<apic->apicno)<<24)){
-			unlock(&mprdthilock);
-			break;
-		}
-		unlock(&mprdthilock);
-		delay(10);
-	}
-	nvramwrite(0x0F, 0x00);
-}
-
 void
 mpinit(void)
 {
-	int ncpu;
+	int ncpu, i;
+	Apic *apic;
 	char *cp;
-	PCMP *pcmp;
-	uchar *e, *p;
-	Apic *apic, *bpapic;
 
 	i8259init();
 	syncclock();
 
-	if(_mp_ == 0)
-		return;
-	pcmp = KADDR(_mp_->physaddr);
+	if(getconf("*apicdebug")){
+		Bus *b;
+		Aintr *ai;
+		PCMPintr *pi;
 
-	/*
-	 * Map the local APIC.
-	 */
-	if(mmukmap(pcmp->lapicbase, 0, 1024) == 0)
-		return;
-
-	bpapic = nil;
-
-	/*
-	 * Run through the table saving information needed for starting
-	 * application processors and initialising any I/O APICs. The table
-	 * is guaranteed to be in order such that only one pass is necessary.
-	 */
-	p = ((uchar*)pcmp)+sizeof(PCMP);
-	e = ((uchar*)pcmp)+pcmp->length;
-	while(p < e) switch(*p){
-
-	default:
-		print("mpinit: unknown PCMP type 0x%uX (e-p 0x%luX)\n",
-			*p, e-p);
-		while(p < e){
-			print("%uX ", *p);
-			p++;
+		for(i=0; i<=MaxAPICNO; i++){
+			if(apic = mpapic[i])
+				print("LAPIC%d: pa=%lux va=%#p flags=%x\n",
+					i, apic->paddr, apic->addr, apic->flags);
+			if(apic = mpioapic[i])
+				print("IOAPIC%d: pa=%lux va=%#p flags=%x gsibase=%d mre=%d\n",
+					i, apic->paddr, apic->addr, apic->flags, apic->gsibase, apic->mre);
 		}
-		break;
-
-	case PcmpPROCESSOR:
-		if(apic = mkprocessor((PCMPprocessor*)p)){
-			/*
-			 * Must take a note of bootstrap processor APIC
-			 * now as it will be needed in order to start the
-			 * application processors later and there's no
-			 * guarantee that the bootstrap processor appears
-			 * first in the table before the others.
-			 */
-			apic->addr = KADDR(pcmp->lapicbase);
-			if(apic->flags & PcmpBP)
-				bpapic = apic;
+		for(b = mpbus; b; b = b->next){
+			print("BUS%d type=%d flags=%x\n", b->busno, b->type, b->po|b->el);
+			for(ai = b->aintr; ai; ai = ai->next){
+				if(pi = ai->intr)
+					print("\ttype=%d irq=%d (%d [%c]) apic=%d intin=%d flags=%x\n",
+						pi->type, pi->irq, pi->irq>>2, "ABCD"[pi->irq&3],
+						pi->apicno, pi->intin, pi->flags);
+			}
 		}
-		p += sizeof(PCMPprocessor);
-		continue;
+	}
 
-	case PcmpBUS:
-		mkbus((PCMPbus*)p);
-		p += sizeof(PCMPbus);
-		continue;
-
-	case PcmpIOAPIC:
-		if(apic = mkioapic((PCMPioapic*)p))
-			ioapicinit(apic, ((PCMPioapic*)p)->apicno);
-		p += sizeof(PCMPioapic);
-		continue;
-
-	case PcmpIOINTR:
-		mkiointr((PCMPintr*)p);
-		p += sizeof(PCMPintr);
-		continue;
-
-	case PcmpLINTR:
-		mklintr((PCMPintr*)p);
-		p += sizeof(PCMPintr);
-		continue;
+	apic = nil;
+	for(i=0; i<=MaxAPICNO; i++){
+		if(mpapic[i] == nil)
+			continue;
+		if(mpapic[i]->flags & PcmpBP){
+			apic = mpapic[i];
+			break;
+		}
 	}
 
-	/*
-	 * No bootstrap processor, no need to go further.
-	 */
-	if(bpapic == 0)
+	if(apic == nil){
+		panic("mpinit: no bootstrap processor");
 		return;
+	}
+	apic->online = 1;
 
-	lapicinit(bpapic);
-	lock(&mprdthilock);
-	mprdthi |= (1<<bpapic->apicno)<<24;
-	unlock(&mprdthilock);
+	lapicinit(apic);
 
 	/*
 	 * These interrupts are local to the processor
@@ -558,8 +185,6 @@
 	intrenable(IrqSPURIOUS, lapicspurious, 0, BUSUNKNOWN, "lapicspurious");
 	lapiconline();
 
-	checkmtrr();
-
 	/*
 	 * Initialise the application processors.
 	 */
@@ -567,15 +192,20 @@
 		ncpu = strtol(cp, 0, 0);
 		if(ncpu < 1)
 			ncpu = 1;
+		else if(ncpu > MAXMACH)
+			ncpu = MAXMACH;
 	}
 	else
-		ncpu = MaxAPICNO;
+		ncpu = MAXMACH;
 	memmove((void*)APBOOTSTRAP, apbootstrap, sizeof(apbootstrap));
-	for(apic = mpapic; apic <= &mpapic[MaxAPICNO]; apic++){
+	for(i=0; i<nelem(mpapic); i++){
+		if((apic = mpapic[i]) == nil)
+			continue;
+		if(apic->machno >= MAXMACH)
+			continue;
 		if(ncpu <= 1)
 			break;
-		if((apic->flags & (PcmpBP|PcmpEN)) == PcmpEN
-		&& apic->type == PcmpPROCESSOR){
+		if((apic->flags & (PcmpBP|PcmpEN)) == PcmpEN){
 			mpstartap(apic);
 			conf.nmach++;
 			ncpu--;
@@ -589,35 +219,156 @@
 	 *  set conf.copymode here if nmach > 1.
 	 *  Should look for an ExtINT line and enable it.
 	 */
-	if(X86FAMILY(m->cpuidax) == 3 || conf.nmach > 1)
+	if(m->cpuidfamily == 3 || conf.nmach > 1)
 		conf.copymode = 1;
 }
 
 static int
-mpintrenablex(Vctl* v, int tbdf)
+mpintrcpu(void)
 {
-	Bus *bus;
-	Aintr *aintr;
-	Apic *apic;
-	Pcidev *pcidev;
-	int bno, dno, irq, lo, n, type, vno;
+	static Lock physidlock;
+	static int physid;
+	int i;
 
 	/*
-	 * Find the bus.
+	 * The bulk of this code was written ~1995, when there was
+	 * one architecture and one generation of hardware, the number
+	 * of CPUs was up to 4(8) and the choices for interrupt routing
+	 * were physical, or flat logical (optionally with lowest
+	 * priority interrupt). Logical mode hasn't scaled well with
+	 * the increasing number of packages/cores/threads, so the
+	 * fall-back is to physical mode, which works across all processor
+	 * generations, both AMD and Intel, using the APIC and xAPIC.
+	 *
+	 * Interrupt routing policy can be set here.
+	 * Currently, just assign each interrupt to a different CPU on
+	 * a round-robin basis. Some idea of the packages/cores/thread
+	 * topology would be useful here, e.g. to not assign interrupts
+	 * to more than one thread in a core, or to use a "noise" core.
+	 * But, as usual, Intel make that an onerous task. 
 	 */
+	lock(&physidlock);
+	for(;;){
+		i = physid++;
+		if(physid >= nelem(mpapic))
+			physid = 0;
+		if(mpapic[i] == nil)
+			continue;
+		if(mpapic[i]->online)
+			break;
+	}
+	unlock(&physidlock);
+
+	return mpapic[i]->apicno;
+}
+
+/*
+ * With the APIC a unique vector can be assigned to each
+ * request to enable an interrupt. There are two reasons this
+ * is a good idea:
+ * 1) to prevent lost interrupts, no more than 2 interrupts
+ *    should be assigned per block of 16 vectors (there is an
+ *    in-service entry and a holding entry for each priority
+ *    level and there is one priority level per block of 16
+ *    interrupts).
+ * 2) each input pin on the IOAPIC will receive a different
+ *    vector regardless of whether the devices on that pin use
+ *    the same IRQ as devices on another pin.
+ */
+static int
+allocvector(void)
+{
+	static int round = 0, num = 0;
+	static Lock l;
+	int vno;
+	
+	lock(&l);
+	vno = VectorAPIC + num;
+	if(vno < MaxVectorAPIC-7)
+		num += 8;
+	else
+		num = ++round % 8;
+	unlock(&l);
+	return vno;
+}
+
+static int
+ioapicirqenable(Vctl *v, int shared)
+{
+	Aintr *aintr = v->aux;
+	int lo, hi;
+
+	if(shared)
+		return 0;
+	hi = v->cpu<<24;
+	lo = mpintrinit(aintr->bus, aintr->intr, v->vno, v->irq);
+	lo |= ApicPHYSICAL;			/* no-op */
+ 	ioapicrdtw(aintr->apic, aintr->intr->intin, hi, lo);
+	return 0;
+}
+
+static int
+ioapicirqdisable(Vctl *v, int shared)
+{
+	Aintr *aintr = v->aux;
+	int lo, hi;
+
+	if(shared)
+		return 0;
+	hi = 0;
+	lo = ApicIMASK;
+ 	ioapicrdtw(aintr->apic, aintr->intr->intin, hi, lo);
+	return 0;
+}
+
+static int
+mpintrassignx(Vctl* v, int tbdf)
+{
+	Bus *bus;
+	Pcidev *pci;
+	Aintr *aintr;
+	int bno, dno, pin, irq, type, lo, hi, n;
+
 	type = BUSTYPE(tbdf);
 	bno = BUSBNO(tbdf);
 	dno = BUSDNO(tbdf);
-	n = 0;
+
+	pin = 0;
+	pci = nil;
+	if(type == BusPCI){
+		if((pci = pcimatchtbdf(tbdf)) != nil)
+			pin = pcicfgr8(pci, PciINTP);
+	} else if(type == BusISA)
+		bno = mpisabus;
+
+Findbus:
 	for(bus = mpbus; bus != nil; bus = bus->next){
 		if(bus->type != type)
 			continue;
-		if(n == bno)
+		if(bus->busno == bno)
 			break;
-		n++;
 	}
+
 	if(bus == nil){
-		print("ioapicirq: can't find bus type %d\n", type);
+		/*
+		 * if the PCI device is behind a bridge thats not described
+		 * by the MP or ACPI tables then walk up the bus translating
+		 * interrupt pin to parent bus.
+		 */
+		if(pci != nil && pci->parent != nil && pin > 0){
+			pci = pci->parent;
+			if(pci->ccrb == 6 && pci->ccru == 7){
+				/* Cardbus bridge, use controllers interrupt pin */
+				pin = pcicfgr8(pci, PciINTP);
+			} else {
+				/* PCI-PCI bridge */
+				pin = ((dno+(pin-1))%4)+1;
+			}
+			bno = BUSBNO(pci->tbdf);
+			dno = BUSDNO(pci->tbdf);
+			goto Findbus;
+		}
+		print("mpintrassign: can't find bus type %d, number %d\n", type, bno);
 		return -1;
 	}
 
@@ -624,18 +375,14 @@
 	/*
 	 * For PCI devices the interrupt pin (INT[ABCD]) and device
 	 * number are encoded into the entry irq field, so create something
-	 * to match on. The interrupt pin used by the device has to be
-	 * obtained from the PCI config space.
+	 * to match on.
 	 */
 	if(bus->type == BusPCI){
-		pcidev = pcimatchtbdf(tbdf);
-		if(pcidev != nil && (n = pcicfgr8(pcidev, PciINTP)) != 0)
-			irq = (dno<<2)|(n-1);
+		if(pin > 0)
+			irq = (dno<<2)|(pin-1);
 		else
 			irq = -1;
-		//print("pcidev %uX: irq %uX v->irq %uX\n", tbdf, irq, v->irq);
-	}
-	else
+	} else
 		irq = v->irq;
 
 	/*
@@ -642,7 +389,7 @@
 	 * Find a matching interrupt entry from the list of interrupts
 	 * attached to this bus.
 	 */
-	for(aintr = bus->aintr; aintr; aintr = aintr->next){
+	for(aintr = bus->aintr; aintr != nil; aintr = aintr->next){
 		if(aintr->intr->irq != irq)
 			continue;
 
@@ -650,80 +397,162 @@
 		 * Check if already enabled. Multifunction devices may share
 		 * INT[A-D]# so, if already enabled, check the polarity matches
 		 * and the trigger is level.
-		 *
-		 * Should check the devices differ only in the function number,
-		 * but that can wait for the planned enable/disable rewrite.
-		 * The RDT read here is safe for now as currently interrupts
-		 * are never disabled once enabled.
 		 */
-		apic = aintr->apic;
-		ioapicrdtr(apic, aintr->intr->intin, 0, &lo);
-		if(!(lo & ApicIMASK)){
-			vno = lo & 0xFF;
-			n = mpintrinit(bus, aintr->intr, vno, v->irq);
-			n |= ApicLOGICAL;
-			if(n != lo || !(n & ApicLEVEL)){
-				print("mpintrenable: multiple botch irq%d, tbdf %uX, lo %8.8uX, n %8.8uX\n",
-					v->irq, tbdf, lo, n);
-				return -1;
+		ioapicrdtr(aintr->apic, aintr->intr->intin, &hi, &lo);
+		if(lo & ApicIMASK){
+			v->vno = allocvector();
+			v->cpu = mpintrcpu();
+			lo = mpintrinit(aintr->bus, aintr->intr, v->vno, v->irq);
+			lo |= ApicPHYSICAL;			/* no-op */
+			if(lo & ApicIMASK){
+				print("mpintrassign: disabled irq %d, tbdf %uX, lo %8.8uX, hi %8.8uX\n",
+					v->irq, v->tbdf, lo, hi);
+				break;
 			}
+		} else {
+			v->vno = lo & 0xFF;
+			v->cpu = hi >> 24;
+			lo &= ~(ApicRemoteIRR|ApicDELIVS);
+			n = mpintrinit(aintr->bus, aintr->intr, v->vno, v->irq);
+			n |= ApicPHYSICAL;			/* no-op */
+			if(lo != n){
+				print("mpintrassign: multiple botch irq %d, tbdf %uX, lo %8.8uX, n %8.8uX\n",
+					v->irq, v->tbdf, lo, n);
+				break;
+			}
+		}
 
-			v->isr = lapicisr;
-			v->eoi = lapiceoi;
+		v->isr = lapicisr;
+		v->eoi = lapiceoi;
 
-			return vno;
+		if((aintr->apic->flags & PcmpEN) && aintr->apic->type == PcmpIOAPIC){
+			v->aux = aintr;
+			v->enable = ioapicirqenable;
+			v->disable = ioapicirqdisable;
 		}
 
-		/*
-		 * With the APIC a unique vector can be assigned to each
-		 * request to enable an interrupt. There are two reasons this
-		 * is a good idea:
-		 * 1) to prevent lost interrupts, no more than 2 interrupts
-		 *    should be assigned per block of 16 vectors (there is an
-		 *    in-service entry and a holding entry for each priority
-		 *    level and there is one priority level per block of 16
-		 *    interrupts).
-		 * 2) each input pin on the IOAPIC will receive a different
-		 *    vector regardless of whether the devices on that pin use
-		 *    the same IRQ as devices on another pin.
-		 */
-		vno = VectorAPIC + (incref(&mpvnoref)-1)*8;
-		if(vno > MaxVectorAPIC){
-			print("mpintrenable: vno %d, irq %d, tbdf %uX\n",
-				vno, v->irq, tbdf);
-			return -1;
-		}
-		lo = mpintrinit(bus, aintr->intr, vno, v->irq);
-		//print("lo 0x%uX: busno %d intr %d vno %d irq %d elcr 0x%uX\n",
-		//	lo, bus->busno, aintr->intr->irq, vno,
-		//	v->irq, i8259elcr);
-		if(lo & ApicIMASK)
-			return -1;
-		lo |= ApicLOGICAL;
+		return v->vno;
+	}
 
-		if((apic->flags & PcmpEN) && apic->type == PcmpIOAPIC){
-			lock(&mprdthilock);
- 			ioapicrdtw(apic, aintr->intr->intin, mprdthi, lo);
-			unlock(&mprdthilock);
-		}
-		//else
-		//	print("lo not enabled 0x%uX %d\n",
-		//		apic->flags, apic->type);
+	return -1;
+}
 
-		v->isr = lapicisr;
-		v->eoi = lapiceoi;
+enum {
+	HTMSIMapping	= 0xA8,
+	HTMSIFlags	= 0x02,
+	HTMSIFlagsEn	= 0x01,
+};
 
-		return vno;
+static int
+htmsicapenable(Pcidev *p)
+{
+	int cap, flags;
+
+	if((cap = pcihtcap(p, HTMSIMapping)) <= 0)
+		return -1;
+	flags = pcicfgr8(p, cap + HTMSIFlags);
+	if((flags & HTMSIFlagsEn) == 0)
+		pcicfgw8(p, cap + HTMSIFlags, flags | HTMSIFlagsEn);
+	return 0;
+}
+
+static int
+htmsienable(Pcidev *pdev)
+{
+	Pcidev *p;
+
+	p = nil;
+	while((p = pcimatch(p, 0x1022, 0)) != nil)
+		if(p->did == 0x1103 || p->did == 0x1203)
+			break;
+
+	if(p == nil)
+		return 0;	/* not hypertransport platform */
+
+	p = nil;
+	while((p = pcimatch(p, 0x10de, 0)) != nil){
+		switch(p->did){
+		case 0x02f0:	/* NVIDIA NFORCE C51 MEMC0 */
+		case 0x02f1:	/* NVIDIA NFORCE C51 MEMC1 */
+		case 0x02f2:	/* NVIDIA NFORCE C51 MEMC2 */
+		case 0x02f3:	/* NVIDIA NFORCE C51 MEMC3 */
+		case 0x02f4:	/* NVIDIA NFORCE C51 MEMC4 */
+		case 0x02f5:	/* NVIDIA NFORCE C51 MEMC5 */
+		case 0x02f6:	/* NVIDIA NFORCE C51 MEMC6 */
+		case 0x02f7:	/* NVIDIA NFORCE C51 MEMC7 */
+		case 0x0369:	/* NVIDIA NFORCE MCP55 MEMC */
+			htmsicapenable(p);
+			break;
+		}
 	}
 
+	if(htmsicapenable(pdev) == 0)
+		return 0;
+
+	for(p = pdev->parent; p != nil; p = p->parent)
+		if(htmsicapenable(p) == 0)
+			return 0;
+
 	return -1;
 }
 
+static int
+msiirqenable(Vctl *v, int)
+{
+	Pcidev *pci = v->aux;
+	return pcimsienable(pci, 0xFEE00000ULL | (v->cpu << 12), v->vno | (1<<14));
+}
+
+static int
+msiirqdisable(Vctl *v, int)
+{
+	Pcidev *pci = v->aux;
+	return pcimsidisable(pci);
+}
+
+static int
+msiintrenable(Vctl *v)
+{
+	Pcidev *pci;
+	int tbdf;
+
+	if(getconf("*nomsi") != nil)
+		return -1;
+
+	tbdf = v->tbdf;
+	if(tbdf == BUSUNKNOWN || BUSTYPE(tbdf) != BusPCI)
+		return -1;
+	pci = pcimatchtbdf(tbdf);
+	if(pci == nil) {
+		print("msiintrenable: could not find Pcidev for tbdf %uX\n", tbdf);
+		return -1;
+	}
+	if(htmsienable(pci) < 0)
+		return -1;
+	if(pcimsidisable(pci) < 0)
+		return -1;
+
+	v->vno = allocvector();
+	v->cpu = mpintrcpu();
+	v->isr = lapicisr;
+	v->eoi = lapiceoi;
+
+	v->aux = pci;
+	v->enable = msiirqenable;
+	v->disable = msiirqdisable;
+
+	return v->vno;
+}
+
 int
-mpintrenable(Vctl* v)
+mpintrassign(Vctl* v)
 {
 	int irq, tbdf, vno;
 
+	vno = msiintrenable(v);
+	if(vno != -1)
+		return vno;
+
 	/*
 	 * If the bus is known, try it.
 	 * BUSUNKNOWN is given both by [E]ISA devices and by
@@ -731,17 +560,18 @@
 	 * breakpoint and page-fault).
 	 */
 	tbdf = v->tbdf;
-	if(tbdf != BUSUNKNOWN && (vno = mpintrenablex(v, tbdf)) != -1)
+	if(tbdf != BUSUNKNOWN && (vno = mpintrassignx(v, tbdf)) != -1)
 		return vno;
 
 	irq = v->irq;
 	if(irq >= IrqLINT0 && irq <= MaxIrqLAPIC){
+		v->local = 1;
 		if(irq != IrqSPURIOUS)
 			v->isr = lapiceoi;
 		return VectorPIC+irq;
 	}
 	if(irq < 0 || irq > MaxIrqPIC){
-		print("mpintrenable: irq %d out of range\n", irq);
+		print("mpintrassign: irq %d out of range\n", irq);
 		return -1;
 	}
 
@@ -757,40 +587,31 @@
 	 * be compatible with ISA.
 	 */
 	if(mpeisabus != -1){
-		vno = mpintrenablex(v, MKBUS(BusEISA, 0, 0, 0));
+		vno = mpintrassignx(v, MKBUS(BusEISA, 0, 0, 0));
 		if(vno != -1)
 			return vno;
 	}
 	if(mpisabus != -1){
-		vno = mpintrenablex(v, MKBUS(BusISA, 0, 0, 0));
+		vno = mpintrassignx(v, MKBUS(BusISA, 0, 0, 0));
 		if(vno != -1)
 			return vno;
 	}
-
+	print("mpintrassign: out of choices eisa %d isa %d tbdf %uX irq %d\n",
+		mpeisabus, mpisabus, v->tbdf, v->irq);
 	return -1;
 }
 
-static Lock mpshutdownlock;
-
 void
 mpshutdown(void)
 {
 	/*
-	 * To be done...
+	 * Park application processors.
 	 */
-	if(!canlock(&mpshutdownlock)){
-		/*
-		 * If this processor received the CTRL-ALT-DEL from
-		 * the keyboard, acknowledge it. Send an INIT to self.
-		 */
-#ifdef FIXTHIS
-		if(lapicisr(VectorKBD))
-			lapiceoi(VectorKBD);
-#endif /* FIX THIS */
-		idle();
+	if(m->machno != 0){
+		splhi();
+		arch->intrdisable(0);
+		for(;;) idle();
 	}
-
-	print("apshutdown: active = 0x%2.2uX\n", active.machs);
 	delay(1000);
 	splhi();
 
@@ -799,17 +620,5 @@
 	 */
 	lapicicrw(0, 0x000C0000|ApicINIT);
 
-#ifdef notdef
-	/*
-	 * Often the BIOS hangs during restart if a conventional 8042
-	 * warm-boot sequence is tried. The following is Intel specific and
-	 * seems to perform a cold-boot, but at least it comes back.
-	 */
-	*(ushort*)KADDR(0x472) = 0x1234;	/* BIOS warm-boot flag */
-	outb(0xCF9, 0x02);
-	outb(0xCF9, 0x06);
-#else
 	pcireset();
-	i8042reset();
-#endif /* notdef */
 }
--- /dev/null
+++ b/os/pc/mtrr.c
@@ -1,0 +1,789 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+
+enum {
+	/*
+	 * MTRR Physical base/mask are indexed by
+	 *	MTRRPhys{Base|Mask}N = MTRRPhys{Base|Mask}0 + 2*N
+	 */
+	MTRRPhysBase0 = 0x200,
+	MTRRPhysMask0 = 0x201,
+
+	MTRRDefaultType = 0x2FF,
+		Deftype = 0xFF,		/* default MTRR type */
+		Deffixena = 1<<10,	/* fixed-range MTRR enable */
+		Defena	= 1<<11,	/* MTRR enable */
+
+	MTRRCap = 0xFE,
+		Capvcnt = 0xFF,		/* mask: # of variable-range MTRRs we have */
+		Capwc = 1<<8,		/* flag: have write combining? */
+		Capfix = 1<<10,		/* flag: have fixed MTRRs? */
+
+	AMDK8SysCfg = 0xC0010010,
+		Tom2Enabled = 1<<21,
+		Tom2ForceMemTypeWB = 1<<22,
+
+	AMDK8TopMem2 = 0xC001001D,
+};
+
+enum {
+	Nvarreg = 8,
+	Nfixreg = 11*8,
+	Nranges = Nfixreg+Nvarreg*2+1,
+};
+
+typedef struct Varreg Varreg;
+struct Varreg {
+	s64	base;
+	s64	mask;
+};
+
+typedef struct Fixreg Fixreg;
+struct Fixreg {
+	int	msr;
+	u32	base;
+	u32	size;
+};
+
+typedef struct State State;
+struct State {
+	u64	mask;
+	s64	cap;
+	s64	def;
+	s64	tom2;
+	int	nvarreg;
+	Varreg	varreg[Nvarreg];
+	s64	fixreg[Nfixreg/8];
+};
+
+typedef struct Range Range;
+struct Range {
+	u64	base;
+	u64	size;
+	int	type;
+};
+
+enum {
+	Uncacheable	= 0,
+	Writecomb	= 1,
+	Unknown1	= 2,
+	Unknown2	= 3,
+	Writethru	= 4,
+	Writeprot	= 5,
+	Writeback	= 6,
+};
+
+static char *types[] = {
+	[Uncacheable]	"uc",
+	[Writecomb]	"wc",
+	[Unknown1]	"uk1",
+	[Unknown2]	"uk2",
+	[Writethru]	"wt",
+	[Writeprot]	"wp",
+	[Writeback]	"wb",
+};
+
+static char *
+type2str(int type)
+{
+	if(type < 0 || type >= nelem(types))
+		return nil;
+	return types[type];
+}
+
+static int
+str2type(char *str)
+{
+	int type;
+
+	for(type = 0; type < nelem(types); type++){
+		if(strcmp(str, types[type]) == 0)
+			return type;
+	}
+	return -1;
+}
+
+static int
+getvarreg(State *s, Range *rp, int index)
+{
+	Varreg *reg = &s->varreg[index];
+
+	if((reg->mask & (1<<11)) == 0)
+		return 0;
+	rp->base = reg->base & ~0xFFFULL;
+	rp->type = reg->base & 0xFF;
+	rp->size = (s->mask ^ (reg->mask & ~0xFFFULL)) + 1;
+	return 1;
+}
+
+static void
+setvarreg(State *s, Range *rp, int index)
+{
+	Varreg *reg = &s->varreg[index];
+
+	if(rp == nil || rp->size == 0){
+		reg->base = 0;
+		reg->mask = 0;
+		return;
+	}
+	reg->base = rp->base | (rp->type & 0xFF);
+	reg->mask = (s->mask & ~(rp->size-1)) | 1<<11;
+}
+
+static Fixreg fixreg[Nfixreg/8] = {
+	0x250, 0x00000, 0x10000,
+
+	0x258, 0x80000, 0x04000,
+	0x259, 0xA0000, 0x04000,
+
+	0x268, 0xC0000, 0x01000,
+	0x269, 0xC8000, 0x01000,
+	0x26A, 0xD0000, 0x01000,
+	0x26B, 0xD8000, 0x01000,
+	0x26C, 0xE0000, 0x01000,
+	0x26D, 0xE8000, 0x01000,
+	0x26E, 0xF0000, 0x01000,
+	0x26F, 0xF8000, 0x01000,
+};
+
+static int
+getfixreg(State *s, Range *rp, int index)
+{
+	Fixreg *reg = &fixreg[index >> 3];
+
+	index &= 7;
+	rp->base = reg->base + reg->size * index;
+	rp->size = reg->size;
+	rp->type = ((u64)s->fixreg[reg - fixreg] >> 8*index) & 0xFF;
+	return 1;
+}
+
+static void
+setfixreg(State *s, Range *rp, int index)
+{
+	Fixreg *reg = &fixreg[index >> 3];
+	int type;
+
+	index &= 7;
+	if(rp == nil || rp->size == 0)
+		type = Uncacheable;
+	else
+		type = rp->type & 0xFF;
+	s->fixreg[reg - fixreg] &= ~(0xFFULL << 8*index);
+	s->fixreg[reg - fixreg] |= (u64)type << 8*index;
+}
+
+static int
+preftype(int a, int b)
+{
+	if(a == b)
+		return a;
+	if(a == Uncacheable || b == Uncacheable)
+		return Uncacheable;
+	if(a == Writethru && b == Writeback
+	|| a == Writeback && b == Writethru)
+		return Writethru;
+	return -1;
+}
+
+static int
+gettype(State *s, u64 pa, Range *new)
+{
+	int i, type;
+	Range r;
+
+	if(new != nil && pa >= new->base && pa < new->base + new->size)
+		return new->type;
+
+	if((s->def & Defena) == 0)
+		return Uncacheable;
+
+	if(pa < 0x100000 && (s->def & Deffixena) != 0){
+		for(i = 0; i < Nfixreg; i++){
+			if(getfixreg(s, &r, i) && pa < r.base + r.size && pa >= r.base)
+				return r.type;
+		}
+	}
+
+	if(pa >= 0x100000000ULL && pa < s->tom2)
+		return Writeback;
+
+	type = -1;
+	for(i = 0; i < s->nvarreg; i++){
+		if(!getvarreg(s, &r, i))
+			continue;
+		if((pa & -r.size) == r.base)
+			type = (type == -1) ? r.type : preftype(r.type, type);
+	}
+	if(type == -1)
+		type = s->def & Deftype;
+	return type;
+}
+
+static u64
+getnext(State *s, u64 pa, Range *new)
+{
+	u64 end;
+	Range r;
+	int i;
+
+	if(new != nil){
+		end = getnext(s, pa, nil);
+		if(pa < new->base && end > new->base)
+			return new->base;
+		if(pa < new->base + new->size && end > new->base + new->size)
+			return new->base + new->size;
+		return end;
+	}
+
+	end = s->mask+1;
+	if((s->def & Defena) == 0)
+		return end;
+
+	if(pa < 0x100000 && (s->def & Deffixena) != 0){
+		for(i = 0; i < Nfixreg; i++){
+			if(getfixreg(s, &r, i) && pa < r.base + r.size && pa >= r.base)
+				return r.base + r.size;
+		}
+	}
+
+	if(pa >= 0x100000000ULL && pa < s->tom2)
+		return s->tom2;
+
+	for(i = 0; i < s->nvarreg; i++){
+		if(!getvarreg(s, &r, i))
+			continue;
+		if((pa & -r.size) == r.base)
+			r.base += r.size;
+		else if(r.base <= pa)
+			continue;
+		if(r.base < end)
+			end = r.base;
+	}
+
+	if(pa < 0x100000000ULL && end > 0x100000000ULL)
+		end = 0x100000000ULL;
+
+	return end;
+}
+
+enum {
+	Exthighfunc = 1ul << 31,
+	Extprocsigamd,
+	Extprocname0,
+	Extprocname1,
+	Extprocname2,
+	Exttlbl1,
+	Extl2,
+	Extapm,
+	Extaddrsz,
+};
+
+static u64
+physmask(void)
+{
+	u32 regs[4];
+	u64 mask;
+
+	cpuid(Exthighfunc, 0, regs);
+	if(regs[0] >= Extaddrsz) {			/* ax */
+		cpuid(Extaddrsz, 0, regs);
+		mask = (1ULL << (regs[0] & 0xFF)) - 1;	/* ax */
+	} else {
+		mask = (1ULL << 36) - 1;
+	}
+	return mask;
+}
+
+static int
+getstate(State *s)
+{
+	s64 v;
+	int i;
+
+	if(rdmsr(MTRRCap, &s->cap) < 0)
+		return -1;
+
+	if((s->cap & (Capfix|Capvcnt)) == 0)
+		return -1;
+
+	if(rdmsr(MTRRDefaultType, &s->def) < 0)
+		return -1;
+
+	if(s->cap & Capfix){
+		for(i = 0; i < nelem(fixreg); i++){
+			if(rdmsr(fixreg[i].msr, &s->fixreg[i]) < 0)
+				return -1;
+		}
+	} else {
+		s->def &= ~(s64)Deffixena;
+	}
+
+	s->nvarreg = s->cap & Capvcnt;
+	if(s->nvarreg > Nvarreg)
+		s->nvarreg = Nvarreg;
+
+	for(i = 0; i < s->nvarreg; i++){
+		if(rdmsr(MTRRPhysBase0 + 2*i, &s->varreg[i].base) < 0)
+			return -1;
+		if(rdmsr(MTRRPhysMask0 + 2*i, &s->varreg[i].mask) < 0)
+			return -1;
+	}
+
+	s->mask = physmask();
+
+	if(strcmp(m->cpuidid, "AuthenticAMD") != 0
+	|| m->cpuidfamily < 15
+	|| rdmsr(AMDK8SysCfg, &v) < 0
+	|| (v & (Tom2Enabled|Tom2ForceMemTypeWB)) != (Tom2Enabled|Tom2ForceMemTypeWB)
+	|| rdmsr(AMDK8TopMem2, &s->tom2) < 0)
+		s->tom2 = 0;
+	else {
+		s->tom2 &= s->mask;
+		s->tom2 &= -0x800000LL;
+	}
+
+	return 0;
+}
+
+enum {
+	CR4PageGlobalEnable = 1 << 7,
+	CR0CacheDisable = 1 << 30,
+};
+
+static void
+putstate(State *s)
+{
+	uintptr cr0, cr4;
+	int i, x;
+
+	x = splhi();
+
+	/* disable cache */
+	cr0 = getcr0();
+	putcr0(cr0 | CR0CacheDisable);
+	wbinvd();
+
+	/* disable PGE */
+	cr4 = getcr4();
+	putcr4(cr4 & ~CR4PageGlobalEnable);
+
+	/* flush tlb */
+	putcr3(getcr3());
+
+	/* disable MTRRs */
+	wrmsr(MTRRDefaultType, s->def & ~(s64)(Defena|Deffixena|Deftype));
+	wbinvd();
+
+	/* write all registers */
+	if(s->cap & Capfix){
+		for(i = 0; i < nelem(fixreg); i++)
+			wrmsr(fixreg[i].msr, s->fixreg[i]);
+	}
+	for(i = 0; i < s->nvarreg; i++){
+		wrmsr(MTRRPhysBase0 + 2*i, s->varreg[i].base);
+		wrmsr(MTRRPhysMask0 + 2*i, s->varreg[i].mask);
+	}
+
+	/* flush tlb */
+	putcr3(getcr3());
+
+	/* enable MTRRs */
+	wrmsr(MTRRDefaultType, s->def);
+
+	/* reenable cache */
+	putcr0(cr0);
+
+	/* reenable PGE */
+	putcr4(cr4);
+
+	splx(x);
+}
+
+static int
+fls64(u64 x)
+{
+	int i;
+
+	for(i = 0; i < 64; i++)
+		if(x & (1ULL<<i))
+			break;
+	return i;
+}
+
+static int
+fms64(u64 x)
+{
+	int i;
+
+	if(x == 0)
+		return 0;
+	for(i = 63; i >= 0; i--)
+		if(x & (1ULL<<i))
+			break;
+	return i;
+}
+
+static int
+range2varreg(State *s, Range r, int index, int doit)
+{
+	u64 len;
+
+	if(index < 0)
+		return -1;
+
+	if(r.base <= 0x100000 && (s->def & Deffixena) != 0){
+		r.size += r.base;
+		r.base = 0;
+	}
+
+	if(r.base >= 0x100000000ULL && r.base <= s->tom2){
+		if(r.base + r.size <= s->tom2){
+			if(r.type != Writeback)
+				return -1;
+			return index;
+		}
+	}
+
+	len = r.size;
+	while(len){
+		if(index >= s->nvarreg)
+			return -1;
+		if(fls64(r.base) > fms64(len))
+			r.size = 1ULL << fms64(len);
+		else
+			r.size = 1ULL << fls64(r.base);
+		if(doit)
+			setvarreg(s, &r, index);
+		index++;
+		len -= r.size;
+		r.base += r.size;
+	}
+	return index;
+}
+
+static int ranges2varregs(State*, Range*, int, int, int);
+
+/*
+ * try to combine same type ranges that are split by
+ * higher precedence ranges.
+ */
+static int
+ranges2varregscomb(State *s, Range *rp, int nr, int index, int doit)
+{
+	Range rr;
+	int i, j;
+
+	if(nr < 2 || rp[0].type == rp[1].type)
+		return -1;
+	rr = rp[0];
+	if(preftype(rr.type, rp[1].type) == rr.type)
+		rr.type = rp[1].type;
+	for(j = 1; j < nr; j++){
+		if(rp[j].type != rr.type
+		&& preftype(rp[j].type, rr.type) != rp[j].type)
+			return -1;
+		rr.size += rp[j].size;
+	}
+	i = ranges2varregs(s, &rr, 1, index, doit);
+	for(j = 0; j < nr && i >= index; j++){
+		if(rp[j].type != rr.type)
+			i = range2varreg(s, rp[j], i, doit);
+	}
+	return i;
+}
+
+static int
+ranges2varregs(State *s, Range *rp, int nr, int index, int doit)
+{
+	int i, j, k;
+
+	if(nr == 1){
+		if(rp->type == (s->def & Deftype))
+			return index;
+		return range2varreg(s, *rp, index, doit);
+	}
+
+	/* try combining */
+	i = ranges2varregscomb(s, rp, nr, index, doit);
+
+	/*
+	 * now see if we can find a better solution using
+	 * different splittings.
+	 */
+	for(k = 1; k < nr; k++){
+		j = ranges2varregs(s, rp+k, nr-k,
+			ranges2varregs(s, rp, k, index, 0), 0);
+		if(j < 0)
+			continue;
+		if(i < 0 || j < i)
+			i = doit ? ranges2varregs(s, rp+k, nr-k,
+				ranges2varregs(s, rp, k, index, 1), 1) : j;
+	}
+	return i;
+}
+
+static int
+range2fixreg(State *s, Range r)
+{
+	Range rr;
+	int i;
+
+	for(i = 0; i < Nfixreg; i++){
+		if(!getfixreg(s, &rr, i) || rr.base + rr.size <= r.base)
+			continue;
+		if(rr.base >= r.base + r.size)
+			break;
+		if(r.base > rr.base || r.base + r.size < rr.base + rr.size)
+			return -1;
+		rr.type = r.type;
+		setfixreg(s, &rr, i);
+	}
+	return 0;
+}
+
+static int
+setranges(State *s, Range *rp, int nr)
+{
+	int i, j;
+
+	if(nr < 1 || nr > Nranges)
+		return -1;
+
+	s->def &= ~(s64)(Defena|Deffixena|Deftype);
+
+	i = 0;
+	if(rp[0].size != s->mask+1 || rp[0].type != Uncacheable){
+		s->def |= Defena;
+
+		/* first handle ranges below 1MB using fixed registers */
+		if(rp[0].size < 0x100000 && (s->cap & Capfix) != 0){
+			s->def |= Deffixena;
+
+			for(i = 0; i < Nfixreg; i++)
+				setfixreg(s, nil, i);
+
+			while(nr > 0 && rp->base < 0x100000){
+				if(range2fixreg(s, *rp) < 0)
+					return -1;
+				if(rp->base + rp->size > 0x100000)
+					break;
+				rp++;
+				nr--;
+			}
+		}
+
+		/* remaining ranges to to variable registers */
+		if(nr > 0){
+			/* make sure the algorithm doesnt explode */
+			if(nr > Nvarreg+1)
+				return -1;
+
+			/* try with UC default type */
+			s->def = (s->def & ~(s64)Deftype) | Uncacheable;
+			i = ranges2varregs(s, rp, nr, 0, 1);
+
+			/* try with WB default type, dont do it yet */
+			s->def = (s->def & ~(s64)Deftype) | Writeback;
+			j = ranges2varregs(s, rp, nr, 0, 0);
+			if(j < 0 || (i >= 0 && i <= j)){
+				/* WB not better or worse, use UC solution */
+				s->def = (s->def & ~(s64)Deftype) | Uncacheable;
+			} else {
+				/* WB default is better, doit! */
+				i = ranges2varregs(s, rp, nr, 0, 1);
+			}
+			if(i < 0)
+				return -1;
+		}
+	}
+
+	/* clear unused variable registers */
+	for(; i < s->nvarreg; i++)
+		setvarreg(s, nil, i);
+
+	return 0;
+}
+
+static int
+checkranges(State *s, Range *rp, int nr)
+{
+	u64 base, next;
+	int i;
+
+	for(i = 0; i < nr; i++){
+		next = rp[i].base + rp[i].size;
+		for(base = rp[i].base; base < next; base = getnext(s, base, nil)){
+			if(gettype(s, base, nil) != rp[i].type)
+				return -1;
+		}
+	}
+	return 0;
+}
+
+static int
+getranges(State *s, Range *rp, int nr, Range *new)
+{
+	u64 base, next;
+	Range *rs, *re;
+	int type;
+
+	rs = rp;
+	re = rp + nr;
+	for(base = 0; base <= s->mask; base = next) {
+		if(rp >= re)
+			return -1;
+		type = gettype(s, base, new);
+		next = getnext(s, base, new);
+		while(next <= s->mask && (gettype(s, next, new) == type))
+			next = getnext(s, next, new);
+		rp->base = base;
+		rp->size = next - base;
+		rp->type = type;
+		rp++;
+	}
+	return rp - rs;
+}
+
+static int dosync;
+static QLock mtrrlk;
+static State cpu0state;
+static Range ranges[Nranges];
+
+char*
+mtrr(u64 base, u64 size, char *tstr)
+{
+	static State newstate;
+	Range new;
+	int nr;
+
+	if(cpu0state.mask == 0)
+		return "mtrr not supported";
+
+	if(size < 0x1000)
+		return "size too small";
+	if((base | size) & 0xFFF)
+		return "base or size not page aligned";
+	if(base & ~cpu0state.mask)
+		return "base out of range";
+	if(base + size > cpu0state.mask+1)
+		return "size out of range";
+
+	new.base = base;
+	new.size = size;
+	if((new.type = str2type(tstr)) < 0)
+		return "bad cache type";
+
+	if(new.type == Writecomb
+	&& (cpu0state.cap & Capwc) == 0)
+		return "write combining not supported";
+
+	qlock(&mtrrlk);
+	newstate = cpu0state;
+	nr = getranges(&newstate, ranges, Nranges, &new);
+	if(setranges(&newstate, ranges, nr) < 0
+	|| checkranges(&newstate, ranges, nr) < 0){
+		qunlock(&mtrrlk);
+		return "cache range not satisfiable";
+	}
+	cpu0state = newstate;
+	coherence();
+	dosync = 1;
+	mtrrclock();
+	qunlock(&mtrrlk);
+
+	return nil;
+}
+
+char*
+mtrrattr(u64 pa, u64 *pnext)
+{
+	if(cpu0state.mask == 0)
+		return nil;
+	if(pnext != nil)
+		*pnext = getnext(&cpu0state, pa, nil);
+	return type2str(gettype(&cpu0state, pa, nil));
+}
+
+int
+mtrrprint(char *buf, s32 bufsize)
+{
+	char *cp, *ep;
+	int i, nr;
+
+	if(cpu0state.mask == 0)
+		return 0;
+
+	cp = buf;
+	ep = buf + bufsize;
+
+	qlock(&mtrrlk);
+	nr = getranges(&cpu0state, ranges, Nranges, nil);
+	for(i = 0; i < nr; i++){
+		cp = seprint(cp, ep, "cache %#.16llux %15llud %s\n",
+			ranges[i].base,
+			ranges[i].size,
+			type2str(ranges[i].type));
+	}
+	qunlock(&mtrrlk);
+
+	return cp - buf;
+}
+
+/* called from clock interrupt */
+void
+mtrrclock(void)
+{
+	static Ref bar1, bar2;
+	int x;
+
+	if(dosync == 0 || cpu0state.mask == 0)
+		return;
+
+	x = splhi();
+
+	/*
+	 * wait for all CPUs to sync here, so that the MTRR setup gets
+	 * done at roughly the same time on all processors.
+	 */
+	incref(&bar1);
+	while(bar1.ref < conf.nmach)
+		microdelay(10);
+
+	putstate(&cpu0state);
+
+	/*
+	 * wait for all CPUs to sync up again, so that we don't continue
+	 * executing while the MTRRs are still being set up.
+	 */
+	incref(&bar2);
+	while(bar2.ref < conf.nmach)
+		microdelay(10);
+	decref(&bar1);
+	while(bar1.ref > 0)
+		microdelay(10);
+	decref(&bar2);
+
+	dosync = 0;
+	splx(x);
+}
+
+/* called from cpuidentify() */
+void
+mtrrsync(void)
+{
+	State s;
+
+	if(getstate(&s) < 0)
+		return;
+	if(cpu0state.mask == 0){
+		cpu0state = s;
+		coherence();
+		return;
+	}
+	putstate(&cpu0state);
+}
--- a/os/pc/pc
+++ b/os/pc/pc
@@ -141,3 +141,5 @@
 	/dis/lib/ssl.dis
 	/dis/lib/arg.dis
 	/dis/lib/filepat.dis
+	/dis/lib/arg.dis
+	/dis/lib/filepat.dis
--- /dev/null
+++ b/os/pc/pc.root.s
@@ -1,0 +1,1 @@
+/* Generated by /os/port/mkroot */
--- a/os/pc/pci.c
+++ b/os/pc/pci.c
@@ -1,7 +1,3 @@
-/*
- * PCI support code.
- * Needs a massive rewrite.
- */
 #include "u.h"
 #include "../port/lib.h"
 #include "mem.h"
@@ -8,69 +4,22 @@
 #include "dat.h"
 #include "fns.h"
 #include "io.h"
-#include "../port/error.h"
+#include "../port/pci.h"
 
-#define DBG	if(0) pcilog
-
-struct
+typedef struct Pcisiz Pcisiz;
+struct Pcisiz
 {
-	char	output[16384];
-	int	ptr;
-}PCICONS;
-
-int
-pcilog(char *fmt, ...)
-{
-	int n;
-	va_list arg;
-	char buf[PRINTSIZE];
-
-	va_start(arg, fmt);
-	n = vseprint(buf, buf+sizeof(buf), fmt, arg) - buf;
-	va_end(arg);
-
-	memmove(PCICONS.output+PCICONS.ptr, buf, n);
-	PCICONS.ptr += n;
-	return n;
-}
-
-enum
-{					/* configuration mechanism #1 */
-	PciADDR		= 0xCF8,	/* CONFIG_ADDRESS */
-	PciDATA		= 0xCFC,	/* CONFIG_DATA */
-
-					/* configuration mechanism #2 */
-	PciCSE		= 0xCF8,	/* configuration space enable */
-	PciFORWARD	= 0xCFA,	/* which bus */
-
-	MaxFNO		= 7,
-	MaxUBN		= 255,
+	Pcidev*	dev;
+	int	siz;
+	int	bar;
+	int	typ;
 };
 
-enum
-{					/* command register */
-	IOen		= (1<<0),
-	MEMen		= (1<<1),
-	MASen		= (1<<2),
-	MemWrInv	= (1<<4),
-	PErrEn		= (1<<6),
-	SErrEn		= (1<<8),
-};
+int pcimaxdno;
 
 static Lock pcicfglock;
-static Lock pcicfginitlock;
-static int pcicfgmode = -1;
-static int pcimaxbno = 7;
-static int pcimaxdno;
-static Pcidev* pciroot;
-static Pcidev* pcilist;
-static Pcidev* pcitail;
-static int nobios, nopcirouting;
+static Pcidev *pcilist, **pcitail;
 
-static int pcicfgrw32(int, int, int, int);
-static int pcicfgrw16(int, int, int, int);
-static int pcicfgrw8(int, int, int, int);
-
 static char* bustypes[] = {
 	"CBUSI",
 	"CBUSII",
@@ -92,54 +41,178 @@
 	"XPRESS",
 };
 
-#pragma	varargck	type	"T"	int
-
-static int
+int
 tbdffmt(Fmt* fmt)
 {
-	char *p;
-	int l, r, type, tbdf;
+	int type, tbdf;
 
-	if((p = malloc(READSTR)) == nil)
-		return fmtstrcpy(fmt, "(tbdfconv)");
-		
 	switch(fmt->r){
+	default:
+		return fmtstrcpy(fmt, "(tbdffmt)");
+
 	case 'T':
 		tbdf = va_arg(fmt->args, int);
-		type = BUSTYPE(tbdf);
-		if(type < nelem(bustypes))
-			l = snprint(p, READSTR, bustypes[type]);
-		else
-			l = snprint(p, READSTR, "%d", type);
-		snprint(p+l, READSTR-l, ".%d.%d.%d",
-			BUSBNO(tbdf), BUSDNO(tbdf), BUSFNO(tbdf));
-		break;
+		if(tbdf == BUSUNKNOWN) {
+			return fmtstrcpy(fmt, "unknown");
+		} else {
+			type = BUSTYPE(tbdf);
+			if(type < nelem(bustypes)) {
+				return fmtprint(fmt, "%s.%d.%d.%d",
+					bustypes[type], BUSBNO(tbdf), BUSDNO(tbdf), BUSFNO(tbdf));
+			} else {
+				return fmtprint(fmt, "%d.%d.%d.%d",
+					type, BUSBNO(tbdf), BUSDNO(tbdf), BUSFNO(tbdf));
+			}
+		}
+	}
+}
 
-	default:
-		snprint(p, READSTR, "(tbdfconv)");
-		break;
+static Pcidev*
+pcidevalloc(void)
+{
+	Pcidev *p;
+
+	p = xalloc(sizeof(*p));
+	if(p == nil)
+		panic("pci: no memory for Pcidev");
+	return p;
+}
+
+void
+pcidevfree(Pcidev *p)
+{
+	Pcidev **l;
+
+	if(p == nil)
+		return;
+
+	while(p->bridge != nil)
+		pcidevfree(p->bridge);
+
+	if(p->parent != nil){
+		for(l = &p->parent->bridge; *l != nil; l = &(*l)->link) {
+			if(*l == p) {
+				*l = p->link;
+				break;
+			}
+		}
 	}
-	r = fmtstrcpy(fmt, p);
-	free(p);
+	for(l = &pcilist; *l != nil; l = &(*l)->list) {
+		if(*l == p) {
+			if((*l = p->list) == nil)
+				pcitail = l;
+			break;
+		}
+	}
+	/* leaked */
+}
 
-	return r;
+int
+pcicfgr8(Pcidev* p, int rno)
+{
+	int data;
+
+	ilock(&pcicfglock);
+	data = pcicfgrw8(p->tbdf, rno, 0, 1);
+	iunlock(&pcicfglock);
+
+	return data;
 }
+void
+pcicfgw8(Pcidev* p, int rno, int data)
+{
+	ilock(&pcicfglock);
+	pcicfgrw8(p->tbdf, rno, data, 0);
+	iunlock(&pcicfglock);
+}
+int
+pcicfgr16(Pcidev* p, int rno)
+{
+	int data;
 
-ulong
+	ilock(&pcicfglock);
+	data = pcicfgrw16(p->tbdf, rno, 0, 1);
+	iunlock(&pcicfglock);
+
+	return data;
+}
+void
+pcicfgw16(Pcidev* p, int rno, int data)
+{
+	ilock(&pcicfglock);
+	pcicfgrw16(p->tbdf, rno, data, 0);
+	iunlock(&pcicfglock);
+}
+int
+pcicfgr32(Pcidev* p, int rno)
+{
+	int data;
+
+	ilock(&pcicfglock);
+	data = pcicfgrw32(p->tbdf, rno, 0, 1);
+	iunlock(&pcicfglock);
+
+	return data;
+}
+void
+pcicfgw32(Pcidev* p, int rno, int data)
+{
+	ilock(&pcicfglock);
+	pcicfgrw32(p->tbdf, rno, data, 0);
+	iunlock(&pcicfglock);
+}
+
+u32
 pcibarsize(Pcidev *p, int rno)
 {
-	ulong v, size;
+	int v, size;
 
+	ilock(&pcicfglock);
 	v = pcicfgrw32(p->tbdf, rno, 0, 1);
-	pcicfgrw32(p->tbdf, rno, 0xFFFFFFF0, 0);
+	pcicfgrw32(p->tbdf, rno, -1, 0);
 	size = pcicfgrw32(p->tbdf, rno, 0, 1);
-	if(v & 1)
-		size |= 0xFFFF0000;
 	pcicfgrw32(p->tbdf, rno, v, 0);
+	iunlock(&pcicfglock);
 
-	return -(size & ~0x0F);
+	if(rno == PciEBAR0 || rno == PciEBAR1){
+		size &= ~0x7FF;
+	} else if(v & 1){
+		size = (short)size;
+		size &= ~3;
+	} else {
+		size &= ~0xF;
+	}
+
+	return -size;
 }
 
+void
+pcisetbar(Pcidev *p, int rno, uvlong bar)
+{
+	ilock(&pcicfglock);
+	pcicfgrw32(p->tbdf, rno, bar, 0);
+	if((bar&7) == 4 && rno >= PciBAR0 && rno < PciBAR0+4*(nelem(p->mem)-1))
+		pcicfgrw32(p->tbdf, rno+4, bar>>32, 0);
+	iunlock(&pcicfglock);
+}
+
+void
+pcisetwin(Pcidev *p, uvlong base, uvlong limit)
+{
+	ilock(&pcicfglock);
+	if(base & 1){
+		pcicfgrw16(p->tbdf, PciIBR, (limit & 0xF000)|((base & 0xF000)>>8), 0);
+		pcicfgrw32(p->tbdf, PciIUBR, (limit & 0xFFFF0000)|(base>>16), 0);
+	} else if(base & 8){
+		pcicfgrw32(p->tbdf, PciPMBR, (limit & 0xFFF00000)|((base & 0xFFF00000)>>16), 0);
+		pcicfgrw32(p->tbdf, PciPUBR, base >> 32, 0);
+		pcicfgrw32(p->tbdf, PciPULR, limit >> 32, 0);
+	} else {
+		pcicfgrw32(p->tbdf, PciMBR, (limit & 0xFFF00000)|((base & 0xFFF00000)>>16), 0);
+	}
+	iunlock(&pcicfglock);
+}
+
 static int
 pcisizcmp(void *a, void *b)
 {
@@ -169,29 +242,26 @@
 	return v+1;
 }
 
-static void
-pcibusmap(Pcidev *root, ulong *pmema, ulong *pioa, int wrreg)
+void
+pcibusmap(Pcidev *root, uvlong *pmema, ulong *pioa, int wrreg)
 {
 	Pcidev *p;
 	int ntb, i, size, rno, hole;
-	ulong v, mema, ioa, sioa, smema, base, limit;
+	uvlong mema, smema;
+	ulong ioa, sioa, v;
 	Pcisiz *table, *tptr, *mtb, *itb;
 
-	if(!nobios)
-		return;
-
 	ioa = *pioa;
 	mema = *pmema;
 
-	DBG("pcibusmap wr=%d %T mem=%luX io=%luX\n", 
-		wrreg, root->tbdf, mema, ioa);
-
 	ntb = 0;
 	for(p = root; p != nil; p = p->link)
 		ntb++;
 
 	ntb *= (PciCIS-PciBAR0)/4;
-	table = malloc(2*ntb*sizeof(Pcisiz));
+	table = malloc((2*ntb+1)*sizeof(Pcisiz));
+	if(table == nil)
+		panic("pcibusmap: can't allocate memory");
 	itb = table;
 	mtb = table+ntb;
 
@@ -200,59 +270,88 @@
 	 */
 	for(p = root; p != nil; p = p->link) {
 		if(p->ccrb == 0x06) {
-			if(p->ccru != 0x04 || p->bridge == nil) {
-//				DBG("pci: ignored bridge %T\n", p->tbdf);
+			/* carbus bridge? */
+			if(p->ccru == 0x07){
+				if(pcicfgr32(p, PciBAR0) & 1)
+					continue;
+				size = pcibarsize(p, PciBAR0);
+				if(size == 0)
+					continue;
+				mtb->dev = p;
+				mtb->bar = 0;
+				mtb->siz = size;
+				mtb->typ = 0;
+				mtb++;
 				continue;
 			}
 
+			/* pci bridge? */
+			if(p->ccru != 0x04 || p->bridge == nil)
+				continue;
+
 			sioa = ioa;
 			smema = mema;
 			pcibusmap(p->bridge, &smema, &sioa, 0);
 
-			hole = pcimask(smema-mema);
-			if(hole < (1<<20))
-				hole = 1<<20;
-			p->mema.size = hole;
-
 			hole = pcimask(sioa-ioa);
 			if(hole < (1<<12))
 				hole = 1<<12;
-
-			p->ioa.size = hole;
-
 			itb->dev = p;
 			itb->bar = -1;
-			itb->siz = p->ioa.size;
+			itb->siz = hole;
+			itb->typ = 0;
 			itb++;
 
+			hole = pcimask(smema-mema);
+			if(hole < (1<<20))
+				hole = 1<<20;
 			mtb->dev = p;
 			mtb->bar = -1;
-			mtb->siz = p->mema.size;
+			mtb->siz = hole;
+			mtb->typ = 0;
 			mtb++;
+
+			size = pcibarsize(p, PciEBAR1);
+			if(size != 0){
+				mtb->dev = p;
+				mtb->bar = -3;
+				mtb->siz = size;
+				mtb->typ = 0;
+				mtb++;
+			}
 			continue;
 		}
 
-		for(i = 0; i <= 5; i++) {
+		size = pcibarsize(p, PciEBAR0);
+		if(size != 0){
+			mtb->dev = p;
+			mtb->bar = -2;
+			mtb->siz = size;
+			mtb->typ = 0;
+			mtb++;
+		}
+
+		for(i = 0; i < nelem(p->mem); i++) {
 			rno = PciBAR0 + i*4;
-			v = pcicfgrw32(p->tbdf, rno, 0, 1);
+			v = pcicfgr32(p, rno);
 			size = pcibarsize(p, rno);
 			if(size == 0)
 				continue;
-
 			if(v & 1) {
 				itb->dev = p;
 				itb->bar = i;
 				itb->siz = size;
+				itb->typ = 1;
 				itb++;
-			}
-			else {
+			} else {
 				mtb->dev = p;
 				mtb->bar = i;
 				mtb->siz = size;
+				mtb->typ = v & 7;
+				if(mtb->typ & 4)
+					i++;
 				mtb++;
 			}
-
-			p->mem[i].size = size;
 		}
 	}
 
@@ -271,17 +370,17 @@
 		if(tptr->bar == -1)
 			hole = 1<<12;
 		ioa = (ioa+hole-1) & ~(hole-1);
-
-		p = tptr->dev;
-		if(tptr->bar == -1)
-			p->ioa.bar = ioa;
-		else {
-			p->pcr |= IOen;
-			p->mem[tptr->bar].bar = ioa|1;
-			if(wrreg)
-				pcicfgrw32(p->tbdf, PciBAR0+(tptr->bar*4), ioa|1, 0);
+		if(wrreg){
+			p = tptr->dev;
+			if(tptr->bar == -1) {
+				p->ioa.bar = ioa;
+				p->ioa.size = tptr->siz;
+			} else {
+				p->mem[tptr->bar].size = tptr->siz;
+				p->mem[tptr->bar].bar = ioa|1;
+				pcisetbar(p, PciBAR0+tptr->bar*4, p->mem[tptr->bar].bar);
+			}
 		}
-
 		ioa += tptr->siz;
 	}
 
@@ -292,16 +391,25 @@
 		hole = tptr->siz;
 		if(tptr->bar == -1)
 			hole = 1<<20;
-		mema = (mema+hole-1) & ~(hole-1);
-
-		p = tptr->dev;
-		if(tptr->bar == -1)
-			p->mema.bar = mema;
-		else {
-			p->pcr |= MEMen;
-			p->mem[tptr->bar].bar = mema;
-			if(wrreg)
-				pcicfgrw32(p->tbdf, PciBAR0+(tptr->bar*4), mema, 0);
+		mema = (mema+hole-1) & ~((uvlong)hole-1);
+		if(wrreg){
+			p = tptr->dev;
+			if(tptr->bar == -1) {
+				p->mema.bar = mema;
+				p->mema.size = tptr->siz;
+			} else if(tptr->bar == -2) {
+				p->rom.bar = mema|1;
+				p->rom.size = tptr->siz;
+				pcisetbar(p, PciEBAR0, p->rom.bar);
+			} else if(tptr->bar == -3) {
+				p->rom.bar = mema|1;
+				p->rom.size = tptr->siz;
+				pcisetbar(p, PciEBAR1, p->rom.bar);
+			} else {
+				p->mem[tptr->bar].size = tptr->siz;
+				p->mem[tptr->bar].bar = mema|tptr->typ;
+				pcisetbar(p, PciBAR0+tptr->bar*4, p->mem[tptr->bar].bar);
+			}
 		}
 		mema += tptr->siz;
 	}
@@ -318,38 +426,20 @@
 	 */
 	for(p = root; p != nil; p = p->link) {
 		if(p->bridge == nil) {
-			pcicfgrw8(p->tbdf, PciLTR, 64, 0);
-
-			p->pcr |= MASen;
-			pcicfgrw16(p->tbdf, PciPCR, p->pcr, 0);
+			pcienable(p);
 			continue;
 		}
 
-		base = p->ioa.bar;
-		limit = base+p->ioa.size-1;
-		v = pcicfgrw32(p->tbdf, PciIBR, 0, 1);
-		v = (v&0xFFFF0000)|(limit & 0xF000)|((base & 0xF000)>>8);
-		pcicfgrw32(p->tbdf, PciIBR, v, 0);
-		v = (limit & 0xFFFF0000)|(base>>16);
-		pcicfgrw32(p->tbdf, PciIUBR, v, 0);
+		/* Set I/O and Mem windows */
+		pcisetwin(p, p->ioa.bar|1, p->ioa.bar+p->ioa.size-1);
+		pcisetwin(p, p->mema.bar|0, p->mema.bar+p->mema.size-1);
 
-		base = p->mema.bar;
-		limit = base+p->mema.size-1;
-		v = (limit & 0xFFF00000)|((base & 0xFFF00000)>>16);
-		pcicfgrw32(p->tbdf, PciMBR, v, 0);
+		/* Disable prefetch */
+		pcisetwin(p, 0xFFF00000|8, 0);
 
-		/*
-		 * Disable memory prefetch
-		 */
-		pcicfgrw32(p->tbdf, PciPMBR, 0x0000FFFF, 0);
-		pcicfgrw8(p->tbdf, PciLTR, 64, 0);
+		/* Enable the bridge */
+		pcienable(p);
 
-		/*
-		 * Enable the bridge
-		 */
-		p->pcr |= IOen|MEMen|MASen;
-		pcicfgrw32(p->tbdf, PciPCR, 0xFFFF0000|p->pcr , 0);
-
 		sioa = p->ioa.bar;
 		smema = p->mema.bar;
 		pcibusmap(p->bridge, &smema, &sioa, 1);
@@ -357,9 +447,56 @@
 }
 
 static int
-pcilscan(int bno, Pcidev** list)
+pcivalidwin(Pcidev *p, uvlong base, uvlong limit)
 {
-	Pcidev *p, *head, *tail;
+	Pcidev *bridge = p->parent;
+	char *typ;
+
+	if(base & 1){
+		typ = "io";
+		base &= ~3;
+		if(base > limit)
+			return 0;
+		if(bridge == nil)
+			return 1;
+		if(base >= bridge->ioa.bar && limit < (bridge->ioa.bar + bridge->ioa.size))
+			return 1;
+	} else {
+		typ = "mem";
+		base &= ~0xFULL;
+		if(base > limit)
+			return 0;
+		if(bridge == nil)
+			return 1;
+		if(base >= bridge->mema.bar && limit < (bridge->mema.bar + bridge->mema.size))
+			return 1;
+		if(base >= bridge->prefa.bar && limit < (bridge->prefa.bar + bridge->prefa.size))
+			return 1;
+	}
+	print("%T: %.2uX invalid %s-window: %.8llux-%.8llux\n", p->tbdf, p->ccrb, typ, base, limit);
+	return 0;
+}
+
+static int
+pcivalidbar(Pcidev *p, uvlong bar, int size)
+{
+	if(bar & 1){
+		bar &= ~3;
+		if(bar == 0 || size < 4 || (bar & (size-1)) != 0)
+			return 0;
+		return pcivalidwin(p, bar|1, bar+size-1);
+	} else {
+		bar &= ~0xFULL;
+		if(bar == 0 || size < 16 || (bar & (size-1)) != 0)
+			return 0;
+		return pcivalidwin(p, bar|0, bar+size-1);
+	}
+}
+
+int
+pciscan(int bno, Pcidev** list, Pcidev *parent)
+{
+	Pcidev *p, *head, **tail;
 	int dno, fno, i, hdt, l, maxfno, maxubn, rno, sbn, tbdf, ubn;
 
 	maxubn = bno;
@@ -377,20 +514,18 @@
 			 * from the device's configuration space.
 			 */
 			tbdf = MKBUS(BusPCI, bno, dno, fno);
+
+			lock(&pcicfglock);
 			l = pcicfgrw32(tbdf, PciVID, 0, 1);
+			unlock(&pcicfglock);
+
 			if(l == 0xFFFFFFFF || l == 0)
 				continue;
-			p = malloc(sizeof(*p));
+			p = pcidevalloc();
 			p->tbdf = tbdf;
 			p->vid = l;
 			p->did = l>>16;
 
-			if(pcilist != nil)
-				pcitail->list = p;
-			else
-				pcilist = p;
-			pcitail = p;
-
 			p->pcr = pcicfgr16(p, PciPCR);
 			p->rid = pcicfgr8(p, PciRID);
 			p->ccrp = pcicfgr8(p, PciCCRp);
@@ -398,7 +533,6 @@
 			p->ccrb = pcicfgr8(p, PciCCRb);
 			p->cls = pcicfgr8(p, PciCLS);
 			p->ltr = pcicfgr8(p, PciLTR);
-
 			p->intl = pcicfgr8(p, PciINTL);
 
 			/*
@@ -414,6 +548,7 @@
 			 * and work out the sizes.
 			 */
 			switch(p->ccrb) {
+			case 0x00:		/* prehistoric */
 			case 0x01:		/* mass storage controller */
 			case 0x02:		/* network controller */
 			case 0x03:		/* display controller */
@@ -424,28 +559,62 @@
 			case 0x0A:		/* docking stations */
 			case 0x0B:		/* processors */
 			case 0x0C:		/* serial bus controllers */
+			case 0x0D:		/* wireless controllers */
+			case 0x0E:		/* intelligent I/O controllers */
+			case 0x0F:		/* sattelite communication controllers */
+			case 0x10:		/* encryption/decryption controllers */
+			case 0x11:		/* signal processing controllers */
 				if((hdt & 0x7F) != 0)
 					break;
-				rno = PciBAR0 - 4;
+				rno = PciBAR0;
 				for(i = 0; i < nelem(p->mem); i++) {
-					rno += 4;
-					p->mem[i].bar = pcicfgr32(p, rno);
+					p->mem[i].bar = (ulong)pcicfgr32(p, rno);
 					p->mem[i].size = pcibarsize(p, rno);
+					if((p->mem[i].bar & 7) == 4 && i < nelem(p->mem)-1){
+						rno += 4;
+						p->mem[i++].bar |= (uvlong)pcicfgr32(p, rno) << 32;
+						p->mem[i].bar = 0;
+						p->mem[i].size = 0;
+					}
+					rno += 4;
 				}
+
+				p->rom.bar = (ulong)pcicfgr32(p, PciEBAR0);
+				p->rom.size = pcibarsize(p, PciEBAR0);
 				break;
 
-			case 0x00:
-			case 0x05:		/* memory controller */
 			case 0x06:		/* bridge device */
+				/* cardbus bridge? */
+				if(p->ccru == 0x07){
+					p->mem[0].bar = (ulong)pcicfgr32(p, PciBAR0);
+					p->mem[0].size = pcibarsize(p, PciBAR0);
+					break;
+				}
+
+				/* pci bridge? */
+				if(p->ccru != 0x04)
+					break;
+
+				p->rom.bar = (ulong)pcicfgr32(p, PciEBAR1);
+				p->rom.size = pcibarsize(p, PciEBAR1);
+				break;
+			case 0x05:		/* memory controller */
 			default:
 				break;
 			}
 
+			p->parent = parent;
 			if(head != nil)
-				tail->link = p;
+				*tail = p;
 			else
 				head = p;
-			tail = p;
+			tail = &p->link;
+
+			if(pcilist != nil)
+				*pcitail = p;
+			else
+				pcilist = p;
+			pcitail = &p->list;
 		}
 	}
 
@@ -454,9 +623,41 @@
 		/*
 		 * Find PCI-PCI bridges and recursively descend the tree.
 		 */
-		if(p->ccrb != 0x06 || p->ccru != 0x04)
+		switch(p->ccrb) {
+		case 0x06:
+			if(p->ccru == 0x04)
+				break;
+		default:
+			/* check and clear invalid membars for non bridges */
+			for(i = 0; i < nelem(p->mem); i++) {
+				if(p->mem[i].size == 0)
+					continue;
+				if(!pcivalidbar(p, p->mem[i].bar, p->mem[i].size)){
+					if(p->mem[i].bar & 1)
+						p->mem[i].bar &= 3;
+					else
+						p->mem[i].bar &= 0xF;
+					pcisetbar(p, PciBAR0 + i*4, p->mem[i].bar);
+				}
+			}
+			if(p->rom.size) {
+				if((p->rom.bar & 1) == 0
+				|| !pcivalidbar(p, p->rom.bar & ~0x7FFULL, p->rom.size)){
+					p->rom.bar = 0;
+					pcisetbar(p, PciEBAR0, p->rom.bar);
+				}
+			}
 			continue;
+		}
 
+		if(p->rom.size) {
+			if((p->rom.bar & 1) == 0
+			|| !pcivalidbar(p, p->rom.bar & ~0x7FFULL, p->rom.size)){
+				p->rom.bar = 0;
+				pcisetbar(p, PciEBAR1, p->rom.bar);
+			}
+		}
+
 		/*
 		 * If the secondary or subordinate bus number is not
 		 * initialised try to do what the PCI BIOS should have
@@ -468,7 +669,7 @@
 		sbn = pcicfgr8(p, PciSBN);
 		ubn = pcicfgr8(p, PciUBN);
 
-		if(sbn == 0 || ubn == 0 || nobios) {
+		if(sbn == 0 || ubn == 0) {
 			sbn = maxubn+1;
 			/*
 			 * Make sure memory, I/O and master enables are
@@ -478,607 +679,95 @@
 			 *
 			 * Initialisation of the bridge should be done here.
 			 */
+			p->pcr = 0;
 			pcicfgw32(p, PciPCR, 0xFFFF0000);
 			l = (MaxUBN<<16)|(sbn<<8)|bno;
 			pcicfgw32(p, PciPBN, l);
 			pcicfgw16(p, PciSPSR, 0xFFFF);
-			maxubn = pcilscan(sbn, &p->bridge);
+
+			p->ioa.bar = 0;
+			p->ioa.size = 0;
+			p->mema.bar = 0;
+			p->mema.size = 0;
+			p->prefa.bar = 0;
+			p->prefa.size = 0;
+
+			pcisetwin(p, 0xFFFFF000|1, 0);
+			pcisetwin(p, 0xFFF00000|0, 0);
+			pcisetwin(p, 0xFFF00000|8, 0);
+
+			maxubn = pciscan(sbn, &p->bridge, p);
 			l = (maxubn<<16)|(sbn<<8)|bno;
 
 			pcicfgw32(p, PciPBN, l);
 		}
 		else {
-			if(ubn > maxubn)
-				maxubn = ubn;
-			pcilscan(sbn, &p->bridge);
-		}
-	}
+			uvlong base, limit;
+			ulong v;
 
-	return maxubn;
-}
-
-int
-pciscan(int bno, Pcidev **list)
-{
-	int ubn;
-
-	lock(&pcicfginitlock);
-	ubn = pcilscan(bno, list);
-	unlock(&pcicfginitlock);
-	return ubn;
-}
-
-static uchar 
-pIIxget(Pcidev *router, uchar link)
-{
-	uchar pirq;
-
-	/* link should be 0x60, 0x61, 0x62, 0x63 */
-	pirq = pcicfgr8(router, link);
-	return (pirq < 16)? pirq: 0;
-}
-
-static void 
-pIIxset(Pcidev *router, uchar link, uchar irq)
-{
-	pcicfgw8(router, link, irq);
-}
-
-static uchar 
-viaget(Pcidev *router, uchar link)
-{
-	uchar pirq;
-
-	/* link should be 1, 2, 3, 5 */
-	pirq = (link < 6)? pcicfgr8(router, 0x55 + (link>>1)): 0;
-
-	return (link & 1)? (pirq >> 4): (pirq & 15);
-}
-
-static void 
-viaset(Pcidev *router, uchar link, uchar irq)
-{
-	uchar pirq;
-
-	pirq = pcicfgr8(router, 0x55 + (link >> 1));
-	pirq &= (link & 1)? 0x0f: 0xf0;
-	pirq |= (link & 1)? (irq << 4): (irq & 15);
-	pcicfgw8(router, 0x55 + (link>>1), pirq);
-}
-
-static uchar 
-optiget(Pcidev *router, uchar link)
-{
-	uchar pirq = 0;
-
-	/* link should be 0x02, 0x12, 0x22, 0x32 */
-	if ((link & 0xcf) == 0x02)
-		pirq = pcicfgr8(router, 0xb8 + (link >> 5));
-	return (link & 0x10)? (pirq >> 4): (pirq & 15);
-}
-
-static void 
-optiset(Pcidev *router, uchar link, uchar irq)
-{
-	uchar pirq;
-
-	pirq = pcicfgr8(router, 0xb8 + (link >> 5));
-    	pirq &= (link & 0x10)? 0x0f : 0xf0;
-    	pirq |= (link & 0x10)? (irq << 4): (irq & 15);
-	pcicfgw8(router, 0xb8 + (link >> 5), pirq);
-}
-
-static uchar 
-aliget(Pcidev *router, uchar link)
-{
-	/* No, you're not dreaming */
-	static const uchar map[] = { 0, 9, 3, 10, 4, 5, 7, 6, 1, 11, 0, 12, 0, 14, 0, 15 };
-	uchar pirq;
-
-	/* link should be 0x01..0x08 */
-	pirq = pcicfgr8(router, 0x48 + ((link-1)>>1));
-	return (link & 1)? map[pirq&15]: map[pirq>>4];
-}
-
-static void 
-aliset(Pcidev *router, uchar link, uchar irq)
-{
-	/* Inverse of map in aliget */
-	static const uchar map[] = { 0, 8, 0, 2, 4, 5, 7, 6, 0, 1, 3, 9, 11, 0, 13, 15 };
-	uchar pirq;
-
-	pirq = pcicfgr8(router, 0x48 + ((link-1)>>1));
-	pirq &= (link & 1)? 0x0f: 0xf0;
-	pirq |= (link & 1)? (map[irq] << 4): (map[irq] & 15);
-	pcicfgw8(router, 0x48 + ((link-1)>>1), pirq);
-}
-
-static uchar 
-cyrixget(Pcidev *router, uchar link)
-{
-	uchar pirq;
-
-	/* link should be 1, 2, 3, 4 */
-	pirq = pcicfgr8(router, 0x5c + ((link-1)>>1));
-	return ((link & 1)? pirq >> 4: pirq & 15);
-}
-
-static void 
-cyrixset(Pcidev *router, uchar link, uchar irq)
-{
-	uchar pirq;
-
-	pirq = pcicfgr8(router, 0x5c + (link>>1));
-	pirq &= (link & 1)? 0x0f: 0xf0;
-	pirq |= (link & 1)? (irq << 4): (irq & 15);
-	pcicfgw8(router, 0x5c + (link>>1), pirq);
-}
-
-typedef struct Bridge Bridge;
-struct Bridge
-{
-	ushort	vid;
-	ushort	did;
-	uchar	(*get)(Pcidev *, uchar);
-	void	(*set)(Pcidev *, uchar, uchar);	
-};
-
-static Bridge southbridges[] = {
-	{ 0x8086, 0x122e, pIIxget, pIIxset },	// Intel 82371FB
-	{ 0x8086, 0x1234, pIIxget, pIIxset },	// Intel 82371MX
-	{ 0x8086, 0x7000, pIIxget, pIIxset },	// Intel 82371SB
-	{ 0x8086, 0x7110, pIIxget, pIIxset },	// Intel 82371AB
-	{ 0x8086, 0x7198, pIIxget, pIIxset },	// Intel 82443MX (fn 1)
-	{ 0x8086, 0x2410, pIIxget, pIIxset },	// Intel 82801AA
-	{ 0x8086, 0x2420, pIIxget, pIIxset },	// Intel 82801AB
-	{ 0x8086, 0x2440, pIIxget, pIIxset },	// Intel 82801BA
-	{ 0x8086, 0x244c, pIIxget, pIIxset },	// Intel 82801BAM
-	{ 0x8086, 0x248c, pIIxget, pIIxset },	// Intel 82801CAM
-	{ 0x8086, 0x24cc, pIIxget, pIIxset },	// Intel 82801DBM
-	{ 0x8086, 0x24d0, pIIxget, pIIxset },	// Intel 82801EB
-	{ 0x8086, 0x2640, pIIxget, pIIxset },	// Intel 82801FB
-	{ 0x1106, 0x0586, viaget, viaset },	// Viatech 82C586
-	{ 0x1106, 0x0596, viaget, viaset },	// Viatech 82C596
-	{ 0x1106, 0x0686, viaget, viaset },	// Viatech 82C686
-	{ 0x1106, 0x3227, viaget, viaset },	// Viatech VT8237
-	{ 0x1045, 0xc700, optiget, optiset },	// Opti 82C700
-	{ 0x10b9, 0x1533, aliget, aliset },	// Al M1533
-	{ 0x1039, 0x0008, pIIxget, pIIxset },	// SI 503
-	{ 0x1039, 0x0496, pIIxget, pIIxset },	// SI 496
-	{ 0x1078, 0x0100, cyrixget, cyrixset },	// Cyrix 5530 Legacy
-
-	{ 0x1022, 0x746B, nil, nil },		// AMD 8111
-	{ 0x10DE, 0x00D1, nil, nil },		// NVIDIA nForce 3
-	{ 0x1166, 0x0200, nil, nil },		// ServerWorks ServerSet III LE
-};
-
-typedef struct Slot Slot;
-struct Slot {
-	uchar	bus;			// Pci bus number
-	uchar	dev;			// Pci device number
-	uchar	maps[12];		// Avoid structs!  Link and mask.
-	uchar	slot;			// Add-in/built-in slot
-	uchar	reserved;
-};
-
-typedef struct Router Router;
-struct Router {
-	uchar	signature[4];		// Routing table signature
-	uchar	version[2];		// Version number
-	uchar	size[2];		// Total table size
-	uchar	bus;			// Interrupt router bus number
-	uchar	devfn;			// Router's devfunc
-	uchar	pciirqs[2];		// Exclusive PCI irqs
-	uchar	compat[4];		// Compatible PCI interrupt router
-	uchar	miniport[4];		// Miniport data
-	uchar	reserved[11];
-	uchar	checksum;
-};
-
-static ushort pciirqs;			// Exclusive PCI irqs
-static Bridge *southbridge;		// Which southbridge to use.
-
-static void
-pcirouting(void)
-{
-	Slot *e;
-	Router *r;
-	int size, i, fn, tbdf;
-	Pcidev *sbpci, *pci;
-	uchar *p, pin, irq, link, *map;
-
-	// Search for PCI interrupt routing table in BIOS
-	for(p = (uchar *)KADDR(0xf0000); p < (uchar *)KADDR(0xfffff); p += 16)
-		if(p[0] == '$' && p[1] == 'P' && p[2] == 'I' && p[3] == 'R')
-			break;
-
-	if(p >= (uchar *)KADDR(0xfffff))
-		return;
-
-	r = (Router *)p;
-
-	// print("PCI interrupt routing table version %d.%d at %.6uX\n",
-	// 	r->version[0], r->version[1], (ulong)r & 0xfffff);
-
-	tbdf = (BusPCI << 24)|(r->bus << 16)|(r->devfn << 8);
-	sbpci = pcimatchtbdf(tbdf);
-	if(sbpci == nil) {
-		print("pcirouting: Cannot find south bridge %T\n", tbdf);
-		return;
-	}
-
-	for(i = 0; i != nelem(southbridges); i++)
-		if(sbpci->vid == southbridges[i].vid && sbpci->did == southbridges[i].did)
-			break;
-
-	if(i == nelem(southbridges)) {
-		print("pcirouting: ignoring south bridge %T %.4uX/%.4uX\n", tbdf, sbpci->vid, sbpci->did);
-		return;
-	}
-	southbridge = &southbridges[i];
-	if(southbridge->get == nil || southbridge->set == nil)
-		return;
-
-	pciirqs = (r->pciirqs[1] << 8)|r->pciirqs[0];
-
-	size = (r->size[1] << 8)|r->size[0];
-	for(e = (Slot *)&r[1]; (uchar *)e < p + size; e++) {
-		// print("%.2uX/%.2uX %.2uX: ", e->bus, e->dev, e->slot);
-		// for (i = 0; i != 4; i++) {
-		// 	uchar *m = &e->maps[i * 3];
-		// 	print("[%d] %.2uX %.4uX ",
-		// 		i, m[0], (m[2] << 8)|m[1]);
-		// }
-		// print("\n");
-
-		for(fn = 0; fn != 8; fn++) {
-			tbdf = (BusPCI << 24)|(e->bus << 16)|((e->dev | fn) << 8);
-			pci = pcimatchtbdf(tbdf);
-			if(pci == nil)
-				continue;
-			pin = pcicfgr8(pci, PciINTP);
-			if(pin == 0 || pin == 0xff) 
-				continue;
-
-			map = &e->maps[(pin - 1) * 3];
-			link = map[0];
-			irq = southbridge->get(sbpci, link);
-			if(irq == 0 || irq == pci->intl)
-				continue;
-			if(pci->intl != 0 && pci->intl != 0xFF) {
-				print("pcirouting: BIOS workaround: %T at pin %d link %d irq %d -> %d\n",
-					  tbdf, pin, link, irq, pci->intl);
-				southbridge->set(sbpci, link, pci->intl);
-				continue;
+			v = pcicfgr16(p, PciIBR);
+			limit = (v & 0xF000) | 0x0FFF;
+			base  = (v & 0x00F0) << 8;
+			if((v & 0x0F) == 0x01){
+				v = pcicfgr32(p, PciIUBR);
+				limit |= (v & 0xFFFF0000);
+				base  |= (v & 0x0000FFFF) << 16;
 			}
-			print("pcirouting: %T at pin %d link %d irq %d\n", tbdf, pin, link, irq);
-			pcicfgw8(pci, PciINTL, irq);
-			pci->intl = irq;
-		}
-	}
-}
-
-static void
-pcicfginit(void)
-{
-	char *p;
-	Pcidev **list;
-	ulong mema, ioa;
-	int bno, n, pcibios;
-
-	lock(&pcicfginitlock);
-	if(pcicfgmode != -1)
-		goto out;
-
-	pcibios = 0;
-	if(getconf("*nobios"))
-		nobios = 1;
-	else if(getconf("*pcibios"))
-		pcibios = 1;
-	if(getconf("*nopcirouting"))
-		nopcirouting = 1;
-
-	/*
-	 * Try to determine which PCI configuration mode is implemented.
-	 * Mode2 uses a byte at 0xCF8 and another at 0xCFA; Mode1 uses
-	 * a DWORD at 0xCF8 and another at 0xCFC and will pass through
-	 * any non-DWORD accesses as normal I/O cycles. There shouldn't be
-	 * a device behind these addresses so if Mode1 accesses fail try
-	 * for Mode2 (Mode2 is deprecated).
-	 */
-	if(!pcibios){
-		/*
-		 * Bits [30:24] of PciADDR must be 0,
-		 * according to the spec.
-		 */
-		n = inl(PciADDR);
-		if(!(n & 0x7FF00000)){
-			outl(PciADDR, 0x80000000);
-			outb(PciADDR+3, 0);
-			if(inl(PciADDR) & 0x80000000){
-				pcicfgmode = 1;
-				pcimaxdno = 31;
+			if(pcivalidwin(p, base|1, limit)){
+				p->ioa.bar = base;
+				p->ioa.size = (limit - base)+1;
+			} else {
+				pcisetwin(p, 0xFFFFF000|1, 0);
+				p->ioa.bar = 0;
+				p->ioa.size = 0;
 			}
-		}
-		outl(PciADDR, n);
 
-		if(pcicfgmode < 0){
-			/*
-			 * The 'key' part of PciCSE should be 0.
-			 */
-			n = inb(PciCSE);
-			if(!(n & 0xF0)){
-				outb(PciCSE, 0x0E);
-				if(inb(PciCSE) == 0x0E){
-					pcicfgmode = 2;
-					pcimaxdno = 15;
-				}
+			v = pcicfgr32(p, PciMBR);
+			limit = (v & 0xFFF00000) | 0x000FFFFF;
+			base  = (v & 0x0000FFF0) << 16;
+			if(pcivalidwin(p, base|0, limit)){
+				p->mema.bar = base;
+				p->mema.size = (limit - base)+1;
+			} else {
+				pcisetwin(p, 0xFFF00000|0, 0);
+				p->mema.bar = 0;
+				p->mema.size = 0;
 			}
-			outb(PciCSE, n);
-		}
-	}
-	
-	if(pcicfgmode < 0)
-		goto out;
 
-	fmtinstall('T', tbdffmt);
-
-	if(p = getconf("*pcimaxbno")){
-		n = strtoul(p, 0, 0);
-		if(n < pcimaxbno)
-			pcimaxbno = n;
-	}
-	if(p = getconf("*pcimaxdno")){
-		n = strtoul(p, 0, 0);
-		if(n < pcimaxdno)
-			pcimaxdno = n;
-	}
-
-	list = &pciroot;
-	for(bno = 0; bno <= pcimaxbno; bno++) {
-		int sbno = bno;
-		bno = pcilscan(bno, list);
-
-		while(*list)
-			list = &(*list)->link;
-
-		if (sbno == 0) {
-			Pcidev *pci;
-
-			/*
-			  * If we have found a PCI-to-Cardbus bridge, make sure
-			  * it has no valid mappings anymore.  
-			  */
-			pci = pciroot;
-			while (pci) {
-				if (pci->ccrb == 6 && pci->ccru == 7) {
-					ushort bcr;
-
-					/* reset the cardbus */
-					bcr = pcicfgr16(pci, PciBCR);
-					pcicfgw16(pci, PciBCR, 0x40 | bcr);
-					delay(50);
-				}
-				pci = pci->link;
+			v = pcicfgr32(p, PciPMBR);
+			limit = (v & 0xFFF00000) | 0x000FFFFF;
+			limit |= (uvlong)pcicfgr32(p, PciPULR) << 32;
+			base  = (v & 0x0000FFF0) << 16;
+			base  |= (uvlong)pcicfgr32(p, PciPUBR) << 32;
+			if(pcivalidwin(p, base|8, limit)){
+				p->prefa.bar = base;
+				p->prefa.size = (limit - base)+1;
+			} else {
+				pcisetwin(p, 0xFFF00000|8, 0);
+				p->prefa.bar = 0;
+				p->prefa.size = 0;
 			}
+
+			if(ubn > maxubn)
+				maxubn = ubn;
+			pciscan(sbn, &p->bridge, p);
 		}
 	}
 
-	if(pciroot == nil)
-		goto out;
-
-	if(nobios) {
-		/*
-		 * Work out how big the top bus is
-		 */
-		mema = 0;
-		ioa = 0;
-		pcibusmap(pciroot, &mema, &ioa, 0);
-
-		DBG("Sizes: mem=%8.8lux size=%8.8lux io=%8.8lux\n",
-			mema, pcimask(mema), ioa);
-	
-		/*
-		 * Align the windows and map it
-		 */
-		ioa = 0x1000;
-		mema = 0x90000000;
-
-		pcilog("Mask sizes: mem=%lux io=%lux\n", mema, ioa);
-
-		pcibusmap(pciroot, &mema, &ioa, 1);
-		DBG("Sizes2: mem=%lux io=%lux\n", mema, ioa);
-	
-		unlock(&pcicfginitlock);
-		return;
-	}
-
-	if (!nopcirouting)
-		pcirouting();
-
-out:
-	unlock(&pcicfginitlock);
-
-	if(getconf("*pcihinv"))
-		pcihinv(nil);
+	return maxubn;
 }
 
-static int
-pcicfgrw8(int tbdf, int rno, int data, int read)
-{
-	int o, type, x;
-
-	if(pcicfgmode == -1)
-		pcicfginit();
-
-	if(BUSBNO(tbdf))
-		type = 0x01;
-	else
-		type = 0x00;
-	x = -1;
-	if(BUSDNO(tbdf) > pcimaxdno)
-		return x;
-
-	lock(&pcicfglock);
-	switch(pcicfgmode){
-
-	case 1:
-		o = rno & 0x03;
-		rno &= ~0x03;
-		outl(PciADDR, 0x80000000|BUSBDF(tbdf)|rno|type);
-		if(read)
-			x = inb(PciDATA+o);
-		else
-			outb(PciDATA+o, data);
-		outl(PciADDR, 0);
-		break;
-
-	case 2:
-		outb(PciCSE, 0x80|(BUSFNO(tbdf)<<1));
-		outb(PciFORWARD, BUSBNO(tbdf));
-		if(read)
-			x = inb((0xC000|(BUSDNO(tbdf)<<8)) + rno);
-		else
-			outb((0xC000|(BUSDNO(tbdf)<<8)) + rno, data);
-		outb(PciCSE, 0);
-		break;
-	}
-	unlock(&pcicfglock);
-
-	return x;
-}
-
-int
-pcicfgr8(Pcidev* pcidev, int rno)
-{
-	return pcicfgrw8(pcidev->tbdf, rno, 0, 1);
-}
-
 void
-pcicfgw8(Pcidev* pcidev, int rno, int data)
+pcibussize(Pcidev *root, uvlong *msize, ulong *iosize)
 {
-	pcicfgrw8(pcidev->tbdf, rno, data, 0);
+	*msize = 0;
+	*iosize = 0;
+	pcibusmap(root, msize, iosize, 0);
 }
 
-static int
-pcicfgrw16(int tbdf, int rno, int data, int read)
-{
-	int o, type, x;
-
-	if(pcicfgmode == -1)
-		pcicfginit();
-
-	if(BUSBNO(tbdf))
-		type = 0x01;
-	else
-		type = 0x00;
-	x = -1;
-	if(BUSDNO(tbdf) > pcimaxdno)
-		return x;
-
-	lock(&pcicfglock);
-	switch(pcicfgmode){
-
-	case 1:
-		o = rno & 0x02;
-		rno &= ~0x03;
-		outl(PciADDR, 0x80000000|BUSBDF(tbdf)|rno|type);
-		if(read)
-			x = ins(PciDATA+o);
-		else
-			outs(PciDATA+o, data);
-		outl(PciADDR, 0);
-		break;
-
-	case 2:
-		outb(PciCSE, 0x80|(BUSFNO(tbdf)<<1));
-		outb(PciFORWARD, BUSBNO(tbdf));
-		if(read)
-			x = ins((0xC000|(BUSDNO(tbdf)<<8)) + rno);
-		else
-			outs((0xC000|(BUSDNO(tbdf)<<8)) + rno, data);
-		outb(PciCSE, 0);
-		break;
-	}
-	unlock(&pcicfglock);
-
-	return x;
-}
-
-int
-pcicfgr16(Pcidev* pcidev, int rno)
-{
-	return pcicfgrw16(pcidev->tbdf, rno, 0, 1);
-}
-
-void
-pcicfgw16(Pcidev* pcidev, int rno, int data)
-{
-	pcicfgrw16(pcidev->tbdf, rno, data, 0);
-}
-
-static int
-pcicfgrw32(int tbdf, int rno, int data, int read)
-{
-	int type, x;
-
-	if(pcicfgmode == -1)
-		pcicfginit();
-
-	if(BUSBNO(tbdf))
-		type = 0x01;
-	else
-		type = 0x00;
-	x = -1;
-	if(BUSDNO(tbdf) > pcimaxdno)
-		return x;
-
-	lock(&pcicfglock);
-	switch(pcicfgmode){
-
-	case 1:
-		rno &= ~0x03;
-		outl(PciADDR, 0x80000000|BUSBDF(tbdf)|rno|type);
-		if(read)
-			x = inl(PciDATA);
-		else
-			outl(PciDATA, data);
-		outl(PciADDR, 0);
-		break;
-
-	case 2:
-		outb(PciCSE, 0x80|(BUSFNO(tbdf)<<1));
-		outb(PciFORWARD, BUSBNO(tbdf));
-		if(read)
-			x = inl((0xC000|(BUSDNO(tbdf)<<8)) + rno);
-		else
-			outl((0xC000|(BUSDNO(tbdf)<<8)) + rno, data);
-		outb(PciCSE, 0);
-		break;
-	}
-	unlock(&pcicfglock);
-
-	return x;
-}
-
-int
-pcicfgr32(Pcidev* pcidev, int rno)
-{
-	return pcicfgrw32(pcidev->tbdf, rno, 0, 1);
-}
-
-void
-pcicfgw32(Pcidev* pcidev, int rno, int data)
-{
-	pcicfgrw32(pcidev->tbdf, rno, data, 0);
-}
-
 Pcidev*
 pcimatch(Pcidev* prev, int vid, int did)
 {
-	if(pcicfgmode == -1)
-		pcicfginit();
-
 	if(prev == nil)
 		prev = pcilist;
 	else
@@ -1098,9 +787,6 @@
 {
 	Pcidev *pcidev;
 
-	if(pcicfgmode == -1)
-		pcicfginit();
-
 	for(pcidev = pcilist; pcidev != nil; pcidev = pcidev->list) {
 		if(pcidev->tbdf == tbdf)
 			break;
@@ -1114,7 +800,7 @@
 	if (pci == nil)
 		pci = pcilist;
 
-	while (pci) {
+	while (pci != nil) {
 		uchar intl;
 
 		if (pcicfgr8(pci, PciINTP) == pin && pci->intl != 0 && pci->intl != 0xff)
@@ -1134,26 +820,23 @@
 	int i;
 	Pcidev *t;
 
-	if(p == nil) {
-		putstrn(PCICONS.output, PCICONS.ptr);
-		p = pciroot;
-		print("bus dev type vid  did intl memory\n");
-	}
 	for(t = p; t != nil; t = t->link) {
 		print("%d  %2d/%d %.2ux %.2ux %.2ux %.4ux %.4ux %3d  ",
 			BUSBNO(t->tbdf), BUSDNO(t->tbdf), BUSFNO(t->tbdf),
 			t->ccrb, t->ccru, t->ccrp, t->vid, t->did, t->intl);
-
 		for(i = 0; i < nelem(p->mem); i++) {
 			if(t->mem[i].size == 0)
 				continue;
-			print("%d:%.8lux %d ", i,
-				t->mem[i].bar, t->mem[i].size);
+			print("%d:%.8llux %d ", i, t->mem[i].bar, t->mem[i].size);
 		}
+		if(t->rom.bar || t->rom.size)
+			print("rom:%.8llux %d ", t->rom.bar, t->rom.size);
 		if(t->ioa.bar || t->ioa.size)
-			print("ioa:%.8lux %d ", t->ioa.bar, t->ioa.size);
+			print("ioa:%.8llux-%.8llux %d ", t->ioa.bar, t->ioa.bar+t->ioa.size, t->ioa.size);
 		if(t->mema.bar || t->mema.size)
-			print("mema:%.8lux %d ", t->mema.bar, t->mema.size);
+			print("mema:%.8llux-%.8llux %d ", t->mema.bar, t->mema.bar+t->mema.size, t->mema.size);
+		if(t->prefa.bar || t->prefa.size)
+			print("prefa:%.8llux-%.8llux %llud ", t->prefa.bar, t->prefa.bar+t->prefa.size, t->prefa.size);
 		if(t->bridge)
 			print("->%d", BUSBNO(t->bridge->tbdf));
 		print("\n");
@@ -1162,17 +845,14 @@
 		if(p->bridge != nil)
 			pcilhinv(p->bridge);
 		p = p->link;
-	}	
+	}
 }
 
 void
 pcihinv(Pcidev* p)
 {
-	if(pcicfgmode == -1)
-		pcicfginit();
-	lock(&pcicfginitlock);
+	print("bus dev type     vid  did  intl memory\n");
 	pcilhinv(p);
-	unlock(&pcicfginitlock);
 }
 
 void
@@ -1180,14 +860,11 @@
 {
 	Pcidev *p;
 
-	if(pcicfgmode == -1)
-		pcicfginit();
-
 	for(p = pcilist; p != nil; p = p->list) {
 		/* don't mess with the bridges */
 		if(p->ccrb == 0x06)
 			continue;
-		pciclrbme(p);
+		pcidisable(p);
 	}
 }
 
@@ -1234,55 +911,140 @@
 }
 
 static int
-pcigetpmrb(Pcidev* p)
+enumcaps(Pcidev *p, int (*fmatch)(Pcidev*, int, int, int), int arg)
 {
-	int ptr;
+	int i, r, cap, off;
 
-	if(p->pmrb != 0)
-		return p->pmrb;
-	p->pmrb = -1;
-
-	/*
-	 * If there are no extended capabilities implemented,
-	 * (bit 4 in the status register) assume there's no standard
-	 * power management method.
-	 * Find the capabilities pointer based on PCI header type.
-	 */
-	if(!(p->pcr & 0x0010))
-		return -1;
-	switch(pcicfgr8(p, PciHDT)){
+	/* status register bit 4 has capabilities */
+	if((pcicfgr16(p, PciPSR) & 1<<4) == 0)
+		return -1;      
+	switch(pcicfgr8(p, PciHDT) & 0x7F){
 	default:
 		return -1;
-	case 0:					/* all other */
-	case 1:					/* PCI to PCI bridge */
-		ptr = 0x34;
+	case 0:                         /* etc */
+	case 1:                         /* pci to pci bridge */
+		off = 0x34;
 		break;
-	case 2:					/* CardBus bridge */
-		ptr = 0x14;
+	case 2:                         /* cardbus bridge */
+		off = 0x14;
 		break;
 	}
-	ptr = pcicfgr32(p, ptr);
+	for(i = 48; i--;){
+		off = pcicfgr8(p, off);
+		if(off < 0x40 || (off & 3))
+			break;
+		off &= ~3;
+		cap = pcicfgr8(p, off);
+		if(cap == 0xff)
+			break;
+		r = (*fmatch)(p, cap, off, arg);
+		if(r < 0)
+			break;
+		if(r == 0)
+			return off;
+		off++;
+	}
+	return -1;
+}
 
-	while(ptr != 0){
-		/*
-		 * Check for validity.
-		 * Can't be in standard header and must be double
-		 * word aligned.
-		 */
-		if(ptr < 0x40 || (ptr & ~0xFC))
-			return -1;
-		if(pcicfgr8(p, ptr) == 0x01){
-			p->pmrb = ptr;
-			return ptr;
-		}
+static int
+matchcap(Pcidev *, int cap, int, int arg)
+{
+	return cap != arg;
+}
 
-		ptr = pcicfgr8(p, ptr+1);
-	}
+static int
+matchhtcap(Pcidev *p, int cap, int off, int arg)
+{
+	int mask;
 
-	return -1;
+	if(cap != PciCapHTC)
+		return 1;
+	if(arg == 0x00 || arg == 0x20)
+		mask = 0xE0;
+	else
+		mask = 0xF8;
+	cap = pcicfgr8(p, off+3);
+	return (cap & mask) != arg;
 }
 
 int
+pcicap(Pcidev *p, int cap)
+{
+	return enumcaps(p, matchcap, cap);
+}
+
+int
+pcihtcap(Pcidev *p, int cap)
+{
+	return enumcaps(p, matchhtcap, cap);
+}
+
+static int
+pcigetmsi(Pcidev *p)
+{
+	if(p->msi != 0)
+		return p->msi;
+	return p->msi = pcicap(p, PciCapMSI);
+}
+
+enum {
+	MSICtrl = 0x02, /* message control register (16 bit) */
+	MSIAddr = 0x04, /* message address register (64 bit) */
+	MSIData32 = 0x08, /* message data register for 32 bit MSI (16 bit) */
+	MSIData64 = 0x0C, /* message data register for 64 bit MSI (16 bit) */
+};
+
+int
+pcimsienable(Pcidev *p, uvlong addr, ulong data)
+{
+	int off, ok64;
+
+	if((off = pcigetmsi(p)) < 0)
+		return -1;
+	ok64 = (pcicfgr16(p, off + MSICtrl) & (1<<7)) != 0;
+	pcicfgw32(p, off + MSIAddr, addr);
+	if(ok64) pcicfgw32(p, off + MSIAddr+4, addr >> 32);
+	pcicfgw16(p, off + (ok64 ? MSIData64 : MSIData32), data);
+	pcicfgw16(p, off + MSICtrl, 1);
+	return 0;
+}
+
+int
+pcimsidisable(Pcidev *p)
+{
+	int off;
+
+	if((off = pcigetmsi(p)) < 0)
+		return -1;
+	pcicfgw16(p, off + MSICtrl, 0);
+	return 0;
+}
+
+enum {
+	MSIXCtrl = 0x02,
+};
+
+static int
+pcimsixdisable(Pcidev *p)
+{
+	int off;
+
+	if((off = pcicap(p, PciCapMSIX)) < 0)
+		return -1;
+	pcicfgw16(p, off + MSIXCtrl, 0);
+	return 0;
+}
+
+static int
+pcigetpmrb(Pcidev *p)
+{
+        if(p->pmrb != 0)
+                return p->pmrb;
+        return p->pmrb = pcicap(p, PciCapPMG);
+}
+
+int
 pcigetpms(Pcidev* p)
 {
 	int pmcsr, ptr;
@@ -1337,4 +1099,84 @@
 	pcicfgw16(p, ptr+4, pmcsr);
 
 	return ostate;
+}
+
+void
+pcienable(Pcidev *p)
+{
+	uint pcr;
+	int i;
+
+	if(p == nil)
+		return;
+
+	pcienable(p->parent);
+
+	switch(pcisetpms(p, 0)){
+	case 1:
+		print("pcienable %T: wakeup from D1\n", p->tbdf);
+		break;
+	case 2:
+		print("pcienable %T: wakeup from D2\n", p->tbdf);
+		if(p->bridge != nil)
+			delay(100);	/* B2: minimum delay 50ms */
+		else
+			delay(1);	/* D2: minimum delay 200µs */
+		break;
+	case 3:
+		print("pcienable %T: wakeup from D3\n", p->tbdf);
+		delay(100);		/* D3: minimum delay 50ms */
+
+		/* restore registers */
+		for(i = 0; i < nelem(p->mem); i++){
+			if(p->mem[i].size == 0)
+				continue;
+			pcisetbar(p, PciBAR0+i*4, p->mem[i].bar);
+		}
+
+		pcicfgw8(p, PciINTL, p->intl);
+		pcicfgw8(p, PciLTR, p->ltr);
+		pcicfgw8(p, PciCLS, p->cls);
+		pcicfgw16(p, PciPCR, p->pcr);
+		break;
+	}
+
+	if(p->ltr == 0 || p->ltr == 0xFF){
+		p->ltr = 64;
+		pcicfgw8(p,PciLTR, p->ltr);
+	}
+	if(p->cls == 0 || p->cls == 0xFF){
+		p->cls = 64/4;
+		pcicfgw8(p, PciCLS, p->cls);
+	}
+
+	if(p->bridge != nil)
+		pcr = IOen|MEMen|MASen;
+	else {
+		pcr = 0;
+		for(i = 0; i < nelem(p->mem); i++){
+			if(p->mem[i].size == 0)
+				continue;
+			if(p->mem[i].bar & 1)
+				pcr |= IOen;
+			else
+				pcr |= MEMen;
+		}
+	}
+
+	if((p->pcr & pcr) != pcr){
+		print("pcienable %T: pcr %ux->%ux\n", p->tbdf, p->pcr, p->pcr|pcr);
+		p->pcr |= pcr;
+		pcicfgw32(p, PciPCR, 0xFFFF0000|p->pcr);
+	}
+}
+
+void
+pcidisable(Pcidev *p)
+{
+	if(p == nil)
+		return;
+	pcimsixdisable(p);
+	pcimsidisable(p);
+	pciclrbme(p);
 }
--- /dev/null
+++ b/os/pc/pcipc.c
@@ -1,0 +1,768 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "../port/pci.h"
+#include "../port/error.h"
+
+#define DBG	if(1) print
+
+enum
+{	/* configuration mechanism #1 */
+	PciADDR		= 0xCF8,	/* CONFIG_ADDRESS */
+	PciDATA		= 0xCFC,	/* CONFIG_DATA */
+
+	/* configuration mechanism #2 */
+	PciCSE		= 0xCF8,	/* configuration space enable */
+	PciFORWARD	= 0xCFA,	/* which bus */
+};
+
+static int pcimaxbno = 255;
+static int pcicfgmode = -1;
+static Pcidev* pciroot;
+static int nobios, nopcirouting;
+static BIOS32si* pcibiossi;
+
+static int pcicfgrw8raw(int, int, int, int);
+static int pcicfgrw16raw(int, int, int, int);
+static int pcicfgrw32raw(int, int, int, int);
+
+int (*pcicfgrw8)(int, int, int, int) = pcicfgrw8raw;
+int (*pcicfgrw16)(int, int, int, int) = pcicfgrw16raw;
+int (*pcicfgrw32)(int, int, int, int) = pcicfgrw32raw;
+
+static int
+pcicfgrw8raw(int tbdf, int rno, int data, int read)
+{
+	int o;
+
+	switch(pcicfgmode){
+	case 1:
+		o = rno & 0x03;
+		rno &= ~0x03;
+		outl(PciADDR, 0x80000000|BUSBDF(tbdf)|rno);
+		if(read)
+			data = inb(PciDATA+o);
+		else
+			outb(PciDATA+o, data);
+		outl(PciADDR, 0);
+		break;
+
+	case 2:
+		outb(PciCSE, 0x80|(BUSFNO(tbdf)<<1));
+		outb(PciFORWARD, BUSBNO(tbdf));
+		if(read)
+			data = inb((0xC000|(BUSDNO(tbdf)<<8)) + rno);
+		else
+			outb((0xC000|(BUSDNO(tbdf)<<8)) + rno, data);
+		outb(PciCSE, 0);
+		break;
+	default:
+		data = -1;
+	}
+	return data;
+}
+
+static int
+pcicfgrw16raw(int tbdf, int rno, int data, int read)
+{
+	int o;
+
+	switch(pcicfgmode){
+	case 1:
+		o = rno & 0x02;
+		rno &= ~0x03;
+		outl(PciADDR, 0x80000000|BUSBDF(tbdf)|rno);
+		if(read)
+			data = ins(PciDATA+o);
+		else
+			outs(PciDATA+o, data);
+		outl(PciADDR, 0);
+		break;
+
+	case 2:
+		outb(PciCSE, 0x80|(BUSFNO(tbdf)<<1));
+		outb(PciFORWARD, BUSBNO(tbdf));
+		if(read)
+			data = ins((0xC000|(BUSDNO(tbdf)<<8)) + rno);
+		else
+			outs((0xC000|(BUSDNO(tbdf)<<8)) + rno, data);
+		outb(PciCSE, 0);
+		break;
+	default:
+		data = -1;
+	}
+	return data;
+}
+
+static int
+pcicfgrw32raw(int tbdf, int rno, int data, int read)
+{
+	switch(pcicfgmode){
+	case 1:
+		rno &= ~0x03;
+		outl(PciADDR, 0x80000000|BUSBDF(tbdf)|rno);
+		if(read)
+			data = inl(PciDATA);
+		else
+			outl(PciDATA, data);
+		outl(PciADDR, 0);
+		break;
+
+	case 2:
+		outb(PciCSE, 0x80|(BUSFNO(tbdf)<<1));
+		outb(PciFORWARD, BUSBNO(tbdf));
+		if(read)
+			data = inl((0xC000|(BUSDNO(tbdf)<<8)) + rno);
+		else
+			outl((0xC000|(BUSDNO(tbdf)<<8)) + rno, data);
+		outb(PciCSE, 0);
+		break;
+	default:
+		data = -1;
+	}
+	return data;
+}
+
+static int
+pcicfgrw8bios(int tbdf, int rno, int data, int read)
+{
+	BIOS32ci ci;
+
+	if(pcibiossi == nil)
+		return -1;
+
+	memset(&ci, 0, sizeof(BIOS32ci));
+	ci.ebx = (BUSBNO(tbdf)<<8)|(BUSDNO(tbdf)<<3)|BUSFNO(tbdf);
+	ci.edi = rno;
+	if(read){
+		ci.eax = 0xB108;
+		if(!bios32ci(pcibiossi, &ci)/* && !(ci.eax & 0xFF)*/)
+			return ci.ecx & 0xFF;
+	}
+	else{
+		ci.eax = 0xB10B;
+		ci.ecx = data & 0xFF;
+		if(!bios32ci(pcibiossi, &ci)/* && !(ci.eax & 0xFF)*/)
+			return 0;
+	}
+
+	return -1;
+}
+
+static int
+pcicfgrw16bios(int tbdf, int rno, int data, int read)
+{
+	BIOS32ci ci;
+
+	if(pcibiossi == nil)
+		return -1;
+
+	memset(&ci, 0, sizeof(BIOS32ci));
+	ci.ebx = (BUSBNO(tbdf)<<8)|(BUSDNO(tbdf)<<3)|BUSFNO(tbdf);
+	ci.edi = rno;
+	if(read){
+		ci.eax = 0xB109;
+		if(!bios32ci(pcibiossi, &ci)/* && !(ci.eax & 0xFF)*/)
+			return ci.ecx & 0xFFFF;
+	}
+	else{
+		ci.eax = 0xB10C;
+		ci.ecx = data & 0xFFFF;
+		if(!bios32ci(pcibiossi, &ci)/* && !(ci.eax & 0xFF)*/)
+			return 0;
+	}
+
+	return -1;
+}
+
+static int
+pcicfgrw32bios(int tbdf, int rno, int data, int read)
+{
+	BIOS32ci ci;
+
+	if(pcibiossi == nil)
+		return -1;
+
+	memset(&ci, 0, sizeof(BIOS32ci));
+	ci.ebx = (BUSBNO(tbdf)<<8)|(BUSDNO(tbdf)<<3)|BUSFNO(tbdf);
+	ci.edi = rno;
+	if(read){
+		ci.eax = 0xB10A;
+		if(!bios32ci(pcibiossi, &ci)/* && !(ci.eax & 0xFF)*/)
+			return ci.ecx;
+	}
+	else{
+		ci.eax = 0xB10D;
+		ci.ecx = data;
+		if(!bios32ci(pcibiossi, &ci)/* && !(ci.eax & 0xFF)*/)
+			return 0;
+	}
+
+	return -1;
+}
+
+static BIOS32si*
+pcibiosinit(void)
+{
+	BIOS32ci ci;
+	BIOS32si *si;
+
+	if((si = bios32open("$PCI")) == nil)
+		return nil;
+
+	memset(&ci, 0, sizeof(BIOS32ci));
+	ci.eax = 0xB101;
+	if(bios32ci(si, &ci) || ci.edx != ((' '<<24)|('I'<<16)|('C'<<8)|'P')){
+		free(si);
+		return nil;
+	}
+	if(ci.eax & 0x01)
+		pcimaxdno = 31;
+	else
+		pcimaxdno = 15;
+	pcimaxbno = ci.ecx & 0xff;
+
+	return si;
+}
+
+static uchar
+pIIxget(Pcidev *router, uchar link)
+{
+	uchar pirq;
+
+	/* link should be 0x60, 0x61, 0x62, 0x63 */
+	pirq = pcicfgr8(router, link);
+	return (pirq < 16)? pirq: 0;
+}
+
+static void
+pIIxset(Pcidev *router, uchar link, uchar irq)
+{
+	pcicfgw8(router, link, irq);
+}
+
+static uchar
+viaget(Pcidev *router, uchar link)
+{
+	uchar pirq;
+
+	/* link should be 1, 2, 3, 5 */
+	pirq = (link < 6)? pcicfgr8(router, 0x55 + (link>>1)): 0;
+
+	return (link & 1)? (pirq >> 4): (pirq & 15);
+}
+
+static void
+viaset(Pcidev *router, uchar link, uchar irq)
+{
+	uchar pirq;
+
+	pirq = pcicfgr8(router, 0x55 + (link >> 1));
+	pirq &= (link & 1)? 0x0f: 0xf0;
+	pirq |= (link & 1)? (irq << 4): (irq & 15);
+	pcicfgw8(router, 0x55 + (link>>1), pirq);
+}
+
+static uchar
+optiget(Pcidev *router, uchar link)
+{
+	uchar pirq = 0;
+
+	/* link should be 0x02, 0x12, 0x22, 0x32 */
+	if ((link & 0xcf) == 0x02)
+		pirq = pcicfgr8(router, 0xb8 + (link >> 5));
+	return (link & 0x10)? (pirq >> 4): (pirq & 15);
+}
+
+static void
+optiset(Pcidev *router, uchar link, uchar irq)
+{
+	uchar pirq;
+
+	pirq = pcicfgr8(router, 0xb8 + (link >> 5));
+    	pirq &= (link & 0x10)? 0x0f : 0xf0;
+    	pirq |= (link & 0x10)? (irq << 4): (irq & 15);
+	pcicfgw8(router, 0xb8 + (link >> 5), pirq);
+}
+
+static uchar
+aliget(Pcidev *router, uchar link)
+{
+	/* No, you're not dreaming */
+	static const uchar map[] = { 0, 9, 3, 10, 4, 5, 7, 6, 1, 11, 0, 12, 0, 14, 0, 15 };
+	uchar pirq;
+
+	/* link should be 0x01..0x08 */
+	pirq = pcicfgr8(router, 0x48 + ((link-1)>>1));
+	return (link & 1)? map[pirq&15]: map[pirq>>4];
+}
+
+static void
+aliset(Pcidev *router, uchar link, uchar irq)
+{
+	/* Inverse of map in aliget */
+	static const uchar map[] = { 0, 8, 0, 2, 4, 5, 7, 6, 0, 1, 3, 9, 11, 0, 13, 15 };
+	uchar pirq;
+
+	pirq = pcicfgr8(router, 0x48 + ((link-1)>>1));
+	pirq &= (link & 1)? 0x0f: 0xf0;
+	pirq |= (link & 1)? (map[irq] << 4): (map[irq] & 15);
+	pcicfgw8(router, 0x48 + ((link-1)>>1), pirq);
+}
+
+static uchar
+cyrixget(Pcidev *router, uchar link)
+{
+	uchar pirq;
+
+	/* link should be 1, 2, 3, 4 */
+	pirq = pcicfgr8(router, 0x5c + ((link-1)>>1));
+	return ((link & 1)? pirq >> 4: pirq & 15);
+}
+
+static void
+cyrixset(Pcidev *router, uchar link, uchar irq)
+{
+	uchar pirq;
+
+	pirq = pcicfgr8(router, 0x5c + (link>>1));
+	pirq &= (link & 1)? 0x0f: 0xf0;
+	pirq |= (link & 1)? (irq << 4): (irq & 15);
+	pcicfgw8(router, 0x5c + (link>>1), pirq);
+}
+
+typedef struct Bridge Bridge;
+struct Bridge
+{
+	ushort	vid;
+	ushort	did;
+	uchar	(*get)(Pcidev *, uchar);
+	void	(*set)(Pcidev *, uchar, uchar);
+};
+
+static Bridge southbridges[] = {
+	{ 0x8086, 0x122e, pIIxget, pIIxset },	/* Intel 82371FB */
+	{ 0x8086, 0x1234, pIIxget, pIIxset },	/* Intel 82371MX */
+	{ 0x8086, 0x7000, pIIxget, pIIxset },	/* Intel 82371SB */
+	{ 0x8086, 0x7110, pIIxget, pIIxset },	/* Intel 82371AB */
+	{ 0x8086, 0x7198, pIIxget, pIIxset },	/* Intel 82443MX (fn 1) */
+	{ 0x8086, 0x2410, pIIxget, pIIxset },	/* Intel 82801AA */
+	{ 0x8086, 0x2420, pIIxget, pIIxset },	/* Intel 82801AB */
+	{ 0x8086, 0x2440, pIIxget, pIIxset },	/* Intel 82801BA */
+	{ 0x8086, 0x2448, pIIxget, pIIxset },	/* Intel 82801BAM/CAM/DBM */
+	{ 0x8086, 0x244c, pIIxget, pIIxset },	/* Intel 82801BAM */
+	{ 0x8086, 0x244e, pIIxget, pIIxset },	/* Intel 82801 */
+	{ 0x8086, 0x2480, pIIxget, pIIxset },	/* Intel 82801CA */
+	{ 0x8086, 0x248c, pIIxget, pIIxset },	/* Intel 82801CAM */
+	{ 0x8086, 0x24c0, pIIxget, pIIxset },	/* Intel 82801DBL */
+	{ 0x8086, 0x24cc, pIIxget, pIIxset },	/* Intel 82801DBM */
+	{ 0x8086, 0x24d0, pIIxget, pIIxset },	/* Intel 82801EB */
+	{ 0x8086, 0x25a1, pIIxget, pIIxset },	/* Intel 6300ESB */
+	{ 0x8086, 0x2640, pIIxget, pIIxset },	/* Intel 82801FB */
+	{ 0x8086, 0x2641, pIIxget, pIIxset },	/* Intel 82801FBM */
+	{ 0x8086, 0x2670, pIIxget, pIIxset },	/* Intel 632xesb */
+	{ 0x8086, 0x27b8, pIIxget, pIIxset },	/* Intel 82801GB */
+	{ 0x8086, 0x27b9, pIIxget, pIIxset },	/* Intel 82801GBM */
+	{ 0x8086, 0x27bd, pIIxget, pIIxset },	/* Intel 82801GB/GR */
+	{ 0x8086, 0x3a16, pIIxget, pIIxset },	/* Intel 82801JIR */
+	{ 0x8086, 0x3a40, pIIxget, pIIxset },	/* Intel 82801JI */
+	{ 0x8086, 0x3a42, pIIxget, pIIxset },	/* Intel 82801JI */
+	{ 0x8086, 0x3a48, pIIxget, pIIxset },	/* Intel 82801JI */
+	{ 0x8086, 0x2916, pIIxget, pIIxset },	/* Intel 82801? */
+	{ 0x8086, 0x1c02, pIIxget, pIIxset },	/* Intel 6 Series/C200 */
+	{ 0x8086, 0x1e53, pIIxget, pIIxset },	/* Intel 7 Series/C216 */
+	{ 0x8086, 0x8c56, pIIxget, pIIxset },	/* Intel 8 Series/C226 */
+	{ 0x8086, 0x2810, pIIxget, pIIxset },	/* Intel 82801HB/HR (ich8/r) */
+	{ 0x8086, 0x2812, pIIxget, pIIxset },	/* Intel 82801HH (ich8dh) */
+	{ 0x8086, 0x2912, pIIxget, pIIxset },	/* Intel 82801ih ich9dh */
+	{ 0x8086, 0x2914, pIIxget, pIIxset },	/* Intel 82801io ich9do */
+	{ 0x8086, 0x2916, pIIxget, pIIxset },	/* Intel 82801ibr ich9r */
+	{ 0x8086, 0x2917, pIIxget, pIIxset },	/* Intel 82801iem ich9m-e  */
+	{ 0x8086, 0x2918, pIIxget, pIIxset },	/* Intel 82801ib ich9 */
+	{ 0x8086, 0x2919, pIIxget, pIIxset },	/* Intel 82801? ich9m  */
+	{ 0x8086, 0x3a16, pIIxget, pIIxset },	/* Intel 82801jir ich10r */
+	{ 0x8086, 0x3a18, pIIxget, pIIxset },	/* Intel 82801jib ich10 */
+	{ 0x8086, 0x3a40, pIIxget, pIIxset },	/* Intel 82801ji */
+	{ 0x8086, 0x3a42, pIIxget, pIIxset },	/* Intel 82801ji */
+	{ 0x8086, 0x3a48, pIIxget, pIIxset },	/* Intel 82801ji */
+	{ 0x8086, 0x3b06, pIIxget, pIIxset },	/* Intel 82801? ibex peak */
+	{ 0x8086, 0x3b14, pIIxget, pIIxset },	/* Intel 82801? 3420 */
+	{ 0x8086, 0x1c49, pIIxget, pIIxset },	/* Intel 82hm65 cougar point pch */
+	{ 0x8086, 0x1c4b, pIIxget, pIIxset },	/* Intel 82hm67 */
+	{ 0x8086, 0x1c4f, pIIxget, pIIxset },	/* Intel 82qm67 cougar point pch */
+	{ 0x8086, 0x1c52, pIIxget, pIIxset },	/* Intel 82q65 cougar point pch */
+	{ 0x8086, 0x1c54, pIIxget, pIIxset },	/* Intel 82q67 cougar point pch */
+	{ 0x8086, 0x1e55, pIIxget, pIIxset },	/* Intel QM77 panter point lpc */
+
+	{ 0x1106, 0x0586, viaget, viaset },	/* Viatech 82C586 */
+	{ 0x1106, 0x0596, viaget, viaset },	/* Viatech 82C596 */
+	{ 0x1106, 0x0686, viaget, viaset },	/* Viatech 82C686 */
+	{ 0x1106, 0x3177, viaget, viaset },	/* Viatech VT8235 */
+	{ 0x1106, 0x3227, viaget, viaset },	/* Viatech VT8237 */
+	{ 0x1106, 0x3287, viaget, viaset },	/* Viatech VT8251 */
+	{ 0x1106, 0x8410, viaget, viaset },	/* Viatech PV530 bridge */
+	{ 0x1045, 0xc700, optiget, optiset },	/* Opti 82C700 */
+	{ 0x10b9, 0x1533, aliget, aliset },	/* Al M1533 */
+	{ 0x1039, 0x0008, pIIxget, pIIxset },	/* SI 503 */
+	{ 0x1039, 0x0496, pIIxget, pIIxset },	/* SI 496 */
+	{ 0x1078, 0x0100, cyrixget, cyrixset },	/* Cyrix 5530 Legacy */
+
+	{ 0x1022, 0x790e, nil, nil },		/* AMD FCH LPC bridge */
+	{ 0x1022, 0x746b, nil, nil },		/* AMD 8111 */
+	{ 0x10de, 0x00d1, nil, nil },		/* NVIDIA nForce 3 */
+	{ 0x10de, 0x00e0, nil, nil },		/* NVIDIA nForce 3 250 Series */
+	{ 0x10de, 0x00e1, nil, nil },		/* NVIDIA nForce 3 250 Series */
+	{ 0x1166, 0x0200, nil, nil },		/* ServerWorks ServerSet III LE */
+	{ 0x1002, 0x4377, nil, nil },		/* ATI Radeon Xpress 200M */
+	{ 0x1002, 0x4372, nil, nil },		/* ATI SB400 */
+	{ 0x1002, 0x9601, nil, nil },		/* AMD SB710 */
+	{ 0x1002, 0x438d, nil, nil },		/* AMD SB600 */
+	{ 0x1002, 0x439d, nil, nil },		/* AMD SB810 */
+};
+
+typedef struct Slot Slot;
+struct Slot {
+	uchar	bus;		/* Pci bus number */
+	uchar	dev;		/* Pci device number */
+	uchar	maps[12];	/* Avoid structs!  Link and mask. */
+	uchar	slot;		/* Add-in/built-in slot */
+	uchar	reserved;
+};
+
+typedef struct Router Router;
+struct Router {
+	uchar	signature[4];	/* Routing table signature */
+	uchar	version[2];	/* Version number */
+	uchar	size[2];	/* Total table size */
+	uchar	bus;		/* Interrupt router bus number */
+	uchar	devfn;		/* Router's devfunc */
+	uchar	pciirqs[2];	/* Exclusive PCI irqs */
+	uchar	compat[4];	/* Compatible PCI interrupt router */
+	uchar	miniport[4];	/* Miniport data */
+	uchar	reserved[11];
+	uchar	checksum;
+};
+
+static ushort pciirqs;		/* Exclusive PCI irqs */
+static Bridge *southbridge;	/* Which southbridge to use. */
+
+static void
+pcirouting(void)
+{
+	Slot *e;
+	Router *r;
+	int i, size, tbdf;
+	Pcidev *sbpci, *pci;
+	uchar *p, pin, irq, link, *map;
+
+	if((p = sigsearch("$PIR", 0)) == nil)
+		return;
+
+	r = (Router*)p;
+	size = (r->size[1] << 8)|r->size[0];
+	if(size < sizeof(Router) || checksum(r, size))
+		return;
+
+	if(0) print("PCI interrupt routing table version %d.%d at %p\n",
+		r->version[0], r->version[1], r);
+
+	tbdf = MKBUS(BusPCI, r->bus, (r->devfn>>3)&0x1f, r->devfn&7);
+	sbpci = pcimatchtbdf(tbdf);
+	if(sbpci == nil) {
+		print("pcirouting: Cannot find south bridge %T\n", tbdf);
+		return;
+	}
+
+	for(i = 0; i < nelem(southbridges); i++)
+		if(sbpci->vid == southbridges[i].vid && sbpci->did == southbridges[i].did)
+			break;
+
+	if(i == nelem(southbridges)) {
+		print("pcirouting: ignoring south bridge %T %.4uX/%.4uX\n", tbdf, sbpci->vid, sbpci->did);
+		return;
+	}
+	southbridge = &southbridges[i];
+	if(southbridge->get == nil)
+		return;
+
+	pciirqs = (r->pciirqs[1] << 8)|r->pciirqs[0];
+	for(e = (Slot *)&r[1]; (uchar *)e < p + size; e++) {
+		if(0) {
+			print("%.2uX/%.2uX %.2uX: ", e->bus, e->dev, e->slot);
+			for (i = 0; i < 4; i++) {
+				map = &e->maps[i * 3];
+				print("[%d] %.2uX %.4uX ", i, map[0], (map[2] << 8)|map[1]);
+			}
+			print("\n");
+		}
+		for(i = 0; i < 8; i++) {
+			tbdf = MKBUS(BusPCI, e->bus, (e->dev>>3)&0x1f, i);
+			pci = pcimatchtbdf(tbdf);
+			if(pci == nil)
+				continue;
+			pin = pcicfgr8(pci, PciINTP);
+			if(pin == 0 || pin == 0xff)
+				continue;
+
+			map = &e->maps[((pin - 1) % 4) * 3];
+			link = map[0];
+			irq = southbridge->get(sbpci, link);
+			if(irq == pci->intl)
+				continue;
+			if(irq == 0 || (irq & 0x80) != 0){
+				irq = pci->intl;
+				if(irq == 0 || irq == 0xff)
+					continue;
+				if(southbridge->set == nil)
+					continue;
+				southbridge->set(sbpci, link, irq);
+			}
+			print("pcirouting: %T at pin %d link %.2uX irq %d -> %d\n", tbdf, pin, link, pci->intl, irq);
+			pcicfgw8(pci, PciINTL, irq);
+			pci->intl = irq;
+		}
+	}
+}
+
+static void
+pcireserve(void)
+{
+	char tag[64];
+	Pcidev *p;
+	uvlong pa;
+	ulong io;
+	int i;
+
+	/*
+	 * mark all valid io/mem address space claimed by pci devices
+	 * so that ioreserve/upaalloc doesn't give it out.
+	 */
+	for(p=pciroot; p != nil; p=p->list){
+		snprint(tag, sizeof(tag), "%T", p->tbdf);
+		for(i=0; i<nelem(p->mem); i++){
+			if(p->mem[i].size == 0)
+				continue;
+			if(p->mem[i].bar & 1){
+				io = p->mem[i].bar & ~3ULL;
+				if(io == 0)
+					continue;
+				ioreserve(io, p->mem[i].size, 0, tag);
+			} else {
+				pa = p->mem[i].bar & ~0xFULL;
+				if(pa == 0)
+					continue;
+				upaalloc(pa, p->mem[i].size, 0);
+			}
+		}
+		if(p->rom.size && (p->rom.bar & 1) != 0){
+			pa = p->rom.bar & ~0x7FFULL;
+			upaalloc(pa, p->rom.size, 0);
+		}
+	}
+
+	/*
+	 * allocate io/mem address space for unassigned membars.
+	 */
+	for(p=pciroot; p != nil; p=p->list){
+		snprint(tag, sizeof(tag), "%T", p->tbdf);
+		for(i=0; i<nelem(p->mem); i++){
+			if(p->mem[i].size == 0)
+				continue;
+			if(p->mem[i].bar & 1){
+				if(p->mem[i].bar & ~0x3ULL)
+					continue;
+				if(p->parent == nil){
+					io = ioreserve(-1, p->mem[i].size, p->mem[i].size, tag);
+				} else {
+					io = ioreservewin(p->parent->ioa.bar, p->parent->ioa.size,
+						p->mem[i].size, p->mem[i].size, tag);
+				}
+				if(io == -1)
+					continue;
+				p->mem[i].bar |= io;
+			} else {
+				if(p->mem[i].bar & ~0xFULL)
+					continue;
+				if(p->parent == nil){
+					pa = upaalloc(-1ULL, p->mem[i].size, p->mem[i].size);
+				} else if(p->mem[i].bar & 8){
+					pa = upaallocwin(p->parent->prefa.bar, p->parent->prefa.size,
+						p->mem[i].size, p->mem[i].size);
+					if(pa == -1ULL)
+						goto Mem;
+				} else {
+				Mem:
+					pa = upaallocwin(p->parent->mema.bar, p->parent->mema.size,
+						p->mem[i].size, p->mem[i].size);
+				}
+				if(pa == -1ULL)
+					continue;
+				p->mem[i].bar |= pa;
+			}
+			pcisetbar(p, PciBAR0 + i*4, p->mem[i].bar);
+			DBG("%s: bar%d: fixed %.8lluX %d\n", tag, i, p->mem[i].bar, p->mem[i].size);
+		}
+	}
+}
+
+void
+pcicfginit(void)
+{
+	char *p;
+	Pcidev **list;
+	int bno, n, pcibios;
+
+	fmtinstall('T', tbdffmt);
+
+	pcibios = 0;
+	if(getconf("*nobios"))
+		nobios = 1;
+	else if(getconf("*pcibios"))
+		pcibios = 1;
+	if(getconf("*nopcirouting"))
+		nopcirouting = 1;
+
+	/*
+	 * Try to determine which PCI configuration mode is implemented.
+	 * Mode2 uses a byte at 0xCF8 and another at 0xCFA; Mode1 uses
+	 * a DWORD at 0xCF8 and another at 0xCFC and will pass through
+	 * any non-DWORD accesses as normal I/O cycles. There shouldn't be
+	 * a device behind these addresses so if Mode1 accesses fail try
+	 * for Mode2 (Mode2 is deprecated).
+	 */
+	if(!pcibios){
+		/*
+		 * Bits [30:24] of PciADDR must be 0,
+		 * according to the spec.
+		 */
+		n = inl(PciADDR);
+		if(!(n & 0x7F000000)){
+			outl(PciADDR, 0x80000000);
+			outb(PciADDR+3, 0);
+			if(inl(PciADDR) & 0x80000000){
+				ioalloc(PciADDR, 4, 0, "pcicfg.addr");
+				ioalloc(PciDATA, 4, 0, "pcicfg.data");
+
+				pcicfgmode = 1;
+				pcimaxdno = 31;
+			}
+		}
+		outl(PciADDR, n);
+
+		if(pcicfgmode < 0){
+			/*
+			 * The 'key' part of PciCSE should be 0.
+			 */
+			n = inb(PciCSE);
+			if(!(n & 0xF0)){
+				outb(PciCSE, 0x0E);
+				if(inb(PciCSE) == 0x0E){
+					ioalloc(PciCSE, 1, 0, "pcicfg.cse");
+					ioalloc(PciFORWARD, 1, 0, "pcicfg.forward");
+					ioalloc(0xC000, 0x1000, 0, "pcicfg.io");
+
+					pcicfgmode = 2;
+					pcimaxdno = 15;
+				}
+			}
+			outb(PciCSE, n);
+		}
+	}
+
+	if(pcicfgmode < 0 || pcibios) {
+		if((pcibiossi = pcibiosinit()) == nil)
+			goto out;
+		pcicfgrw8 = pcicfgrw8bios;
+		pcicfgrw16 = pcicfgrw16bios;
+		pcicfgrw32 = pcicfgrw32bios;
+		pcicfgmode = 3;
+	}
+
+	if(p = getconf("*pcimaxbno"))
+		pcimaxbno = strtoul(p, 0, 0);
+	if(p = getconf("*pcimaxdno")){
+		n = strtoul(p, 0, 0);
+		if(n < pcimaxdno)
+			pcimaxdno = n;
+	}
+
+	list = &pciroot;
+	for(bno = 0; bno <= pcimaxbno; bno++) {
+		int sbno = bno;
+		bno = pciscan(bno, list, nil);
+
+		while(*list)
+			list = &(*list)->link;
+
+		if (sbno == 0) {
+			Pcidev *pci;
+
+			/*
+			  * If we have found a PCI-to-Cardbus bridge, make sure
+			  * it has no valid mappings anymore.
+			  */
+			for(pci = pciroot; pci != nil; pci = pci->link){
+				if (pci->ccrb == 6 && pci->ccru == 7) {
+					ushort bcr;
+
+					/* reset the cardbus */
+					bcr = pcicfgr16(pci, PciBCR);
+					pcicfgw16(pci, PciBCR, 0x40 | bcr);
+					delay(50);
+				}
+			}
+		}
+	}
+
+	if(pciroot == nil)
+		goto out;
+
+	/*
+	 * Disabling devices here (by clearing bus master enable)
+	 * causes problems with with some OHCI USB controllers.
+	 * I supected that this is due to legacy device emulation
+	 * and revoking bus master flag before executing the handoff
+	 * makes BIOS/SMM lock up the system.
+	 *
+	 * pcireset();
+	 */
+
+	if(nobios) {
+		uvlong mema;
+		ulong ioa;
+
+		/*
+		 * Work out how big the top bus is
+		 */
+		pcibussize(pciroot, &mema, &ioa);
+		DBG("Size:  mem=%.8llux io=%lux\n", mema, ioa);
+
+		/*
+		 * Align the windows and map it
+		 */
+		mema = upaalloc(-1ULL, mema, mema);
+		if(mema == -1ULL)
+			panic("pcicfginit: can't allocate pci mem window");
+
+		ioa = ioreserve(-1, ioa, ioa, "pci");
+		if(ioa == -1UL)
+			panic("pcicfginit: can't allocate pci io window");
+
+		DBG("Base:  mem=%.8llux io=%lux\n", mema, ioa);
+		pcibusmap(pciroot, &mema, &ioa, 1);
+		DBG("Limit: mem=%.8llux io=%lux\n", mema, ioa);
+		goto out;
+	}
+
+	pcireserve();
+
+	if(!nopcirouting)
+		pcirouting();
+
+out:
+	if(getconf("*pcihinv"))
+		pcihinv(pciroot);
+}
--- a/os/pc/screen.c
+++ b/os/pc/screen.c
@@ -5,6 +5,7 @@
 #include "fns.h"
 #include "io.h"
 #include "ureg.h"
+#include "../port/pci.h"
 #include "../port/error.h"
 
 #include <draw.h>
@@ -14,6 +15,8 @@
 
 #define RGB2K(r,g,b)	((156763*(r)+307758*(g)+59769*(b))>>19)
 
+extern VGAcur vgasoftcur;
+
 Point ZP = {0, 0};
 
 Rectangle physgscreenr;
@@ -24,6 +27,13 @@
 
 VGAscr vgascreen[1];
 
+char *tiltstr[4] = {
+	"none",
+	"left",
+	"inverted",
+	"right",
+};
+
 Cursor	arrow = {
 	{ -1, -1 },
 	{ 0xFF, 0xFF, 0x80, 0x01, 0x80, 0x02, 0x80, 0x0C, 
@@ -38,6 +48,152 @@
 	},
 };
 
+static Point
+tiltpt(int tilt, Point dim, Point p)
+{
+	switch(tilt&3){
+	case 1:	return Pt(dim.y-p.y-1, p.x);
+	case 2:	return Pt(dim.x-p.x-1, dim.y-p.y-1);
+	case 3:	return Pt(p.y, dim.x-p.x-1);
+	}
+	return p;
+}
+
+static Rectangle
+tiltrect(int tilt, Point dim, Rectangle r)
+{
+	switch(tilt&3){
+	case 1:	return Rect(dim.y-r.max.y, r.min.x, dim.y-r.min.y, r.max.x);
+	case 2:	return Rect(dim.x-r.max.x, dim.y-r.max.y, dim.x-r.min.x, dim.y-r.min.y);
+	case 3:	return Rect(r.min.y, dim.x-r.max.x, r.max.y, dim.x-r.min.x);
+	}
+	return r;
+}
+
+static Point
+tiltsize(int tilt, Point dim)
+{
+	return (tilt & 1) != 0 ? Pt(dim.y, dim.x) : dim;
+}
+
+Rectangle
+actualscreensize(VGAscr *scr)
+{
+	return Rpt(ZP, tiltsize(-scr->tilt, scr->gscreen->clipr.max));
+}
+
+void
+setactualsize(VGAscr *scr, Rectangle r)
+{
+	qlock(&drawlock);
+
+	r.min = ZP;
+	r.max = tiltsize(scr->tilt, r.max);
+	if(rectclip(&r, scr->gscreen->r) == 0){
+		qunlock(&drawlock);
+		return;
+	}
+	scr->gscreen->clipr = r;
+
+	qunlock(&drawlock);
+}
+
+static char*
+setscreensize0(VGAscr *scr, int width, int height, int depth, ulong chan, int tilt)
+{
+	int bpp, pitch;
+
+	scr->gscreendata = nil;
+	scr->gscreen = nil;
+	if(gscreen != nil){
+		freememimage(gscreen);
+		gscreen = nil;
+	}
+	if(scr->paddr == 0){
+		if(scr->dev && scr->dev->page){
+			scr->vaddr = KADDR(VGAMEM());
+			scr->apsize = 1<<16;
+		}
+		scr->softscreen = 1;
+	}
+
+	depth = chantodepth(chan);
+	bpp = (depth+7) / 8;
+	pitch = ((width * depth+31) & ~31) / 8;
+
+	if(tilt)
+		scr->softscreen = 1;
+	if(scr->softscreen){
+		gscreen = allocmemimage(Rpt(ZP, tiltsize(tilt, Pt(width, height))), chan);
+		scr->useflush = 1;
+	}else{
+		static Memdata md;
+
+		md.ref = 1;
+		if((md.bdata = scr->vaddr) == 0)
+			error("framebuffer not maped");
+		gscreen = allocmemimaged(Rpt(ZP, Pt(width, height)), chan, &md);
+		scr->useflush = scr->dev && scr->dev->flush;
+	}
+	if(gscreen == nil)
+		return "no memory for vga memimage";
+
+	scr->bpp = bpp;
+	scr->pitch = pitch;
+	scr->width = width;
+	scr->height = height;
+	scr->tilt = tilt & 3;
+
+	scr->palettedepth = 6;	/* default */
+	scr->memdefont = getmemdefont();
+	scr->gscreen = gscreen;
+	scr->gscreendata = gscreen->data;
+
+	return nil;
+}
+
+void
+setscreensize(VGAscr *scr, int x, int y, int z, ulong chan, int tilt)
+{
+	char *err;
+
+	qlock(&drawlock);
+	if(waserror()){
+		qunlock(&drawlock);
+		nexterror();
+	}
+
+	if(memimageinit() < 0)
+		error("memimageinit failed");
+
+	lock(&vgascreenlock);
+	if(waserror()){
+		unlock(&vgascreenlock);
+		nexterror();
+	}
+
+	err = setscreensize0(scr, x, y, z, chan, tilt);
+	if(err != nil)
+		error(err);
+
+	vgaimageinit(chan);
+	bootscreenconf(scr);
+
+	unlock(&vgascreenlock);
+	poperror();
+
+	drawcmap();
+
+	if(scr->cur && scr->cur != &vgasoftcur){
+		cursoroff();
+		setcursor(&cursor);
+		cursoron(0);
+	}
+
+	qunlock(&drawlock);
+	poperror();
+}
+
 int
 screensize(int x, int y, int z, ulong chan)
 {
@@ -56,7 +212,7 @@
 		gscreendata.bdata = xalloc(width*BY2WD*y);
 		if(gscreendata.bdata == 0)
 			error("screensize: vga soft memory");
-/*		memset(gscreendata.bdata, 0x72, width*BY2WD*y);	/* not really black */
+/*		memset(gscreendata.bdata, 0x72, width*BY2WD*y);	 not really black */
 		scr->useflush = 1;
 		scr->aperture = VGAMEM();
 		scr->apsize = 1<<16;
@@ -215,7 +371,7 @@
 }
 
 void
-getcolor(ulong p, ulong* pr, ulong* pg, ulong* pb)
+getcolor(u32 p, u32* pr, u32* pg, u32* pb)
 {
 	VGAscr *scr;
 	ulong x;
@@ -242,7 +398,7 @@
 }
 
 int
-setpalette(ulong p, ulong r, ulong g, ulong b)
+setpalette(u32 p, u32 r, u32 g, u32 b)
 {
 	VGAscr *scr;
 	int d;
@@ -269,7 +425,7 @@
  * is trying to set a colormap and the card is in one of these modes.
  */
 int
-setcolor(ulong p, ulong r, ulong g, ulong b)
+setcolor(u32 p, u32 r, u32 g, u32 b)
 {
 	VGAscr *scr;
 	int x;
@@ -315,7 +471,7 @@
 }
 
 void
-cursoroff(int)
+cursoroff(void)
 {
 }
 
@@ -407,4 +563,274 @@
 void
 cursordisable(void)
 {
+}
+
+static char*
+vgalinearaddr0(VGAscr *scr, uvlong paddr, int size)
+{
+	int x, nsize;
+	uvlong npaddr;
+
+	/*
+	 * new approach.  instead of trying to resize this
+	 * later, let's assume that we can just allocate the
+	 * entire window to start with.
+	 */
+	if(scr->paddr == paddr && size <= scr->apsize)
+		return nil;
+
+	if(scr->paddr){
+		/*
+		 * could call vunmap and vmap,
+		 * but worried about dangling pointers in devdraw
+		 */
+		return "cannot grow vga frame buffer";
+	}
+
+	/* round to page boundary, just in case */
+	x = paddr&(BY2PG-1);
+	npaddr = paddr-x;
+	nsize = PGROUND(size+x);
+
+	/*
+	 * Don't bother trying to map more than 4000x4000x32 = 64MB.
+	 * We only have a 256MB window.
+	 */
+	if(nsize > 64*MB)
+		nsize = 64*MB;
+	scr->vaddr = vmap(npaddr, nsize);
+	if(scr->vaddr == nil)
+		return "cannot map vga frame buffer";
+
+	patwc(scr->vaddr, nsize);
+
+	scr->vaddr = (char*)scr->vaddr+x;
+	scr->paddr = paddr;
+	scr->apsize = nsize;
+
+	mtrr(npaddr, nsize, "wc");
+
+	return nil;
+}
+
+static char*
+vgalinearpci0(VGAscr *scr)
+{
+	int i, size, best;
+	uvlong paddr;
+	Pcidev *p;
+	
+	p = scr->pci;
+	if(p == nil)
+		return "no pci card";
+
+	/*
+	 * Scan for largest memory region on card.
+	 * Some S3 cards (e.g. Savage) have enormous
+	 * mmio regions (but even larger frame buffers).
+	 * Some 3dfx cards (e.g., Voodoo3) have mmio
+	 * buffers the same size as the frame buffer,
+	 * but only the frame buffer is marked as
+	 * prefetchable (bar&8).  If a card doesn't fit
+	 * into these heuristics, its driver will have to
+	 * call vgalinearaddr directly.
+	 */
+	best = -1;
+	for(i=0; i<nelem(p->mem); i++){
+		if(p->mem[i].bar&1)	/* not memory */
+			continue;
+		if(p->mem[i].size < 640*480)	/* not big enough */
+			continue;
+		if(best==-1 
+		|| p->mem[i].size > p->mem[best].size 
+		|| (p->mem[i].size == p->mem[best].size 
+		  && (p->mem[i].bar&8)
+		  && !(p->mem[best].bar&8)))
+			best = i;
+	}
+	if(best >= 0){
+		paddr = p->mem[best].bar & ~0x0F;
+		size = p->mem[best].size;
+		return vgalinearaddr0(scr, paddr, size);
+	}
+	return "no video memory found on pci card";
+}
+
+void
+vgalinearpci(VGAscr *scr)
+{
+	char *err;
+
+	if(scr->pci == nil)
+		return;
+	if((err = vgalinearpci0(scr)) != nil)
+		error(err);
+}
+
+void
+vgalinearaddr(VGAscr *scr, uvlong paddr, int size)
+{
+	char *err;
+
+	if((err = vgalinearaddr0(scr, paddr, size)) != nil)
+		error(err);
+}
+
+static char*
+bootmapfb(VGAscr *scr, uvlong pa, ulong sz)
+{
+	uvlong start, end;
+	Pcidev *p;
+	int i;
+
+	for(p = pcimatch(nil, 0, 0); p != nil; p = pcimatch(p, 0, 0)){
+		for(i=0; i<nelem(p->mem); i++){
+			if(p->mem[i].size == 0 || (p->mem[i].bar & 1) != 0)
+				continue;
+			start = p->mem[i].bar & ~0xF;
+			end = start + p->mem[i].size;
+			if(pa == start && (pa + sz) <= end){
+				scr->pci = p;
+				return vgalinearpci0(scr);
+			}
+		}
+	}
+	upaalloc(pa, sz, 0);
+	return vgalinearaddr0(scr, pa, sz);
+}
+
+char*
+rgbmask2chan(char *buf, int depth, u32int rm, u32int gm, u32int bm)
+{
+	u32int m[4], dm;	/* r,g,b,x */
+	char tmp[32];
+	int c, n;
+
+	dm = 1<<depth-1;
+	dm |= dm-1;
+
+	m[0] = rm & dm;
+	m[1] = gm & dm;
+	m[2] = bm & dm;
+	m[3] = (~(m[0] | m[1] | m[2])) & dm;
+
+	buf[0] = 0;
+Next:
+	for(c=0; c<4; c++){
+		for(n = 0; m[c] & (1<<n); n++)
+			;
+		if(n){
+			m[0] >>= n, m[1] >>= n, m[2] >>= n, m[3] >>= n;
+			snprint(tmp, sizeof tmp, "%c%d%s", "rgbx"[c], n, buf);
+			strcpy(buf, tmp);
+			goto Next;
+		}
+	}
+	return buf;
+}
+
+/*
+ * called early on boot to attach to framebuffer
+ * setup by bootloader/firmware or plan9.
+ */
+void
+bootscreeninit(void)
+{
+	VGAscr *scr;
+	int x, y, z, tilt;
+	uvlong pa;
+	ulong chan, sz;
+	char *s, *p, *err;
+
+	print("pc bootscreeninit\n");
+	/* *bootscreen=WIDTHxHEIGHTxDEPTH CHAN PA [SZ] */
+	s = getconf("*bootscreen");
+	if(s == nil){
+		print("bootscreen == nil\n");
+		return;
+	}
+
+	x = strtoul(s, &s, 0);
+	if(x == 0 || *s++ != 'x')
+		return;
+
+	y = strtoul(s, &s, 0);
+	if(y == 0 || *s++ != 'x')
+		return;
+
+	z = strtoul(s, &s, 0);
+	if(*s != ' ')
+		return;
+	if((p = strchr(++s, ' ')) == nil)
+		return;
+	*p = 0;
+	chan = strtochan(s);
+	*p = ' ';
+	if(chan == 0 || chantodepth(chan) != z)
+		return;
+
+	sz = 0;
+	pa = strtoull(p+1, &s, 0);
+	if(pa == 0)
+		return;
+	if(*s == ' ')
+		sz = strtoul(s+1, nil, 0);
+	if(sz < x * y * (z+7)/8)
+		sz = x * y * (z+7)/8;
+
+	tilt = 0;
+	if((p = getconf("tiltscreen")) != nil){
+		for(; tilt < nelem(tiltstr); tilt++)
+			if(strcmp(p, tiltstr[tilt]) == 0)
+				break;
+		tilt &= 3;
+	}
+
+	scr = &vgascreen[0];
+	scr->dev = nil;
+	scr->softscreen = 1;
+
+	if((err = bootmapfb(scr, pa, sz)) != nil){
+		print("bootmapfb: %s\n", err);
+		return;
+	}
+
+	if(memimageinit() < 0){
+		print("memimageinit failed\n");
+		return;
+	}
+	if((err = setscreensize0(scr, x, y, z, chan, tilt)) != nil){
+		print("setscreensize0: %s\n", err);
+		return;
+	}
+
+	vgaimageinit(chan);
+	vgascreenwin(scr);
+
+	drawcmap();
+
+	scr->cur = &vgasoftcur;
+	scr->cur->enable(scr);
+
+	conf.monitor = 1;
+}
+
+/*
+ * called from devvga when the framebuffer is setup
+ * to set *bootscreen= that can be passed on to a
+ * new kernel on reboot.
+ */
+void
+bootscreenconf(VGAscr *scr)
+{
+	char conf[100], chan[30];
+
+	conf[0] = '\0';
+	if(scr != nil && scr->paddr != 0 && scr->gscreen != nil){
+		snprint(conf, sizeof(conf), "%dx%dx%d %s 0x%.8llux %d\n",
+			scr->width, scr->height, scr->gscreen->depth, chantostr(chan, scr->gscreen->chan),
+			scr->paddr, scr->apsize);
+		ksetenv("tiltscreen", tiltstr[scr->tilt], 1);
+	}
+	ksetenv("*bootscreen", conf, 1);
 }
--- a/os/pc/screen.h
+++ b/os/pc/screen.h
@@ -103,6 +103,7 @@
 struct VGAscr {
 	Lock	devlock;
 	VGAdev*	dev;
+	Pcidev*	pci;
 
 	VGAcur*	cur;
 	ulong	storage;
@@ -110,10 +111,20 @@
 
 	int	useflush;
 
-	ulong	aperture;			/* physical address */
+	union {			/* physical address */
+		uintptr	aperture;
+		uintptr	paddr;
+	};
+	void*	vaddr;
 	int	isupamem;
 	int	apsize;
 
+	int	bpp;
+	int	pitch;
+
+	int	width;
+	int	height;
+
 	ulong	io;				/* device specific registers */
 
 	ulong	colormap[Pcolours][3];
@@ -128,11 +139,13 @@
 	int	(*scroll)(VGAscr*, Rectangle, Rectangle);
 	void	(*blank)(VGAscr*, int);
 	ulong	id;	/* internal identifier for driver use */
+	u32	softscreen;
+	u32	tilt;
 	int isblank;
 	int overlayinit;
 };
 
-extern VGAscr vgascreen[];
+extern VGAscr vgascreen[1];
 
 enum {
 	Backgnd		= 0,	/* black */
@@ -145,29 +158,32 @@
 extern int		hwaccel;	/* use hw acceleration; default on */
 extern int		hwblank;	/* use hw blanking; default on */
 extern int		panning;	/* use virtual screen panning; default off */
-extern void addvgaseg(char*, ulong, ulong);
+extern void addvgaseg(char*, u32, u32);
 extern uchar* attachscreen(Rectangle*, ulong*, int*, int*, int*);
 extern void	flushmemscreen(Rectangle);
 extern int	cursoron(int);
-extern void	cursoroff(int);
+extern void	cursoroff(void);
 extern void	setcursor(Cursor*);
 extern int	screensize(int, int, int, ulong);
 extern int	screenaperture(int, int);
 extern Rectangle physgscreenr;	/* actual monitor size */
 extern void	blankscreen(int);
+extern void	bootscreeninit(void);
+extern void	bootscreenconf(VGAscr*);
 
 /* devdraw.c */
 extern void	deletescreenimage(void);
 extern int		drawhasclients(void);
-extern ulong	blanktime;
+extern u32	blanktime;
 extern void	setscreenimageclipr(Rectangle);
 extern void	drawflush(void);
 extern int drawidletime(void);
+extern QLock	drawlock;
 
 /* vga.c */
 extern void	vgascreenwin(VGAscr*);
 extern void	vgaimageinit(ulong);
 extern ulong	vgapcilinear(VGAscr*, int*, int*, int, int);
-
 extern void	drawblankscreen(int);
 extern void	vgablank(VGAscr*, int);
+extern Lock	vgascreenlock;
--- a/os/pc/sdscsi.c
+++ b/os/pc/sdscsi.c
@@ -168,7 +168,6 @@
 		default:
 			return -1;
 		}
-		return -1;
 	case SDok:
 		return 0;
 	}
--- /dev/null
+++ b/os/pc/sdvirtio.c
@@ -1,0 +1,700 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "../port/pci.h"
+#include "ureg.h"
+#include "../port/error.h"
+
+#include "../port/sd.h"
+
+typedef struct Vring Vring;
+typedef struct Vdesc Vdesc;
+typedef struct Vused Vused;
+typedef struct Vqueue Vqueue;
+typedef struct Vdev Vdev;
+
+typedef struct ScsiCfg ScsiCfg;
+
+/* device types */
+enum {
+	TypBlk	= 2,
+	TypSCSI	= 8,
+};
+
+/* status flags */
+enum {
+	Acknowledge = 1,
+	Driver = 2,
+	DriverOk = 4,
+	Failed = 0x80,
+};
+
+/* virtio ports */
+enum {
+	Devfeat = 0,
+	Drvfeat = 4,
+	Qaddr = 8,
+	Qsize = 12,
+	Qselect = 14,
+	Qnotify = 16,
+	Status = 18,
+	Isr = 19,
+
+	Devspec = 20,
+};
+
+/* descriptor flags */
+enum {
+	Next = 1,
+	Write = 2,
+	Indirect = 4,
+};
+
+/* struct sizes */
+enum {
+	VringSize = 4,
+};	
+
+struct Vring
+{
+	u16int	flags;
+	u16int	idx;
+};
+
+struct Vdesc
+{
+	u64int	addr;
+	u32	len;
+	u16int	flags;
+	u16int	next;
+};
+
+struct Vused
+{
+	u32	id;
+	u32	len;
+};
+
+struct Vqueue
+{
+	Lock;
+
+	Vdev	*dev;
+	int	idx;
+
+	int	size;
+
+	int	free;
+	int	nfree;
+
+	Vdesc	*desc;
+
+	Vring	*avail;
+	u16int	*availent;
+	u16int	*availevent;
+
+	Vring	*used;
+	Vused	*usedent;
+	u16int	*usedevent;
+	u16int	lastused;
+
+	void	*rock[];
+};
+
+struct Vdev
+{
+	int	typ;
+
+	Pcidev	*pci;
+
+	ulong	port;
+	ulong	feat;
+
+	int	nqueue;
+	Vqueue	*queue[16];
+
+	void	*cfg;	/* device specific config (for scsi) */
+
+	Vdev	*next;
+};
+
+enum {
+	CDBSIZE		= 32,
+	SENSESIZE	= 96,
+};
+
+struct ScsiCfg
+{
+	u32	num_queues;
+	u32	seg_max;
+	u32	max_sectors;
+	u32	cmd_per_lun;
+	u32	event_info_size;
+	u32	sense_size;
+	u32	cdb_size;
+	u16int	max_channel;
+	u16int	max_target;
+	u32	max_lun;
+};
+
+static Vqueue*
+mkvqueue(int size)
+{
+	Vqueue *q;
+	uchar *p;
+	int i;
+
+	q = malloc(sizeof(*q) + sizeof(void*)*size);
+	p = mallocalign(
+		PGROUND(sizeof(Vdesc)*size + 
+			VringSize + 
+			sizeof(u16int)*size + 
+			sizeof(u16int)) +
+		PGROUND(VringSize + 
+			sizeof(Vused)*size + 
+			sizeof(u16int)), 
+		BY2PG, 0, 0);
+	if(p == nil || q == nil){
+		print("virtio: no memory for Vqueue\n");
+		free(p);
+		free(q);
+		return nil;
+	}
+
+	q->desc = (void*)p;
+	p += sizeof(Vdesc)*size;
+	q->avail = (void*)p;
+	p += VringSize;
+	q->availent = (void*)p;
+	p += sizeof(u16int)*size;
+	q->availevent = (void*)p;
+	p += sizeof(u16int);
+
+	p = (uchar*)PGROUND((uintptr)p);
+	q->used = (void*)p;
+	p += VringSize;
+	q->usedent = (void*)p;
+	p += sizeof(Vused)*size;
+	q->usedevent = (void*)p;
+
+	q->free = -1;
+	q->nfree = q->size = size;
+	for(i=0; i<size; i++){
+		q->desc[i].next = q->free;
+		q->free = i;
+	}
+
+	return q;
+}
+
+static Vdev*
+viopnpdevs(int typ)
+{
+	Vdev *vd, *h, *t;
+	Vqueue *q;
+	Pcidev *p;
+	int n, i;
+
+	h = t = nil;
+	for(p = nil; p = pcimatch(p, 0x1AF4, 0);){
+		if((p->did < 0x1000) || (p->did > 0x103F))
+			continue;
+		if(p->rid != 0)
+			continue;
+		if((p->mem[0].bar & 1) == 0)
+			continue;
+		if(pcicfgr16(p, 0x2E) != typ)
+			continue;
+		if((vd = malloc(sizeof(*vd))) == nil){
+			print("virtio: no memory for Vdev\n");
+			break;
+		}
+		vd->port = p->mem[0].bar & ~3;
+		if(ioalloc(vd->port, p->mem[0].size, 0, "virtio") < 0){
+			print("virtio: port %lux in use\n", vd->port);
+			free(vd);
+			continue;
+		}
+		vd->typ = typ;
+		vd->pci = p;
+		pcienable(p);
+
+		/* reset */
+		outb(vd->port+Status, 0);
+
+		vd->feat = inl(vd->port+Devfeat);
+		outb(vd->port+Status, Acknowledge|Driver);
+		for(i=0; i<nelem(vd->queue); i++){
+			outs(vd->port+Qselect, i);
+			n = ins(vd->port+Qsize);
+			if(n == 0 || (n & (n-1)) != 0)
+				break;
+			if((q = mkvqueue(n)) == nil)
+				break;
+			q->dev = vd;
+			q->idx = i;
+			vd->queue[i] = q;
+			coherence();
+			outl(vd->port+Qaddr, PADDR(vd->queue[i]->desc)/BY2PG);
+		}
+		vd->nqueue = i;
+	
+		if(h == nil)
+			h = vd;
+		else
+			t->next = vd;
+		t = vd;
+	}
+
+	return h;
+}
+
+struct Rock {
+	int done;
+	Rendez *sleep;
+};
+
+static void
+vqinterrupt(Vqueue *q)
+{
+	int id, free, m;
+	struct Rock *r;
+	Rendez *z;
+
+	m = q->size-1;
+
+	ilock(q);
+	while((q->lastused ^ q->used->idx) & m){
+		id = q->usedent[q->lastused++ & m].id;
+		if(r = q->rock[id]){
+			q->rock[id] = nil;
+			z = r->sleep;
+			r->done = 1;	/* hands off */
+			if(z != nil)
+				wakeup(z);
+		}
+		do {
+			free = id;
+			id = q->desc[free].next;
+			q->desc[free].next = q->free;
+			q->free = free;
+			q->nfree++;
+		} while(q->desc[free].flags & Next);
+	}
+	iunlock(q);
+}
+
+static void
+viointerrupt(Ureg *, void *arg)
+{
+	Vdev *vd = arg;
+
+	if(inb(vd->port+Isr) & 1)
+		vqinterrupt(vd->queue[vd->typ == TypSCSI ? 2 : 0]);
+}
+
+static int
+viodone(void *arg)
+{
+	return ((struct Rock*)arg)->done;
+}
+
+static void
+vqio(Vqueue *q, int head)
+{
+	struct Rock rock;
+
+	rock.done = 0;
+	rock.sleep = &up->sleep;
+	q->rock[head] = &rock;
+	q->availent[q->avail->idx & (q->size-1)] = head;
+	coherence();
+	q->avail->idx++;
+	iunlock(q);
+	if((q->used->flags & 1) == 0)
+		outs(q->dev->port+Qnotify, q->idx);
+	while(!rock.done){
+		while(waserror())
+			;
+		tsleep(rock.sleep, viodone, &rock, 1000);
+		poperror();
+
+		if(!rock.done)
+			vqinterrupt(q);
+	}
+}
+
+static int
+vioblkreq(Vdev *vd, int typ, void *a, long count, long secsize, uvlong lba)
+{
+	int need, free, head;
+	Vqueue *q;
+	Vdesc *d;
+
+	u8int status;
+	struct Vioblkreqhdr {
+		u32	typ;
+		u32	prio;
+		u64int	lba;
+	} req;
+
+	need = 2;
+	if(a != nil)
+		need = 3;
+
+	status = -1;
+	req.typ = typ;
+	req.prio = 0;
+	req.lba = lba;
+
+	q = vd->queue[0];
+	ilock(q);
+	while(q->nfree < need){
+		iunlock(q);
+
+		if(!waserror())
+			tsleep(&up->sleep, return0, 0, 500);
+		poperror();
+
+		ilock(q);
+	}
+
+	head = free = q->free;
+
+	d = &q->desc[free]; free = d->next;
+	d->addr = PADDR(&req);
+	d->len = sizeof(req);
+	d->flags = Next;
+
+	if(a != nil){
+		d = &q->desc[free]; free = d->next;
+		d->addr = PADDR(a);
+		d->len = secsize*count;
+		d->flags = typ ? Next : (Write|Next);
+	}
+
+	d = &q->desc[free]; free = d->next;
+	d->addr = PADDR(&status);
+	d->len = sizeof(status);
+	d->flags = Write;
+
+	q->free = free;
+	q->nfree -= need;
+
+	/* queue io, unlock and wait for completion */
+	vqio(q, head);
+
+	return status;
+}
+
+static int
+vioscsireq(SDreq *r)
+{
+	u8int resp[4+4+2+2+SENSESIZE];
+	u8int req[8+8+3+CDBSIZE];
+	int free, head;
+	u32 len;
+	Vqueue *q;
+	Vdesc *d;
+	Vdev *vd;
+	SDunit *u;
+	ScsiCfg *cfg;
+
+	u = r->unit;
+	vd = u->dev->ctlr;
+	cfg = vd->cfg;
+
+	memset(resp, 0, sizeof(resp));
+	memset(req, 0, sizeof(req));
+	req[0] = 1;
+	req[1] = u->subno;
+	req[2] = r->lun>>8;
+	req[3] = r->lun&0xFF;
+	*(u64int*)(&req[8]) = (uintptr)r;
+
+	memmove(&req[8+8+3], r->cmd, r->clen);
+
+	q = vd->queue[2];
+	ilock(q);
+	while(q->nfree < 3){
+		iunlock(q);
+
+		if(!waserror())
+			tsleep(&up->sleep, return0, 0, 500);
+		poperror();
+
+		ilock(q);
+	}
+
+	head = free = q->free;
+
+	d = &q->desc[free]; free = d->next;
+	d->addr = PADDR(req);
+	d->len = 8+8+3+cfg->cdb_size;
+	d->flags = Next;
+
+	if(r->write && r->dlen > 0){
+		d = &q->desc[free]; free = d->next;
+		d->addr = PADDR(r->data);
+		d->len = r->dlen;
+		d->flags = Next;
+	}
+
+	d = &q->desc[free]; free = d->next;
+	d->addr = PADDR(resp);
+	d->len = 4+4+2+2+cfg->sense_size;
+	d->flags = Write;
+
+	if(!r->write && r->dlen > 0){
+		d->flags |= Next;
+
+		d = &q->desc[free]; free = d->next;
+		d->addr = PADDR(r->data);
+		d->len = r->dlen;
+		d->flags = Write;
+	}
+	
+	q->free = free;
+	q->nfree -= 2 + (r->dlen > 0);
+
+	/* queue io, unlock and wait for completion */
+	vqio(q, head);
+
+	/* response+status */
+	r->status = resp[10];
+	if(resp[11] != 0)
+		r->status = SDcheck;
+
+	/* sense_len */
+	len = *((u32*)&resp[0]);
+	if(len > 0){
+		if(len > sizeof(r->sense))
+			len = sizeof(r->sense);
+		memmove(r->sense, &resp[4+4+2+2], len);
+		r->flags |= SDvalidsense;
+	}
+
+	/* data residue */
+	len = *((u32*)&resp[4]);
+	if(len > r->dlen)
+		r->rlen = 0;
+	else
+		r->rlen = r->dlen - len;
+
+	return r->status;
+
+}
+
+static long
+viobio(SDunit *u, int lun, int write, void *a, long count, uvlong lba)
+{
+	long ss, cc, max, ret;
+	Vdev *vd;
+
+	vd = u->dev->ctlr;
+	if(vd->typ == TypSCSI)
+		return scsibio(u, lun, write, a, count, lba);
+
+	max = 32;
+	ss = u->secsize;
+	ret = 0;
+	while(count > 0){
+		if((cc = count) > max)
+			cc = max;
+		if(vioblkreq(vd, write != 0, (uchar*)a + ret, cc, ss, lba) != 0)
+			error(Eio);
+		ret += cc*ss;
+		count -= cc;
+		lba += cc;
+	}
+	return ret;
+}
+
+static int
+viorio(SDreq *r)
+{
+	int i, count, rw;
+	uvlong lba;
+	SDunit *u;
+	Vdev *vd;
+
+	u = r->unit;
+	vd = u->dev->ctlr;
+	if(vd->typ == TypSCSI)
+		return vioscsireq(r);
+	if(r->cmd[0] == 0x35 || r->cmd[0] == 0x91){
+		if(vioblkreq(vd, 4, nil, 0, 0, 0) != 0)
+			return sdsetsense(r, SDcheck, 3, 0xc, 2);
+		return sdsetsense(r, SDok, 0, 0, 0);
+	}
+	if((i = sdfakescsi(r)) != SDnostatus)
+		return r->status = i;
+	if((i = sdfakescsirw(r, &lba, &count, &rw)) != SDnostatus)
+		return i;
+	r->rlen = viobio(u, r->lun, rw == SDwrite, r->data, count, lba);
+	return r->status = SDok;
+}
+
+static int
+vioonline(SDunit *u)
+{
+	uvlong cap;
+	Vdev *vd;
+
+	vd = u->dev->ctlr;
+	if(vd->typ == TypSCSI)
+		return scsionline(u);
+
+	cap = inl(vd->port+Devspec+4);
+	cap <<= 32;
+	cap |= inl(vd->port+Devspec);
+	if(u->sectors != cap){
+		u->sectors = cap;
+		u->secsize = 512;
+		return 2;
+	}
+	return 1;
+}
+
+static int
+vioverify(SDunit *u)
+{
+	Vdev *vd;
+
+	vd = u->dev->ctlr;
+	if(vd->typ == TypSCSI)
+		return scsiverify(u);
+
+	return 1;
+}
+
+SDifc sdvirtioifc;
+
+static int
+vioenable(SDev *sd)
+{
+	char name[32];
+	Vdev *vd;
+
+	vd = sd->ctlr;
+	pcisetbme(vd->pci);
+	snprint(name, sizeof(name), "%s (%s)", sd->name, sd->ifc->name);
+	intrenable(vd->pci->intl, viointerrupt, vd, vd->pci->tbdf, name);
+	outb(vd->port+Status, inb(vd->port+Status) | DriverOk);
+	return 1;
+}
+
+static int
+viodisable(SDev *sd)
+{
+	char name[32];
+	Vdev *vd;
+
+	vd = sd->ctlr;
+	snprint(name, sizeof(name), "%s (%s)", sd->name, sd->ifc->name);
+	intrdisable(vd->pci->intl, viointerrupt, vd, vd->pci->tbdf, name);
+	pciclrbme(vd->pci);
+	return 1;
+}
+
+static SDev*
+viopnp(void)
+{
+	SDev *s, *h, *t;
+	Vdev *vd;
+	int id;
+
+	h = t = nil;
+
+	id = 'F';
+	for(vd =  viopnpdevs(TypBlk); vd; vd = vd->next){
+		if(vd->nqueue != 1)
+			continue;
+
+		if((s = malloc(sizeof(*s))) == nil)
+			break;
+		s->ctlr = vd;
+		s->idno = id++;
+		s->ifc = &sdvirtioifc;
+		s->nunit = 1;
+		if(h)
+			t->next = s;
+		else
+			h = s;
+		t = s;
+	}
+
+	id = '0';
+	for(vd = viopnpdevs(TypSCSI); vd; vd = vd->next){
+		ScsiCfg *cfg;
+
+		if(vd->nqueue < 3)
+			continue;
+
+		if((cfg = malloc(sizeof(*cfg))) == nil)
+			break;
+		cfg->num_queues = inl(vd->port+Devspec+4*0);
+		cfg->seg_max = inl(vd->port+Devspec+4*1);
+		cfg->max_sectors = inl(vd->port+Devspec+4*2);
+		cfg->cmd_per_lun = inl(vd->port+Devspec+4*3);
+		cfg->event_info_size = inl(vd->port+Devspec+4*4);
+		cfg->sense_size = inl(vd->port+Devspec+4*5);
+		cfg->cdb_size = inl(vd->port+Devspec+4*6);
+		cfg->max_channel = ins(vd->port+Devspec+4*7);
+		cfg->max_target = ins(vd->port+Devspec+4*7+2);
+		cfg->max_lun = inl(vd->port+Devspec+4*8);
+
+		if(cfg->max_target == 0){
+			free(cfg);
+			continue;
+		}
+		if((cfg->cdb_size > CDBSIZE) || (cfg->sense_size > SENSESIZE)){
+			print("sdvirtio: cdb %ud or sense size %ud too big\n",
+				cfg->cdb_size, cfg->sense_size);
+			free(cfg);
+			continue;
+		}
+		vd->cfg = cfg;
+
+		if((s = malloc(sizeof(*s))) == nil)
+			break;
+		s->ctlr = vd;
+		s->idno = id++;
+		s->ifc = &sdvirtioifc;
+		s->nunit = cfg->max_target;
+		if(h)
+			t->next = s;
+		else
+			h = s;
+		t = s;
+	}
+
+	return h;
+}
+
+SDifc sdvirtioifc = {
+	"virtio",			/* name */
+
+	viopnp,				/* pnp */
+	nil,				/* legacy */
+	nil,				/* id */
+	vioenable,			/* enable */
+	viodisable,			/* disable */
+
+	vioverify,			/* verify */
+	vioonline,			/* online */
+	viorio,				/* rio */
+	nil,				/* rctl */
+	nil,				/* wctl */
+
+	viobio,				/* bio */
+	nil,				/* probe */
+	nil,				/* clear */
+	nil,				/* stat */
+	nil,				/* rtopctl */
+	nil,				/* wtopctl */
+	nil,				/* ataio */
+};
--- a/os/pc/uarti8250.c
+++ b/os/pc/uarti8250.c
@@ -5,7 +5,6 @@
 #include "fns.h"
 #include "io.h"
 #include "../port/error.h"
-#include "../port/uart.h"
 
 /*
  * 8250 UART and compatibles.
@@ -713,7 +712,7 @@
 	consuart = uart;
 	uart->console = 1;
 }
-
+/* TODO these are not in 9front
 void
 i8250mouse(char* which, int (*putc)(Queue*, int), int setb1200)
 {
@@ -738,3 +737,4 @@
 	uartsetmouseputc(&i8250uart[port], putc);
 
 }
+*/
--- a/os/pc/vga.c
+++ b/os/pc/vga.c
@@ -3,6 +3,8 @@
 #include "mem.h"
 #include "dat.h"
 #include "fns.h"
+#include "io.h"
+#include "ureg.h"
 #include "../port/error.h"
 
 #define	Image	IMAGE
@@ -18,7 +20,7 @@
 static Rectangle window;
 static int *xp;
 static int xbuf[256];
-static Lock vgascreenlock;
+Lock vgascreenlock;
 int drawdebug;
 
 void
--- a/os/pc/vgamach64xx.c
+++ b/os/pc/vgamach64xx.c
@@ -4,6 +4,7 @@
 #include "dat.h"
 #include "fns.h"
 #include "io.h"
+#include "../port/pci.h"
 #include "../port/error.h"
 
 #define	Image	IMAGE
--- a/os/pc/vgas3.c
+++ b/os/pc/vgas3.c
@@ -4,6 +4,7 @@
 #include "dat.h"
 #include "fns.h"
 #include "io.h"
+#include "../port/pci.h"
 #include "../port/error.h"
 
 #define	Image	IMAGE
--- /dev/null
+++ b/os/pc/vgavesa.c
@@ -1,0 +1,272 @@
+/*
+ * vga driver using just vesa bios to set up.
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "../port/pci.h"
+#include "../port/error.h"
+
+#define Ureg Ureg386
+#include "/386/include/ureg.h"
+typedef struct Ureg386 Ureg386;
+
+#define	Image	IMAGE
+#include <draw.h>
+#include <memdraw.h>
+#include <cursor.h>
+#include "screen.h"
+
+enum {
+	RealModeBuf = 0x9000,
+};
+
+static uchar modebuf[0x1000];
+static Chan *creg, *cmem;
+
+#define WORD(p) ((p)[0] | ((p)[1]<<8))
+#define LONG(p) ((p)[0] | ((p)[1]<<8) | ((p)[2]<<16) | ((p)[3]<<24))
+#define PWORD(p, v) (p)[0] = (v); (p)[1] = (v)>>8
+#define PLONG(p, v) (p)[0] = (v); (p)[1] = (v)>>8; (p)[2] = (v)>>16; (p)[3] = (v)>>24
+
+typedef struct Vmode Vmode;
+struct Vmode
+{
+	char	chan[32];
+	int	attr;	/* flags */
+	int	bpl;
+	int	dx, dy;
+	int	depth;
+	ulong	paddr;
+};
+
+static uchar*
+vbesetup(Ureg386 *u, int ax)
+{
+	memset(modebuf, 0, sizeof modebuf);
+	memset(u, 0, sizeof *u);
+	u->ax = ax;
+	u->es = (RealModeBuf>>4)&0xF000;
+	u->di = RealModeBuf&0xFFFF;
+	return modebuf;
+}
+
+static void
+vbecall(Ureg386 *u)
+{
+	if(devtab[cmem->type]->write(cmem, modebuf, sizeof(modebuf), RealModeBuf) != sizeof(modebuf))
+		error("write modebuf");
+	u->trap = 0x10;
+	if(devtab[creg->type]->write(creg, u, sizeof(*u), 0) != sizeof(*u))
+		error("write ureg");
+	if(devtab[creg->type]->read(creg, u, sizeof(*u), 0) != sizeof(*u))
+		error("read ureg");
+	if((u->ax&0xFFFF) != 0x004F)
+		error("vesa bios error");
+	if(devtab[cmem->type]->read(cmem, modebuf, sizeof(modebuf), RealModeBuf) != sizeof(modebuf))
+		error("read modebuf");
+}
+
+static void
+vbecheck(void)
+{
+	Ureg386 u;
+	uchar *p;
+
+	p = vbesetup(&u, 0x4F00);
+	strcpy((char*)p, "VBE2");
+	vbecall(&u);
+	if(memcmp((char*)p, "VESA", 4) != 0)
+		error("bad vesa signature");
+	if(p[5] < 2)
+		error("bad vesa version");
+}
+
+static int
+vbegetmode(void)
+{
+	Ureg386 u;
+
+	vbesetup(&u, 0x4F03);
+	vbecall(&u);
+	return u.bx;
+}
+
+static uchar*
+vbemodeinfo(int mode)
+{
+	uchar *p;
+	Ureg386 u;
+
+	p = vbesetup(&u, 0x4F01);
+	u.cx = mode;
+	vbecall(&u);
+	return p;
+}
+
+static char*
+vmode(Vmode *m, uchar *p)
+{
+	m->attr = WORD(p);
+	if(!(m->attr & (1<<4)))
+		return "not in VESA graphics mode";
+	if(!(m->attr & (1<<7)))
+		return "not in linear graphics mode";
+	m->bpl = WORD(p+16);
+	m->dx = WORD(p+18);
+	m->dy = WORD(p+20);
+	m->depth = p[25];
+	m->paddr = LONG(p+40);
+	if(m->depth <= 8)
+		snprint(m->chan, sizeof m->chan, "%c%d", 
+			(m->attr & (1<<3)) ? 'm' : 'k', m->depth);
+	else
+		rgbmask2chan(m->chan, m->depth,
+			(1UL<<p[31])-1 << p[32],
+			(1UL<<p[33])-1 << p[34],
+			(1UL<<p[35])-1 << p[36]);
+	return nil;
+}
+
+static void
+vesalinear(VGAscr *scr, int, int)
+{
+	int i, mode, size, havesize;
+	Pcidev *pci;
+	char *err;
+	Vmode m;
+
+	vbecheck();
+	mode = vbegetmode();
+	/*
+	 * bochs loses the top bits - cannot use this
+	if((mode&(1<<14)) == 0)
+		error("not in linear graphics mode");
+	 */
+	mode &= 0x3FFF;
+	if((err = vmode(&m, vbemodeinfo(mode))) != nil)
+		error(err);
+
+	size = m.dy * m.bpl;
+
+	/*
+	 * figure out max size of memory so that we have
+	 * enough if the screen is resized.
+	 */
+	pci = nil;
+	havesize = 0;
+	while(!havesize && (pci = pcimatch(pci, 0, 0)) != nil){
+		if(pci->ccrb != Pcibcdisp)
+			continue;
+		for(i=0; i<nelem(pci->mem); i++){
+			uvlong a, e;
+
+			if(pci->mem[i].size == 0 || (pci->mem[i].bar & 1) != 0)
+				continue;
+			a = pci->mem[i].bar & ~0xF;
+			e = a + pci->mem[i].size;
+			if(m.paddr >= a && (m.paddr+size) <= e){
+				size = e - m.paddr;
+				havesize = 1;
+				break;
+			}
+		}
+	}
+
+	/* no pci - heuristic guess */
+	if(!havesize)
+		if(size < 4*1024*1024)
+			size = 4*1024*1024;
+		else
+			size = ROUND(size, 1024*1024);
+
+	vgalinearaddr(scr, m.paddr, size);
+	if(scr->apsize)
+		addvgaseg("vesascreen", scr->paddr, scr->apsize);
+
+	scr->softscreen = 1;
+}
+
+static void
+vesaenable(VGAscr *)
+{
+	cmem = namec("/dev/realmodemem", Aopen, ORDWR, 0);
+	if(waserror()){
+		cclose(cmem);
+		cmem = nil;
+		nexterror();
+	}
+	creg = namec("/dev/realmode", Aopen, ORDWR, 0);
+	poperror();
+}
+
+static void
+vesadisable(VGAscr *)
+{
+	if(cmem != nil)
+		cclose(cmem);
+	if(creg != nil)
+		cclose(creg);
+	cmem = creg = nil;
+}
+
+static void
+vesablank(VGAscr *, int blank)
+{
+	Ureg386 u;
+
+	vbesetup(&u, 0x4f10);
+	u.bx = blank ? 0x0101 : 0x0001;
+
+	/*
+	 * dont wait forever when called from mouse kproc.
+	 * some BIOS get stuck in i/o poll loop after
+	 * blank/unblank for some reason. (Thinkpad A22p)
+	 */
+	/* TODO if(up->kp)
+		procalarm(10000); */
+
+	if(!waserror()){
+		vbecall(&u);
+		poperror();
+	}
+
+	if(up->kp){
+		/* procalarm(0); */
+		/* TODO up->notepending = 0; */
+	}
+}
+
+static void
+vesadrawinit(VGAscr *scr)
+{
+	scr->blank = vesablank;
+}
+
+VGAdev vgavesadev = {
+	"vesa",
+	vesaenable,
+	vesadisable,
+	0,
+	vesalinear,
+	vesadrawinit,
+};
+
+/*
+ * called from multibootargs() to convert
+ * vbe mode info (passed from bootloader)
+ * to *bootscreen= parameter
+ */
+char*
+vesabootscreenconf(char *s, char *e, uchar *p)
+{
+	Vmode m;
+
+	if(vmode(&m, p) != nil)
+		return s;
+	return seprint(s, e, "*bootscreen=%dx%dx%d %s %#lux\n",
+		m.bpl * 8 / m.depth, m.dy, m.depth, m.chan, m.paddr);
+}
--- /dev/null
+++ b/os/pc64/NOTICE
@@ -1,0 +1,10 @@
+Most of these files are adapted from Plan 9
+	Copyright © 2002 Lucent Technologies Inc
+	Copyright © 2021 Plan 9 Foundation
+This software was originally authored by employees of Bell Laboratories,
+a unit of Nokia Corporation.
+
+devbench.c, fpi387.c, and flashzpc.c are
+	Copyright © 1999-2005 Vita Nuova Holdings Ltd
+
+All of them are covered by the MIT licence (see /NOTICE).
--- /dev/null
+++ b/os/pc64/apbootstrap.i
@@ -1,0 +1,21 @@
+uchar apbootstrap[]={
+0xea,0x30,0x70,0x00,0x00,0x90,0x90,0x90,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
+0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00,0x00,0x00,0x00,
+0x8c,0xc8,0x8e,0xd8,0x0f,0x01,0x16,0xff,0x70,0x0f,0x20,0xc0,0x83,0xc8,0x01,0x0f,
+0x22,0xc0,0xeb,0x00,0xea,0x49,0x70,0x18,0x00,0x66,0xb8,0x10,0x00,0x8e,0xd8,0x8e,
+0xc0,0x8e,0xe0,0x8e,0xe8,0x8e,0xd0,0x8b,0x05,0x10,0x70,0x00,0x00,0x0f,0x22,0xd8,
+0xeb,0x00,0x0f,0x20,0xe0,0x83,0xe0,0xef,0x0d,0xa0,0x00,0x00,0x00,0x0f,0x22,0xe0,
+0xb9,0x80,0x00,0x00,0xc0,0x0f,0x32,0x0b,0x05,0x28,0x70,0x00,0x00,0x0f,0x30,0x0f,
+0x20,0xc2,0x81,0xe2,0xf5,0xff,0xff,0x9f,0x81,0xca,0x00,0x00,0x01,0x80,0x0f,0x22,
+0xc2,0xea,0x98,0x70,0x00,0x00,0x08,0x00,0x48,0xc7,0xc0,0x0f,0x71,0x00,0x80,0x0f,
+0x01,0x10,0x48,0x31,0xc0,0x8e,0xd8,0x8e,0xc0,0x8e,0xe0,0x8e,0xe8,0x8e,0xd0,0x0f,
+0x00,0xd0,0x48,0x8b,0x24,0x25,0x20,0x70,0x00,0x80,0x49,0x89,0xc6,0x49,0x89,0xe7,
+0x48,0x81,0xc4,0x00,0x80,0x00,0x00,0x50,0x9d,0x48,0x8b,0x04,0x25,0x08,0x70,0x00,
+0x80,0x48,0x8b,0x2c,0x25,0x18,0x70,0x00,0x80,0x55,0xff,0xd0,0xf4,0xeb,0xfd,0x00,
+0x00,0x00,0x00,0x00,0x00,0x00,0x00,0xff,0xff,0x00,0x00,0x00,0x9a,0xaf,0x00,0xff,
+0xff,0x00,0x00,0x00,0x92,0xcf,0x00,0xff,0xff,0x00,0x00,0x00,0x9a,0xcf,0x00,0x1f,
+0x00,0xdf,0x70,0x00,0x00,0x1f,0x00,0xdf,0x70,0x00,0x00,0x00,0x00,0x00,0x00,0x1f,
+0x00,0xdf,0x70,0x00,0x80,0xff,0xff,0xff,0xff,
+
+};
--- /dev/null
+++ b/os/pc64/apbootstrap.s
@@ -1,0 +1,110 @@
+#include "mem.h"
+
+#define NOP		BYTE $0x90		/* NOP */
+#define LGDT(gdtptr)	BYTE $0x0F;		/* LGDT */			\
+			BYTE $0x01; BYTE $0x16;					\
+			WORD $gdtptr
+#define FARJUMP16(s, o)	BYTE $0xEA;		/* far jump to ptr16:16 */	\
+			WORD $o; WORD $s;					\
+			NOP; NOP; NOP
+#define FARJUMP32(s, o)	BYTE $0x66;		/* far jump to ptr32:16 */	\
+			BYTE $0xEA; LONG $o; WORD $s
+
+#define	DELAY		BYTE $0xEB;		/* JMP .+2 */			\
+			BYTE $0x00
+#define INVD		BYTE $0x0F; BYTE $0x08
+#define WBINVD		BYTE $0x0F; BYTE $0x09
+
+/*
+ * Macros for calculating offsets within the page directory base
+ * and page tables. Note that these are assembler-specific hence
+ * the '<<2'.
+ */
+#define PDO(a)		(((((a))>>22) & 0x03FF)<<2)
+#define PTO(a)		(((((a))>>12) & 0x03FF)<<2)
+
+/*
+ * Start an Application Processor. This must be placed on a 4KB boundary
+ * somewhere in the 1st MB of conventional memory (APBOOTSTRAP). However,
+ * due to some shortcuts below it's restricted further to within the 1st
+ * 64KB. The AP starts in real-mode, with
+ *   CS selector set to the startup memory address/16;
+ *   CS base set to startup memory address;
+ *   CS limit set to 64KB;
+ *   CPL and IP set to 0.
+ */
+TEXT apbootstrap(SB), $0
+	FARJUMP16(0, _apbootstrap(SB))
+TEXT _apvector(SB), $0				/* address APBOOTSTRAP+0x08 */
+	LONG $0
+TEXT _appdb(SB), $0				/* address APBOOTSTRAP+0x0C */
+	LONG $0
+TEXT _apapic(SB), $0				/* address APBOOTSTRAP+0x10 */
+	LONG $0
+TEXT _apbootstrap(SB), $0			/* address APBOOTSTRAP+0x14 */
+	MOVW	CS, AX
+	MOVW	AX, DS				/* initialise DS */
+
+	LGDT(gdtptr(SB))			/* load a basic gdt */
+
+	MOVL	CR0, AX
+	ORL	$1, AX
+	MOVL	AX, CR0				/* turn on protected mode */
+	DELAY					/* JMP .+2 */
+
+	BYTE $0xB8; WORD $SELECTOR(1, SELGDT, 0)/* MOVW $SELECTOR(1, SELGDT, 0), AX */
+	MOVW	AX, DS
+	MOVW	AX, ES
+	MOVW	AX, FS
+	MOVW	AX, GS
+	MOVW	AX, SS
+
+	FARJUMP32(SELECTOR(2, SELGDT, 0), _ap32-KZERO(SB))
+
+/*
+ * For Pentiums and higher, the code that enables paging must come from
+ * pages that are identity mapped. 
+ * To this end double map KZERO at virtual 0 and undo the mapping once virtual
+ * nirvana has been obtained.
+ */
+TEXT _ap32(SB), $0
+	MOVL	_appdb-KZERO(SB), CX		/* physical address of PDB */
+	MOVL	(PDO(KZERO))(CX), DX		/* double-map KZERO at 0 */
+	MOVL	DX, (PDO(0))(CX)
+	MOVL	CX, CR3				/* load and flush the mmu */
+
+	MOVL	CR0, DX
+	ORL	$0x80010000, DX			/* PG|WP */
+	ANDL	$~0x6000000A, DX		/* ~(CD|NW|TS|MP) */
+
+	MOVL	$_appg(SB), AX
+	MOVL	DX, CR0				/* turn on paging */
+	JMP*	AX
+
+TEXT _appg(SB), $0
+	MOVL	CX, AX				/* physical address of PDB */
+	ORL	$KZERO, AX
+	MOVL	$0, (PDO(0))(AX)		/* undo double-map of KZERO at 0 */
+	MOVL	CX, CR3				/* load and flush the mmu */
+
+	MOVL	$(MACHADDR+MACHSIZE-4), SP
+
+	MOVL	$0, AX
+	PUSHL	AX
+	POPFL
+
+	MOVL	_apapic(SB), AX
+	MOVL	AX, (SP)
+	MOVL	_apvector(SB), AX
+	CALL*	AX
+_aphalt:
+	HLT
+	JMP	_aphalt
+
+TEXT gdt(SB), $0
+	LONG $0x0000; LONG $0
+	LONG $0xFFFF; LONG $(SEGG|SEGB|(0xF<<16)|SEGP|SEGPL(0)|SEGDATA|SEGW)
+	LONG $0xFFFF; LONG $(SEGG|SEGD|(0xF<<16)|SEGP|SEGPL(0)|SEGEXEC|SEGR)
+TEXT gdtptr(SB), $0
+	WORD	$(3*8-1)
+	LONG	$gdt-KZERO(SB)
--- /dev/null
+++ b/os/pc64/apic.c
@@ -1,0 +1,448 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+
+#include "mp.h"
+
+enum {					/* Local APIC registers */
+	LapicID		= 0x0020,	/* ID */
+	LapicVER	= 0x0030,	/* Version */
+	LapicTPR	= 0x0080,	/* Task Priority */
+	LapicAPR	= 0x0090,	/* Arbitration Priority */
+	LapicPPR	= 0x00A0,	/* Processor Priority */
+	LapicEOI	= 0x00B0,	/* EOI */
+	LapicLDR	= 0x00D0,	/* Logical Destination */
+	LapicDFR	= 0x00E0,	/* Destination Format */
+	LapicSVR	= 0x00F0,	/* Spurious Interrupt Vector */
+	LapicISR	= 0x0100,	/* Interrupt Status (8 registers) */
+	LapicTMR	= 0x0180,	/* Trigger Mode (8 registers) */
+	LapicIRR	= 0x0200,	/* Interrupt Request (8 registers) */
+	LapicESR	= 0x0280,	/* Error Status */
+	LapicICRLO	= 0x0300,	/* Interrupt Command */
+	LapicICRHI	= 0x0310,	/* Interrupt Command [63:32] */
+	LapicTIMER	= 0x0320,	/* Local Vector Table 0 (TIMER) */
+	LapicPCINT	= 0x0340,	/* Performance Counter LVT */
+	LapicLINT0	= 0x0350,	/* Local Vector Table 1 (LINT0) */
+	LapicLINT1	= 0x0360,	/* Local Vector Table 2 (LINT1) */
+	LapicERROR	= 0x0370,	/* Local Vector Table 3 (ERROR) */
+	LapicTICR	= 0x0380,	/* Timer Initial Count */
+	LapicTCCR	= 0x0390,	/* Timer Current Count */
+	LapicTDCR	= 0x03E0,	/* Timer Divide Configuration */
+};
+
+enum {					/* LapicSVR */
+	LapicENABLE	= 0x00000100,	/* Unit Enable */
+	LapicFOCUS	= 0x00000200,	/* Focus Processor Checking Disable */
+};
+
+enum {					/* LapicICRLO */
+					/* [14] IPI Trigger Mode Level (RW) */
+	LapicDEASSERT	= 0x00000000,	/* Deassert level-sensitive interrupt */
+	LapicASSERT	= 0x00004000,	/* Assert level-sensitive interrupt */
+
+					/* [17:16] Remote Read Status */
+	LapicINVALID	= 0x00000000,	/* Invalid */
+	LapicWAIT	= 0x00010000,	/* In-Progress */
+	LapicVALID	= 0x00020000,	/* Valid */
+
+					/* [19:18] Destination Shorthand */
+	LapicFIELD	= 0x00000000,	/* No shorthand */
+	LapicSELF	= 0x00040000,	/* Self is single destination */
+	LapicALLINC	= 0x00080000,	/* All including self */
+	LapicALLEXC	= 0x000C0000,	/* All Excluding self */
+};
+
+enum {					/* LapicESR */
+	LapicSENDCS	= 0x00000001,	/* Send CS Error */
+	LapicRCVCS	= 0x00000002,	/* Receive CS Error */
+	LapicSENDACCEPT	= 0x00000004,	/* Send Accept Error */
+	LapicRCVACCEPT	= 0x00000008,	/* Receive Accept Error */
+	LapicSENDVECTOR	= 0x00000020,	/* Send Illegal Vector */
+	LapicRCVVECTOR	= 0x00000040,	/* Receive Illegal Vector */
+	LapicREGISTER	= 0x00000080,	/* Illegal Register Address */
+};
+
+enum {					/* LapicTIMER */
+					/* [17] Timer Mode (RW) */
+	LapicONESHOT	= 0x00000000,	/* One-shot */
+	LapicPERIODIC	= 0x00020000,	/* Periodic */
+
+					/* [19:18] Timer Base (RW) */
+	LapicCLKIN	= 0x00000000,	/* use CLKIN as input */
+	LapicTMBASE	= 0x00040000,	/* use TMBASE */
+	LapicDIVIDER	= 0x00080000,	/* use output of the divider */
+};
+
+static uchar lapictdxtab[] = {		/* LapicTDCR */
+	0x0B,	/* divide by 1 */
+	0x00,	/* divide by 2 */
+	0x01,	/* divide by 4 */
+	0x02,	/* divide by 8 */
+	0x03,	/* divide by 16 */
+	0x08,	/* divide by 32 */
+	0x09,	/* divide by 64 */
+	0x0A,	/* divide by 128 */
+};
+
+static ulong* lapicbase;
+
+typedef struct Apictimer Apictimer;
+struct Apictimer
+{
+	uvlong	hz;
+	ulong	max;
+	ulong	min;
+	ulong	div;
+	int	tdx;
+};
+
+static Apictimer lapictimer[MAXMACH];
+
+static ulong
+lapicr(int r)
+{
+	return *(lapicbase+(r/sizeof(*lapicbase)));
+}
+
+static void
+lapicw(int r, ulong data)
+{
+	*(lapicbase+(r/sizeof(*lapicbase))) = data;
+	data = *(lapicbase+(LapicID/sizeof(*lapicbase)));
+	USED(data);
+}
+
+void
+lapiconline(void)
+{
+	Apictimer *a;
+
+	a = &lapictimer[m->machno];
+
+	/*
+	 * Reload the timer to de-synchronise the processors,
+	 * then lower the task priority to allow interrupts to be
+	 * accepted by the APIC.
+	 */
+	microdelay((TK2MS(1)*1000/conf.nmach) * m->machno);
+	lapicw(LapicTICR, a->max);
+	lapicw(LapicTIMER, LapicCLKIN|LapicPERIODIC|(VectorPIC+IrqTIMER));
+
+	/*
+	 * not strickly neccesary, but reported (osdev.org) to be
+	 * required for some machines.
+	 */
+	lapicw(LapicTDCR, lapictdxtab[a->tdx]);
+
+	lapicw(LapicTPR, 0);
+}
+
+/*
+ *  use the i8253/tsc clock to figure out our lapic timer rate.
+ */
+static void
+lapictimerinit(void)
+{
+	uvlong x, v, hz;
+	Apictimer *a;
+	int s;
+
+	if(m->machno != 0){
+		lapictimer[m->machno] = lapictimer[0];
+		return;
+	}
+
+	s = splhi();
+	a = &lapictimer[m->machno];
+	a->tdx = 0;
+Retry:
+	lapicw(LapicTIMER, ApicIMASK|LapicCLKIN|LapicONESHOT|(VectorPIC+IrqTIMER));
+	lapicw(LapicTDCR, lapictdxtab[a->tdx]);
+
+	x = fastticks(&hz);
+	x += hz/10;
+	lapicw(LapicTICR, 0xffffffff);
+	do{
+		v = fastticks(nil);
+	}while(v < x);
+
+	v = (0xffffffffUL-lapicr(LapicTCCR))*10;
+	if(v > hz-(hz/10)){
+		if(v > hz+(hz/10) && a->tdx < nelem(lapictdxtab)-1){
+			a->tdx++;
+			goto Retry;
+		}
+		v = hz;
+	}
+
+	assert(v >= (100*HZ));
+
+	a->hz = v;
+	a->div = hz/a->hz;
+	a->max = a->hz/HZ;
+	a->min = a->hz/(100*HZ);
+
+	splx(s);
+
+	v = (v+500000LL)/1000000LL;
+	print("cpu%d: lapic clock at %lludMHz\n", m->machno, v);
+}
+
+void
+lapicinit(Apic* apic)
+{
+	ulong dfr, ldr, lvt;
+
+	if(lapicbase == 0)
+		lapicbase = apic->addr;
+
+	/*
+	 * These don't really matter in Physical mode;
+	 * set the defaults anyway.
+	 */
+	if(strncmp(m->cpuidid, "AuthenticAMD", 12) == 0)
+		dfr = 0xf0000000;
+	else
+		dfr = 0xffffffff;
+	ldr = 0x00000000;
+
+	lapicw(LapicDFR, dfr);
+	lapicw(LapicLDR, ldr);
+	lapicw(LapicTPR, 0xff);
+	lapicw(LapicSVR, LapicENABLE|(VectorPIC+IrqSPURIOUS));
+
+	lapictimerinit();
+
+	/*
+	 * Some Pentium revisions have a bug whereby spurious
+	 * interrupts are generated in the through-local mode.
+	 */
+	switch(m->cpuidax & 0xFFF){
+	case 0x526:				/* stepping cB1 */
+	case 0x52B:				/* stepping E0 */
+	case 0x52C:				/* stepping cC0 */
+		wrmsr(0x0E, 1<<14);		/* TR12 */
+		break;
+	}
+
+	/*
+	 * Set the local interrupts. It's likely these should just be
+	 * masked off for SMP mode as some Pentium Pros have problems if
+	 * LINT[01] are set to ExtINT.
+	 * Acknowledge any outstanding interrupts.
+	lapicw(LapicLINT0, apic->lintr[0]);
+	lapicw(LapicLINT1, apic->lintr[1]);
+	 */
+	lapiceoi(0);
+
+	lvt = (lapicr(LapicVER)>>16) & 0xFF;
+	if(lvt >= 4)
+		lapicw(LapicPCINT, ApicIMASK);
+	lapicw(LapicERROR, VectorPIC+IrqERROR);
+	lapicw(LapicESR, 0);
+	lapicr(LapicESR);
+
+	/*
+	 * Issue an INIT Level De-Assert to synchronise arbitration ID's.
+	 */
+	lapicw(LapicICRHI, 0);
+	lapicw(LapicICRLO, LapicALLINC|ApicLEVEL|LapicDEASSERT|ApicINIT);
+	while(lapicr(LapicICRLO) & ApicDELIVS)
+		;
+
+	/*
+	 * Do not allow acceptance of interrupts until all initialisation
+	 * for this processor is done. For the bootstrap processor this can be
+	 * early duing initialisation. For the application processors this should
+	 * be after the bootstrap processor has lowered priority and is accepting
+	 * interrupts.
+	lapicw(LapicTPR, 0);
+	 */
+}
+
+void
+lapicstartap(Apic* apic, int v)
+{
+	int i;
+	ulong crhi;
+
+	/* make apic's processor do a warm reset */
+	crhi = apic->apicno<<24;
+	lapicw(LapicICRHI, crhi);
+	lapicw(LapicICRLO, LapicFIELD|ApicLEVEL|LapicASSERT|ApicINIT);
+	microdelay(200);
+	lapicw(LapicICRLO, LapicFIELD|ApicLEVEL|LapicDEASSERT|ApicINIT);
+	delay(10);
+
+	/* assumes apic is not an 82489dx */
+	for(i = 0; i < 2; i++){
+		lapicw(LapicICRHI, crhi);
+		/* make apic's processor start at v in real mode */
+		lapicw(LapicICRLO, LapicFIELD|ApicEDGE|ApicSTARTUP|(v/BY2PG));
+		microdelay(200);
+	}
+}
+
+void
+lapicerror(Ureg*, void*)
+{
+	ulong esr;
+
+	lapicw(LapicESR, 0);
+	esr = lapicr(LapicESR);
+	switch(m->cpuidax & 0xFFF){
+	case 0x526:				/* stepping cB1 */
+	case 0x52B:				/* stepping E0 */
+	case 0x52C:				/* stepping cC0 */
+		return;
+	}
+	print("cpu%d: lapicerror: 0x%8.8luX\n", m->machno, esr);
+}
+
+void
+lapicspurious(Ureg*, void*)
+{
+	print("cpu%d: lapicspurious\n", m->machno);
+}
+
+int
+lapicisr(int v)
+{
+	ulong isr;
+
+	isr = lapicr(LapicISR + (v/32));
+
+	return isr & (1<<(v%32));
+}
+
+int
+lapiceoi(int v)
+{
+	lapicw(LapicEOI, 0);
+
+	return v;
+}
+
+void
+lapicicrw(ulong hi, ulong lo)
+{
+	lapicw(LapicICRHI, hi);
+	lapicw(LapicICRLO, lo);
+}
+
+void
+ioapicrdtr(Apic* apic, int sel, int* hi, int* lo)
+{
+	ulong *iowin;
+
+	iowin = apic->addr+(0x10/sizeof(ulong));
+	sel = IoapicRDT + 2*sel;
+
+	lock(apic);
+	*apic->addr = sel+1;
+	if(hi)
+		*hi = *iowin;
+	*apic->addr = sel;
+	if(lo)
+		*lo = *iowin;
+	unlock(apic);
+}
+
+void
+ioapicrdtw(Apic* apic, int sel, int hi, int lo)
+{
+	ulong *iowin;
+
+	iowin = apic->addr+(0x10/sizeof(ulong));
+	sel = IoapicRDT + 2*sel;
+
+	lock(apic);
+	*apic->addr = sel+1;
+	*iowin = hi;
+	*apic->addr = sel;
+	*iowin = lo;
+	unlock(apic);
+}
+
+void
+ioapicinit(Apic* apic, int apicno)
+{
+	int hi, lo, v;
+	ulong *iowin;
+
+	/*
+	 * Initialise the I/O APIC.
+	 * The MultiProcessor Specification says it is the responsibility
+	 * of the O/S to set the APIC id.
+	 * Make sure interrupts are all masked off for now.
+	 */
+	iowin = apic->addr+(0x10/sizeof(ulong));
+	lock(apic);
+	*apic->addr = IoapicVER;
+	apic->mre = (*iowin>>16) & 0xFF;
+
+	*apic->addr = IoapicID;
+	*iowin = apicno<<24;
+	unlock(apic);
+
+	hi = 0;
+	lo = ApicIMASK;
+	for(v = 0; v <= apic->mre; v++)
+		ioapicrdtw(apic, v, hi, lo);
+}
+
+void
+lapictimerset(uvlong next)
+{
+	vlong period;
+	Apictimer *a;
+
+	a = &lapictimer[m->machno];
+	period = next - fastticks(nil);
+	period /= a->div;
+	if(period < a->min)
+		period = a->min;
+	else if(period > a->max - a->min)
+		period = a->max;
+	lapicw(LapicTICR, period);
+}
+
+void
+lapicclock(Ureg *u, void*)
+{
+	/*
+	 * since the MTRR updates need to be synchronized across processors,
+	 * we want to do this within the clock tick.
+	 */
+	mtrrclock();
+	timerintr(u, 0);
+}
+
+int
+lapicintron(Vctl*)
+{
+	lapicw(LapicTPR, 0);
+	return 0;
+}
+
+int
+lapicintroff(int)
+{
+	lapicw(LapicTPR, 0xFF);
+	return 0;
+}
+
+void
+lapicnmienable(void)
+{
+	lapicw(LapicPCINT, ApicNMI);
+}
+
+void
+lapicnmidisable(void)
+{
+	lapicw(LapicPCINT, ApicIMASK);
+}
--- /dev/null
+++ b/os/pc64/archmp.c
@@ -1,0 +1,435 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "../port/pci.h"
+
+#include "mp.h"
+
+static PCMP *pcmp;
+
+static char* buses[] = {
+	"CBUSI ",
+	"CBUSII",
+	"EISA  ",
+	"FUTURE",
+	"INTERN",
+	"ISA   ",
+	"MBI   ",
+	"MBII  ",
+	"MCA   ",
+	"MPI   ",
+	"MPSA  ",
+	"NUBUS ",
+	"PCI   ",
+	"PCMCIA",
+	"TC    ",
+	"VL    ",
+	"VME   ",
+	"XPRESS",
+	0,
+};
+
+static Bus*
+mpgetbus(int busno)
+{
+	Bus *bus;
+
+	for(bus = mpbus; bus; bus = bus->next)
+		if(bus->busno == busno)
+			return bus;
+
+	print("mpgetbus: can't find bus %d\n", busno);
+	return 0;
+}
+
+static Apic*
+mkprocessor(PCMPprocessor* p)
+{
+	static int machno = 1;
+	int apicno;
+	Apic *apic;
+
+	apicno = p->apicno;
+	if(!(p->flags & PcmpEN) || apicno > MaxAPICNO || mpapic[apicno] != nil)
+		return 0;
+
+	if((apic = xalloc(sizeof(Apic))) == nil)
+		panic("mkprocessor: no memory for Apic");
+	apic->type = PcmpPROCESSOR;
+	apic->apicno = apicno;
+	apic->flags = p->flags;
+	apic->lintr[0] = ApicIMASK;
+	apic->lintr[1] = ApicIMASK;
+	if(p->flags & PcmpBP)
+		apic->machno = 0;
+	else
+		apic->machno = machno++;
+	mpapic[apicno] = apic;
+
+	return apic;
+}
+
+static Bus*
+mkbus(PCMPbus* p)
+{
+	Bus *bus;
+	int i;
+
+	for(i = 0; buses[i]; i++)
+		if(strncmp(buses[i], p->string, sizeof(p->string)) == 0)
+			break;
+	if(buses[i] == 0)
+		return 0;
+
+	if((bus = xalloc(sizeof(Bus))) == nil)
+		panic("mkbus: no memory for Bus");
+	if(mpbus)
+		mpbuslast->next = bus;
+	else
+		mpbus = bus;
+	mpbuslast = bus;
+
+	bus->type = i;
+	bus->busno = p->busno;
+	if(bus->type == BusEISA){
+		bus->po = PcmpLOW;
+		bus->el = PcmpLEVEL;
+		if(mpeisabus != -1)
+			print("mkbus: more than one EISA bus\n");
+		mpeisabus = bus->busno;
+	}
+	else if(bus->type == BusPCI){
+		bus->po = PcmpLOW;
+		bus->el = PcmpLEVEL;
+	}
+	else if(bus->type == BusISA){
+		bus->po = PcmpHIGH;
+		bus->el = PcmpEDGE;
+		if(mpisabus != -1)
+			print("mkbus: more than one ISA bus\n");
+		mpisabus = bus->busno;
+	}
+	else{
+		bus->po = PcmpHIGH;
+		bus->el = PcmpEDGE;
+	}
+
+	return bus;
+}
+
+static Apic*
+mkioapic(PCMPioapic* p)
+{
+	void *va;
+	int apicno;
+	Apic *apic;
+
+	apicno = p->apicno;
+	if(!(p->flags & PcmpEN) || apicno > MaxAPICNO || mpioapic[apicno] != nil)
+		return 0;
+	/*
+	 * Map the I/O APIC.
+	 */
+	if((va = vmap(p->addr, 1024)) == nil)
+		return 0;
+	if((apic = xalloc(sizeof(Apic))) == nil)
+		panic("mkioapic: no memory for Apic");
+	apic->type = PcmpIOAPIC;
+	apic->apicno = apicno;
+	apic->addr = va;
+	apic->paddr = p->addr;
+	apic->flags = p->flags;
+	mpioapic[apicno] = apic;
+
+	return apic;
+}
+
+static Aintr*
+mkiointr(PCMPintr* p)
+{
+	Bus *bus;
+	Aintr *aintr;
+	PCMPintr* pcmpintr;
+
+	/*
+	 * According to the MultiProcessor Specification, a destination
+	 * I/O APIC of 0xFF means the signal is routed to all I/O APICs.
+	 * It's unclear how that can possibly be correct so treat it as
+	 * an error for now.
+	 */
+	if(p->apicno > MaxAPICNO || mpioapic[p->apicno] == nil)
+		return 0;
+	
+	if((bus = mpgetbus(p->busno)) == 0)
+		return 0;
+
+	if((aintr = xalloc(sizeof(Aintr))) == nil)
+		panic("mkiointr: no memory for Aintr");
+	aintr->intr = p;
+
+	if(0)
+		print("mkiointr: type %d intr type %d flags %#o "
+			"bus %d irq %d apicno %d intin %d\n",
+			p->type, p->intr, p->flags,
+			p->busno, p->irq, p->apicno, p->intin);
+	/*
+	 * Hack for Intel SR1520ML motherboard, which BIOS describes
+	 * the i82575 dual ethernet controllers incorrectly.
+	 */
+	if(memcmp(pcmp->product, "INTEL   X38MLST     ", 20) == 0){
+		if(p->busno == 1 && p->intin == 16 && p->irq == 1){
+			if((pcmpintr = xalloc(sizeof(PCMPintr))) == nil)
+				panic("iointr: no memory for PCMPintr");
+			memmove(pcmpintr, p, sizeof(PCMPintr));
+			print("mkiointr: %20.20s bus %d intin %d irq %d\n",
+				(char*)pcmp->product,
+				pcmpintr->busno, pcmpintr->intin,
+				pcmpintr->irq);
+			pcmpintr->intin = 17;
+			aintr->intr = pcmpintr;
+		}
+	}
+	aintr->apic = mpioapic[p->apicno];
+	aintr->next = bus->aintr;
+	aintr->bus = bus;
+	bus->aintr = aintr;
+
+	return aintr;
+}
+
+static int
+mklintr(PCMPintr* p)
+{
+	Apic *apic;
+	Bus *bus;
+	int i, intin, v;
+
+	/*
+	 * The offsets of vectors for LINT[01] are known to be
+	 * 0 and 1 from the local APIC vector space at VectorLAPIC.
+	 */
+	if((bus = mpgetbus(p->busno)) == 0)
+		return 0;
+	intin = p->intin;
+
+	/*
+	 * Pentium Pros have problems if LINT[01] are set to ExtINT
+	 * so just bag it, SMP mode shouldn't need ExtINT anyway.
+	 */
+	if(p->intr == PcmpExtINT || p->intr == PcmpNMI)
+		v = ApicIMASK;
+	else
+		v = mpintrinit(bus, p, VectorLAPIC+intin, p->irq);
+
+	if(p->apicno == 0xFF){
+		for(i=0; i<=MaxAPICNO; i++){
+			if((apic = mpapic[i]) == nil)
+				continue;
+			if(apic->flags & PcmpEN)
+				apic->lintr[intin] = v;
+		}
+	}
+	else{
+		if(apic = mpapic[p->apicno])
+			if(apic->flags & PcmpEN)
+				apic->lintr[intin] = v;
+	}
+
+	return v;
+}
+
+static void
+dumpmp(uchar *p, uchar *e)
+{
+	int i;
+
+	for(i = 0; p < e; p++) {
+		if((i % 16) == 0) print("*mp%d=", i/16);
+		print("%.2x ", *p);
+		if((++i % 16) == 0) print("\n");
+	}
+	if((i % 16) != 0) print("\n");
+}
+
+
+static void
+mpoverride(uchar** newp, uchar** e)
+{
+	int size, i, j;
+	char buf[20];
+	uchar* p;
+	char* s;
+	
+	size = strtol(getconf("*mp"), 0, 0);
+	if(size <= 0) panic("mpoverride: invalid size in *mp");
+	*newp = p = xalloc(size);
+	if(p == nil) panic("mpoverride: can't allocate memory");
+	*e = p + size;
+	for(i = 0; ; i++){
+		snprint(buf, sizeof buf, "*mp%d", i);
+		s = getconf(buf);
+		if(s == nil) break;
+		while(*s){
+			j = strtol(s, &s, 16);
+			if(*s && *s != ' ' || j < 0 || j > 0xff) panic("mpoverride: invalid entry in %s", buf);
+			if(p >= *e) panic("mpoverride: overflow in %s", buf);
+			*p++ = j;
+		}
+	}
+	if(p != *e) panic("mpoverride: size doesn't match");
+}
+
+static void
+pcmpinit(void)
+{
+	uchar *p, *e;
+	Apic *apic;
+	void *va;
+
+	/*
+	 * Map the local APIC.
+	 */
+	va = vmap(pcmp->lapicbase, 1024);
+
+	print("LAPIC: %.8lux %#p\n", pcmp->lapicbase, va);
+	if(va == nil)
+		panic("pcmpinit: cannot map lapic %.8lux", pcmp->lapicbase);
+
+	p = ((uchar*)pcmp)+PCMPsz;
+	e = ((uchar*)pcmp)+pcmp->length;
+	if(getconf("*dumpmp") != nil)
+		dumpmp(p, e);
+	if(getconf("*mp") != nil)
+		mpoverride(&p, &e);
+
+	/*
+	 * Run through the table saving information needed for starting
+	 * application processors and initialising any I/O APICs. The table
+	 * is guaranteed to be in order such that only one pass is necessary.
+	 */
+	while(p < e) switch(*p){
+	default:
+		print("pcmpinit: unknown PCMP type 0x%uX (e-p 0x%zuX)\n",
+			*p, e-p);
+		while(p < e){
+			print("%uX ", *p);
+			p++;
+		}
+		break;
+
+	case PcmpPROCESSOR:
+		if(apic = mkprocessor((PCMPprocessor*)p)){
+			apic->addr = va;
+			apic->paddr = pcmp->lapicbase;
+		}
+		p += PCMPprocessorsz;
+		continue;
+
+	case PcmpBUS:
+		mkbus((PCMPbus*)p);
+		p += PCMPbussz;
+		continue;
+
+	case PcmpIOAPIC:
+		if(apic = mkioapic((PCMPioapic*)p))
+			ioapicinit(apic, apic->apicno);
+		p += PCMPioapicsz;
+		continue;
+
+	case PcmpIOINTR:
+		mkiointr((PCMPintr*)p);
+		p += PCMPintrsz;
+		continue;
+
+	case PcmpLINTR:
+		mklintr((PCMPintr*)p);
+		p += PCMPintrsz;
+		continue;
+	}
+
+	/*
+	 * Ininitalize local APIC and start application processors.
+	 */
+	mpinit();
+}
+
+static void
+mpreset(void)
+{
+	/* stop application processors */
+	mpshutdown();
+
+	/* do generic reset */
+	archreset();
+}
+
+static int identify(void);
+extern int i8259irqno(int, int);
+
+PCArch archmp = {
+.id=		"_MP_",	
+.ident=		identify,
+.reset=		mpreset,
+.intrinit=	pcmpinit,
+.intrassign=	mpintrassign,
+.intrirqno=	i8259irqno,
+.intrenable=	lapicintron,
+.intrdisable=	lapicintroff,
+.fastclock=	i8253read,
+.timerset=	lapictimerset,
+};
+
+static int
+identify(void)
+{
+	char *cp;
+	_MP_ *_mp_;
+	ulong pa, len;
+
+	if((cp = getconf("*nomp")) != nil && strcmp(cp, "0") != 0)
+		return 1;
+
+	/*
+	 * Search for an MP configuration table. For now,
+	 * don't accept the default configurations (physaddr == 0).
+	 * Check for correct signature, calculate the checksum and,
+	 * if correct, check the version.
+	 * To do: check extended table checksum.
+	 */
+	if((_mp_ = sigsearch("_MP_", _MP_sz)) == nil || _mp_->physaddr == 0)
+		return 1;
+
+	len = PCMPsz;
+	pa = _mp_->physaddr;
+	if(pa + len-1 < pa)
+		return 1;
+
+	memreserve(pa, len);
+	if((pcmp = vmap(pa, len)) == nil)
+		return 1;
+	if(pcmp->length < PCMPsz
+	|| pa + pcmp->length-1 < pa
+	|| memcmp(pcmp, "PCMP", 4) != 0
+	|| (pcmp->version != 1 && pcmp->version != 4)){
+Bad:
+		vunmap(pcmp, len);
+		pcmp = nil;
+		return 1;
+	}
+	len = pcmp->length;
+	memreserve(pa, len);
+	vunmap(pcmp, PCMPsz);
+	if((pcmp = vmap(pa, len)) == nil)
+		return 1;
+
+	if(checksum(pcmp, len) != 0)
+		goto Bad;
+
+	if(m->havetsc && getconf("*notsc") == nil)
+		archmp.fastclock = tscticks;
+
+	return 0;
+}
--- /dev/null
+++ b/os/pc64/audio.h
@@ -1,0 +1,15 @@
+enum
+{
+	Bufsize	= 1024,	/* 5.8 ms each, must be power of two */
+	Nbuf		= 128,	/* .74 seconds total */
+	Dma		= 6,
+	IrqAUDIO	= 7,
+	SBswab	= 0,
+};
+
+#define seteisadma(a, b)	dmainit(a, Bufsize);
+#define CACHELINESZ		8
+#define UNCACHED(type, v)	(type*)((uintptr)(v))
+
+#define Int0vec
+#define setvec(v, f, a)		intrenable(v, f, a, BUSUNKNOWN, "audio")
--- /dev/null
+++ b/os/pc64/dat.h
@@ -1,0 +1,376 @@
+typedef struct BIOS32si	BIOS32si;
+typedef struct BIOS32ci	BIOS32ci;
+typedef struct Conf	Conf;
+typedef struct Confmem	Confmem;
+typedef struct FPssestate	FPssestate;
+typedef struct FPavxstate	FPavxstate;
+typedef struct FPsave	FPsave;
+typedef struct PFPU	PFPU;
+typedef ulong Instr;
+typedef struct ISAConf	ISAConf;
+typedef struct Label	Label;
+typedef struct Lock	Lock;
+typedef struct MMU	MMU;
+typedef struct Mach	Mach;
+typedef struct Notsave	Notsave;
+typedef struct PCArch	PCArch;
+typedef struct Pcidev	Pcidev;
+typedef struct PCMmap	PCMmap;
+typedef struct PCMslot	PCMslot;
+typedef struct Page	Page;
+typedef struct PMMU	PMMU;
+typedef struct Segdesc	Segdesc;
+typedef struct Tss	Tss;
+typedef s64		Tval;
+typedef struct Ureg	Ureg;
+typedef struct Vctl	Vctl;
+
+#pragma incomplete BIOS32si
+#pragma incomplete Pcidev
+#pragma incomplete Ureg
+#pragma incomplete Vctl
+
+
+struct Lock
+{
+	u32	key;
+	u32	sr;
+	uintptr	pc;
+	u32	pri;
+};
+
+struct Label
+{
+	uintptr	sp;
+	uintptr	pc;
+};
+
+struct FPssestate
+{
+	u16int	fcw;			/* x87 control word */
+	u16int	fsw;			/* x87 status word */
+	u8int	ftw;			/* x87 tag word */
+	u8int	zero;			/* 0 */
+	u16int	fop;			/* last x87 opcode */
+	u64int	rip;			/* last x87 instruction pointer */
+	u64int	rdp;			/* last x87 data pointer */
+	u32	mxcsr;			/* MMX control and status */
+	u32	mxcsrmask;		/* supported MMX feature bits */
+	uchar	st[128];		/* shared 64-bit media and x87 regs */
+	uchar	xmm[256];		/* 128-bit media regs */
+	uchar	ign[96];		/* reserved, ignored */
+};
+
+struct FPavxstate
+{
+	FPssestate;
+	uchar	header[64];		/* XSAVE header */
+	uchar	ymm[256];		/* upper 128-bit regs (AVX) */
+};
+
+struct FPsave
+{
+	FPavxstate;
+};
+
+enum
+{
+	/* this is a state */
+	FPinit=		0,
+	FPactive=	1,
+	FPinactive=	2,
+
+	/*
+	 * the following are bits that can be or'd into the state.
+	 *
+	 * this is biased so that FPinit, FPactive and FPinactive
+	 * without any flags refer to user fp state in fpslot[0].
+	 */
+	FPillegal=	1<<8,	/* fp forbidden in note handler */
+	FPpush=		2<<8,	/* trap on use and initialize new fpslot */
+	FPnouser=	4<<8,	/* fpslot[0] is kernel regs */
+	FPkernel=	8<<8,	/* fp use in kernel (user in fpslot[0] when !FPnouser) */
+
+	FPindexs=	16,
+	FPindex1=	1<<FPindexs,
+	FPindexm=	3<<FPindexs,
+};
+
+struct PFPU
+{
+	int	fpstate;
+	FPsave	*fpsave;	/* fpslot[fpstate>>FPindexs] */
+	FPsave	*fpslot[(FPindexm+1)>>FPindexs];
+};
+
+struct Confmem
+{
+	uintptr	base;
+	u32	npage;
+};
+
+struct Conf
+{
+	u32	nmach;		/* processors */
+	u32	nproc;		/* processes */
+	u32	monitor;	/* has monitor? */
+	Confmem mem[16];	/* physical memory */
+	u32	npage;		/* total physical pages of memory */
+	u32	nswap;		/* number of swap pages */
+	u32	copymode;	/* 0 is copy on write, 1 is copy on reference */
+	u32	ialloc;		/* max interrupt time allocation in bytes */
+	u32	pipeqsize;	/* size in bytes of pipe queues */
+	u32	nuart;		/* number of uart devices */
+};
+
+struct Segdesc
+{
+	u32	d0;
+	u32	d1;
+};
+
+/*
+ *  MMU structure for PDP, PD, PT pages.
+ */
+struct MMU
+{
+	MMU	*next;
+	uintptr	*page;
+	int	index;
+	int	level;
+};
+
+/*
+ *  MMU stuff in proc
+ */
+#define NCOLOR 1
+struct PMMU
+{
+	MMU*	mmuhead;
+	MMU*	mmutail;
+	MMU*	kmaphead;
+	MMU*	kmaptail;
+	ulong	kmapcount;
+	ulong	kmapindex;
+	ulong	mmucount;
+	
+	u64int	dr[8];
+	void	*vmx;
+};
+
+#include "../port/portdat.h"
+
+struct Tss {
+	u32	_0_;
+	u32	rsp0[2];
+	u32	rsp1[2];
+	u32	rsp2[2];
+	u32	_28_[2];
+	u32	ist[14];
+	u16	_92_[5];
+	u16	iomap;
+};
+
+struct Mach
+{
+	u32	machno;		/* physical id of processor (KNOWN TO ASSEMBLY) */
+	uintptr	splpc;		/* pc of last caller to splhi */
+
+	u64*	pml4;		/* pml4 base for this processor (va) */
+	Tss*	tss;		/* tss for this processor */
+	Segdesc	*gdt;		/* gdt for this processor */
+
+	Proc*	externup;	/* extern register Proc *up */
+
+	u64	ticks;		/* of the clock since boot time */
+	Proc*	proc;		/* current process on this processor */
+	Label	sched;		/* scheduler wakeup */
+	Lock	alarmlock;	/* access to alarm list */
+	void*	alarm;		/* alarms bound to this clock */
+	u32	inclockintr;
+
+	u32	nrdy;
+	u32	ilockdepth;
+	Perf	perf;		/* performance counters */
+
+	u32	loopconst;
+	int	aalcycles;
+
+	Lock	apictimerlock;
+	u32	cpumhz;
+	u64	cyclefreq;	/* Frequency of user readable cycle counter */
+	u64	cpuhz;
+	u32	cpuidax;
+	u32	cpuidcx;
+	u32	cpuiddx;
+	char	cpuidid[16];
+	char*	cpuidtype;
+	uchar	cpuidfamily;
+	uchar	cpuidmodel;
+	uchar	cpuidstepping;
+	u32	havetsc;
+	u32	havepge;
+	int	havewatchpt8;
+	int	havenx;
+	u64	tscticks;
+	u64	tscoff;
+	u32	intr;
+	u32	spuriousintr;
+	u32	lastintr;
+
+	u64	dr7;			/* shadow copy of dr7 */
+	u64	xcr0;
+
+	s64	mtrrcap;
+	s64	mtrrdef;
+	s64	mtrrfix[11];
+	s64	mtrrvar[32];	/* 256 max. */
+
+	uintptr	stack[1];
+};
+
+extern u64 MemMin;
+
+struct
+{
+	Lock;
+	char	machs[MAXMACH];		/* bitmap of active CPUs */
+	s32	exiting;	/* shutdown */
+	s32	ispanic;	/* shutdown in response to a panic */
+	s32	thunderbirdsarego;/* lets the added processors continue to schedinit */
+}active;
+
+
+/*
+ *  routines for things outside the PC model, like power management
+ */
+struct PCArch
+{
+	char*	id;
+	s32	(*ident)(void);		/* this should be in the model */
+	void	(*reset)(void);		/* this should be in the model */
+	s32	(*serialpower)(s32);	/* 1 == on, 0 == off */
+	s32	(*modempower)(s32);	/* 1 == on, 0 == off */
+
+	void	(*intrinit)(void);
+	s32	(*intrassign)(Vctl*);
+	s32	(*intrirqno)(s32, s32);
+	s32	(*intrspurious)(s32);
+	s32	(*intrenable)(Vctl*);
+	s32	(*intrvecno)(s32);
+	s32	(*intrdisable)(s32);
+
+	void	(*clockinit)(void);
+	void	(*clockenable)(void);
+	u64	(*fastclock)(u64*);
+	void	(*timerset)(u64);
+};
+
+/* cpuid instruction result register bits */
+enum {
+	/* ax */
+	Xsaveopt = 1<<0,
+	Xsaves = 1<<3,
+
+	/* cx */
+	Monitor	= 1<<3,
+	Xsave = 1<<26,
+	Avx	= 1<<28,
+
+	/* dx */
+	Fpuonchip = 1<<0,
+	Vmex	= 1<<1,		/* virtual-mode extensions */
+	Pse	= 1<<3,		/* page size extensions */
+	Tsc	= 1<<4,		/* time-stamp counter */
+	Cpumsr	= 1<<5,		/* model-specific registers, rdmsr/wrmsr */
+	Pae	= 1<<6,		/* physical-addr extensions */
+	Mce	= 1<<7,		/* machine-check exception */
+	Cmpxchg8b = 1<<8,
+	Cpuapic	= 1<<9,
+	Mtrr	= 1<<12,	/* memory-type range regs.  */
+	Pge	= 1<<13,	/* page global extension */
+	Mca	= 1<<14,	/* machine-check architecture */
+	Pat	= 1<<16,	/* page attribute table */
+	Pse2	= 1<<17,	/* more page size extensions */
+	Clflush = 1<<19,
+	Acpif	= 1<<22,	/* therm control msr */
+	Mmx	= 1<<23,
+	Fxsr	= 1<<24,	/* have SSE FXSAVE/FXRSTOR */
+	Sse	= 1<<25,	/* thus sfence instr. */
+	Sse2	= 1<<26,	/* thus mfence & lfence instr.s */
+	Rdrnd	= 1<<30,	/* RDRAND support bit */
+};
+
+enum {						/* MSRs */
+	PerfEvtbase	= 0xc0010000,		/* Performance Event Select */
+	PerfCtrbase	= 0xc0010004,		/* Performance Counters */
+
+	Efer		= 0xc0000080,		/* Extended Feature Enable */
+	Star		= 0xc0000081,		/* Legacy Target IP and [CS]S */
+	Lstar		= 0xc0000082,		/* Long Mode Target IP */
+	Cstar		= 0xc0000083,		/* Compatibility Target IP */
+	Sfmask		= 0xc0000084,		/* SYSCALL Flags Mask */
+	FSbase		= 0xc0000100,		/* 64-bit FS Base Address */
+	GSbase		= 0xc0000101,		/* 64-bit GS Base Address */
+	KernelGSbase	= 0xc0000102,		/* SWAPGS instruction */
+};
+
+/*
+ *  a parsed plan9.ini line
+ */
+#define NISAOPT		8
+
+struct ISAConf {
+	char	*type;
+	u32	port;
+	s32	irq;
+	u32	dma;
+	u32	mem;
+	u32	size;
+	u32	freq;
+
+	s32	nopt;
+	char	*opt[NISAOPT];
+};
+
+extern PCArch	*arch;			/* PC architecture */
+
+/*
+ * Each processor sees its own Mach structure at address MACHADDR.
+ * However, the Mach structures must also be available via the per-processor
+ * MMU information array machp, mainly for disambiguation and access to
+ * the clock which is only maintained by the bootstrap processor (0).
+ */
+Mach* machp[MAXMACH];
+
+#define	MACHP(n)	(machp[n])
+
+extern register Mach* m;			/* R15 */
+extern register Proc* up;			/* R14 */
+
+extern s32 swcursor;
+
+/*
+ *  hardware info about a device
+ */
+typedef struct {
+	u32	port;	
+	s32	size;
+} Devport;
+
+struct DevConf
+{
+	u32	intnum;			/* interrupt number */
+	char	*type;			/* card type, malloced */
+	s32	nports;			/* Number of ports */
+	Devport	*ports;			/* The ports themselves */
+};
+
+typedef struct BIOS32ci {		/* BIOS32 Calling Interface */
+	u32	eax;
+	u32	ebx;
+	u32	ecx;
+	u32	edx;
+	u32	esi;
+	u32	edi;
+} BIOS32ci;
--- /dev/null
+++ b/os/pc64/errstr.h
@@ -1,0 +1,65 @@
+char Enoerror[] = "no error";
+char Emount[] = "inconsistent mount";
+char Eunmount[] = "not mounted";
+char Eunion[] = "not in union";
+char Emountrpc[] = "mount rpc error";
+char Eshutdown[] = "mounted device shut down";
+char Enocreate[] = "mounted directory forbids creation";
+char Enonexist[] = "file does not exist";
+char Eexist[] = "file already exists";
+char Ebadsharp[] = "unknown device in # filename";
+char Enotdir[] = "not a directory";
+char Eisdir[] = "file is a directory";
+char Ebadchar[] = "bad character in file name";
+char Efilename[] = "file name syntax";
+char Eperm[] = "permission denied";
+char Ebadusefd[] = "inappropriate use of fd";
+char Ebadarg[] = "bad arg in system call";
+char Einuse[] = "device or object already in use";
+char Eio[] = "i/o error";
+char Etoobig[] = "read or write too large";
+char Etoosmall[] = "read or write too small";
+char Enetaddr[] = "bad network address";
+char Emsgsize[] = "message is too big for protocol";
+char Enetbusy[] = "network device is busy or allocated";
+char Enoproto[] = "network protocol not supported";
+char Enoport[] = "network port not available";
+char Enoifc[] = "bad interface or no free interface slots";
+char Enolisten[] = "not announced";
+char Ehungup[] = "i/o on hungup channel";
+char Ebadctl[] = "bad process or channel control request";
+char Enodev[] = "no free devices";
+char Enoenv[] = "no free environment resources";
+char Ethread[] = "thread exited";
+char Estopped[] = "thread must be stopped";
+char Enochild[] = "no living children";
+char Eioload[] = "i/o error in demand load";
+char Enovmem[] = "out of memory: virtual memory";
+char Ebadld[] = "illegal line discipline";
+char Ebadfd[] = "fd out of range or not open";
+char Enofd[] = "no free file descriptors";
+char Eisstream[] = "seek on a stream";
+char Ebadexec[] = "exec header invalid";
+char Etimedout[] = "connection timed out";
+char Econrefused[] = "connection refused";
+char Econinuse[] = "connection in use";
+char Eintr[] = "interrupted";
+char Eneedservice[] = "service required for tcp/udp/il calls";
+char Enomem[] = "out of memory: kernel";
+char Esfnotcached[] = "subfont not cached";
+char Esoverlap[] = "segments overlap";
+char Emouseset[] = "mouse type already set";
+char Erecover[] = "failed to recover fd";
+char Eshort[] = "i/o count too small";
+char Enobitstore[] = "out of screen memory";
+char Egreg[] = "jim'll fix it";
+char Ebadspec[] = "bad attach specifier";
+char Enoattach[] = "mount/attach disallowed";
+char Eshortstat[] = "stat buffer too small";
+char Ebadstat[] = "malformed stat buffer";
+char Enegoff[] = "negative i/o offset";
+char Ecmdargs[] = "wrong #args in control message";
+char Ebadip[] = "bad ip address syntax";
+char Edirseek[] = "seek in directory";
+char Etoolong[] = "name too long";
+char Echange[] = "media or partition has changed";
--- /dev/null
+++ b/os/pc64/etherif.h
@@ -1,0 +1,39 @@
+enum {
+	MaxEther	= 64,
+	Ntypes		= 8,
+};
+
+typedef struct Ether Ether;
+struct Ether {
+	ISAConf;			/* hardware info */
+
+	s32	ctlrno;
+	s32	tbdf;			/* type+busno+devno+funcno */
+	s32	minmtu;
+	s32 	maxmtu;
+	uchar	ea[Eaddrlen];
+
+	void	(*attach)(Ether*);	/* filled in by reset routine */
+	void	(*detach)(Ether*);
+	void	(*transmit)(Ether*);
+	/* TODO removed in 9front void	(*interrupt)(Ureg*, void*); */
+	s32	(*ifstat)(Ether*, void*, s32, u32);
+	s32 	(*ctl)(Ether*, void*, s32); /* custom ctl messages */
+	void	(*power)(Ether*, s32);	/* power on/off */
+	void	(*shutdown)(Ether*);	/* shutdown hardware before reboot */
+	void	*ctlr;
+
+	Queue*	oq;
+
+	Netif;
+};
+
+extern Block* etheriq(Ether*, Block*, s32);
+extern void addethercard(char*, int(*)(Ether*));
+extern u32 ethercrc(uchar*, s32);
+extern s32 parseether(uchar*, char*);
+
+#define NEXT(x, l)	(((x)+1)%(l))
+#define PREV(x, l)	(((x) == 0) ? (l)-1: (x)-1)
+#define	HOWMANY(x, y)	(((x)+((y)-1))/(y))
+#define ROUNDUP(x, y)	(HOWMANY((x), (y))*(y))
--- /dev/null
+++ b/os/pc64/fns.h
@@ -1,0 +1,222 @@
+#include "../port/portfns.h"
+void	aamloop(int);
+Dirtab*	addarchfile(char*, u32, s32(*)(Chan*,void*,s32,s64), s32(*)(Chan*,void*,s32,s64));
+void	archinit(void);
+void	archreset(void);
+int	bios32call(BIOS32ci*, u16int[3]);
+int	bios32ci(BIOS32si*, BIOS32ci*);
+void	bios32close(BIOS32si*);
+BIOS32si* bios32open(char*);
+void	bootargs(ulong);
+void	bootargsinit(void);
+int	checksum(void *, int);
+int	cistrcmp(char*, char*);
+int	cistrncmp(char*, char*, int);
+#define	clearmmucache()				/* x86 doesn't have one */
+void	clockintr(Ureg*, void*);
+s32	(*cmpswap)(s32*, s32, s32);
+s32	cmpswap486(s32*, s32, s32);
+void	(*coherence)(void);
+void	cpuid(u32, u32, u32 regs[]);
+void	fpuinit(void);
+int	cpuidentify(void);
+void	cpuidprint(void);
+void	(*cycles)(uvlong*);
+void	delay(int);
+int	dmacount(int);
+int	dmadone(int);
+void	dmaend(int);
+int	dmainit(int, int);
+s32	dmasetup(int, void*, s32, s32);
+void	dumpregs(Ureg*);
+int	ecinit(int cmdport, int dataport);
+int	ecread(uchar addr);
+int	ecwrite(uchar addr, uchar val);
+#define	evenaddr(x)				/* x86 doesn't care */
+u64	fastticks(u64*);
+u64	fastticks2ns(u64);
+u64	fastticks2us(u64);
+void	fpinit(void);
+void	(*fprestore)(FPsave*);
+void	(*fpsave)(FPsave*);
+void	fpuprocsetup(Proc*);
+void	fpuprocfork(Proc*);
+void	fpuprocsave(Proc*);
+void	fpuprocrestore(Proc*);
+int	fpusave(void);
+void	fpurestore(int);
+u64	getcr0(void);
+u64	getcr2(void);
+u64	getcr3(void);
+u64	getcr4(void);
+char*	getconf(char*);
+void	guesscpuhz(int);
+void	mwait(void*);
+int	i8042auxcmd(int);
+int	i8042auxcmdval(int);
+void	i8042auxenable(void (*)(int, int));
+int i8042auxdetect(void);
+void	i8042reset(void);
+void	i8250console(void);
+void	i8253enable(void);
+void	i8253init(void);
+void	i8253link(void);
+uvlong	i8253read(uvlong*);
+void	i8253timerset(uvlong);
+void	i8259init(void);
+int	i8259isr(int);
+int	i8259enable(Vctl*);
+int	i8259vecno(int);
+int	i8259disable(int);
+void	idle(void);
+void	idlehands(void);
+int	inb(int);
+void	insb(int, void*, int);
+ushort	ins(int);
+void	inss(int, void*, int);
+ulong	inl(int);
+void	insl(int, void*, int);
+int	intrdisable(int, void (*)(Ureg *, void *), void*, int, char*);
+void	intrenable(int, void (*)(Ureg*, void*), void*, int, char*);
+void	invlpg(uintptr);
+void	iofree(u32);
+void	ioinit(void);
+s32	iounused(u32, u32);
+u32	ioalloc(u32, u32, u32, char*);
+u32	ioreserve(u32, u32, u32, char*);
+int	iprint(char*, ...);
+int	isaconfig(char*, int, ISAConf*);
+int	isvalid_va(void*);
+void	kbdenable(void);
+void	kbdinit(void);
+void	kdbenable(void);
+#define	kmapinval()
+void	lapicclock(Ureg*, void*);
+void	lapictimerset(uvlong);
+void	lgdt(void*);
+void	lidt(void*);
+void	links(void);
+void	ltr(ulong);
+void	mach0init(void);
+void	machinit(void);
+void	mathinit(void);
+void	mb386(void);
+void	mb586(void);
+void	meminit(void);
+void	meminit0(void);
+void	memreserve(uintptr, uintptr);
+void	mfence(void);
+#define mmuflushtlb() putcr3(getcr3())
+void	mmuinit(void);
+uintptr	mmukmap(uintptr, uintptr, int);
+int	mmukmapsync(uintptr);
+uintptr*	mmuwalk(uintptr*, uintptr, int, int);
+char*	mtrr(u64, u64, char *);
+char*	mtrrattr(u64, u64 *);
+void	mtrrclock(void);
+int	mtrrprint(char *, s32);
+void	mtrrsync(void);
+uchar	nvramread(intptr);
+void	nvramwrite(intptr, uchar);
+void	outb(int, int);
+void	outsb(int, void*, int);
+void	outs(int, ushort);
+void	outss(int, void*, int);
+void	outl(int, ulong);
+void	outsl(int, void*, int);
+void	patwc(void*, int);
+void	pcicfginit(void);
+int	(*pcicfgrw8)(int, int, int, int);
+int	(*pcicfgrw16)(int, int, int, int);
+int	(*pcicfgrw32)(int, int, int, int);
+int	pciscan(int bno, Pcidev **list, Pcidev *parent);
+u32	pcibarsize(Pcidev*, int);
+int	pcicfgr8(Pcidev*, int);
+int	pcicfgr16(Pcidev*, int);
+int	pcicfgr32(Pcidev*, int);
+void	pcicfgw8(Pcidev*, int, int);
+void	pcicfgw16(Pcidev*, int, int);
+void	pcicfgw32(Pcidev*, int, int);
+void	pciclrbme(Pcidev*);
+void	pciclrioe(Pcidev*);
+void	pciclrmwi(Pcidev*);
+int	pcigetpms(Pcidev*);
+void	pcihinv(Pcidev*);
+uchar	pciipin(Pcidev*, uchar);
+Pcidev* pcimatch(Pcidev*, int, int);
+Pcidev* pcimatchtbdf(int);
+void	pcireset(void);
+void	pcisetbme(Pcidev*);
+void	pcisetioe(Pcidev*);
+int	pcisetpms(Pcidev*, int);
+void	pcmcisread(PCMslot*);
+int	pcmcistuple(int, int, int, void*, int);
+PCMmap*	pcmmap(int, ulong, int, int);
+int	pcmspecial(char*, ISAConf*);
+int	(*_pcmspecial)(char *, ISAConf *);
+void	pcmspecialclose(int);
+void	(*_pcmspecialclose)(int);
+void	pcmunmap(int, PCMmap*);
+void	pmap(uintptr, u64, s64);
+void	poolinit(void);
+void	poolsizeinit(void);
+void	procsave(Proc*);
+void	procsetup(Proc*);
+void	punmap(uintptr, vlong);
+void	putcr0(u64);
+void	putcr2(u64);
+void	putcr3(u64);
+void	putcr4(u64);
+void	putxcr0(u64);
+void	putdr(u64*);
+void	putdr01236(u64*);
+void	putdr6(u64);
+void	putdr7(u64);
+void*	rampage(void);
+s32	rdmsr(s32, s64*);
+ulong rdtsc32(void);
+void	rdrandbuf(void*, u32);
+void*	rsdsearch(void);
+void	screeninit(void);
+int	screenprint(char*, ...);			/* debugging */
+void	(*screenputs)(char*, int);
+void	setconfenv(void);
+void*	sigsearch(char*, int);
+s32	segflush(void*, u32);
+void	setpanic(void);
+void	showframe(void*, void*);
+void	syncclock(void);
+uvlong	tscticks(uvlong*);
+void	trapenable(int, void (*)(Ureg*, void*), void*, char*);
+void	trapinit(void);
+void	trapinit0(void);
+int	tas(void*);
+uintptr	umballoc(uintptr, u32, u32);
+void	umbfree(uintptr, u32);
+uintptr	umbrwmalloc(uintptr, int, int);
+void	umbrwfree(uintptr, int);
+u64	upaalloc(u64, u32, u32);
+u64	upamalloc(u64, u32, u32);
+u64	upaallocwin(u64, u32, u32, u32);
+void	upafree(uintptr, u32);
+void	upareserve(uintptr, u32);
+u64	us2fastticks(u64);
+void	vectortable(void);
+void*	vmap(uintptr, int);
+void	vunmap(void*, int);
+void	wbinvd(void);
+s32	wrmsr(ulong, ulong);
+int	xchgw(ushort*, int);
+ulong	kzeromap(ulong, ulong, int);
+void	nmiscreen(void);
+int	kbdinready(void);
+
+#define	userureg(ur)	(((ur)->cs & 3) == 3)
+#define	waserror()	(up->nerrlab++, setlabel(&up->errlab[up->nerrlab-1]))
+#define getcallerpc(x)	(((uintptr*)(x))[-1])
+#define KADDR(a)	((void*)((uintptr)(a)|KZERO))
+#define PADDR(a)	((uintptr)(a)&~(uintptr)KZERO)
+
+#define	dcflush(a, b)
+#define	clockcheck();
+#define 	dumplongs(x, y, z)
--- /dev/null
+++ b/os/pc64/fpsave.s
@@ -1,0 +1,9 @@
+TEXT	FPsave(SB), 1, $0	/* save FPU environment without waiting */
+	MOVQ	RARG, AX
+	FSTENV	0(AX)
+	RET
+ 
+TEXT	FPrestore(SB), 1, $0	/* restore FPU environment without waiting */
+	MOVQ	RARG, AX
+	FLDENV	0(AX)
+	RET
--- /dev/null
+++ b/os/pc64/fpu.c
@@ -1,0 +1,378 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "ureg.h"
+#include "io.h"
+
+enum {
+	CR4Osfxsr  = 1 << 9,
+	CR4Oxmmex  = 1 << 10,
+	CR4Oxsave  = 1 << 18,
+};
+
+/*
+ * SIMD Floating Point.
+ * Assembler support to get at the individual instructions
+ * is in l.s.
+ */
+extern void _clts(void);
+extern void _fldcw(u16int);
+extern void _fnclex(void);
+extern void _fninit(void);
+extern void _fxrstor(void*);
+extern void _fxsave(void*);
+extern void _xrstor(void*);
+extern void _xsave(void*);
+extern void _xsaveopt(void*);
+extern void _fwait(void);
+extern void _ldmxcsr(u32);
+extern void _stts(void);
+
+static void
+fpssesave(FPsave *s)
+{
+	_fxsave(s);
+	_stts();
+}
+static void
+fpsserestore(FPsave *s)
+{
+	_clts();
+	_fxrstor(s);
+}
+
+static void
+fpxsave(FPsave *s)
+{
+	_xsave(s);
+	_stts();
+}
+static void
+fpxrestore(FPsave *s)
+{
+	_clts();
+	_xrstor(s);
+}
+
+static void
+fpxsaves(FPsave *s)
+{
+	_xsaveopt(s);
+	_stts();
+}
+static void
+fpxrestores(FPsave *s)
+{
+	_clts();
+	_xrstor(s);
+}
+
+static void
+fpxsaveopt(FPsave *s)
+{
+	_xsaveopt(s);
+	_stts();
+}
+
+static char* mathmsg[] =
+{
+	nil,	/* handled below */
+	"denormalized operand",
+	"division by zero",
+	"numeric overflow",
+	"numeric underflow",
+	"precision loss",
+};
+
+enum
+{
+	NDebug,				/* print debug message */
+};
+
+static void
+mathnote(ulong status, uintptr pc)
+{
+	char *msg, note[ERRMAX];
+	int i;
+
+	/*
+	 * Some attention should probably be paid here to the
+	 * exception masks and error summary.
+	 */
+	msg = "unknown exception";
+	for(i = 1; i <= 5; i++){
+		if(!((1<<i) & status))
+			continue;
+		msg = mathmsg[i];
+		break;
+	}
+	if(status & 0x01){
+		if(status & 0x40){
+			if(status & 0x200)
+				msg = "stack overflow";
+			else
+				msg = "stack underflow";
+		}else
+			msg = "invalid operation";
+	}
+	snprint(note, sizeof note, "sys: fp: %s fppc=%#p status=0x%lux",
+		msg, pc, status);
+	postnote(up, 1, note, NDebug);
+}
+
+/*
+ *  math coprocessor error
+ */
+static void
+matherror(Ureg *, void*)
+{
+	/*
+	 * Save FPU state to check out the error.
+	 */
+	fpsave(up->fpsave);
+	up->fpstate = FPinactive | (up->fpstate & (FPnouser|FPkernel|FPindexm));
+	mathnote(up->fpsave->fsw, up->fpsave->rip);
+}
+
+/*
+ *  SIMD error
+ */
+static void
+simderror(Ureg *ureg, void*)
+{
+	fpsave(up->fpsave);
+	up->fpstate = FPinactive | (up->fpstate & (FPnouser|FPkernel|FPindexm));
+	mathnote(up->fpsave->mxcsr & 0x3f, ureg->pc);
+}
+
+void
+fpinit(void)
+{
+	/*
+	 * A process tries to use the FPU for the
+	 * first time and generates a 'device not available'
+	 * exception.
+	 * Turn the FPU on and initialise it for use.
+	 * Set the precision and mask the exceptions
+	 * we don't care about from the generic Mach value.
+	 */
+	_clts();
+	_fninit();
+	_fwait();
+	_fldcw(0x0232);
+	_ldmxcsr(0x1900);
+}
+
+/*
+ *  math coprocessor emulation fault
+ */
+static void
+mathemu(Ureg *ureg, void*)
+{
+	ulong status, control;
+	int index;
+
+	if(up->fpstate & FPillegal){
+		/* someone did floating point in a note handler */
+		postnote(up, 1, "sys: floating point in note handler", NDebug);
+		return;
+	}
+	switch(up->fpstate & ~(FPnouser|FPkernel|FPindexm)){
+	case FPactive	| FPpush:
+		_clts();
+		fpsave(up->fpsave);
+	case FPinactive	| FPpush:
+		up->fpstate += FPindex1;
+	case FPinit	| FPpush:
+	case FPinit:
+		fpinit();
+		index = up->fpstate >> FPindexs;
+		if(index < 0 || index > (FPindexm>>FPindexs))
+			panic("fpslot index overflow: %d", index);
+		if(userureg(ureg)){
+			if(index != 0)
+				panic("fpslot index %d != 0 for user", index);
+		} else {
+			if(index == 0)
+				up->fpstate |= FPnouser;
+			up->fpstate |= FPkernel;
+		}
+		while(up->fpslot[index] == nil)
+			up->fpslot[index] = mallocalign(sizeof(FPsave), FPalign, 0, 0);
+		up->fpsave = up->fpslot[index];
+		up->fpstate = FPactive | (up->fpstate & (FPnouser|FPkernel|FPindexm));
+		break;
+	case FPinactive:
+		/*
+		 * Before restoring the state, check for any pending
+		 * exceptions, there's no way to restore the state without
+		 * generating an unmasked exception.
+		 * More attention should probably be paid here to the
+		 * exception masks and error summary.
+		 */
+		status = up->fpsave->fsw;
+		control = up->fpsave->fcw;
+		if((status & ~control) & 0x07F){
+			mathnote(status, up->fpsave->rip);
+			break;
+		}
+		fprestore(up->fpsave);
+		up->fpstate = FPactive | (up->fpstate & (FPnouser|FPkernel|FPindexm));
+		break;
+	case FPactive:
+		panic("math emu pid %ld %s pc %#p", 
+			up->pid, up->text, ureg->pc);
+		break;
+	}
+}
+
+/*
+ *  math coprocessor segment overrun
+ */
+static void
+mathover(Ureg*, void*)
+{
+	pexit("math overrun", 0);
+}
+
+void
+mathinit(void)
+{
+	trapenable(VectorCERR, matherror, 0, "matherror");
+	if(m->cpuidfamily == 3)
+		intrenable(IrqIRQ13, matherror, 0, BUSUNKNOWN, "matherror");
+	trapenable(VectorCNA, mathemu, 0, "mathemu");
+	trapenable(VectorCSO, mathover, 0, "mathover");
+	trapenable(VectorSIMD, simderror, 0, "simderror");
+}
+
+/*
+ * fpuinit(), called from cpuidentify() for each cpu.
+ */
+void
+fpuinit(void)
+{
+	u64 cr4;
+	u32 regs[4];
+
+	cr4 = getcr4() | CR4Osfxsr|CR4Oxmmex;
+	if((m->cpuidcx & (Xsave|Avx)) == (Xsave|Avx) && getconf("*noavx") == nil){
+		cr4 |= CR4Oxsave;
+		putcr4(cr4);
+		m->xcr0 = 7; /* x87, sse, avx */
+		putxcr0(m->xcr0);
+		cpuid(0xd, 1, regs);
+		if(regs[0] & Xsaves){
+			fpsave = fpxsaves;
+			fprestore = fpxrestores;
+		} else {
+			if(regs[0] & Xsaveopt)
+				fpsave = fpxsaveopt;
+			else
+				fpsave = fpxsave;
+			fprestore = fpxrestore;
+		}
+	} else {
+		putcr4(cr4);
+		fpsave = fpssesave;
+		fprestore = fpsserestore;
+	}
+}
+
+void
+fpuprocsetup(Proc *p)
+{
+	p->fpstate = FPinit;
+	_stts();
+}
+
+void
+fpuprocfork(Proc *p)
+{
+	int s;
+
+	/* save floating point state */
+	s = splhi();
+	switch(up->fpstate & ~FPillegal){
+	case FPactive	| FPpush:
+		_clts();
+	case FPactive:
+		fpsave(up->fpsave);
+		up->fpstate = FPinactive | (up->fpstate & FPpush);
+	case FPactive	| FPkernel:
+	case FPinactive	| FPkernel:
+	case FPinactive	| FPpush:
+	case FPinactive:
+		while(p->fpslot[0] == nil)
+			p->fpslot[0] = mallocalign(sizeof(FPsave), FPalign, 0, 0);
+		memmove(p->fpsave = p->fpslot[0], up->fpslot[0], sizeof(FPsave));
+		p->fpstate = FPinactive;
+	}
+	splx(s);
+}
+
+void
+fpuprocsave(Proc *p)
+{
+	switch(p->fpstate & ~(FPnouser|FPkernel|FPindexm)){
+	case FPactive	| FPpush:
+		_clts();
+	case FPactive:
+		if(p->state == Moribund){
+			_fnclex();
+			_stts();
+			break;
+		}
+		/*
+		 * Fpsave() stores without handling pending
+		 * unmasked exeptions. Postnote() can't be called
+		 * so the handling of pending exceptions is delayed
+		 * until the process runs again and generates an
+		 * emulation fault to activate the FPU.
+		 */
+		fpsave(p->fpsave);
+		p->fpstate = FPinactive | (p->fpstate & ~FPactive);
+		break;
+	}
+}
+
+void
+fpuprocrestore(Proc*)
+{
+}
+
+
+/*
+ * Fpusave and fpurestore lazily save and restore FPU state across
+ * system calls and the pagefault handler so that we can take
+ * advantage of SSE instructions such as AES-NI in the kernel.
+ */
+int
+fpusave(void)
+{
+	int ostate = up->fpstate;
+	if((ostate & ~(FPnouser|FPkernel|FPindexm)) == FPactive)
+		_stts();
+	up->fpstate = FPpush | (ostate & ~FPillegal);
+	return ostate;
+}
+void
+fpurestore(int ostate)
+{
+	int astate = up->fpstate;
+	if(astate == (FPpush | (ostate & ~FPillegal))){
+		if((ostate & ~(FPnouser|FPkernel|FPindexm)) == FPactive)
+			_clts();
+	} else {
+		if(astate == FPinit)	/* don't restore on procexec()/procsetup() */
+			return;
+		if((astate & ~(FPnouser|FPkernel|FPindexm)) == FPactive)
+			_stts();
+		up->fpsave = up->fpslot[ostate>>FPindexs];
+		if(ostate & FPactive)
+			ostate = FPinactive | (ostate & ~FPactive);
+	}
+	up->fpstate = ostate;
+}
--- /dev/null
+++ b/os/pc64/inferno.main.c
@@ -1,0 +1,460 @@
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"io.h"
+#include	"ureg.h"
+
+extern int main_pool_pcnt;
+extern int heap_pool_pcnt;
+extern int image_pool_pcnt;
+int	pckdebug;
+
+static  uchar *sp;	/* stack pointer for /boot */
+
+char bootdisk[KNAMELEN];
+char *confname[MAXCONF];
+char *confval[MAXCONF];
+int nconf;
+
+static void
+options(void)
+{
+	long i, n;
+	char *cp, *line[MAXCONF], *p, *q;
+
+	/*
+	 *  parse configuration args from dos file plan9.ini
+	 */
+	cp = BOOTARGS;	/* where b.com leaves its config */
+	cp[BOOTARGSLEN-1] = 0;
+
+	/*
+	 * Strip out '\r', change '\t' -> ' '.
+	 */
+	p = cp;
+	for(q = cp; *q; q++){
+		if(*q == '\r')
+			continue;
+		if(*q == '\t')
+			*q = ' ';
+		*p++ = *q;
+	}
+	*p = 0;
+
+	n = getfields(cp, line, MAXCONF, 1, "\n");
+	for(i = 0; i < n; i++){
+		if(*line[i] == '#')
+			continue;
+		cp = strchr(line[i], '=');
+		if(cp == nil)
+			continue;
+		*cp++ = '\0';
+		confname[nconf] = line[i];
+		confval[nconf] = cp;
+		nconf++;
+	}
+}
+
+static void
+doc(char *m)
+{
+	int i;
+	print("%s...\n", m);
+	for(i = 0; i < 100*1024*1024; i++)
+		i++;
+}
+
+void
+main(void)
+{
+	outb(0x3F2, 0x00);			/* botch: turn off the floppy motor */
+
+	mach0init();
+	options();
+	/* on 9front: trapinit0(); */
+	ioinit();
+	i8250console();
+	quotefmtinstall();
+	screeninit();
+	print("\nInferno\n");
+	kbdinit();
+	i8253init();
+	cpuidentify();
+	confinit();
+	archinit();
+	xinit();
+	poolsizeinit();
+	trapinit();
+	printinit();
+	cpuidprint();
+	mmuinit();
+	eve = strdup("inferno");
+	if(arch->intrinit){	/* launches other processors on an mp */
+		doc("intrinit");
+		arch->intrinit();
+	}
+	doc("timersinit");
+	timersinit();
+	doc("mathinit");
+	mathinit();
+	doc("kbdenable");
+	kbdenable();
+	if(arch->clockenable){
+		doc("clockinit");
+		arch->clockenable();
+	}
+	doc("procinit");
+	procinit();
+	doc("links");
+	links();
+	doc("chandevreset");
+	chandevreset();
+	doc("userinit");
+	userinit();
+	doc("schedinit");
+	active.thunderbirdsarego = 1;
+	schedinit();
+	
+}
+
+void
+mach0init(void)
+{
+	conf.nmach = 1;
+
+	MACHP(0) = (Mach*)CPU0MACH;
+
+	m->machno = 0;
+	m->pml4 = (u64*)CPU0PML4;
+	m->gdt = (Segdesc*)CPU0GDT;
+
+	machinit();
+
+	active.machs = 1;
+	active.exiting = 0;
+}
+
+void
+machinit(void)
+{
+	int machno;
+	Segdesc *gdt;
+	uintptr *pml4;
+
+	machno = m->machno;
+	pml4 = m->pml4;
+	gdt = m->gdt;
+	memset(m, 0, sizeof(Mach));
+	m->machno = machno;
+	m->pml4 = pml4;
+	m->gdt = gdt;
+	/* m->perf.period = 1; TODO 9front uses it */
+
+	/*
+	 * For polled uart output at boot, need
+	 * a default delay constant. 100000 should
+	 * be enough for a while. Cpuidentify will
+	 * calculate the real value later.
+	 */
+	m->loopconst = 100000;
+}
+
+void
+init0(void)
+{
+	Osenv *o;
+	int i;
+	char buf[2*KNAMELEN];
+
+	up->nerrlab = 0;
+
+	spllo();
+	if(waserror())
+		panic("init0: %r");
+	/*
+	 * These are o.k. because rootinit is null.
+	 * Then early kproc's will have a root and dot.
+	 */
+	o = up->env;
+	o->pgrp->slash = namec("#/", Atodir, 0, 0);
+	cnameclose(o->pgrp->slash->name);
+	o->pgrp->slash->name = newcname("/");
+	o->pgrp->dot = cclone(o->pgrp->slash);
+
+	chandevinit();
+
+	if(!waserror()){
+		ksetenv("cputype", "386", 0);
+		snprint(buf, sizeof(buf), "386 %s", conffile);
+		ksetenv("terminal", buf, 0);
+		for(i = 0; i < nconf; i++){
+			if(confname[i][0] != '*')
+				ksetenv(confname[i], confval[i], 0);
+			ksetenv(confname[i], confval[i], 1);
+		}
+		poperror();
+	}
+
+	poperror();
+
+	disinit("/osinit.dis");
+}
+
+void
+userinit(void)
+{
+	Proc *p;
+	Osenv *o;
+
+	p = newproc();
+	o = p->env;
+
+	o->fgrp = newfgrp(nil);
+
+	o->pgrp = newpgrp();
+	kstrdup(&o->user, eve);
+
+	strcpy(p->text, "interp");
+
+	p->fpstate = FPINIT;
+	fpoff();
+
+	/*
+	 * Kernel Stack
+	 *
+	 * N.B. make sure there's
+	 *	4 bytes for gotolabel's return PC
+	 */
+	p->sched.pc = (uintptr)init0;
+	p->sched.sp = (uintptr)p->kstack+KSTACK-sizeof(uintptr);
+
+	ready(p);
+}
+
+Conf	conf;
+
+char*
+getconf(char *name)
+{
+	int i;
+
+	for(i = 0; i < nconf; i++)
+		if(cistrcmp(confname[i], name) == 0)
+			return confval[i];
+	return 0;
+}
+
+void
+confinit(void)
+{
+	char *p;
+	int pcnt;
+	ulong maxmem;
+
+	if(p = getconf("*maxmem"))
+		maxmem = strtoul(p, 0, 0);
+	else
+		maxmem = 0;
+	if(p = getconf("*kernelpercent"))
+		pcnt = 100 - strtol(p, 0, 0);
+	else
+		pcnt = 0;
+
+	meminit(maxmem);
+
+	conf.npage = conf.npage0 + conf.npage1;
+	if(pcnt < 10)
+		pcnt = 70;
+	conf.ialloc = (((conf.npage*(100-pcnt))/100)/2)*BY2PG;
+
+	conf.nproc = 100 + ((conf.npage*BY2PG)/MB)*5;
+}
+
+void
+poolsizeinit(void)
+{
+	ulong nb;
+
+	nb = conf.npage*BY2PG;
+	poolsize(mainmem, (nb*main_pool_pcnt)/100, 0);
+	poolsize(heapmem, (nb*heap_pool_pcnt)/100, 0);
+	poolsize(imagmem, (nb*image_pool_pcnt)/100, 1);
+}
+
+static char *mathmsg[] =
+{
+	"invalid operation",
+	"denormalized operand",
+	"division by zero",
+	"numeric overflow",
+	"numeric underflow",
+	"precision loss",
+	"stack",
+	"error",
+};
+
+/*
+ *  math coprocessor error
+ */
+void
+matherror(Ureg* ureg, void* arg)
+{
+	ulong status;
+	int i;
+	char *msg;
+	char note[ERRMAX];
+
+	USED(arg);
+
+	/*
+	 *  a write cycle to port 0xF0 clears the interrupt latch attached
+	 *  to the error# line from the 387
+	 */
+	if(!(m->cpuiddx & 0x01))
+		outb(0xF0, 0xFF);
+
+	/*
+	 *  save floating point state to check out error
+	 */
+	FPsave(&up->fpsave.env);
+	status = up->fpsave.env.status;
+
+	msg = 0;
+	for(i = 0; i < 8; i++)
+		if((1<<i) & status){
+			msg = mathmsg[i];
+			sprint(note, "sys: fp: %s fppc=0x%zux", msg, up->fpsave.env.pc);
+			error(note);
+			break;
+		}
+	if(msg == 0){
+		sprint(note, "sys: fp: unknown fppc=0x%zux", up->fpsave.env.pc);
+		error(note);
+	}
+	if(ureg->pc & KZERO)
+		panic("fp: status %lux fppc=0x%zux pc=0x%zux", status,
+			up->fpsave.env.pc, ureg->pc);
+}
+
+/*
+ *  math coprocessor emulation fault
+ */
+void
+mathemu(Ureg* ureg, void* arg)
+{
+	USED(ureg, arg);
+	switch(up->fpstate){
+	case FPINIT:
+		fpinit();
+		up->fpstate = FPACTIVE;
+		break;
+	case FPINACTIVE:
+		fprestore(&up->fpsave);
+		up->fpstate = FPACTIVE;
+		break;
+	case FPACTIVE:
+		panic("math emu");
+		break;
+	}
+}
+
+/*
+ *  math coprocessor segment overrun
+ */
+void
+mathover(Ureg* ureg, void* arg)
+{
+	USED(arg);
+	print("sys: fp: math overrun pc 0x%zux pid %d\n", ureg->pc, up->pid);
+	pexit("math overrun", 0);
+}
+
+void
+mathinit(void)
+{
+	trapenable(VectorCERR, matherror, 0, "matherror");
+	if(X86FAMILY(m->cpuidax) == 3)
+		intrenable(IrqIRQ13, matherror, 0, BUSUNKNOWN, "matherror");
+	trapenable(VectorCNA, mathemu, 0, "mathemu");
+	trapenable(VectorCSO, mathover, 0, "mathover");
+}
+
+/*
+ *  Save the mach dependent part of the process state.
+ */
+void
+procsave(Proc *p)
+{
+	if(p->fpstate == FPACTIVE){
+		if(p->state == Moribund)
+			fpoff();
+		else
+			fpsave(&up->fpsave);
+		p->fpstate = FPINACTIVE;
+	}
+}
+
+void
+exit(int ispanic)
+{
+	USED(ispanic);
+
+	up = 0;
+	print("exiting\n");
+
+	/* Shutdown running devices */
+	chandevshutdown();
+
+	arch->reset();
+}
+
+void
+reboot(void)
+{
+	exit(0);
+}
+
+int
+isaconfig(char *class, int ctlrno, ISAConf *isa)
+{
+	char cc[32], *p;
+	int i;
+
+	snprint(cc, sizeof cc, "%s%d", class, ctlrno);
+	p = getconf(cc);
+	if(p == nil)
+		return 0;
+
+	isa->nopt = tokenize(p, isa->opt, NISAOPT);
+	for(i = 0; i < isa->nopt; i++){
+		p = isa->opt[i];
+		if(cistrncmp(p, "type=", 5) == 0)
+			isa->type = p + 5;
+		else if(cistrncmp(p, "port=", 5) == 0)
+			isa->port = strtoul(p+5, &p, 0);
+		else if(cistrncmp(p, "irq=", 4) == 0)
+			isa->irq = strtoul(p+4, &p, 0);
+		else if(cistrncmp(p, "dma=", 4) == 0)
+			isa->dma = strtoul(p+4, &p, 0);
+		else if(cistrncmp(p, "mem=", 4) == 0)
+			isa->mem = strtoul(p+4, &p, 0);
+		else if(cistrncmp(p, "size=", 5) == 0)
+			isa->size = strtoul(p+5, &p, 0);
+		else if(cistrncmp(p, "freq=", 5) == 0)
+			isa->freq = strtoul(p+5, &p, 0);
+	}
+	return 1;
+}
+
+/*
+ *  put the processor in the halt state if we've no processes to run.
+ *  an interrupt will get us going again.
+ */
+void
+idlehands(void)
+{
+	if(conf.nmach == 1)
+		halt();
+}
--- /dev/null
+++ b/os/pc64/io.h
@@ -1,0 +1,187 @@
+enum {
+	VectorDE	= 1,		/* debug exception */
+	VectorNMI	= 2,		/* non-maskable interrupt */
+	VectorBPT	= 3,		/* breakpoint */
+	VectorUD	= 6,		/* invalid opcode exception */
+	VectorCNA	= 7,		/* coprocessor not available */
+	Vector2F	= 8,		/* double fault */
+	VectorCSO	= 9,		/* coprocessor segment overrun */
+	VectorSNP	= 11,		/* segment not present */
+	VectorGPF	= 13,		/* general protection fault */
+	VectorPF	= 14,		/* page fault */
+	Vector15	= 15,		/* reserved */
+	VectorCERR	= 16,		/* coprocessor error */
+	VectorAC	= 17,		/* alignment check */
+	VectorMC	= 18,		/* machine check */
+	VectorSIMD	= 19,		/* simd error */
+
+	VectorPIC	= 32,		/* external i8259 interrupts */
+	IrqCLOCK	= 0,
+	IrqKBD		= 1,
+	IrqUART1	= 3,
+	IrqUART0	= 4,
+	IrqPCMCIA	= 5,
+	IrqFLOPPY	= 6,
+	IrqLPT		= 7,
+	IrqIRQ7		= 7,
+	IrqAUX		= 12,		/* PS/2 port */
+	IrqIRQ13	= 13,		/* coprocessor on 386 */
+	IrqATA0		= 14,
+	IrqATA1		= 15,
+	MaxIrqPIC	= 15,
+
+	VectorLAPIC	= VectorPIC+16,	/* local APIC interrupts */
+	IrqLINT0	= 16,		/* LINT[01] must be offsets 0 and 1 */
+	IrqLINT1	= 17,
+	IrqTIMER	= 18,
+	IrqERROR	= 19,
+	IrqPCINT	= 20,
+	IrqSPURIOUS	= 31,		/* must have bits [3-0] == 0x0F */
+	MaxIrqLAPIC	= 31,
+
+	VectorSYSCALL	= 64,
+
+	VectorAPIC	= 65,		/* external APIC interrupts */
+	MaxVectorAPIC	= 255,
+};
+
+typedef struct Vctl {
+	Vctl	*next;			/* handlers on this vector */
+
+	void	(*f)(Ureg*, void*);	/* handler to call */
+	void	*a;			/* argument to call it with */
+
+	int	(*isr)(int);		/* get isr bit for this irq */
+	int	(*eoi)(int);		/* eoi */
+
+	int	(*enable)(Vctl*, int);
+	int	(*disable)(Vctl*, int);
+	void	*aux;
+
+	int	irq;
+	int	tbdf;
+	int	vno;
+	int	cpu;
+	int	local;
+
+	char	name[KNAMELEN];		/* of driver */
+	/* inferno specific */
+	int	isintr;			/* interrupt or fault/trap */
+} Vctl;
+
+enum {
+	MaxEISA		= 16,
+	CfgEISA		= 0xC80,
+};
+
+#define PCIWINDOW	0
+#define PCIWADDR(va)	(PADDR(va)+PCIWINDOW)
+#define ISAWINDOW	0
+#define ISAWADDR(va)	(PADDR(va)+ISAWINDOW)
+
+#define	BUSUNKNOWN	(-1)
+
+/* SMBus transactions */
+enum
+{
+	SMBquick,		/* sends address only */
+
+	/* write */
+	SMBsend,		/* sends address and cmd */
+	SMBbytewrite,		/* sends address and cmd and 1 byte */
+	SMBwordwrite,		/* sends address and cmd and 2 bytes */
+
+	/* read */
+	SMBrecv,		/* sends address, recvs 1 byte */
+	SMBbyteread,		/* sends address and cmd, recv's byte */
+	SMBwordread,		/* sends address and cmd, recv's 2 bytes */
+};
+
+typedef struct SMBus SMBus;
+struct SMBus {
+	QLock;		/* mutex */
+	Rendez	r;	/* rendezvous point for completion interrupts */
+	void	*arg;	/* implementation dependent */
+	ulong	base;	/* port or memory base of smbus */
+	int	busy;
+	void	(*transact)(SMBus*, int, int, int, uchar*);
+};
+
+/*
+ * PCMCIA support code.
+ */
+
+typedef struct PCMslot		PCMslot;
+typedef struct PCMconftab	PCMconftab;
+
+/*
+ * Map between ISA memory space and PCMCIA card memory space.
+ */
+struct PCMmap {
+	ulong	ca;			/* card address */
+	ulong	cea;			/* card end address */
+	ulong	isa;			/* ISA address */
+	int	len;			/* length of the ISA area */
+	int	attr;			/* attribute memory */
+	int	ref;
+};
+
+/* configuration table entry */
+struct PCMconftab
+{
+	int	index;
+	ushort	irqs;		/* legal irqs */
+	uchar	irqtype;
+	uchar	bit16;		/* true for 16 bit access */
+	struct {
+		ulong	start;
+		ulong	len;
+	} io[16];
+	int	nio;
+	uchar	vpp1;
+	uchar	vpp2;
+	uchar	memwait;
+	ulong	maxwait;
+	ulong	readywait;
+	ulong	otherwait;
+};
+
+/* a card slot */
+struct PCMslot
+{
+	Lock;
+	int	ref;
+
+	void	*cp;		/* controller for this slot */
+	long	memlen;		/* memory length */
+	uchar	base;		/* index register base */
+	uchar	slotno;		/* slot number */
+
+	/* status */
+	uchar	special;	/* in use for a special device */
+	uchar	already;	/* already inited */
+	uchar	occupied;
+	uchar	battery;
+	uchar	wrprot;
+	uchar	powered;
+	uchar	configed;
+	uchar	enabled;
+	uchar	busy;
+
+	/* cis info */
+	ulong	msec;		/* time of last slotinfo call */
+	char	verstr[512];	/* version string */
+	int	ncfg;		/* number of configurations */
+	struct {
+		ushort	cpresent;	/* config registers present */
+		ulong	caddr;		/* relative address of config registers */
+	} cfg[8];
+	int	nctab;		/* number of config table entries */
+	PCMconftab	ctab[8];
+	PCMconftab	*def;	/* default conftab */
+
+	/* memory maps */
+	Lock	mlock;		/* lock down the maps */
+	int	time;
+	PCMmap	mmap[4];	/* maps, last is always for the kernel */
+};
--- /dev/null
+++ b/os/pc64/l.s
@@ -1,0 +1,1259 @@
+#include "mem.h"
+
+MODE $32
+
+#define DELAY		BYTE $0xEB; BYTE $0x00	/* JMP .+2 */
+
+#define pFARJMP32(s, o)	BYTE $0xea;		/* far jump to ptr32:16 */\
+			LONG $o; WORD $s
+
+/*
+ * Enter here in 32-bit protected mode. Welcome to 1982.
+ * Make sure the GDT is set as it should be:
+ *	disable interrupts;
+ *	load the GDT with the table in _gdt32p;
+ *	load all the data segments
+ *	load the code segment via a far jump.
+ */
+TEXT _protected<>(SB), 1, $-4
+	CLI
+
+	MOVL	$_gdtptr32p<>-KZERO(SB), AX
+	MOVL	(AX), GDTR
+
+	MOVL	$SELECTOR(2, SELGDT, 0), AX
+	MOVW	AX, DS
+	MOVW	AX, ES
+	MOVW	AX, FS
+	MOVW	AX, GS
+	MOVW	AX, SS
+
+	pFARJMP32(SELECTOR(3, SELGDT, 0), _warp64<>-KZERO(SB))
+
+	BYTE	$0x90	/* align */
+
+/*
+ * Must be 4-byte aligned.
+ */
+TEXT _multibootheader<>(SB), 1, $-4
+	LONG	$0x1BADB002			/* magic */
+	LONG	$0x00010007			/* flags */
+	LONG	$-(0x1BADB002 + 0x00010007)	/* checksum */
+	LONG	$_multibootheader<>-KZERO(SB)	/* header_addr */
+	LONG	$_protected<>-KZERO(SB)		/* load_addr */
+	LONG	$edata-KZERO(SB)		/* load_end_addr */
+	LONG	$end-KZERO(SB)			/* bss_end_addr */
+	LONG	$_multibootentry<>-KZERO(SB)	/* entry_addr */
+	LONG	$0				/* mode_type */
+	LONG	$0				/* width */
+	LONG	$0				/* height */
+	LONG	$32				/* depth */
+
+/* 
+ * the kernel expects the data segment to be page-aligned
+ * multiboot bootloaders put the data segment right behind text
+ */
+TEXT _multibootentry<>(SB), 1, $-4
+	MOVL	$etext-KZERO(SB), SI
+	MOVL	SI, DI
+	ADDL	$(BY2PG-1), DI
+	ANDL	$~(BY2PG-1), DI
+	MOVL	$edata-KZERO(SB), CX
+	SUBL	DI, CX
+	ADDL	CX, SI
+	ADDL	CX, DI
+	INCL	CX	/* one more for post decrement */
+	STD
+	REP; MOVSB
+	MOVL	BX, multibootptr-KZERO(SB)
+	MOVL	$_protected<>-KZERO(SB), AX
+	JMP*	AX
+
+/* multiboot structure pointer (physical address) */
+TEXT multibootptr(SB), 1, $-4
+	LONG	$0
+
+TEXT _gdt<>(SB), 1, $-4
+	/* null descriptor */
+	LONG	$0
+	LONG	$0
+
+	/* (KESEG) 64 bit long mode exec segment */
+	LONG	$(0xFFFF)
+	LONG	$(SEGL|SEGG|SEGP|(0xF<<16)|SEGPL(0)|SEGEXEC|SEGR)
+
+	/* 32 bit data segment descriptor for 4 gigabytes (PL 0) */
+	LONG	$(0xFFFF)
+	LONG	$(SEGG|SEGB|(0xF<<16)|SEGP|SEGPL(0)|SEGDATA|SEGW)
+
+	/* 32 bit exec segment descriptor for 4 gigabytes (PL 0) */
+	LONG	$(0xFFFF)
+	LONG	$(SEGG|SEGD|(0xF<<16)|SEGP|SEGPL(0)|SEGEXEC|SEGR)
+
+
+TEXT _gdtptr32p<>(SB), 1, $-4
+	WORD	$(4*8-1)
+	LONG	$_gdt<>-KZERO(SB)
+
+TEXT _gdtptr64p<>(SB), 1, $-4
+	WORD	$(4*8-1)
+	QUAD	$_gdt<>-KZERO(SB)
+
+TEXT _gdtptr64v<>(SB), 1, $-4
+	WORD	$(4*8-1)
+	QUAD	$_gdt<>(SB)
+
+/*
+ * Macros for accessing page table entries; change the
+ * C-style array-index macros into a page table byte offset
+ * It is O (as in offset) not 0 (zero)
+ */
+#undef PDO
+#undef PTO
+#define PML4O(v)	((PTLX((v), 3))<<3)
+#define PDPO(v)		((PTLX((v), 2))<<3)
+#define PDO(v)		((PTLX((v), 1))<<3)
+#define PTO(v)		((PTLX((v), 0))<<3)
+
+TEXT _warp64<>(SB), 1, $-4
+
+	/* clear global data tables - IDT, GDT, Page tables, Mach */
+	MOVL	$((KTZERO-IDTADDR)>>2), CX
+	MOVL	$IDTADDR, DI
+	XORL	AX, AX
+	CLD
+	REP;	STOSL
+
+	MOVL	$PML4ADDR, SI
+	MOVL	SI, AX				/* PML4 */
+	MOVL	AX, DX
+	ADDL	$(PTSZ|PTEWRITE|PTEVALID), DX	/* PDP at PML4 + PTSZ */
+	MOVL	DX, (AX)			/* PML4E for KZERO = 0 */
+
+	ADDL	$PTSZ, AX			/* PDP at PML4 + PTSZ */
+	ADDL	$PTSZ, DX			/* PD0 at PML4 + 2*PTSZ */
+	MOVL	DX, (AX)			/* PDPE for KZERO = 0 */
+
+	/*
+	 * map from KZERO = 0 to KTZERO using 4096 pages
+	 */
+	ADDL	$PTSZ, AX			/* PD0 at PML4 + 2*PTSZ */
+	ADDL	$PTSZ, DX			/* PT0 at PML4 + 3*PTSZ */
+	/* each page uses 4096 bytes = 0x1000 => (>>12) to find the number of pages */
+	MOVL	$((KTZERO-PT0ADDR)>>PGSHIFT), CX
+	ANDL	$(512-1), CX			/* not more than a table full */
+pdeloop:
+	MOVL	DX, (AX)			/* PDE from 0 .. */
+	ADDL	$8, AX
+	ADDL	$PTSZ, DX
+	LOOP	pdeloop
+
+	MOVL	$(PT0ADDR), AX
+	MOVL	$(PTEWRITE|PTEVALID), DX
+	MOVL	$((KTZERO-PT0ADDR)>>3), CX
+pteloop:
+	MOVL	DX, (AX)			/* PTE from 0 */
+	ADDL	$8, AX
+	ADDL	$PTSZ, DX
+	LOOP	pteloop
+
+	MOVL	$end-KZERO(SB), CX
+	ADDL	$(16*1024), CX			/* qemu puts multiboot data after the kernel */
+	ADDL	$(PGLSZ(0)-1), CX
+	ANDL	$~(PGLSZ(0)-1), CX
+	MOVL	CX, MemMin-KZERO(SB)		/* see memory.c */
+
+/*
+ * Enable and activate Long Mode. From the manual:
+ * 	make sure Page Size Extentions are off, and Page Global
+ *	Extensions and Physical Address Extensions are on in CR4;
+ *	set Long Mode Enable in the Extended Feature Enable MSR;
+ *	set Paging Enable in CR0;
+ *	make an inter-segment jump to the Long Mode code.
+ * It's all in 32-bit mode until the jump is made.
+ */
+TEXT _lme<>(SB), 1, $-4
+	MOVL	SI, CR3			/* load the mmu */
+	DELAY
+
+	MOVL	CR4, AX
+	ANDL	$~0x00000010, AX	/* Page Size Extension off (Bit 4) */
+	ORL	$0x000000A0, AX		/* Page Global, Phys. Address */
+	MOVL	AX, CR4
+
+	MOVL	$0xc0000080, CX		/* Extended Feature Enable */
+	RDMSR
+	ORL	$0x00000100, AX		/* Long Mode Enable */
+	WRMSR
+
+	MOVL	CR0, DX
+	ANDL	$~0x6000000a, DX
+	ORL	$0x80010000, DX		/* Paging Enable, Write Protect */
+	MOVL	DX, CR0
+
+	pFARJMP32(SELECTOR(KESEG, SELGDT, 0), _identity<>-KZERO(SB))
+
+/*
+ * Long mode. Welcome to 2003.
+ * Jump out of the identity map space;
+ * load a proper long mode GDT.
+ */
+MODE $64
+
+TEXT _identity<>(SB), 1, $-4
+	MOVQ	$_start64v<>(SB), AX
+	JMP*	AX
+
+TEXT _start64v<>(SB), 1, $-4
+	MOVQ	$_gdtptr64v<>(SB), AX
+	MOVL	(AX), GDTR
+
+	XORQ	AX, AX
+	MOVW	AX, DS				/* not used in long mode */
+	MOVW	AX, ES				/* not used in long mode */
+	MOVW	AX, FS
+	MOVW	AX, GS
+	MOVW	AX, SS				/* not used in long mode */
+
+	MOVW	AX, LDTR
+
+	MOVQ	$CPU0SP, SP
+	MOVQ	$CPU0MACH, RMACH
+	MOVQ	AX, RUSER			/* up = 0; */
+
+_clearbss:
+	MOVQ	$end(SB), CX
+	MOVQ	$edata(SB), DI
+	SUBQ	DI, CX				/* end-edata bytes */
+	SHRQ	$2, CX				/* end-edata doublewords */
+
+	CLD
+	REP;	STOSL				/* clear BSS */
+
+	PUSHQ	AX				/* clear flags */
+	POPFQ
+
+	CALL	main(SB)
+
+/*
+ * Park a processor. Should never fall through a return from main to here,
+ * should only be called by application processors when shutting down.
+ */
+TEXT idle(SB), 1, $-4
+_idle:
+	STI
+	HLT
+	JMP	_idle
+
+/*
+ * The CPUID instruction is always supported on the amd64.
+ */
+TEXT cpuid(SB), $-4
+	MOVL	RARG, AX			/* function in AX */
+	MOVL	cx+8(FP), CX			/* sub-level in CX */
+	CPUID
+
+	MOVQ	info+16(FP), BP
+	MOVL	AX, 0(BP)
+	MOVL	BX, 4(BP)
+	MOVL	CX, 8(BP)
+	MOVL	DX, 12(BP)
+	RET
+
+/*
+ * Port I/O.
+ */
+TEXT inb(SB), 1, $-4
+	MOVL	RARG, DX			/* MOVL	port+0(FP), DX */
+	XORL	AX, AX
+	INB
+	RET
+
+TEXT insb(SB), 1, $-4
+	MOVL	RARG, DX			/* MOVL	port+0(FP), DX */
+	MOVQ	address+8(FP), DI
+	MOVL	count+16(FP), CX
+	CLD
+	REP;	INSB
+	RET
+
+TEXT ins(SB), 1, $-4
+	MOVL	RARG, DX			/* MOVL	port+0(FP), DX */
+	XORL	AX, AX
+	INW
+	RET
+
+TEXT inss(SB), 1, $-4
+	MOVL	RARG, DX			/* MOVL	port+0(FP), DX */
+	MOVQ	address+8(FP), DI
+	MOVL	count+16(FP), CX
+	CLD
+	REP;	INSW
+	RET
+
+TEXT inl(SB), 1, $-4
+	MOVL	RARG, DX			/* MOVL	port+0(FP), DX */
+	INL
+	RET
+
+TEXT insl(SB), 1, $-4
+	MOVL	RARG, DX			/* MOVL	port+0(FP), DX */
+	MOVQ	address+8(FP), DI
+	MOVL	count+16(FP), CX
+	CLD
+	REP; INSL
+	RET
+
+TEXT outb(SB), 1, $-1
+	MOVL	RARG, DX			/* MOVL	port+0(FP), DX */
+	MOVL	byte+8(FP), AX
+	OUTB
+	RET
+
+TEXT outsb(SB), 1, $-4
+	MOVL	RARG, DX			/* MOVL	port+0(FP), DX */
+	MOVQ	address+8(FP), SI
+	MOVL	count+16(FP), CX
+	CLD
+	REP; OUTSB
+	RET
+
+TEXT outs(SB), 1, $-4
+	MOVL	RARG, DX			/* MOVL	port+0(FP), DX */
+	MOVL	short+8(FP), AX
+	OUTW
+	RET
+
+TEXT outss(SB), 1, $-4
+	MOVL	RARG, DX			/* MOVL	port+0(FP), DX */
+	MOVQ	address+8(FP), SI
+	MOVL	count+16(FP), CX
+	CLD
+	REP; OUTSW
+	RET
+
+TEXT outl(SB), 1, $-4
+	MOVL	RARG, DX			/* MOVL	port+0(FP), DX */
+	MOVL	long+8(FP), AX
+	OUTL
+	RET
+
+TEXT outsl(SB), 1, $-4
+	MOVL	RARG, DX			/* MOVL	port+0(FP), DX */
+	MOVQ	address+8(FP), SI
+	MOVL	count+16(FP), CX
+	CLD
+	REP; OUTSL
+	RET
+
+/* there's a macro in fns.h but libinterp can't see it */
+TEXT getcallerpc(SB), $0
+	MOVQ	a+0(FP), AX
+	RET
+
+TEXT getgdt(SB), 1, $-4
+	MOVQ	RARG, AX
+	MOVL	GDTR, (AX)			/* Note: 10 bytes returned */
+	RET
+
+TEXT lgdt(SB), $0				/* GDTR - global descriptor table */
+	MOVQ	RARG, AX
+	MOVL	(AX), GDTR
+	RET
+
+TEXT lidt(SB), $0				/* IDTR - interrupt descriptor table */
+	MOVQ	RARG, AX
+	MOVL	(AX), IDTR
+	RET
+
+TEXT ltr(SB), 1, $-4
+	MOVW	RARG, AX
+	MOVW	AX, TASK
+	RET
+
+/*
+ * Read/write various system registers.
+ */
+TEXT getcr0(SB), 1, $-4				/* Processor Control */
+	MOVQ	CR0, AX
+	RET
+
+TEXT putcr0(SB), 1, $-4
+	MOVQ	RARG, CR0
+	RET
+
+TEXT getcr2(SB), 1, $-4				/* #PF Linear Address */
+	MOVQ	CR2, AX
+	RET
+
+TEXT putcr2(SB), 1, $-4
+	MOVQ	BP, CR2
+	RET
+
+TEXT getcr3(SB), 1, $-4				/* PML4 Base */
+	MOVQ	CR3, AX
+	RET
+
+TEXT putcr3(SB), 1, $-4
+	MOVQ	RARG, CR3
+	RET
+
+TEXT getcr4(SB), 1, $-4				/* Extensions */
+	MOVQ	CR4, AX
+	RET
+
+TEXT putcr4(SB), 1, $-4
+	MOVQ	RARG, CR4
+	RET
+
+TEXT getxcr0(SB), 1, $-4			/* XCR0 - extended control */
+	XORQ CX, CX
+	WORD $0x010f; BYTE $0xd0	// XGETBV
+	SHLQ $32, DX
+	ORQ DX, AX
+	RET
+
+TEXT putxcr0(SB), 1, $-4
+	XORQ CX, CX
+	MOVL RARG, DX
+	SHRQ $32, DX
+	MOVL RARG, AX
+	WORD $0x010f; BYTE $0xd1	// XSETBV
+	RET
+
+TEXT mb386(SB), 1, $-4				/* hack */
+TEXT mb586(SB), 1, $-4
+	XORL	AX, AX
+	CPUID
+	RET
+
+/*
+ * BIOS32.
+ */
+TEXT bios32call(SB), 1, $-4
+	XORL	AX, AX
+	INCL	AX
+	RET
+
+/*
+ * Basic timing loop to determine CPU frequency.
+ */
+TEXT aamloop(SB), 1, $-4
+	MOVL	RARG, CX
+_aamloop:
+	LOOP	_aamloop
+	RET
+
+TEXT _cycles(SB), 1, $-4			/* time stamp counter */
+	RDTSC
+	MOVL	AX, 0(RARG)			/* lo */
+	MOVL	DX, 4(RARG)			/* hi */
+	RET
+
+TEXT rdmsr(SB), 1, $-4				/* Model-Specific Register */
+	MOVL	RARG, CX
+	MOVQ	$0, BP
+TEXT _rdmsrinst(SB), $0
+	RDMSR
+	MOVQ	vlong+8(FP), CX			/* &vlong */
+	MOVL	AX, 0(CX)			/* lo */
+	MOVL	DX, 4(CX)			/* hi */
+	MOVQ	BP, AX				/* BP set to -1 if traped */
+	RET
+	
+TEXT wrmsr(SB), 1, $-4
+	MOVL	RARG, CX
+	MOVL	lo+8(FP), AX
+	MOVL	hi+12(FP), DX
+	MOVQ	$0, BP
+TEXT _wrmsrinst(SB), $0
+	WRMSR
+	MOVQ	BP, AX				/* BP set to -1 if traped */
+	RET
+
+/* fault-proof memcpy */
+TEXT peek(SB), 1, $-4
+	MOVQ	RARG, SI
+	MOVQ	dst+8(FP), DI
+	MOVL	cnt+16(FP), CX
+	CLD
+TEXT _peekinst(SB), $0
+	REP; MOVSB
+	MOVL	CX, AX
+	RET
+	
+
+TEXT invlpg(SB), 1, $-4
+	INVLPG	(RARG)
+	RET
+
+TEXT wbinvd(SB), 1, $-4
+	WBINVD
+	RET
+
+/*
+ * Serialisation.
+ */
+TEXT lfence(SB), 1, $-4
+	LFENCE
+	RET
+
+TEXT mfence(SB), 1, $-4
+	MFENCE
+	RET
+
+TEXT sfence(SB), 1, $-4
+	SFENCE
+	RET
+
+/*
+ * Note: CLI and STI are not serialising instructions.
+ * Is that assumed anywhere?
+ */
+TEXT splhi(SB), 1, $-4
+_splhi:
+	PUSHFQ
+	POPQ	AX
+	TESTQ	$0x200, AX			/* 0x200 - Interrupt Flag */
+	JZ	_alreadyhi			/* use CMOVLEQ etc. here? */
+
+	MOVQ	(SP), BX
+	MOVQ	BX, 8(RMACH) 			/* save PC in m->splpc */
+
+_alreadyhi:
+	CLI
+	RET
+
+TEXT spllo(SB), 1, $-4
+_spllo:
+	PUSHFQ
+	POPQ	AX
+	TESTQ	$0x200, AX			/* 0x200 - Interrupt Flag */
+	JNZ	_alreadylo			/* use CMOVLEQ etc. here? */
+
+	MOVQ	$0, 8(RMACH)			/* clear m->splpc */
+
+_alreadylo:
+	STI
+	RET
+
+TEXT splx(SB), 1, $-4
+	TESTQ	$0x200, RARG			/* 0x200 - Interrupt Flag */
+	JNZ	_spllo
+	JMP	_splhi
+
+TEXT spldone(SB), 1, $-4
+	RET
+
+TEXT islo(SB), 1, $-4
+	PUSHFQ
+	POPQ	AX
+	ANDQ	$0x200, AX			/* 0x200 - Interrupt Flag */
+	RET
+
+/*
+ * Synchronisation
+ */
+TEXT tas(SB), 1, $-4
+TEXT _tas(SB), 1, $-4
+	MOVL	$0xdeaddead, AX
+	XCHGL	AX, (RARG)			/*  */
+	RET
+
+TEXT cmpswap486(SB), 1, $-4
+TEXT cas(SB), 1, $-4
+	MOVL	exp+8(FP), AX
+	MOVL	new+16(FP), BX
+	LOCK; CMPXCHGL BX, (RARG)
+	MOVL	$1, AX				/* use CMOVLEQ etc. here? */
+	JNZ	_cas32r0
+_cas32r1:
+	RET
+_cas32r0:
+	DECL	AX
+	RET
+
+/*
+ * Label consists of a stack pointer and a programme counter
+ */
+TEXT gotolabel(SB), 1, $-4
+	MOVQ	0(RARG), SP			/* restore SP */
+	MOVQ	8(RARG), AX			/* put return PC on the stack */
+	MOVQ	AX, 0(SP)
+	MOVL	$1, AX				/* return 1 */
+	RET
+
+TEXT setlabel(SB), 1, $-4
+	MOVQ	SP, 0(RARG)			/* store SP */
+	MOVQ	0(SP), BX			/* store return PC */
+	MOVQ	BX, 8(RARG)
+	MOVL	$0, AX				/* return 0 */
+	RET
+
+TEXT halt(SB), 1, $-4
+	CLI
+	CMPL	nrdy(SB), $0
+	JEQ	_nothingready
+	STI
+	RET
+_nothingready:
+	STI
+	HLT
+	RET
+
+TEXT mwait(SB), 1, $-4
+	MOVQ	RARG, AX
+	MOVL	(AX), CX
+	ORL	CX, CX
+	JNZ	_mwaitdone
+	XORL	DX, DX
+	BYTE $0x0f; BYTE $0x01; BYTE $0xc8	/* MONITOR */
+	MOVL	(AX), CX
+	ORL	CX, CX
+	JNZ	_mwaitdone
+	XORL	AX, AX
+	BYTE $0x0f; BYTE $0x01; BYTE $0xc9	/* MWAIT */
+_mwaitdone:
+	RET
+
+/*
+ * SIMD Floating Point.
+ * Note: for x87 instructions which have both a 'wait' and 'nowait' version,
+ * 8a only knows the 'wait' mnemonic but does NOT insertthe WAIT prefix byte
+ * (i.e. they act like their FNxxx variations) so WAIT instructions must be
+ * explicitly placed in the code if necessary.
+ */
+TEXT _clts(SB), 1, $-4
+	CLTS
+	RET
+
+TEXT _fldcw(SB), 1, $-4				/* Load x87 FPU Control Word */
+	MOVQ	RARG, cw+0(FP)
+	FLDCW	cw+0(FP)
+	RET
+
+TEXT _fnclex(SB), 1, $-4
+	FCLEX
+	RET
+
+TEXT _fninit(SB), 1, $-4
+	FINIT					/* no WAIT */
+	RET
+
+TEXT _fxrstor(SB), 1, $-4
+	FXRSTOR64 (RARG)
+	RET
+
+TEXT _fxsave(SB), 1, $-4
+	FXSAVE64 (RARG)
+	RET
+
+TEXT _xrstor(SB), 1, $-4
+	MOVL $7, AX
+	XORL DX, DX
+	BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x6d; BYTE $0x00 // XRSTOR (RARG)
+	RET
+
+TEXT _xrstors(SB), 1, $-4
+	MOVL $7, AX
+	XORL DX, DX
+	BYTE $0x48; BYTE $0x0f; BYTE $0xc7; BYTE $0x5d; BYTE $0x00 // XRSTORS (RARG)
+	RET
+
+TEXT _xsave(SB), 1, $-4
+	MOVL $7, AX
+	XORL DX, DX
+	BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x65; BYTE $0x00 // XSAVE (RARG)
+	RET
+
+TEXT _xsaveopt(SB), 1, $-4
+	MOVL $7, AX
+	XORL DX, DX
+	BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x75; BYTE $0x00 // XSAVEOPT (RARG)
+	RET
+
+TEXT _xsaves(SB), 1, $-4
+	MOVL $7, AX
+	XORL DX, DX
+	BYTE $0x48; BYTE $0x0f; BYTE $0xc7; BYTE $0x6d; BYTE $0x00 // XSAVES (RARG)
+	RET
+
+TEXT _fwait(SB), 1, $-4
+	WAIT
+	RET
+
+TEXT _ldmxcsr(SB), 1, $-4			/* Load MXCSR */
+	MOVQ	RARG, mxcsr+0(FP)
+	LDMXCSR	mxcsr+0(FP)
+	RET
+
+TEXT _stts(SB), 1, $-4
+	MOVQ	CR0, AX
+	ORQ	$8, AX				/* Ts */
+	MOVQ	AX, CR0
+	RET
+
+TEXT mul64fract(SB), 1, $-4
+	MOVQ	a+8(FP), AX
+	MULQ	b+16(FP)			/* a*b */
+	SHRQ	$32, AX:DX
+	MOVQ	AX, (RARG)
+	RET
+
+#define	RDRANDAX	BYTE $0x0f; BYTE $0xc7; BYTE $0xf0
+#define	RDRAND64AX	BYTE $0x48; BYTE $0x0f; BYTE $0xc7;  BYTE $0xf0
+
+TEXT rdrand32(SB), $-4
+loop32:
+	RDRANDAX
+	JCC		loop32
+	RET
+
+TEXT rdrand64(SB), $-4
+loop64:
+	RDRAND64AX
+	JCC		loop64
+	RET
+
+TEXT rdrandbuf(SB), $0
+	MOVQ	RARG, DX
+
+	MOVLQZX	cnt+8(FP), CX
+	SHRQ	$3, CX
+eights:
+	CMPL	CX, $0
+	JLE	f1
+	CALL	rdrand64(SB)
+	MOVQ	AX, 0(DX)
+	ADDQ	$8, DX
+	SUBL	$1, CX
+	JMP	eights
+
+f1:
+	MOVLQZX	cnt+8(FP), CX
+	ANDL	$7, CX
+	SHRQ	$2, CX
+fours:
+	CMPL	CX, $0
+	JLE	f2
+	CALL	rdrand32(SB)
+	MOVL	AX, 0(DX)
+	ADDQ	$4, DX
+	SUBL	$1, CX
+	JMP	fours
+
+f2:
+	MOVLQZX	cnt+8(FP), CX
+	ANDL	$3, CX
+ones:
+	CMPL	CX, $0
+	JLE	f3
+	CALL	rdrand32(SB)
+	MOVB	AX, 0(DX)
+	ADDQ	$1, DX
+	SUBL	$1, CX
+	JMP	ones
+
+f3:
+	RET
+
+/* debug register access */
+
+TEXT putdr(SB), 1, $-4
+	MOVQ	56(BP), AX
+	MOVQ	AX, DR7
+	/* wet floor */
+TEXT putdr01236(SB), 1, $-4
+	MOVQ	0(BP), AX
+	MOVQ	AX, DR0
+	MOVQ	8(BP), AX
+	MOVQ	AX, DR1
+	MOVQ	16(BP), AX
+	MOVQ	AX, DR2
+	MOVQ	24(BP), AX
+	MOVQ	AX, DR3
+	MOVQ	48(BP), AX
+	MOVQ	AX, DR6
+	RET
+
+TEXT getdr6(SB), 1, $-4
+	MOVQ	DR6, AX
+	RET
+
+TEXT putdr6(SB), 1, $-4
+	MOVQ	BP, DR6
+	RET
+
+TEXT putdr7(SB), 1, $-4
+	MOVQ	BP, DR7
+	RET
+
+/* VMX instructions */
+TEXT vmxon(SB), 1, $-4
+	MOVQ	BP, 8(SP)
+	/* VMXON 8(SP) */
+	BYTE	$0xf3; BYTE $0x0f; BYTE $0xc7; BYTE $0x74; BYTE $0x24; BYTE $0x08
+	JMP	_vmout
+
+TEXT vmxoff(SB), 1, $-4
+	BYTE	$0x0f; BYTE $0x01; BYTE $0xc4
+	JMP	_vmout
+
+TEXT vmclear(SB), 1, $-4
+	MOVQ	BP, 8(SP)
+	/* VMCLEAR 8(SP) */
+	BYTE	$0x66;	BYTE $0x0f; BYTE $0xc7; BYTE $0x74; BYTE $0x24; BYTE $0x08
+	JMP	_vmout
+
+TEXT vmlaunch(SB), 1, $-4
+	MOVL	$0x6C14, DI
+	MOVQ	SP, DX
+	BYTE	$0x0f; BYTE $0x79; BYTE $0xfa /* VMWRITE DX, DI */
+	JBE	_vmout
+	MOVL	$0x6C16, DI
+	MOVQ	$vmrestore(SB), DX
+	BYTE	$0x0f; BYTE $0x79; BYTE $0xfa /* VMWRITE DX, DI */
+	JBE	_vmout
+	
+	MOVQ	BP, ureg+0(FP)
+	MOVL	resume+8(FP), AX
+	TESTL	AX, AX
+	MOVQ	0x00(BP), AX
+	MOVQ	0x08(BP), BX
+	MOVQ	0x10(BP), CX
+	MOVQ	0x18(BP), DX
+	MOVQ	0x20(BP), SI
+	MOVQ	0x28(BP), DI
+	MOVQ	0x38(BP), R8
+	MOVQ	0x40(BP), R9
+	MOVQ	0x48(BP), R10
+	MOVQ	0x50(BP), R11
+	MOVQ	0x58(BP), R12
+	MOVQ	0x60(BP), R13
+	MOVQ	0x68(BP), R14
+	MOVQ	0x70(BP), R15
+	MOVQ	0x30(BP), BP
+	JNE	_vmresume
+	BYTE	$0x0f; BYTE $0x01; BYTE	$0xc2 /* VMLAUNCH */
+	JMP	_vmout
+_vmresume:
+	BYTE	$0x0f; BYTE $0x01; BYTE $0xc3 /* VMRESUME */
+	JMP _vmout
+	
+TEXT vmrestore(SB), 1, $-4
+	PUSHQ	BP
+	MOVQ	ureg+0(FP), BP
+	MOVQ	AX, 0x00(BP)
+	MOVQ	BX, 0x08(BP)
+	MOVQ	CX, 0x10(BP)
+	MOVQ	DX, 0x18(BP)
+	MOVQ	SI, 0x20(BP)
+	MOVQ	DI, 0x28(BP)
+	POPQ	0x30(BP)
+	MOVQ	R8, 0x38(BP)
+	MOVQ	R9, 0x40(BP)
+	MOVQ	R10, 0x48(BP)
+	MOVQ	R11, 0x50(BP)
+	MOVQ	R12, 0x58(BP)
+	MOVQ	R13, 0x60(BP)
+	MOVQ	R14, 0x68(BP)
+	MOVQ	R15, 0x70(BP)
+	
+	BYTE	$0x65; MOVQ 0, RMACH /* MOVQ GS:(0), RMACH */
+	MOVQ	16(RMACH), RUSER
+	XORL	AX, AX
+	RET
+
+TEXT vmptrld(SB), 1, $-4
+	MOVQ	BP, 8(SP)
+	/* VMMPTRLD 8(SP) */
+	BYTE	$0x0f; BYTE $0xc7; BYTE $0x74; BYTE $0x24; BYTE $0x08
+	JMP _vmout
+
+TEXT vmwrite(SB), 1, $-4
+	MOVQ	val+8(FP), DX
+	/* VMWRITE DX, BP */
+	BYTE	$0x0f; BYTE $0x79; BYTE $0xea
+	JMP _vmout
+
+TEXT vmread(SB), 1, $-4
+	MOVQ	valp+8(FP), DI
+	/* VMREAD BP, (DI) */
+	BYTE	$0x0f; BYTE $0x78; BYTE $0x2f
+	JMP _vmout
+
+TEXT invept(SB), 1, $-4
+	/* INVEPT BP, 16(SP) */
+	BYTE	$0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x80; BYTE $0x6c; BYTE $0x24; BYTE $0x10
+	JMP _vmout
+
+TEXT invvpid(SB), 1, $-4
+	/* INVVPID BP, 16(SP) */
+	BYTE	$0x66; BYTE $0x0f; BYTE $0x38; BYTE $0x81; BYTE $0x6c; BYTE $0x24; BYTE $0x10
+	JMP _vmout
+
+_vmout:
+	JC	_vmout1
+	JZ	_vmout2
+	XORL	AX, AX
+	RET
+_vmout1:
+	MOVQ	$-1, AX
+	RET
+_vmout2:
+	MOVQ	$-2, AX
+	RET
+
+/*
+ */
+TEXT forkret(SB), 1, $-4
+	CLI
+	SWAPGS
+
+	MOVQ	8(SP), AX			/* return value */
+
+	MOVQ	(15*8)(SP), RMACH		/* r15 */
+	MOVQ	(14*8)(SP), RUSER		/* r14 */
+
+	MOVQ	(19*8)(SP), CX			/* ip */
+	MOVQ	(21*8)(SP), R11			/* flags */
+	MOVQ	(22*8)(SP), SP			/* sp */
+
+	BYTE $0x48; SYSRET			/* SYSRETQ */
+
+/*
+ * Interrupt/exception handling.
+ */
+
+TEXT _strayintr(SB), 1, $-4			/* no error code pushed */
+	PUSHQ	AX				/* save AX */
+	MOVQ	8(SP), AX			/* vectortable(SB) PC */
+	JMP	_intrcommon
+
+TEXT _strayintrx(SB), 1, $-4			/* error code pushed */
+	XCHGQ	AX, (SP)
+_intrcommon:
+	MOVBQZX	(AX), AX
+	XCHGQ	AX, (SP)
+
+	SUBQ	$24, SP				/* R1[45], [DEFG]S */
+	CMPW	48(SP), $KESEL			/* old CS */
+	JEQ	_intrnested
+
+	MOVQ	RUSER, 0(SP)
+	MOVQ	RMACH, 8(SP)
+
+	SWAPGS
+	BYTE $0x65; MOVQ 0, RMACH		/* m-> (MOVQ GS:0x0, R15) */
+	MOVQ	16(RMACH), RUSER		/* up */
+
+_intrnested:
+	PUSHQ	R13
+	PUSHQ	R12
+	PUSHQ	R11
+	PUSHQ	R10
+	PUSHQ	R9
+	PUSHQ	R8
+	PUSHQ	BP
+	PUSHQ	DI
+	PUSHQ	SI
+	PUSHQ	DX
+	PUSHQ	CX
+	PUSHQ	BX
+	PUSHQ	AX
+
+	MOVQ	SP, RARG
+	PUSHQ	SP
+	CALL	trap(SB)
+
+TEXT _intrr(SB), 1, $-4
+_intrestore:
+	POPQ	AX
+
+	POPQ	AX
+	POPQ	BX
+	POPQ	CX
+	POPQ	DX
+	POPQ	SI
+	POPQ	DI
+	POPQ	BP
+	POPQ	R8
+	POPQ	R9
+	POPQ	R10
+	POPQ	R11
+	POPQ	R12
+	POPQ	R13
+
+	CMPQ	48(SP), $KESEL
+	JEQ	_iretnested
+
+	SWAPGS
+
+	MOVQ	8(SP), RMACH
+	MOVQ	0(SP), RUSER
+
+_iretnested:
+	ADDQ	$40, SP
+	IRETQ
+
+TEXT noteret(SB), 1, $-4
+	CLI
+	JMP	_intrestore
+
+TEXT vectortable(SB), $0
+	CALL _strayintr(SB); BYTE $0x00		/* divide error */
+	CALL _strayintr(SB); BYTE $0x01		/* debug exception */
+	CALL _strayintr(SB); BYTE $0x02		/* NMI interrupt */
+	CALL _strayintr(SB); BYTE $0x03		/* breakpoint */
+	CALL _strayintr(SB); BYTE $0x04		/* overflow */
+	CALL _strayintr(SB); BYTE $0x05		/* bound */
+	CALL _strayintr(SB); BYTE $0x06		/* invalid opcode */
+	CALL _strayintr(SB); BYTE $0x07		/* no coprocessor available */
+	CALL _strayintrx(SB); BYTE $0x08	/* double fault */
+	CALL _strayintr(SB); BYTE $0x09		/* coprocessor segment overflow */
+	CALL _strayintrx(SB); BYTE $0x0A	/* invalid TSS */
+	CALL _strayintrx(SB); BYTE $0x0B	/* segment not available */
+	CALL _strayintrx(SB); BYTE $0x0C	/* stack exception */
+	CALL _strayintrx(SB); BYTE $0x0D	/* general protection error */
+	CALL _strayintrx(SB); BYTE $0x0E	/* page fault */
+	CALL _strayintr(SB); BYTE $0x0F		/*  */
+	CALL _strayintr(SB); BYTE $0x10		/* coprocessor error */
+	CALL _strayintrx(SB); BYTE $0x11	/* alignment check */
+	CALL _strayintr(SB); BYTE $0x12		/* machine check */
+	CALL _strayintr(SB); BYTE $0x13		/* simd error */
+	CALL _strayintr(SB); BYTE $0x14
+	CALL _strayintr(SB); BYTE $0x15
+	CALL _strayintr(SB); BYTE $0x16
+	CALL _strayintr(SB); BYTE $0x17
+	CALL _strayintr(SB); BYTE $0x18
+	CALL _strayintr(SB); BYTE $0x19
+	CALL _strayintr(SB); BYTE $0x1A
+	CALL _strayintr(SB); BYTE $0x1B
+	CALL _strayintr(SB); BYTE $0x1C
+	CALL _strayintr(SB); BYTE $0x1D
+	CALL _strayintr(SB); BYTE $0x1E
+	CALL _strayintr(SB); BYTE $0x1F
+	CALL _strayintr(SB); BYTE $0x20		/* VectorLAPIC */
+	CALL _strayintr(SB); BYTE $0x21
+	CALL _strayintr(SB); BYTE $0x22
+	CALL _strayintr(SB); BYTE $0x23
+	CALL _strayintr(SB); BYTE $0x24
+	CALL _strayintr(SB); BYTE $0x25
+	CALL _strayintr(SB); BYTE $0x26
+	CALL _strayintr(SB); BYTE $0x27
+	CALL _strayintr(SB); BYTE $0x28
+	CALL _strayintr(SB); BYTE $0x29
+	CALL _strayintr(SB); BYTE $0x2A
+	CALL _strayintr(SB); BYTE $0x2B
+	CALL _strayintr(SB); BYTE $0x2C
+	CALL _strayintr(SB); BYTE $0x2D
+	CALL _strayintr(SB); BYTE $0x2E
+	CALL _strayintr(SB); BYTE $0x2F
+	CALL _strayintr(SB); BYTE $0x30
+	CALL _strayintr(SB); BYTE $0x31
+	CALL _strayintr(SB); BYTE $0x32
+	CALL _strayintr(SB); BYTE $0x33
+	CALL _strayintr(SB); BYTE $0x34
+	CALL _strayintr(SB); BYTE $0x35
+	CALL _strayintr(SB); BYTE $0x36
+	CALL _strayintr(SB); BYTE $0x37
+	CALL _strayintr(SB); BYTE $0x38
+	CALL _strayintr(SB); BYTE $0x39
+	CALL _strayintr(SB); BYTE $0x3A
+	CALL _strayintr(SB); BYTE $0x3B
+	CALL _strayintr(SB); BYTE $0x3C
+	CALL _strayintr(SB); BYTE $0x3D
+	CALL _strayintr(SB); BYTE $0x3E
+	CALL _strayintr(SB); BYTE $0x3F
+	CALL _strayintr(SB); BYTE $0x40		/* was VectorSYSCALL */
+	CALL _strayintr(SB); BYTE $0x41
+	CALL _strayintr(SB); BYTE $0x42
+	CALL _strayintr(SB); BYTE $0x43
+	CALL _strayintr(SB); BYTE $0x44
+	CALL _strayintr(SB); BYTE $0x45
+	CALL _strayintr(SB); BYTE $0x46
+	CALL _strayintr(SB); BYTE $0x47
+	CALL _strayintr(SB); BYTE $0x48
+	CALL _strayintr(SB); BYTE $0x49
+	CALL _strayintr(SB); BYTE $0x4A
+	CALL _strayintr(SB); BYTE $0x4B
+	CALL _strayintr(SB); BYTE $0x4C
+	CALL _strayintr(SB); BYTE $0x4D
+	CALL _strayintr(SB); BYTE $0x4E
+	CALL _strayintr(SB); BYTE $0x4F
+	CALL _strayintr(SB); BYTE $0x50
+	CALL _strayintr(SB); BYTE $0x51
+	CALL _strayintr(SB); BYTE $0x52
+	CALL _strayintr(SB); BYTE $0x53
+	CALL _strayintr(SB); BYTE $0x54
+	CALL _strayintr(SB); BYTE $0x55
+	CALL _strayintr(SB); BYTE $0x56
+	CALL _strayintr(SB); BYTE $0x57
+	CALL _strayintr(SB); BYTE $0x58
+	CALL _strayintr(SB); BYTE $0x59
+	CALL _strayintr(SB); BYTE $0x5A
+	CALL _strayintr(SB); BYTE $0x5B
+	CALL _strayintr(SB); BYTE $0x5C
+	CALL _strayintr(SB); BYTE $0x5D
+	CALL _strayintr(SB); BYTE $0x5E
+	CALL _strayintr(SB); BYTE $0x5F
+	CALL _strayintr(SB); BYTE $0x60
+	CALL _strayintr(SB); BYTE $0x61
+	CALL _strayintr(SB); BYTE $0x62
+	CALL _strayintr(SB); BYTE $0x63
+	CALL _strayintr(SB); BYTE $0x64
+	CALL _strayintr(SB); BYTE $0x65
+	CALL _strayintr(SB); BYTE $0x66
+	CALL _strayintr(SB); BYTE $0x67
+	CALL _strayintr(SB); BYTE $0x68
+	CALL _strayintr(SB); BYTE $0x69
+	CALL _strayintr(SB); BYTE $0x6A
+	CALL _strayintr(SB); BYTE $0x6B
+	CALL _strayintr(SB); BYTE $0x6C
+	CALL _strayintr(SB); BYTE $0x6D
+	CALL _strayintr(SB); BYTE $0x6E
+	CALL _strayintr(SB); BYTE $0x6F
+	CALL _strayintr(SB); BYTE $0x70
+	CALL _strayintr(SB); BYTE $0x71
+	CALL _strayintr(SB); BYTE $0x72
+	CALL _strayintr(SB); BYTE $0x73
+	CALL _strayintr(SB); BYTE $0x74
+	CALL _strayintr(SB); BYTE $0x75
+	CALL _strayintr(SB); BYTE $0x76
+	CALL _strayintr(SB); BYTE $0x77
+	CALL _strayintr(SB); BYTE $0x78
+	CALL _strayintr(SB); BYTE $0x79
+	CALL _strayintr(SB); BYTE $0x7A
+	CALL _strayintr(SB); BYTE $0x7B
+	CALL _strayintr(SB); BYTE $0x7C
+	CALL _strayintr(SB); BYTE $0x7D
+	CALL _strayintr(SB); BYTE $0x7E
+	CALL _strayintr(SB); BYTE $0x7F
+	CALL _strayintr(SB); BYTE $0x80		/* Vector[A]PIC */
+	CALL _strayintr(SB); BYTE $0x81
+	CALL _strayintr(SB); BYTE $0x82
+	CALL _strayintr(SB); BYTE $0x83
+	CALL _strayintr(SB); BYTE $0x84
+	CALL _strayintr(SB); BYTE $0x85
+	CALL _strayintr(SB); BYTE $0x86
+	CALL _strayintr(SB); BYTE $0x87
+	CALL _strayintr(SB); BYTE $0x88
+	CALL _strayintr(SB); BYTE $0x89
+	CALL _strayintr(SB); BYTE $0x8A
+	CALL _strayintr(SB); BYTE $0x8B
+	CALL _strayintr(SB); BYTE $0x8C
+	CALL _strayintr(SB); BYTE $0x8D
+	CALL _strayintr(SB); BYTE $0x8E
+	CALL _strayintr(SB); BYTE $0x8F
+	CALL _strayintr(SB); BYTE $0x90
+	CALL _strayintr(SB); BYTE $0x91
+	CALL _strayintr(SB); BYTE $0x92
+	CALL _strayintr(SB); BYTE $0x93
+	CALL _strayintr(SB); BYTE $0x94
+	CALL _strayintr(SB); BYTE $0x95
+	CALL _strayintr(SB); BYTE $0x96
+	CALL _strayintr(SB); BYTE $0x97
+	CALL _strayintr(SB); BYTE $0x98
+	CALL _strayintr(SB); BYTE $0x99
+	CALL _strayintr(SB); BYTE $0x9A
+	CALL _strayintr(SB); BYTE $0x9B
+	CALL _strayintr(SB); BYTE $0x9C
+	CALL _strayintr(SB); BYTE $0x9D
+	CALL _strayintr(SB); BYTE $0x9E
+	CALL _strayintr(SB); BYTE $0x9F
+	CALL _strayintr(SB); BYTE $0xA0
+	CALL _strayintr(SB); BYTE $0xA1
+	CALL _strayintr(SB); BYTE $0xA2
+	CALL _strayintr(SB); BYTE $0xA3
+	CALL _strayintr(SB); BYTE $0xA4
+	CALL _strayintr(SB); BYTE $0xA5
+	CALL _strayintr(SB); BYTE $0xA6
+	CALL _strayintr(SB); BYTE $0xA7
+	CALL _strayintr(SB); BYTE $0xA8
+	CALL _strayintr(SB); BYTE $0xA9
+	CALL _strayintr(SB); BYTE $0xAA
+	CALL _strayintr(SB); BYTE $0xAB
+	CALL _strayintr(SB); BYTE $0xAC
+	CALL _strayintr(SB); BYTE $0xAD
+	CALL _strayintr(SB); BYTE $0xAE
+	CALL _strayintr(SB); BYTE $0xAF
+	CALL _strayintr(SB); BYTE $0xB0
+	CALL _strayintr(SB); BYTE $0xB1
+	CALL _strayintr(SB); BYTE $0xB2
+	CALL _strayintr(SB); BYTE $0xB3
+	CALL _strayintr(SB); BYTE $0xB4
+	CALL _strayintr(SB); BYTE $0xB5
+	CALL _strayintr(SB); BYTE $0xB6
+	CALL _strayintr(SB); BYTE $0xB7
+	CALL _strayintr(SB); BYTE $0xB8
+	CALL _strayintr(SB); BYTE $0xB9
+	CALL _strayintr(SB); BYTE $0xBA
+	CALL _strayintr(SB); BYTE $0xBB
+	CALL _strayintr(SB); BYTE $0xBC
+	CALL _strayintr(SB); BYTE $0xBD
+	CALL _strayintr(SB); BYTE $0xBE
+	CALL _strayintr(SB); BYTE $0xBF
+	CALL _strayintr(SB); BYTE $0xC0
+	CALL _strayintr(SB); BYTE $0xC1
+	CALL _strayintr(SB); BYTE $0xC2
+	CALL _strayintr(SB); BYTE $0xC3
+	CALL _strayintr(SB); BYTE $0xC4
+	CALL _strayintr(SB); BYTE $0xC5
+	CALL _strayintr(SB); BYTE $0xC6
+	CALL _strayintr(SB); BYTE $0xC7
+	CALL _strayintr(SB); BYTE $0xC8
+	CALL _strayintr(SB); BYTE $0xC9
+	CALL _strayintr(SB); BYTE $0xCA
+	CALL _strayintr(SB); BYTE $0xCB
+	CALL _strayintr(SB); BYTE $0xCC
+	CALL _strayintr(SB); BYTE $0xCD
+	CALL _strayintr(SB); BYTE $0xCE
+	CALL _strayintr(SB); BYTE $0xCF
+	CALL _strayintr(SB); BYTE $0xD0
+	CALL _strayintr(SB); BYTE $0xD1
+	CALL _strayintr(SB); BYTE $0xD2
+	CALL _strayintr(SB); BYTE $0xD3
+	CALL _strayintr(SB); BYTE $0xD4
+	CALL _strayintr(SB); BYTE $0xD5
+	CALL _strayintr(SB); BYTE $0xD6
+	CALL _strayintr(SB); BYTE $0xD7
+	CALL _strayintr(SB); BYTE $0xD8
+	CALL _strayintr(SB); BYTE $0xD9
+	CALL _strayintr(SB); BYTE $0xDA
+	CALL _strayintr(SB); BYTE $0xDB
+	CALL _strayintr(SB); BYTE $0xDC
+	CALL _strayintr(SB); BYTE $0xDD
+	CALL _strayintr(SB); BYTE $0xDE
+	CALL _strayintr(SB); BYTE $0xDF
+	CALL _strayintr(SB); BYTE $0xE0
+	CALL _strayintr(SB); BYTE $0xE1
+	CALL _strayintr(SB); BYTE $0xE2
+	CALL _strayintr(SB); BYTE $0xE3
+	CALL _strayintr(SB); BYTE $0xE4
+	CALL _strayintr(SB); BYTE $0xE5
+	CALL _strayintr(SB); BYTE $0xE6
+	CALL _strayintr(SB); BYTE $0xE7
+	CALL _strayintr(SB); BYTE $0xE8
+	CALL _strayintr(SB); BYTE $0xE9
+	CALL _strayintr(SB); BYTE $0xEA
+	CALL _strayintr(SB); BYTE $0xEB
+	CALL _strayintr(SB); BYTE $0xEC
+	CALL _strayintr(SB); BYTE $0xED
+	CALL _strayintr(SB); BYTE $0xEE
+	CALL _strayintr(SB); BYTE $0xEF
+	CALL _strayintr(SB); BYTE $0xF0
+	CALL _strayintr(SB); BYTE $0xF1
+	CALL _strayintr(SB); BYTE $0xF2
+	CALL _strayintr(SB); BYTE $0xF3
+	CALL _strayintr(SB); BYTE $0xF4
+	CALL _strayintr(SB); BYTE $0xF5
+	CALL _strayintr(SB); BYTE $0xF6
+	CALL _strayintr(SB); BYTE $0xF7
+	CALL _strayintr(SB); BYTE $0xF8
+	CALL _strayintr(SB); BYTE $0xF9
+	CALL _strayintr(SB); BYTE $0xFA
+	CALL _strayintr(SB); BYTE $0xFB
+	CALL _strayintr(SB); BYTE $0xFC
+	CALL _strayintr(SB); BYTE $0xFD
+	CALL _strayintr(SB); BYTE $0xFE
+	CALL _strayintr(SB); BYTE $0xFF
--- /dev/null
+++ b/os/pc64/main.c
@@ -1,0 +1,363 @@
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"io.h"
+#include	"ureg.h"
+
+#define X86STEPPING(x)	((x) & 0x0F)
+#define X86MODEL(x)	(((x)>>4) & 0x0F)
+#define X86FAMILY(x)	(((x)>>8) & 0x0F)
+
+Conf conf;
+int idle_spin;
+
+extern void bootscreeninit(void);
+extern int main_pool_pcnt;
+extern int heap_pool_pcnt;
+extern int image_pool_pcnt;
+int	pckdebug;
+
+static  uchar *sp;	/* stack pointer for /boot */
+
+char bootdisk[KNAMELEN];
+
+static void
+doc(char *m)
+{
+	int i;
+	print("%s...\n", m);
+	/*for(i = 0; i < 100*1024*1024; i++)
+		i++;*/
+}
+
+enum {
+	VGA_COLOR_BLACK = 0,
+	VGA_COLOR_BLUE = 1,
+	VGA_COLOR_GREEN = 2,
+	VGA_COLOR_CYAN = 3,
+	VGA_COLOR_RED = 4,
+	VGA_COLOR_MAGENTA = 5,
+	VGA_COLOR_BROWN = 6,
+	VGA_COLOR_LIGHT_GREY = 7,
+	VGA_COLOR_DARK_GREY = 8,
+	VGA_COLOR_LIGHT_BLUE = 9,
+	VGA_COLOR_LIGHT_GREEN = 10,
+	VGA_COLOR_LIGHT_CYAN = 11,
+	VGA_COLOR_LIGHT_RED = 12,
+	VGA_COLOR_LIGHT_MAGENTA = 13,
+	VGA_COLOR_LIGHT_BROWN = 14,
+	VGA_COLOR_WHITE = 15,
+	Nvideo = 80*25,
+};
+
+extern u64 MemMin;
+u32 nchars = 0;
+
+void
+writemsg(char *msg, int msglen)
+{
+	u8 *video = (u8*)0xB8000;
+	u8 colour = VGA_COLOR_LIGHT_GREY | (VGA_COLOR_BLACK<<4);
+	u32 i;
+
+	if(nchars == 0){
+		memset(video, 0, Nvideo*2);
+	}else if(nchars + msglen >= Nvideo){
+		memmove(video, video+(2*msglen), 2*(nchars-msglen));
+		nchars -= msglen;
+	}
+	video += nchars*2;
+	for(i = nchars; i < nchars+msglen; i++){
+		*video++ = *msg++;
+		*video++ = colour;
+	}
+	nchars += msglen;
+}
+
+void
+ptedebug(uintptr pa)
+{
+	uintptr *pml4e, *pdpe, *pde;
+
+	pml4e = mmuwalk((uintptr*)PML4ADDR, pa, 3, 0);
+	pdpe = mmuwalk((uintptr*)PML4ADDR, pa, 2, 0);
+	pde = mmuwalk((uintptr*)PML4ADDR, pa, 1, 0);
+	print("pml4 @ 0x%p pa 0x%zux page is \n"
+		"\tpml4 entry @ 0x%p i %d\n"
+		"\tpdp entry @ 0x%p i %d\n"
+		"\tpd entry @ 0x%p i %d\n",
+		m->pml4, pa,
+		pml4e, (pml4e-m->pml4)/sizeof(intptr),
+		pdpe, ((intptr)pdpe-(intptr)PDPADDR)/sizeof(intptr),
+		pde, ((intptr)pde-(intptr)PD0ADDR)/sizeof(intptr));
+}
+
+void
+setupdebug(void)
+{
+	print("kdzero 0x%p confaddr 0x%p apbootstrap 0x%p idtaddr 0x%p\n"
+		"\tcpu0mach 0x%p cpu0sp 0x%p cpu0gdt 0x%p\n"
+		"\tcpu0pml4 0x%p cpu0pdp 0x%p  cpu0pd 0x%p\n"
+		"\tcpu0end 0x%p\n",
+		KDZERO, CONFADDR,APBOOTSTRAP,
+		IDTADDR, CPU0MACH, CPU0SP, GDTADDR,
+		PML4ADDR, PDPADDR, PD0ADDR, CPU0END);
+	ptedebug(1*MiB);
+	ptedebug(2*MiB);
+	ptedebug(1*GiB);
+	ptedebug(4ull*GiB);
+}
+
+/* to check from acid on whether the data segment is being trashed */
+/* static int globaldatatest = 0x12345678; */
+void
+main(void)
+{
+	outb(0x3F2, 0x00);		/* botch: turn off the floppy motor */
+
+	mach0init();
+	bootargsinit();
+	trapinit0();			/* set up idt, check notes on why so early */
+	ioinit();
+	i8250console();
+	quotefmtinstall();
+	screeninit();
+	print("\nInferno\n");
+	setupdebug();
+	cpuidentify();
+	meminit0();			/* builds the memmap */
+	archinit();
+	if(arch->clockinit)
+		arch->clockinit();
+	meminit();			/* builds the conf.mem entries */
+	confinit();
+	xinit();
+	kbdinit();
+	i8253init();
+	/* TODO 9front if(i8237alloc != nil)
+		i8237alloc(); */
+	pcicfginit();
+	bootscreeninit();	/* vga maps pages for the frame buffer TODO bug causes an i8042 system reset in poolsizeinit() */
+	trapinit();
+	printinit();
+	cpuidprint();
+	mmuinit();		/* builds the page tables, lgdt, lidt */
+	print("after mmuinit\n");
+	poolsizeinit();
+	memmapdump();
+	eve = strdup("inferno");
+	if(arch->intrinit){	/* launches other processors on an mp */
+		doc("intrinit");
+		arch->intrinit();
+	}
+	doc("timersinit");
+	timersinit();
+	doc("mathinit");
+	mathinit();
+	doc("kbdenable");
+	kbdenable();
+	if(arch->clockenable){
+		doc("clockinit");
+		arch->clockenable();
+	}
+	doc("procinit");
+	procinit();
+	doc("links");
+	links();
+	doc("chandevreset");
+	chandevreset();
+	doc("userinit");
+	userinit();
+	doc("schedinit");
+	active.thunderbirdsarego = 1;
+	schedinit();
+}
+
+void
+mach0init(void)
+{
+	conf.nmach = 1;
+
+	MACHP(0) = (Mach*)CPU0MACH;
+
+	m->machno = 0;
+	m->pml4 = (u64*)PML4ADDR;
+	m->gdt = (Segdesc*)GDTADDR;
+
+	machinit();
+
+	active.machs[0] = 1;
+	active.exiting = 0;
+}
+
+void
+machinit(void)
+{
+	int machno;
+	Segdesc *gdt;
+	uintptr *pml4;
+
+	machno = m->machno;
+	pml4 = m->pml4;
+	gdt = m->gdt;
+	memset(m, 0, sizeof(Mach));
+	m->machno = machno;
+	m->pml4 = pml4;
+	m->gdt = gdt;
+	m->perf.period = 1;
+
+	/*
+	 * For polled uart output at boot, need
+	 * a default delay constant. 100000 should
+	 * be enough for a while. Cpuidentify will
+	 * calculate the real value later.
+	 */
+	m->loopconst = 100000;
+}
+
+void
+init0(void)
+{
+	Osenv *o;
+	char buf[2*KNAMELEN];
+
+	up->nerrlab = 0;
+
+	spllo();
+	if(waserror())
+		panic("init0: %r");
+	/*
+	 * These are o.k. because rootinit is null.
+	 * Then early kproc's will have a root and dot.
+	 */
+	o = up->env;
+	o->pgrp->slash = namec("#/", Atodir, 0, 0);
+	cnameclose(o->pgrp->slash->name);
+	o->pgrp->slash->name = newcname("/");
+	o->pgrp->dot = cclone(o->pgrp->slash);
+
+	chandevinit();
+
+	if(!waserror()){
+		ksetenv("cputype", "am64", 0);
+		snprint(buf, sizeof(buf), "amd64 %s", conffile);
+		ksetenv("terminal", buf, 0);
+		setconfenv();
+		poperror();
+	}
+
+	poperror();
+
+	disinit("/osinit.dis");
+}
+
+void
+userinit(void)
+{
+	Proc *p;
+	Osenv *o;
+
+	p = newproc();
+	o = p->env;
+
+	o->fgrp = newfgrp(nil);
+
+	o->pgrp = newpgrp();
+	kstrdup(&o->user, eve);
+
+	strcpy(p->text, "interp");
+
+	/*
+	 * Kernel Stack
+	 *
+	 * N.B. make sure there's
+	 *	4 bytes for gotolabel's return PC
+	 */
+	p->sched.pc = (uintptr)init0;
+	p->sched.sp = (uintptr)p->kstack+KSTACK-sizeof(uintptr);
+
+	ready(p);
+}
+
+void
+confinit(void)
+{
+	char *p;
+	u64 maxmem;
+	int i;
+
+	if(p = getconf("*maxmem"))
+		maxmem = strtoull(p, 0, 0);
+	else
+		maxmem = 0;
+
+	conf.npage = 0;
+	for(i=0; i<nelem(conf.mem); i++)
+		conf.npage += conf.mem[i].npage;
+	print("conf.npage %ld\n", conf.npage);
+
+	conf.nproc = 10 /*+ ((conf.npage*BY2PG)/MiB)*5*/;
+}
+
+void
+poolsizeinit(void)
+{
+	u64 nb;
+
+	nb = conf.npage*BY2PG;
+	print("poolsizeinit nb 0x%zx conf.npage %d\n", nb, conf.npage);
+	poolsize(mainmem, (nb*main_pool_pcnt)/100, 0);
+	poolsize(heapmem, (nb*heap_pool_pcnt)/100, 0);
+	poolsize(imagmem, (nb*image_pool_pcnt)/100, 1);
+}
+
+/*
+ *  Save the mach dependent part of the process state.
+ */
+void
+procsave(Proc *p)
+{
+	if(m->dr7 != 0){
+		m->dr7 = 0;
+		putdr7(0);
+	}
+	if(p->state == Moribund)
+		p->dr[7] = 0;
+
+	fpuprocsave(p);
+
+	/*
+	 * While this processor is in the scheduler, the process could run
+	 * on another processor and exit, returning the page tables to
+	 * the free list where they could be reallocated and overwritten.
+	 * When this processor eventually has to get an entry from the
+	 * trashed page tables it will crash.
+	 *
+	 * If there's only one processor, this can't happen.
+	 * You might think it would be a win not to do this in that case,
+	 * especially on VMware, but it turns out not to matter.
+	 */
+	mmuflushtlb();
+}
+
+void
+exit(int ispanic)
+{
+	USED(ispanic);
+
+	up = 0;
+	print("exiting\n");
+
+	/* Shutdown running devices */
+	chandevshutdown();
+
+	arch->reset();
+}
+
+void
+reboot(void)
+{
+	exit(0);
+}
--- /dev/null
+++ b/os/pc64/mem.h
@@ -1,0 +1,189 @@
+/*
+ * Memory and machine-specific definitions.  Used in C and assembler.
+ */
+#define KiB		1024u			/* Kibi 0x0000000000000400 */
+#define MiB		1048576u		/* Mebi 0x0000000000100000 */
+#define GiB		1073741824u		/* Gibi 0x0000000040000000 */
+#define TiB		1099511627776ull	/* Tebi 0x0000010000000000 */
+#define PiB		1125899906842624ull	/* Pebi 0x0004000000000000 */
+#define EiB		1152921504606846976ull	/* Exbi 0x1000000000000000 */
+
+#define MIN(a, b)	((a) < (b)? (a): (b))
+#define MAX(a, b)	((a) > (b)? (a): (b))
+
+#define ALIGNED(p, a)	(!(((uintptr)(p)) & ((a)-1)))
+
+/*
+ * Sizes
+ */
+#define	BI2BY		8			/* bits per byte */
+#define	BI2WD		32			/* bits per word */
+#define	BY2WD		8			/* bytes per word */
+#define	BY2V		8			/* bytes per double word */
+#define	BY2PG		(0x1000ull)		/* bytes per page */
+#define	WD2PG		(BY2PG/BY2WD)		/* words per page */
+#define	PGSHIFT		12			/* log(BY2PG) */
+#define	ROUND(s, sz)	(((s)+((sz)-1))&~((sz)-1))
+#define	PGROUND(s)	ROUND(s, BY2PG)
+#define	BLOCKALIGN	8
+#define	FPalign		64
+
+#define	MAXMACH		128			/* max # cpus system can run */
+#define	KSTACK		(32*KiB)		/* Size of kernel stack */
+#define	MACHSIZE	(2*KSTACK)
+
+/*
+ * Time
+ */
+#define	HZ		(100)			/* clock frequency */
+#define	MS2HZ		(1000/HZ)		/* millisec per clock tick */
+#define	TK2SEC(t)	((t)/HZ)		/* ticks to seconds */
+#define	MS2TK(t)	((((uintptr)(t))*HZ)/1000)	/* milliseconds to ticks */
+
+/*
+ *  Address spaces. Kernel, sorted by address.
+ */
+#define KZERO		(0)			/* with identity mapping, KZERO = 0 */
+/* Leave the 1st MiB to the BIOS (0 to 1MiB-1)
+ * From the first MiB to KTZERO is used by the global data tables
+ * acid expects plan9 userspace program text at 2MiB. So, having KTZERO at 2MiB
+ * 1MiB for l.s data structures  (1 to 2MiB-1)
+ */
+#define KDZERO		(0x100000)
+#define KTZERO		(0x200000)
+
+/*
+ * Fundamental addresses
+ */
+#define	REBOOTADDR	(0x11000)		/* reboot code - physical address */
+#define	CONFADDR	(KDZERO+ 0x1200ull)	/* info passed from boot loader */
+#define	APBOOTSTRAP	(KDZERO+ 0x7000ull)	/* AP bootstrap code */
+#define	IDTADDR		(KDZERO+0x10000ull)	/* idt */
+#define GDTADDR		(KDZERO+0x11000ull)	/* gdt */
+#define	CPU0MACH	(KDZERO+0x12000ull)	/* Mach for bootstrap processor */
+#define CPU0END		(KDZERO+0x22000ull)	/* CPU0MACH + (MACHSIZE = 64 KiB = 0x10 000) */
+#define CPU0SP		(KDZERO+0x22000ull)
+/* 1 PD table has 512 entries
+ * each entry maps to a 2MB page
+ * 512 entries maps 1GiB and occupies 512*8 = 4096 bytes
+ * TODO Drop the CPU0 prefix for the page tables and gdt
+ * call them BOOTPML4 and PML4
+ */
+#define PML4ADDR	(KDZERO+0x23000ull)
+#define PDPADDR		(KDZERO+0x24000ull)
+#define PD0ADDR		(KDZERO+0x25000ull)	/* KZERO=0 .. 1GiB */
+#define PT0ADDR		(KDZERO+0x26000ull)	/* KZERO=0 .. 2MiB */
+#define PT1ADDR		(KDZERO+0x27000ull)	/* 2MiB .. 4MiB */
+#define PT2ADDR		(KDZERO+0x28000ull)	/* 4MiB .. 6MiB */
+#define PT3ADDR		(KDZERO+0x29000ull)	/* 6MiB .. 8MiB */
+						/* fill with page tables until KTZERO */
+
+/*
+ * Where configuration info is left for the loaded programme.
+ * There are 24064 bytes available at CONFADDR.
+ */
+#define BOOTLINE	((char*)CONFADDR)
+#define BOOTLINELEN	64
+#define BOOTARGS	((char*)(CONFADDR+BOOTLINELEN))
+#define BOOTARGSLEN	(0x6000-0x200-BOOTLINELEN)
+
+/*
+ *  known x86 segments (in GDT) and their selectors
+ */
+#define	NULLSEG	0	/* null segment */
+#define	KESEG	1	/* kernel executable */
+#define KDSEG	2	/* kernel data */
+#define UE32SEG	3	/* user executable 32bit */
+#define	UDSEG	4	/* user data/stack */
+#define	UESEG	5	/* user executable 64bit */
+#define	TSSSEG	8	/* task segment (two descriptors) */
+
+#define	NGDT	10	/* number of GDT entries required */
+
+#define	SELGDT	(0<<2)	/* selector is in gdt */
+#define	SELLDT	(1<<2)	/* selector is in ldt */
+
+#define	SELECTOR(i, t, p)	(((i)<<3) | (t) | (p))
+
+#define	NULLSEL	SELECTOR(NULLSEG, SELGDT, 0)
+#define KDSEL	SELECTOR(KDSEG, SELGDT, 0)
+#define	KESEL	SELECTOR(KESEG, SELGDT, 0)
+#define	UE32SEL	SELECTOR(UE32SEG, SELGDT, 3)
+#define	UDSEL	SELECTOR(UDSEG, SELGDT, 3)
+#define	UESEL	SELECTOR(UESEG, SELGDT, 3)
+#define	TSSSEL	SELECTOR(TSSSEG, SELGDT, 0)
+
+/*
+ *  fields in segment descriptors
+ */
+#define	SEGDATA	(0x10<<8)	/* data/stack segment */
+#define	SEGEXEC	(0x18<<8)	/* executable segment */
+#define	SEGTSS	(0x9<<8)	/* TSS segment */
+#define	SEGCG	(0x0C<<8)	/* call gate */
+#define	SEGIG	(0x0E<<8)	/* interrupt gate */
+#define	SEGTG	(0x0F<<8)	/* trap gate */
+#define	SEGLDT	(0x02<<8)	/* local descriptor table */
+#define	SEGTYPE	(0x1F<<8)
+
+#define	SEGP	(1<<15)		/* segment present */
+#define	SEGPL(x) ((x)<<13)	/* priority level */
+#define	SEGB	(1<<22)		/* granularity 1==4k (for expand-down) */
+#define	SEGD	(1<<22)		/* default 1==32bit (for code) */
+#define	SEGE	(1<<10)		/* expand down */
+#define	SEGW	(1<<9)		/* writable (for data/stack) */
+#define	SEGR	(1<<9)		/* readable (for code) */
+#define SEGL	(1<<21)		/* 64 bit */
+#define	SEGG	(1<<23)		/* granularity 1==4k (for other) */
+
+/*
+ *  virtual MMU
+ */
+#define	PTEMAPMEM	(1ull*MiB)	
+#define	PTEPERTAB	(PTEMAPMEM/BY2PG)
+#define	SEGMAPSIZE	65536
+#define	SSEGMAPSIZE	16
+#define	PPN(x)		((x)&~(1ull<<63 | BY2PG-1))
+
+/*
+ *  physical MMU
+ */
+#define	PTEVALID	(1ull<<0)
+#define	PTEWT		(1ull<<3)
+#define	PTEUNCACHED	(1ull<<4)
+#define	PTECACHED	(0ull<<4)
+#define	PTEWRITE	(1ull<<1)
+#define	PTERONLY	(0ull<<1)
+#define	PTEKERNEL	(0ull<<2)
+#define	PTEUSER		(1ull<<2)
+#define	PTESIZE		(1ull<<7)
+#define	PTEGLOBAL	(1ull<<8)
+#define	PTENOEXEC	((uvlong)m->havenx<<63)
+
+/*
+ * Hierarchical Page Tables.
+ * For example, traditional IA-32 paging structures have 2 levels,
+ * level 1 is the PD, and level 0 the PT pages; with IA-32e paging,
+ * level 3 is the PML4(!), level 2 the PDP, level 1 the PD,
+ * and level 0 the PT pages. The PTLX macro gives an index into the
+ * page-table page at level 'l' for the virtual address 'v'.
+ */
+#define PTSZ		(4*KiB)			/* page table page size */
+#define PTSHIFT		9			/*  */
+
+#define PTLX(v, l)	(((v)>>(((l)*PTSHIFT)+PGSHIFT)) & ((1<<PTSHIFT)-1))
+#define PGLSZ(l)	(1ull<<(((l)*PTSHIFT)+PGSHIFT))
+
+/*
+ * Macros for calculating offsets within the page directory base
+ * and page tables. 
+ */
+#define	PDX(va)		((((uintptr)(va))>>22) & 0x03FF)
+#define	PTX(va)		((((uintptr)(va))>>12) & 0x03FF)
+
+#define	getpgcolor(a)	0
+
+/* PAT entry used for write combining */
+#define PATWC	7
+
+#define RMACH		R15			/* m-> */
+#define RUSER		R14			/* up-> */
--- /dev/null
+++ b/os/pc64/memory.c
@@ -1,0 +1,686 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "ureg.h"
+
+#define DP if(1){}else print
+
+enum {
+	MemUPA		= 0,	/* unbacked physical address */
+	MemUMB		= 1,	/* upper memory block (<16MB) */
+	MemRAM		= 2,	/* physical memory */
+	MemACPI		= 3,	/* ACPI tables */
+	MemReserved	= 4,	/* don't allocate */
+
+	KB = 1024,
+};
+
+u64	MemMin;		/* set by l.s */
+
+/* TODO just use xspanalloc. I do not know what the memmapalloc() does. It does not seem to work anyway. inferno pc does not use it either? can refactor all this code.
+ */
+void*
+rampage(void)
+{
+	uintptr pa;
+
+	if(conf.mem[0].npage != 0)
+		return xspanalloc(BY2PG, BY2PG, 0);
+
+	/*
+	 * Allocate from the map directly to make page tables.
+	 */
+	print("before calling rampage\n");
+	memmapdump();
+	pa = memmapalloc(-1, BY2PG, BY2PG, MemRAM);
+	if(pa == -1)
+		panic("rampage: out of memory\n");
+	print("rampage returned 0x%p\n", pa);
+	return (void*)pa;
+}
+
+static void
+mapkzero(uintptr base, u64 len, int type)
+{
+	uintptr flags, n;
+
+	DP("mapkzero base 0x%p len %llud 0x%llux type 0x%x\n",
+		base, len, len, type);
+	if(base < MemMin && base+len > MemMin){
+		mapkzero(base, MemMin-base, type);
+		len = base+len-MemMin;
+		base = MemMin;
+	}
+
+	switch(type){
+	default:
+		return;
+	case MemRAM:
+		if(base < MemMin)
+			return;
+		flags = PTEWRITE|PTEVALID;
+		break;
+	case MemUMB:
+		flags = PTEWRITE|PTEUNCACHED|PTEVALID;
+		break;
+	}
+	pmap(base, flags, len);
+}
+
+/*
+ * map kernel text segment readonly
+ * and everything else no-execute.
+ */
+static void
+kernelro(void)
+{
+	uintptr *pte, psz, va;
+
+	for(va = KTZERO; va <= (uintptr)etext; va += psz){
+		psz = PGLSZ(0);
+		pte = mmuwalk((uintptr*)PML4ADDR, va, 0, 0);
+		if(pte == nil){
+			print("kernelro va 0x%p\n", va);
+			panic("kernelro");
+		}
+		if((*pte & PTEVALID) == 0){
+			print("kernelro invalid page va 0x%p pte 0x%zux *pte 0x%zux\n",
+				va, pte, *pte);
+			panic("kernelro invalid page\n");
+		}
+		if(va >= KTZERO && va < (uintptr)etext){
+			*pte &= ~PTEWRITE;
+			*pte |= PTEGLOBAL;
+		}
+	}
+	mmuflushtlb();
+}
+
+static uintptr
+ebdaseg(void)
+{
+	uchar *bda;
+
+	if(memcmp(KADDR(0xfffd9), "EISA", 4) != 0)
+		return 0;
+	bda = KADDR(0x400);
+	return ((bda[0x0f]<<8)|bda[0x0e]) << 4;
+}
+
+static uintptr
+convmemsize(void)
+{
+	uintptr top;
+	uchar *bda;
+
+	bda = KADDR(0x400);
+	top = ((bda[0x14]<<8) | bda[0x13])*KB;
+
+	if(top < 64*KB || top > 640*KB)
+		top = 640*KB;	/* sanity */
+
+	/* Reserved for BIOS tables */
+	top -= 1*KB;
+
+	return top;
+}
+
+static void
+lowraminit(void)
+{
+	uintptr base, pa, len;
+	uchar *p;
+
+	/*
+	 * Discover the memory bank information for conventional memory
+	 * (i.e. less than 640KB). The base is the first location after the
+	 * bootstrap processor MMU information and the limit is obtained from
+	 * the BIOS data area.
+	 */
+	base = PADDR(CPU0END);
+	pa = convmemsize();
+	if(base < pa)
+		memmapadd(base, pa-base, MemRAM);
+
+	/* Reserve BIOS tables */
+	memmapadd(pa, 1*KB, MemReserved);
+
+	/* Reserve EBDA */
+	if((pa = ebdaseg()) != 0)
+		memmapadd(pa, 1*KB, MemReserved);
+	memmapadd(0xA0000-1*KB, 1*KB, MemReserved);
+
+	/* Reserve the VGA frame buffer */
+	umballoc(0xA0000, 128*KB, 0);
+
+	/* Reserve VGA ROM */
+	memmapadd(0xC0000, 64*KB, MemReserved);
+
+	/*
+	 * Scan the Upper Memory Blocks (0xD0000->0xF0000) for device BIOS ROMs.
+	 * This should start with a two-byte header of 0x55 0xAA, followed by a
+	 * byte giving the size of the ROM in 512-byte chunks.
+	 * These ROM's must start on a 2KB boundary.
+	 */
+	for(p = (uchar*)KADDR(0xD0000); p < (uchar*)KADDR(0xF0000); p += len){
+		len = 2*KB;
+		if(p[0] == 0x55 && p[1] == 0xAA){
+			if(p[2] != 0)
+				len = p[2]*512;
+			memmapadd(PADDR(p), len, MemReserved);
+			len = ROUND(len, 2*KB);
+		}
+	}
+
+	/* Reserve BIOS ROM */
+	memmapadd(0xF0000, 64*KB, MemReserved);
+}
+
+int
+checksum(void *v, int n)
+{
+	uchar *p, s;
+
+	s = 0;
+	p = v;
+	while(n-- > 0)
+		s += *p++;
+	return s;
+}
+
+static void*
+sigscan(uchar *addr, int len, char *sig, int size, int step)
+{
+	uchar *e, *p;
+	int sl;
+
+	sl = strlen(sig);
+	e = addr+len-(size > sl ? size : sl);
+	for(p = addr; p <= e; p += step){
+		if(memcmp(p, sig, sl) != 0)
+			continue;
+		if(size && checksum(p, size) != 0)
+			continue;
+		return p;
+	}
+	return nil;
+}
+
+void*
+sigsearch(char* signature, int size)
+{
+	uintptr p;
+	void *r;
+
+	/*
+	 * Search for the data structure:
+	 * 1) within the first KiB of the Extended BIOS Data Area (EBDA), or
+	 * 2) within the last KiB of system base memory if the EBDA segment
+	 *    is undefined, or
+	 * 3) within the BIOS ROM address space between 0xf0000 and 0xfffff
+	 *    (but will actually check 0xe0000 to 0xfffff).
+	 */
+	if((p = ebdaseg()) != 0){
+		if((r = sigscan(KADDR(p), 1*KB, signature, size, 16)) != nil)
+			return r;
+	}
+	if((r = sigscan(KADDR(convmemsize()), 1*KB, signature, size, 16)) != nil)
+		return r;
+
+	/* hack for virtualbox: look in KiB below 0xa0000 */
+	if((r = sigscan(KADDR(0xA0000-1*KB), 1*KB, signature, size, 16)) != nil)
+		return r;
+
+	return sigscan(KADDR(0xE0000), 128*KB, signature, size, 16);
+}
+
+void*
+rsdsearch(void)
+{
+	static char signature[] = "RSD PTR ";
+	uintptr base, size;
+	uchar *v, *p;
+
+	if((p = sigsearch(signature, 36)) != nil)
+		return p;
+	if((p = sigsearch(signature, 20)) != nil)
+		return p;
+
+	for(base = memmapnext(-1, MemACPI); base != -1; base = memmapnext(base, MemACPI)){
+		size = memmapsize(base, 0);
+		if(size == 0 || size > 0x7fffffff)
+			continue;
+		if((v = vmap(base, size)) != nil){
+			p = sigscan(v, size, signature, 36, 4);
+			if(p == nil)
+				p = sigscan(v, size, signature, 20, 4);
+			vunmap(v, size);
+			if(p != nil)
+				return vmap(base + (p - v), 64);
+		}
+	}
+	return nil;
+}
+
+/*
+ * Give out otherwise-unused physical address space
+ * for use in configuring devices.  Note that upaalloc
+ * does not map the physical address into virtual memory.
+ * Call vmap to do that.
+ */
+u64
+upaalloc(u64 pa, u32 size, u32 align)
+{
+	print("before memmapalloc pa 0x%p size 0x%x %d\n",
+		pa, size, size);
+	// memmapdump();
+	return memmapalloc(pa, size, align, MemUPA);
+}
+
+u64
+upamalloc(u64 pa, u32 size, u32 align)
+{
+	return memmapalloc(pa, size, align, MemUPA);
+}
+
+u64
+upaallocwin(u64 pa, u32 win, u32 size, u32 align)
+{
+	uvlong a, base, top = pa + win;
+
+	for(base = memmapnext(-1, MemUPA); base != -1 && base < top; base = memmapnext(base, MemUPA)){
+		if(base < pa){
+			if(pa >= base + memmapsize(base, 0))
+				continue;
+			base = pa;
+		}
+		a = upaalloc(base, size, align);
+		if(a != -1)
+			return a;
+	}
+	return -1ULL;
+}
+
+void
+upafree(u64 pa, u32 size)
+{
+	memmapfree(pa, size, MemUPA);
+}
+
+/*
+ * Allocate memory from the upper memory blocks.
+ */
+uintptr
+umballoc(uintptr pa, u32 size, u32 align)
+{
+	return (uintptr)memmapalloc(pa == -1UL ? -1ULL : (uvlong)pa, size, align, MemUMB);
+}
+
+void
+umbfree(uintptr pa, u32 size)
+{
+	memmapfree(pa, size, MemUMB);
+}
+
+static void
+umbexclude(void)
+{
+	uintptr pa, size;
+	char *op, *p, *rptr;
+
+	if((p = getconf("umbexclude")) == nil)
+		return;
+
+	while(p && *p != '\0' && *p != '\n'){
+		op = p;
+		pa = strtoul(p, &rptr, 0);
+		if(rptr == nil || rptr == p || *rptr != '-'){
+			print("umbexclude: invalid argument <%s>\n", op);
+			break;
+		}
+		p = rptr+1;
+
+		size = strtoul(p, &rptr, 0) - pa + 1;
+		if(size <= 0){
+			print("umbexclude: bad range <%s>\n", op);
+			break;
+		}
+		if(rptr != nil && *rptr == ',')
+			*rptr++ = '\0';
+		p = rptr;
+
+		memmapalloc(pa, size, 0, MemUMB);
+	}
+}
+
+static void
+mtrrexclude(int type, char *expect)
+{
+	uintptr base, top, next, pa;
+	char *attr;
+
+	for(base = memmapnext(-1, type); base != -1; base = memmapnext(base, type)){
+		top = base + memmapsize(base, 0);
+		for(pa = base; pa < top; pa = next){
+			next = top;
+			attr = mtrrattr(pa, &next);
+			if(attr != nil && strcmp(attr, expect) != 0){
+				if(next > top)
+					next = top;
+				memmapadd(pa, next - pa, MemReserved);
+			}
+			base = pa;
+		}
+	}
+}
+
+static int
+e820scan(void)
+{
+	uintptr base, top, size;
+	int type;
+	char *s;
+
+	/* passed by bootloader */
+	if((s = getconf("*e820")) == nil)
+		if((s = getconf("e820")) == nil)
+			return -1;
+
+	print("e820scan %s\n", s);
+	for(;;){
+		while(*s == ' ')
+			s++;
+		if(*s == 0)
+			break;
+		type = 1;
+		if(s[1] == ' '){	/* new format */
+			type = s[0] - '0';
+			s += 2;
+		}
+		base = strtoull(s, &s, 16);
+		if(*s != ' ')
+			break;
+		top  = strtoull(s, &s, 16);
+		if(*s != ' ' && *s != 0)
+			break;
+		if(base >= top)
+			continue;
+		switch(type){
+		case 1:
+			memmapadd(base, top - base, MemRAM);
+			break;
+		case 3:
+			memmapadd(base, top - base, MemACPI);
+			break;
+		default:
+			memmapadd(base, top - base, MemReserved);
+		}
+	}
+
+	/* RAM needs to be writeback */
+	mtrrexclude(MemRAM, "wb");
+
+	return 0;
+}
+
+/* TODO untested */
+static void
+ramscan(uintptr pa, uintptr top)
+{
+	uintptr save, pat, seed, *v, *k0, *pte;
+	int i, n, w;
+	char *attr;
+	u32 chunk;
+
+	chunk = PGLSZ(0);
+	pa += chunk-1;
+	pa &= ~(chunk-1);
+	top &= ~(chunk-1);
+
+	n = chunk/sizeof(*v);
+	w = BY2PG/sizeof(*v);
+
+	k0 = KADDR(0);
+	save = *k0;
+
+	DP("ramscan\n");
+	pat = 0x12345678UL;
+	for(; pa < top; pa += chunk){
+		DP("ramscan pa 0x%p\n", pa);
+		attr = mtrrattr(pa, nil);
+		if(attr != nil && strcmp(attr, "wb") != 0)
+			goto Skip;
+
+		/* write pattern */
+		seed = pat;
+		if((pte = mmuwalk((uintptr*)PML4ADDR, pa, 0, 1)) == nil)
+			continue;
+		*pte = pa|PTEWRITE|PTEVALID;
+		for(i = 0; i < n; i += w){
+			pat += 0x3141526UL;
+			v[i] = pat;
+			*k0 = ~pat;
+			if(v[i] != pat)
+				goto Bad;
+		}
+
+		/* verify pattern */
+		pat = seed;
+		for(i = 0; i < n; i += w){
+			pat += 0x3141526UL;
+			if(v[i] != pat)
+				goto Bad;
+		}
+
+		memmapadd(pa, chunk, MemRAM);
+		mapkzero(pa, chunk, MemRAM);
+		continue;
+
+	Bad:
+		*pte = 0;
+
+	Skip:
+		if(pa+chunk <= 16*MB)
+			memmapadd(pa, chunk, MemUMB);
+
+		/*
+		 * If we encounter a chunk of missing memory
+		 * at a sufficiently high offset, call it the end of
+		 * memory.  Otherwise we run the risk of thinking
+		 * that video memory is real RAM.
+		 */
+		if(pa >= 32*MB)
+			break;
+	}
+
+	*k0 = save;
+}
+
+void
+showpagetables(uintptr *pml4)
+{
+	uintptr *epml4, pml4e, *pdp, *epdp, pdpe, *pd, *epd, pde, *pt, *ept, pte, cr3, pa;
+
+	cr3 = getcr3();
+	print("CR3 0x%zux cpu0pml4 0x%p\n"
+		"\tpml4 base address 0x%zux\n"
+		"\tpage-level writethrough bit 0x%zux"
+		" page-level cache disable bit 0x%zux\n",
+		cr3, PML4ADDR, cr3&(~0xFFF),
+		cr3&(1<<4)>>4,
+		cr3&(1<<3)>>3);
+	epml4 = pml4+512;
+	for(; pml4 != epml4; pml4++){
+		if(*pml4 == 0)
+			continue;
+		pml4e = *pml4;
+		pdp = (uintptr*)(pml4e&(~0xFFF));
+		print("pml4 0x%p has 0x%zx pdp base address 0x%p\n", pml4, pml4e, pdp);
+		epdp = pdp + 512;
+		for(; pdp != epdp; pdp++){
+			if(*pdp == 0)
+				continue;
+			pdpe = *pdp;
+			pd = (uintptr*)(pdpe&(~0xFFF));
+			epd = pd + 512;
+			print("\tpdp 0x%p has 0x%zx pd base address 0x%p\n",
+				pdp, pdpe, pd);
+			for(; pd != epd; pd++){
+				if(*pd == 0)
+					continue;
+				pde = *pd;
+				pt = (uintptr*)(pde&(~0xFFF));
+				print("\t\tpd 0x%p has 0x%zx page base address 0x%zx\n",
+					pd, pde, pt);
+				ept = pt + 512;
+				for(; pt != ept; pt++){
+					if(*pt == 0)
+						continue;
+					pte = *pt;
+					pa = (uintptr)(pte&(~0xFFF));
+					print("\t\t\tpt 0x%p has 0x%zx address 0x%zx\n",
+						pt, pte, pa);
+				}
+			}
+		}
+	}
+}
+
+/*
+ * Sort out initial memory map and discover RAM.
+ */
+void
+meminit0(void)
+{
+	uintptr prevbase = 0, base, size = 0;
+
+	print("MemMin 0x%llux end 0x%p KZERO 0x%x KDZERO 0x%p\n"
+		"\tKTZERO 0x%x etext 0x%zux\n\tCPU0END 0x%llux\n"
+		"\tPADDR(PGROUND((uintptr)end)) 0x%zux MemMin-PADDR(PGROUND((uintptr)end)) 0x%zux\n",
+		MemMin, end, KZERO, KDZERO, KTZERO, etext, (uintptr)CPU0END,
+		PADDR(PGROUND((uintptr)end)), MemMin-PADDR(PGROUND((uintptr)end)));
+	/*
+	 * Add the already mapped memory after the kernel.
+	 */
+	if(MemMin < PADDR(PGROUND((uintptr)end)))
+		panic("kernel too big");
+	memmapadd(PADDR(PGROUND((uintptr)end)), MemMin-PADDR(PGROUND((uintptr)end)), MemRAM);
+
+	/*
+	 * Memory below MemMin is reserved for the kernel.
+	 * Also, set the kernel text pages read only
+	 */
+	memreserve(PADDR(KDZERO), PADDR(PGROUND((uintptr)MemMin))-PADDR(KDZERO));
+	kernelro();
+
+	/*
+	 * Addresses below 16MB default to be upper
+	 * memory blocks usable for ISA devices.
+	 */
+	memmapadd(0, 16*MB, MemUMB);
+
+	/*
+	 * Everything between 16MB and 4GB defaults
+	 * to unbacked physical addresses usable for
+	 * device mappings.
+	 */
+	memmapadd(16*MB, (u32)-16*MB, MemUPA);
+
+		print("------before lowraminit -----\n");
+		memmapdump();
+		print("-----------\n");
+	/*
+	 * Discover conventional RAM, ROMs and UMBs.
+	 */
+	lowraminit();
+
+		print("------after lowraminit -----\n");
+		memmapdump();
+		print("-----------\n");
+	/*
+	 * Discover more RAM and map to KZERO.
+	 */
+	if(e820scan() < 0)
+		ramscan(MemMin, -((uintptr)MemMin));
+
+	/*
+	 * Exclude UMB's and UPA's with unusual cache attributes.
+	 */
+	mtrrexclude(MemUMB, "uc");
+	mtrrexclude(MemUPA, "uc");
+}
+
+/*
+ * Until the memory map is finalized by meminit(),
+ * archinit() should reserve memory of discovered BIOS
+ * and ACPI tables by calling memreserve() to prevent
+ * them from getting allocated and trashed.
+ * This is due to the UEFI and BIOS memory map being
+ * unreliable and sometimes marking these ranges as RAM.
+ */
+void
+memreserve(uintptr pa, uintptr size)
+{
+	assert(conf.mem[0].npage == 0);
+
+	size += (pa & BY2PG-1);
+	size &= ~(BY2PG-1);
+	pa &= ~(BY2PG-1);
+	memmapadd(pa, size, MemReserved);
+}
+
+/*
+ * Finalize the memory map:
+ *  (re-)map the upper memory blocks
+ *  allocate all usable ram to the conf.mem[] banks
+ *  memory is allocated to memory maps -> conf.mem[] -> xlists.hole
+ */
+void
+meminit(void)
+{
+	uintptr base, size;
+	Confmem *cm;
+
+		print("------ before umbexclude -----\n");
+		memmapdump();
+		print("-----------\n");
+	umbexclude();
+		print("------ after umbexclude -----\n");
+		memmapdump();
+		print("-----------\n");
+	for(base = memmapnext(-1, MemUMB); base != -1; base = memmapnext(base, MemUMB)){
+		size = memmapsize(base, BY2PG) & ~(BY2PG-1);
+		if(size != 0)
+			mapkzero(PGROUND(base), size, MemUMB);
+	}
+
+	cm = &conf.mem[0];
+	for(base = memmapnext(-1, MemRAM); base != -1; base = memmapnext(base, MemRAM)){
+		size = memmapsize(base, BY2PG) & ~(BY2PG-1);
+		if(size == 0)
+			continue;
+		if(cm >= &conf.mem[nelem(conf.mem)]){
+			print("meminit: out of entries, loosing: %#p (%llud)\n", base, (uvlong)size);
+			continue;
+		}
+		if(base < MemMin){
+			print("meminit: ignoring RAM below MemMin base 0x%p size 0x%d\n", base, size);
+			continue;
+		}
+		cm->base = memmapalloc(base, size, BY2PG, MemRAM);
+		if(cm->base == -1)
+			continue;
+		base = cm->base;
+		cm->npage = size/BY2PG;
+		cm++;
+	}
+
+	print("-----------\n");
+	if(1) memmapdump();
+	print("-----------\n");
+	// showpagetables((uintptr*)PML4ADDR);
+	//showpagetables((uintptr*)PML4ADDR);
+}
--- /dev/null
+++ b/os/pc64/mkfile
@@ -1,0 +1,110 @@
+<../../mkconfig
+
+#Configurable parameters
+
+CONF=pc64			#default configuration
+CONFLIST=pc64
+CLEANCONFLIST=pc64
+
+ROOT=/mnt/term/home/j/local/plan9/custom/inferno-os
+SYSTARG=Inferno # $OSTARG
+OBJTYPE=amd64 # 386
+INSTALLDIR=$ROOT/Inferno/$OBJTYPE/bin	#path of directory where kernel is installed
+#INSTALLDIR=/$OBJTYPE
+
+# must match mem.h
+KTZERO=0x200000
+APBOOTSTRAP=0x107000
+REBOOTADDR=0x11000
+
+#end configurable parameters
+
+<$ROOT/mkfiles/mkfile-$SYSTARG-$OBJTYPE	#set vars based on target system
+
+<| $SHELLNAME ../port/mkdevlist $CONF	#sets $IP, $DEVS, $ETHERS, $VGAS, $PORT, $MISC, $LIBS, $OTHERS
+
+OBJ=\
+	l.$O\
+	fpu.$O\
+	portclock.$O\
+	tod.$O\
+	i8253.$O\
+	i8259.$O\
+	main.$O\
+	kbd.$O\
+	memmap.$O\
+	memory.$O\
+	mmu.$O\
+	mtrr.$O\
+	trap.$O\
+	bootargs.$O\
+	$CONF.root.$O\
+	$IP\
+	$DEVS\
+	$ETHERS\
+	$LINKS\
+	$PORT\
+	$MISC\
+	$OTHERS\
+
+LIBNAMES=${LIBS:%=lib%.a}
+
+HFILES=\
+	mem.h\
+	dat.h\
+	fns.h\
+	io.h\
+
+CFLAGS=-wFVT -I$ROOT/Inferno/$OBJTYPE/include -I$ROOT/include -I$ROOT/libinterp -I../port
+KERNDATE=`{$NDATE}
+
+default:V: i$CONF
+
+ETHERS=`{cd ../pc; echo devether.c ether*.c | sed 's/\.c/.'$O'/g'}
+AUDIO=`{cd ../pc; echo devaudio.c audio*.c | sed 's/\.c/.'$O'/g'}
+VGA=`{cd ../pc; echo devvga.c screen.c vga*.c | sed 's/\.c/.'$O'/g'}
+SDEV=`{cd ../pc; echo devsd.c sd*.c | sed 's/\.c/.'$O'/g'}
+
+i$CONF: $OBJ $CONF.c $CONF.root.h $LIBNAMES
+	$CC $CFLAGS '-DKERNDATE='$KERNDATE $CONF.c
+	$LD -o $target -T$KTZERO -R4096 -l $OBJ $CONF.$O $LIBFILES
+	$KSIZE $target
+
+install:V: i$CONF
+	cp i$CONF $INSTALLDIR/i$CONF
+
+# copies generated by the rule below
+PCHEADERS=usbehci.h screen.h mp.h io.h ahci.h \
+	yukdump.h vga.h audio.h
+
+REPCH=`{echo $PCHEADERS | sed 's/\.h//g; s/ /|/g'}
+^($REPCH)\.h:R:	../pc/\1.h
+	cp $prereq .
+
+REPCC=`{../port/mkfilelist ../pc}
+^($REPCC)\.$O:R:	'../pc/\1.c'
+	$CC $CFLAGS -I. -. ../pc/$stem1.c
+
+<../port/portmkfile
+
+clock.$O:	$ROOT/Inferno/$OBJTYPE/include/ureg.h
+devether.$O:	$ROOT/Inferno/$OBJTYPE/include/ureg.h
+fault386.$O:	$ROOT/Inferno/$OBJTYPE/include/ureg.h
+main.$O:	$ROOT/Inferno/$OBJTYPE/include/ureg.h
+trap.$O:	$ROOT/Inferno/$OBJTYPE/include/ureg.h
+
+devether.$O $ETHERS:	etherif.h ../port/netif.h
+$IP devip.$O:		../ip/ip.h
+$VGA mouse.$O:		screen.h ../../include/memdraw.h
+
+mp.$O:				mp.h apbootstrap.i
+apic.$O squidboy.$O:		mp.h
+archmp.$O archacpi.$O:		mp.h
+
+# to be moved to port/interp 
+bench.h:D: ../../module/bench.m
+	rm -f $target && limbo -a -I../../module ../../module/bench.m > $target
+benchmod.h:D:  ../../module/bench.m
+	rm -f $target && limbo -t Bench -I../../module ../../module/bench.m > $target
+devbench.$O: bench.h benchmod.h
+$VGA screen.$O:	screen.h vga.h
--- /dev/null
+++ b/os/pc64/mmu.c
@@ -1,0 +1,310 @@
+#include	"u.h"
+#include	"../port/lib.h"
+#include	"mem.h"
+#include	"dat.h"
+#include	"fns.h"
+#include	"io.h"
+
+#define DP	if(1){}else print
+/*
+ * Simple segment descriptors with no translation.
+ */
+#define	EXECSEGM(p) 	{ 0, SEGL|SEGP|SEGPL(p)|SEGEXEC }
+#define	DATASEGM(p) 	{ 0, SEGB|SEGG|SEGP|SEGPL(p)|SEGDATA|SEGW }
+
+Segdesc gdt[NGDT] =
+{
+[NULLSEG]	{ 0, 0},		/* null descriptor */
+[KESEG]		EXECSEGM(0),		/* code - kernel privilege for all */
+[KDSEG]		DATASEGM(0),		/* data - kernel privilege for all */
+};
+
+enum {
+	/* level */
+	PML4E	= 3,
+	PDPE	= 2,
+	PDE	= 1,
+	PTE	= 0,
+};
+
+static void
+loadptr(u16int lim, uintptr off, void (*load)(void*))
+{
+	u64int b[2], *o;
+	u16int *s;
+
+	o = &b[1];
+	s = ((u16int*)o)-1;
+
+	*s = lim;
+	*o = off;
+
+	(*load)(s);
+}
+
+static void
+taskswitch(uintptr stack)
+{
+	Tss *tss;
+
+	tss = m->tss;
+	tss->rsp0[0] = (u32)stack;
+	tss->rsp0[1] = stack >> 32;
+	tss->rsp1[0] = (u32)stack;
+	tss->rsp1[1] = stack >> 32;
+	tss->rsp2[0] = (u32)stack;
+	tss->rsp2[1] = stack >> 32;
+	mmuflushtlb();
+}
+
+void
+mmuinit(void)
+{
+	uintptr x;
+	int i;
+
+	/* move kernelro here to mimic 9front? */
+
+	m->tss = mallocz(sizeof(Tss), 1);
+	if(m->tss == nil){
+		print("mmuinit: no memory for Tss");
+		panic("mmuinit: no memory for Tss");
+	}
+	m->tss->iomap = 0xDFFF;
+	/* the IST is not used by the interrupt descriptors.
+	 * Putting the existing stack address instead of
+	 * leaving the fields empty.
+	 */
+	for(i=0; i<14; i+=2){
+		x = (uintptr)m + MACHSIZE;
+		m->tss->ist[i] = x;
+		m->tss->ist[i+1] = x>>32;
+	}
+
+	/*
+	 * We used to keep the GDT in the Mach structure, but it
+	 * turns out that that slows down access to the rest of the
+	 * page.  Since the Mach structure is accessed quite often,
+	 * it pays off anywhere from a factor of 1.25 to 2 on real
+	 * hardware to separate them (the AMDs are more sensitive
+	 * than Intels in this regard).  Under VMware it pays off
+	 * a factor of about 10 to 100.
+	 */
+	memmove(m->gdt, gdt, sizeof gdt);
+
+	x = (uintptr)m->tss;
+	m->gdt[TSSSEG+0].d0 = (x<<16)|(sizeof(Tss)-1);
+	m->gdt[TSSSEG+0].d1 = (x&0xFF000000)|((x>>16)&0xFF)|SEGTSS|SEGPL(0)|SEGP;
+	m->gdt[TSSSEG+1].d0 = x>>32;
+	m->gdt[TSSSEG+1].d1 = 0;
+
+	loadptr(sizeof(gdt)-1, (uintptr)m->gdt, lgdt);
+	loadptr(sizeof(Segdesc)*512-1, (uintptr)IDTADDR, lidt);
+	taskswitch((uintptr)m + MACHSIZE);
+	ltr(TSSSEL);
+
+	wrmsr(FSbase, 0ull);
+	wrmsr(GSbase, (u64)&machp[m->machno]);
+	/* SWAPGS is not needed for inferno.
+	 * leaving it the same as GSbase if unintentional SWAPGS's are used
+	 */
+	wrmsr(KernelGSbase, (u64)&machp[m->machno]);
+
+	/* pre allocate pages */
+	Confmem *cm;
+	ulong np, nt;
+
+	np = 0;
+	for(i=0; i<nelem(conf.mem); i++){
+		cm = &conf.mem[i];
+		if(cm->npage == 0)
+			continue;
+		DP("i %d base 0x%p npage 0x%d\n", i, cm->base, cm->npage);
+		pmap(cm->base, PTEGLOBAL|PTEWRITE|PTENOEXEC|PTEVALID, cm->npage*BY2PG);
+
+	}
+}
+
+int
+mmukmapsync(uintptr va)
+{
+return 0;
+}
+
+uintptr
+mmukmap(uintptr pa, uintptr va, int size)
+{
+return 0;
+}
+
+void*
+vmap(uintptr pa, int size)
+{
+return 0;
+}
+
+void
+vunmap(void *va, int size)
+{
+}
+
+s32
+segflush(void*, u32)
+{
+	return 0;
+}
+
+static uintptr*
+mmucreate(uintptr *table, uintptr pa, int level, int index)
+{
+	uintptr *page, flags;
+	MMU *p;
+	
+	DP("mmucreate table 0x%p pa 0x%p level %d index %d\n",
+		table, pa, level, index);
+	flags = PTEWRITE|PTEVALID;
+	page = (uintptr*)rampage();
+	DP("mmucreate new page 0x%p PTSZ 0x%x %d BY2PG 0x%x %d\n",
+		page, PTSZ, PTSZ, BY2PG, BY2PG);
+	memset(page, 0, PTSZ);
+	table[index] = PADDR(page) | flags;
+	return page;
+}
+
+/* not bothering with 1GiB or 2MiB pages yet */
+uintptr*
+mmuwalk(uintptr* table, uintptr pa, int level, int create)
+{
+	uintptr pte, flags;
+	int i, x;
+
+	DP("mmuwalk table 0x%p pa 0x%p level %d create %d\n",
+		table, pa, level, create);
+	flags = PTEWRITE | PTEVALID;
+	x = PTLX(pa, 3);
+	DP("\tpml4 index %d\n", x);
+	for(i = 2; i >= level; i--){
+		pte = table[x];
+		if(pte & PTEVALID){
+			pte = PPN(pte);
+			table = (void*)pte;
+		} else {
+			if(!create)
+				return 0;
+			table = mmucreate(table, pa, i, x);
+		}
+		x = PTLX(pa, i);
+		DP("\tlevel %d index %d\n", i, x);
+	}
+	return &table[x];
+}
+
+static int
+ptecount(uintptr pa, s32 level)
+{
+	return (1<<PTSHIFT) - (pa & PGLSZ(level+1)-1) / PGLSZ(level);
+}
+
+/* splits a 2 MiB page table entry into a table of 4096 pages
+ * probably not useful for inferno
+ */
+static void
+ptesplit(uintptr* table, uintptr va)
+{
+	uintptr *pte, pa, off;
+
+	pte = mmuwalk(table, va, 1, 0);
+	if(pte == nil || (*pte & PTESIZE) == 0 || (va & PGLSZ(1)-1) == 0)
+		return;
+	table = (uintptr*)rampage();
+	va &= -PGLSZ(1);
+	pa = *pte & ~PTESIZE;
+	for(off = 0; off < PGLSZ(1); off += PGLSZ(0))
+		table[PTLX(va + off, 0)] = pa + off;
+	*pte = PADDR(table) | PTEVALID|PTEWRITE;
+	invlpg(va);
+}
+
+void
+pmap(uintptr pa, u64 flags, s64 size)
+{
+	uintptr *pte, *ptee;
+	s32 z, l;
+
+	if(size <= 0)
+		panic("pmap: pa=%#p size=%lld", pa, size);
+	DP("pmap pa 0x%p-0x%zux flags 0x%llux size %llud 0x%llux\n",
+		pa, (uintptr)pa+size, flags, size, size);
+	pa = PPN(pa);
+	DP("\tpa 0x%p\n", pa);
+	while(size > 0){
+		/* reducing complexity, use 4096 byte pages all through */
+		l = 0;
+		z = PGLSZ(0);
+		pte = mmuwalk((uintptr*)PML4ADDR, pa, l, 1);
+		if(pte == nil){
+			panic("pmap: pa=%#p size=%lld", pa, size);
+		}
+		ptee = pte + ptecount(pa, l);
+		while(size > 0 && pte < ptee){
+			*pte++ = pa | flags;
+			pa += z;
+			size -= z;
+		}
+	}
+}
+
+void
+punmap(uintptr pa, vlong size)
+{
+	uintptr *pte;
+	int l;
+
+	pa = PPN(pa);
+	while(size > 0){
+		if((pa % PGLSZ(1)) != 0 || size < PGLSZ(1))
+			ptesplit((uintptr*)PML4ADDR, pa);
+		l = 0;
+		pte = mmuwalk((uintptr*)PML4ADDR,pa, l, 0);
+		if(pte == nil && (pa % PGLSZ(1)) == 0 && size >= PGLSZ(1))
+			pte = mmuwalk((uintptr*)PML4ADDR, pa, ++l, 0);
+		if(pte){
+			*pte = 0;
+			invlpg(pa);
+		}
+		pa += PGLSZ(l);
+		size -= PGLSZ(l);
+	}
+}
+
+/*
+ * mark pages as write combining (used for framebuffer)
+ */
+void
+patwc(void *a, int n)
+{
+	uintptr *pte, mask, attr, pa;
+	int z, l;
+	vlong v;
+
+	/* check if pat is usable */
+	if((MACHP(0)->cpuiddx & Pat) == 0
+	|| rdmsr(0x277, &v) == -1
+	|| ((v >> PATWC*8) & 7) != 1)
+		return;
+
+	/* set the bits for all pages in range */
+	for(pa = (uintptr)a; n > 0; n -= z, pa += z){
+		l = 0;
+		pte = mmuwalk((uintptr*)PML4ADDR, pa, l, 0);
+		if(pte == 0)
+			pte = mmuwalk((uintptr*)PML4ADDR, pa, ++l, 0);
+		if(pte == 0 || (*pte & PTEVALID) == 0)
+			panic("patwc: pa=%#p", pa);
+		z = PGLSZ(l);
+		z -= pa & (z-1);
+		mask = l == 0 ? 3<<3 | 1<<7 : 3<<3 | 1<<12;
+		attr = (((PATWC&3)<<3) | ((PATWC&4)<<5) | ((PATWC&4)<<10));
+		*pte = (*pte & ~mask) | (attr & mask);
+	}
+}
--- /dev/null
+++ b/os/pc64/mp.h
@@ -1,0 +1,262 @@
+/*
+ * MultiProcessor Specification Version 1.[14].
+ */
+typedef struct {			/* floating pointer */
+	uchar	signature[4];		/* "_MP_" */
+	long	physaddr;		/* physical address of MP configuration table */
+	uchar	length;			/* 1 */
+	uchar	specrev;		/* [14] */
+	uchar	checksum;		/* all bytes must add up to 0 */
+	uchar	type;			/* MP system configuration type */
+	uchar	imcrp;
+	uchar	reserved[3];
+} _MP_;
+
+#define _MP_sz			(4+4+1+1+1+1+1+3)
+
+typedef struct {			/* configuration table header */
+	uchar	signature[4];		/* "PCMP" */
+	ushort	length;			/* total table length */
+	uchar	version;		/* [14] */
+	uchar	checksum;		/* all bytes must add up to 0 */
+	uchar	product[20];		/* product id */
+	ulong	oemtable;		/* OEM table pointer */
+	ushort	oemlength;		/* OEM table length */
+	ushort	entry;			/* entry count */
+	ulong	lapicbase;		/* address of local APIC */
+	ushort	xlength;		/* extended table length */
+	uchar	xchecksum;		/* extended table checksum */
+	uchar	reserved;
+} PCMP;
+
+#define PCMPsz			(4+2+1+1+20+4+2+2+4+2+1+1)
+
+typedef struct {			/* processor table entry */
+	uchar	type;			/* entry type (0) */
+	uchar	apicno;			/* local APIC id */
+	uchar	version;		/* local APIC verison */
+	uchar	flags;			/* CPU flags */
+	uchar	signature[4];		/* CPU signature */
+	ulong	feature;		/* feature flags from CPUID instruction */
+	uchar	reserved[8];
+} PCMPprocessor;
+
+#define PCMPprocessorsz		(1+1+1+1+4+4+8)
+
+typedef struct {			/* bus table entry */
+	uchar	type;			/* entry type (1) */
+	uchar	busno;			/* bus id */
+	char	string[6];		/* bus type string */
+} PCMPbus;
+
+#define PCMPbussz		(1+1+6)
+
+typedef struct {			/* I/O APIC table entry */
+	uchar	type;			/* entry type (2) */
+	uchar	apicno;			/* I/O APIC id */
+	uchar	version;		/* I/O APIC version */
+	uchar	flags;			/* I/O APIC flags */
+	ulong	addr;			/* I/O APIC address */
+} PCMPioapic;
+
+#define PCMPioapicsz		(1+1+1+1+4)
+
+typedef struct {			/* interrupt table entry */
+	uchar	type;			/* entry type ([34]) */
+	uchar	intr;			/* interrupt type */
+	ushort	flags;			/* interrupt flag */
+	uchar	busno;			/* source bus id */
+	uchar	irq;			/* source bus irq */
+	uchar	apicno;			/* destination APIC id */
+	uchar	intin;			/* destination APIC [L]INTIN# */
+} PCMPintr;
+
+#define PCMPintrsz		(1+1+2+1+1+1+1)
+
+typedef struct {			/* system address space mapping entry */
+	uchar	type;			/* entry type (128) */
+	uchar	length;			/* of this entry (20) */
+	uchar	busno;			/* bus id */
+	uchar	addrtype;
+	ulong	addrbase[2];
+	ulong	addrlength[2];
+} PCMPsasm;
+
+#define PCMPsasmsz		(1+1+1+1+8+8)
+
+typedef struct {			/* bus hierarchy descriptor entry */
+	uchar	type;			/* entry type (129) */
+	uchar	length;			/* of this entry (8) */
+	uchar	busno;			/* bus id */
+	uchar	info;			/* bus info */
+	uchar	parent;			/* parent bus */
+	uchar	reserved[3];
+} PCMPhierarchy;
+
+#define PCMPhirarchysz		(1+1+1+1+1+3)
+
+typedef struct {			/* compatibility bus address space modifier entry */
+	uchar	type;			/* entry type (130) */
+	uchar	length;			/* of this entry (8) */
+	uchar	busno;			/* bus id */
+	uchar	modifier;		/* address modifier */
+	ulong	range;			/* predefined range list */
+} PCMPcbasm;
+
+#define PCMPcbasmsz		(1+1+1+1+4)
+
+enum {					/* table entry types */
+	PcmpPROCESSOR	= 0x00,		/* one entry per processor */
+	PcmpBUS		= 0x01,		/* one entry per bus */
+	PcmpIOAPIC	= 0x02,		/* one entry per I/O APIC */
+	PcmpIOINTR	= 0x03,		/* one entry per bus interrupt source */
+	PcmpLINTR	= 0x04,		/* one entry per system interrupt source */
+
+	PcmpSASM	= 0x80,
+	PcmpHIERARCHY	= 0x81,
+	PcmpCBASM	= 0x82,
+
+					/* PCMPprocessor and PCMPioapic flags */
+	PcmpEN		= 0x01,		/* enabled */
+	PcmpBP		= 0x02,		/* bootstrap processor */
+
+					/* PCMPiointr and PCMPlintr flags */
+	PcmpPOMASK	= 0x03,		/* polarity conforms to specifications of bus */
+	PcmpHIGH	= 0x01,		/* active high */
+	PcmpLOW		= 0x03,		/* active low */
+	PcmpELMASK	= 0x0C,		/* trigger mode of APIC input signals */
+	PcmpEDGE	= 0x04,		/* edge-triggered */
+	PcmpLEVEL	= 0x0C,		/* level-triggered */
+
+					/* PCMPiointr and PCMPlintr interrupt type */
+	PcmpINT		= 0x00,		/* vectored interrupt from APIC Rdt */
+	PcmpNMI		= 0x01,		/* non-maskable interrupt */
+	PcmpSMI		= 0x02,		/* system management interrupt */
+	PcmpExtINT	= 0x03,		/* vectored interrupt from external PIC */
+
+					/* PCMPsasm addrtype */
+	PcmpIOADDR	= 0x00,		/* I/O address */
+	PcmpMADDR	= 0x01,		/* memory address */
+	PcmpPADDR	= 0x02,		/* prefetch address */
+
+					/* PCMPhierarchy info */
+	PcmpSD		= 0x01,		/* subtractive decode bus */
+
+					/* PCMPcbasm modifier */
+	PcmpPR		= 0x01,		/* predefined range list */
+};
+
+/*
+ * Condensed form of the MP Configuration Table.
+ * This is created during a single pass through the MP Configuration
+ * table.
+ */
+typedef struct Aintr Aintr;
+typedef struct Bus Bus;
+typedef struct Apic Apic;
+
+typedef struct Bus {
+	uchar	type;
+	uchar	busno;
+	uchar	po;
+	uchar	el;
+
+	Aintr*	aintr;			/* interrupts tied to this bus */
+	Bus*	next;
+} Bus;
+
+typedef struct Aintr {
+	PCMPintr* intr;
+	Apic*	apic;
+	Bus*	bus;
+	Aintr*	next;
+};
+
+typedef struct Apic {
+	int	type;
+	int	apicno;
+	ulong*	addr;			/* register base address */
+	ulong	paddr;
+	int	flags;			/* PcmpBP|PcmpEN */
+
+	Lock;				/* I/O APIC: register access */
+	int	mre;			/* I/O APIC: maximum redirection entry */
+	int	gsibase;		/* I/O APIC: global system interrupt base (acpi) */
+
+	int	lintr[2];		/* Local APIC */
+	int	machno;
+
+	int	online;
+} Apic;
+
+enum {
+	MaxAPICNO	= 254,		/* 255 is physical broadcast */
+};
+
+enum {					/* I/O APIC registers */
+	IoapicID	= 0x00,		/* ID */
+	IoapicVER	= 0x01,		/* version */
+	IoapicARB	= 0x02,		/* arbitration ID */
+	IoapicRDT	= 0x10,		/* redirection table */
+};
+
+/*
+ * Common bits for
+ *	I/O APIC Redirection Table Entry;
+ *	Local APIC Local Interrupt Vector Table;
+ *	Local APIC Inter-Processor Interrupt;
+ *	Local APIC Timer Vector Table.
+ */
+enum {
+	ApicFIXED	= 0x00000000,	/* [10:8] Delivery Mode */
+	ApicLOWEST	= 0x00000100,	/* Lowest priority */
+	ApicSMI		= 0x00000200,	/* System Management Interrupt */
+	ApicRR		= 0x00000300,	/* Remote Read */
+	ApicNMI		= 0x00000400,
+	ApicINIT	= 0x00000500,	/* INIT/RESET */
+	ApicSTARTUP	= 0x00000600,	/* Startup IPI */
+	ApicExtINT	= 0x00000700,
+
+	ApicPHYSICAL	= 0x00000000,	/* [11] Destination Mode (RW) */
+	ApicLOGICAL	= 0x00000800,
+
+	ApicDELIVS	= 0x00001000,	/* [12] Delivery Status (RO) */
+	ApicHIGH	= 0x00000000,	/* [13] Interrupt Input Pin Polarity (RW) */
+	ApicLOW		= 0x00002000,
+	ApicRemoteIRR	= 0x00004000,	/* [14] Remote IRR (RO) */
+	ApicEDGE	= 0x00000000,	/* [15] Trigger Mode (RW) */
+	ApicLEVEL	= 0x00008000,
+	ApicIMASK	= 0x00010000,	/* [16] Interrupt Mask */
+};
+
+extern void ioapicinit(Apic*, int);
+extern void ioapicrdtr(Apic*, int, int*, int*);
+extern void ioapicrdtw(Apic*, int, int, int);
+
+extern void lapicclock(Ureg*, void*);
+extern int lapiceoi(int);
+extern void lapicerror(Ureg*, void*);
+extern void lapicicrw(ulong, ulong);
+extern void lapicinit(Apic*);
+extern int lapicintroff(int);
+extern int lapicintron(Vctl*);
+extern int lapicisr(int);
+extern void lapicnmidisable(void);
+extern void lapicnmienable(void);
+extern void lapiconline(void);
+extern void lapicspurious(Ureg*, void*);
+extern void lapicstartap(Apic*, int);
+extern void lapictimerset(uvlong);
+
+extern int mpintrinit(Bus*, PCMPintr*, int, int);
+extern void mpinit(void);
+extern int mpintrassign(Vctl*);
+extern void mpshutdown(void);
+extern void mpstartap(Apic*);
+
+extern Bus* mpbus;
+extern Bus* mpbuslast;
+extern int mpisabus;
+extern int mpeisabus;
+extern Apic *mpioapic[];
+extern Apic *mpapic[];
--- /dev/null
+++ b/os/pc64/pc64
@@ -1,0 +1,969 @@
+# pc64 - amd64 pc terminal with local disk
+dev
+	root
+	cons
+	arch
+	env
+	mnt
+	pipe
+	prog
+	rtc
+	srv
+	dup
+	ssl
+	cap
+
+	draw	screen vga vgax cga
+	pointer
+	vga	pci
+
+	ip	bootp ip ipv6 ipaux iproute arp netlog ptclbsum iprouter plan9 nullmedium pktmedium
+	ether		netif netaux ethermedium
+
+#	ata
+	audio		dma
+	uart
+	sd
+#	floppy	dma
+	tinyfs
+#	mouse		mouse
+#	dbg	x86break
+ip
+	tcp
+	udp
+#	rudp
+	ipifc
+	icmp
+	icmp6
+#	gre
+	ipmux
+#	esp
+#	il
+lib
+	interp
+	keyring
+	draw 
+	memlayer
+	memdraw
+	tk
+	sec
+	mp
+	math
+	kern
+
+link
+##	ether82557	pci
+#	ether83815	pci
+#	etherelnk3	pci
+#	ps2mouse
+#	ethermedium
+##	pppmedium ppp compress
+#	below list from 9front
+#	ether2000	ether8390
+	ether2114x	pci
+#	ether589	etherelnk3
+#	ether79c970	pci
+#	ether8003	ether8390
+#	ether8139	pci
+#	ether8169	pci ethermii
+# should be obsoleted by igbe
+#	ether82543gc	pci
+#	ether82557	pci
+#	ether82563	pci
+#	ether82598	pci
+#	etherx550	pci
+#	ether83815	pci
+#	etherbcm        pci
+#	etherdp83820	pci ethermii
+#	etherec2t	ether8390
+#	etherelnk3	pci
+#	etherga620	pci
+#	etherigbe	pci ethermii
+#	ethervgbe	pci ethermii
+#	ethervt6102	pci ethermii
+#	ethervt6105m	pci ethermii
+#	ethersink
+#	ethersmc	devi82365 cis
+#	etheryuk	pci
+#	etherwavelan	wavelan devi82365 cis pci
+#	etheriwl	pci wifi
+#	etherwpi	pci wifi
+#	etherrt2860	pci wifi
+	ethervirtio	pci
+#	ethermedium
+
+misc
+	archgeneric
+#	archacpi	mp apic ec hpet
+	archmp		mp apic squidboy
+#	mtrr
+	bios32
+	pcipc
+	cga
+	uarti8250
+
+# below from 9front
+#	sdiahci		pci sdscsi led
+	sdvirtio	pci sdscsi
+
+	vgasoft		=cur swcursor
+#	vga3dfx		+cur
+#	vgaark2000pv	+cur
+#	vgabt485	=cur
+#	vgaclgd542x	+cur
+#	vgaclgd546x	+cur
+#	vgact65545	+cur
+#	vgacyber938x	+cur
+#	vgaet4000	+cur
+#	vgageode	+cur
+#	vgahiqvideo	+cur
+#	vgai81x		+cur
+#	vgaigfx		+cur
+#	vgamach64xx	+cur
+#	vgamga2164w	+cur
+#	vgamga4xx	+cur
+#	vganeomagic	+cur
+#	vganvidia	+cur
+#	vgaradeon	+cur
+#	vgargb524	=cur
+#	vgas3		+cur vgasavage
+#	vgat2r4		+cur
+#	vgatvp3020	=cur
+#	vgatvp3026	=cur
+	vgavesa
+#	vgavmware	+cur
+
+mod
+	sys
+	draw
+	tk
+	keyring
+	math
+
+init
+	shell
+	#wminit
+
+code
+	int kernel_pool_pcnt = 10;
+	int main_pool_pcnt = 40;
+	int heap_pool_pcnt = 20;
+	int image_pool_pcnt = 40;
+	int cflag=0;
+	int swcursor=0;
+	int consoleprint=1;
+
+port
+	alarm
+	alloc
+	allocb
+	chan
+	dev
+	dial
+	dis
+	discall
+	exception
+	exportfs
+	inferno
+	iomap
+	latin1
+	nocache
+	nodynld
+	parse
+	pgrp
+	print
+	proc
+	qio
+	qlock
+	random
+	swcursor
+	sysfile
+	taslock
+	xalloc
+
+root
+	/chan	/
+	/dev	/
+	/dis
+	/env	/
+	/fd	/
+	/n
+	/n/remote
+	/net	/
+	/nvfs	/
+	/prog	/
+	/dis/lib
+	/dis/svc
+	/dis/wm
+	/osinit.dis
+	/dis/sh.dis
+	/dis/ls.dis
+	/dis/cat.dis
+	/dis/bind.dis
+	/dis/mount.dis
+	/dis/pwd.dis
+	/dis/echo.dis
+	/dis/cd.dis
+	/dis/lib/bufio.dis
+	/dis/lib/string.dis
+	/dis/lib/readdir.dis
+	/dis/lib/workdir.dis
+	/dis/lib/daytime.dis
+	/dis/lib/auth.dis
+	/dis/lib/ssl.dis
+	/dis/lib/arg.dis
+	/dis/lib/filepat.dis
+	/dis/grid/demo/block.dis
+	/dis/grid/demo/blur.dis
+	/dis/grid/lib/announce.dis
+	/dis/grid/lib/browser.dis
+	/dis/grid/lib/fbrowse.dis
+	/dis/grid/lib/srvbrowse.dis
+	/dis/grid/regstyxlisten.dis
+	/dis/grid/blurdemo.dis
+	/dis/grid/cpupool.dis
+	/dis/grid/find.dis
+	/dis/grid/jpg2bit.dis
+	/dis/grid/query.dis
+	/dis/grid/readjpg.dis
+	/dis/grid/register.dis
+	/dis/grid/reglisten.dis
+	/dis/grid/usercreatesrv.dis
+	/dis/grid/remotelogon.dis
+	/dis/grid/spreesrv
+	/dis/install/applylog.dis
+	/dis/install/arch.dis
+	/dis/install/archfs.dis
+	/dis/install/ckproto.dis
+	/dis/install/create.dis
+	/dis/install/eproto.dis
+	/dis/install/info.dis
+	/dis/install/insbylog
+	/dis/install/inst.dis
+	/dis/install/install.dis
+	/dis/install/log.dis
+	/dis/install/logs.dis
+	/dis/install/mergelog.dis
+	/dis/install/mkproto.dis
+	/dis/install/proto.dis
+	/dis/install/proto2list.dis
+	/dis/install/updatelog.dis
+	/dis/install/wdiff.dis
+	/dis/install/wfind.dis
+	/dis/install/wrap.dis
+	/dis/install/wrap2list.dis
+	/dis/ip/ppp/pppclient.dis
+	/dis/ip/ppp/modem.dis
+	/dis/ip/ppp/pppdial.dis
+	/dis/ip/ppp/pppgui.dis
+	/dis/ip/ppp/ppptest.dis
+	/dis/ip/ppp/script.dis
+	/dis/ip/bootpd.dis
+	/dis/ip/dhcp.dis
+	/dis/ip/ping.dis
+	/dis/ip/rip.dis
+	/dis/ip/sntp.dis
+	/dis/ip/tftpd.dis
+	/dis/ip/virgild.dis
+	/dis/lego/clockface.dis
+	/dis/lego/clock.dis
+	/dis/lego/firmdl.dis
+	/dis/lego/link.dis
+	/dis/lego/rcxsend.dis
+	/dis/lego/send.dis
+	/dis/lego/timers.dis
+	/dis/lib/convcs/big5_btos.dis
+	/dis/lib/convcs/convcs.dis
+	/dis/lib/convcs/big5_stob.dis
+	/dis/lib/convcs/cp_stob.dis
+	/dis/lib/convcs/cp932_btos.dis
+	/dis/lib/convcs/euc-jp_btos.dis
+	/dis/lib/convcs/8bit_stob.dis
+	/dis/lib/convcs/gb2312_btos.dis
+	/dis/lib/convcs/utf8_stob.dis
+	/dis/lib/convcs/utf16_btos.dis
+	/dis/lib/convcs/utf8_btos.dis
+	/dis/lib/convcs/utf16_stob.dis
+	/dis/lib/convcs/cp_btos.dis
+	/dis/lib/crypt/sslsession.dis
+	/dis/lib/crypt/pkcs.dis
+	/dis/lib/crypt/x509.dis
+	/dis/lib/crypt/ssl3.dis
+	/dis/lib/encoding/base16.dis
+	/dis/lib/encoding/base32.dis
+	/dis/lib/encoding/base64.dis
+	/dis/lib/encoding/base32a.dis
+	/dis/lib/ftree/cptree.dis
+	/dis/lib/ftree/ftree.dis
+	/dis/lib/ftree/items.dis
+	/dis/lib/ida/ida.dis
+	/dis/lib/ida/idatab.dis
+	/dis/lib/mash/builtins.dis
+	/dis/lib/mash/history.dis
+	/dis/lib/mash/make.dis
+	/dis/lib/print/hp_driver.dis
+	/dis/lib/print/print.dis
+	/dis/lib/print/scaler.dis
+	/dis/lib/spki/spki.dis
+	/dis/lib/spki/verifier.dis
+	/dis/lib/strokes/buildstrokes.dis
+	/dis/lib/strokes/readstrokes.dis
+	/dis/lib/strokes/writestrokes.dis
+	/dis/lib/strokes/strokes.dis
+	/dis/lib/styxconv/ostyx.dis
+	/dis/lib/styxconv/old2new.dis
+	/dis/lib/styxconv/new2old.dis
+	/dis/lib/usb/usbmct.dis
+	/dis/lib/usb/usb.dis
+	/dis/lib/usb/usbmouse.dis
+	/dis/lib/usb/usbmass.dis
+	/dis/lib/w3c/css.dis
+	/dis/lib/w3c/uris.dis
+	/dis/lib/w3c/xpointers.dis
+	/dis/lib/asn1.dis
+	/dis/lib/arg.dis
+	/dis/lib/attrhash.dis
+	/dis/lib/attrdb.dis
+	/dis/lib/bloomfilter.dis
+	/dis/lib/auth.dis
+	/dis/lib/auth9.dis
+	/dis/lib/chanfill.dis
+	/dis/lib/crc.dis
+	/dis/lib/bufio.dis
+	/dis/lib/cfg.dis
+	/dis/lib/cfgfile.dis
+	/dis/lib/complete.dis
+	/dis/lib/csv.dis
+	/dis/lib/daytime.dis
+	/dis/lib/db.dis
+	/dis/lib/dbm.dis
+	/dis/lib/dbsrv.dis
+	/dis/lib/debug.dis
+	/dis/lib/deflate.dis
+	/dis/lib/devpointer.dis
+	/dis/lib/dhcpclient.dis
+	/dis/lib/dial.dis
+	/dis/lib/dialog.dis
+	/dis/lib/dict.dis
+	/dis/lib/dis.dis
+	/dis/lib/diskblocks.dis
+	/dis/lib/disks.dis
+	/dis/lib/dividers.dis
+	/dis/lib/drawmux.dis
+	/dis/lib/ecmascript.dis
+	/dis/lib/env.dis
+	/dis/lib/ether.dis
+	/dis/lib/exception.dis
+	/dis/lib/factotum.dis
+	/dis/lib/filepat.dis
+	/dis/lib/format.dis
+	/dis/lib/fsfilter.dis
+	/dis/lib/fslib.dis
+	/dis/lib/ip.dis
+	/dis/lib/fsproto.dis
+	/dis/lib/hash.dis
+	/dis/lib/html.dis
+	/dis/lib/imageremap.dis
+	/dis/lib/inflate.dis
+	/dis/lib/ipattr.dis
+	/dis/lib/ir.dis
+	/dis/lib/irsage.dis
+	/dis/lib/irsim.dis
+	/dis/lib/itslib.dis
+	/dis/lib/json.dis
+	/dis/lib/keyset.dis
+	/dis/lib/libc.dis
+	/dis/lib/libc0.dis
+	/dis/lib/lists.dis
+	/dis/lib/lock.dis
+	/dis/lib/login.dis
+	/dis/lib/man.dis
+	/dis/lib/mashlib.dis
+	/dis/lib/mashparse.dis
+	/dis/lib/memfs.dis
+	/dis/lib/mpeg.dis
+	/dis/lib/msgio.dis
+	/dis/lib/ssl.dis
+	/dis/lib/names.dis
+	/dis/lib/url.dis
+	/dis/lib/nametree.dis
+	/dis/lib/newns.dis
+	/dis/lib/ninep.dis
+	/dis/lib/oldauth.dis
+	/dis/lib/palm.dis
+	/dis/lib/palmdb.dis
+	/dis/lib/palmfile.dis
+	/dis/lib/parseman.dis
+	/dis/lib/plumbing.dis
+	/dis/lib/plumbmsg.dis
+	/dis/lib/pop3.dis
+	/dis/lib/popup.dis
+	/dis/lib/powerman.dis
+	/dis/lib/profile.dis
+	/dis/lib/pslib.dis
+	/dis/lib/quicktime.dis
+	/dis/lib/rabin.dis
+	/dis/lib/rand.dis
+	/dis/lib/random.dis
+	/dis/lib/readdir.dis
+	/dis/lib/readgif.dis
+	/dis/lib/readjpg.dis
+	/dis/lib/readpicfile.dis
+	/dis/lib/readpng.dis
+	/dis/lib/readxbitmap.dis
+	/dis/lib/regex.dis
+	/dis/lib/registries.dis
+	/dis/lib/rfc822.dis
+	/dis/lib/riff.dis
+	/dis/lib/scoretable.dis
+	/dis/lib/scsiio.dis
+	/dis/lib/secstore.dis
+	/dis/lib/selectfile.dis
+	/dis/lib/sets.dis
+	/dis/lib/sets32.dis
+	/dis/lib/sexprs.dis
+	/dis/lib/slip.dis
+	/dis/lib/smtp.dis
+	/dis/lib/sort.dis
+	/dis/lib/string.dis
+	/dis/lib/strinttab.dis
+	/dis/lib/styx.dis
+	/dis/lib/styxflush.dis
+	/dis/lib/styxlib.dis
+	/dis/lib/styxpersist.dis
+	/dis/lib/styxservers.dis
+	/dis/lib/tables.dis
+	/dis/lib/vac.dis
+	/dis/lib/tabs.dis
+	/dis/lib/tcl_calc.dis
+	/dis/lib/tcl_core.dis
+	/dis/lib/tcl_inthash.dis
+	/dis/lib/tcl_io.dis
+	/dis/lib/tcl_list.dis
+	/dis/lib/tcl_modhash.dis
+	/dis/lib/tcl_stack.dis
+	/dis/lib/tcl_strhash.dis
+	/dis/lib/tcl_string.dis
+	/dis/lib/tcl_symhash.dis
+	/dis/lib/tcl_utils.dis
+	/dis/lib/tftp.dis
+	/dis/lib/timers.dis
+	/dis/lib/titlebar.dis
+	/dis/lib/tkclient.dis
+	/dis/lib/translate.dis
+	/dis/lib/ubfa.dis
+	/dis/lib/venti.dis
+	/dis/lib/virgil.dis
+	/dis/lib/volume.dis
+	/dis/lib/wait.dis
+	/dis/lib/wmlib.dis
+	/dis/lib/watchvars.dis
+	/dis/lib/winplace.dis
+	/dis/lib/wmclient.dis
+	/dis/lib/wmsrv.dis
+	/dis/lib/workdir.dis
+	/dis/lib/writegif.dis
+	/dis/lib/xml.dis
+	/dis/math/factor.dis
+	/dis/math/ack.dis
+	/dis/math/crackerbarrel.dis
+	/dis/math/ffts.dis
+	/dis/math/fibonacci.dis
+	/dis/math/fit.dis
+	/dis/math/genprimes.dis
+	/dis/math/geodesy.dis
+	/dis/math/gr.dis
+	/dis/math/graph0.dis
+	/dis/math/hist0.dis
+	/dis/math/linalg.dis
+	/dis/math/linbench.dis
+	/dis/math/mersenne.dis
+	/dis/math/parts.dis
+	/dis/math/perms.dis
+	/dis/math/pi.dis
+	/dis/math/polyfill.dis
+	/dis/math/polyhedra.dis
+	/dis/math/powers.dis
+	/dis/math/primes.dis
+	/dis/math/sieve.dis
+	/dis/mpc/qconfig.dis
+	/dis/mpc/qflash.dis
+	/dis/mpeg/makergbvmap.dis
+	/dis/mpeg/decode.dis
+	/dis/mpeg/decode4.dis
+	/dis/mpeg/fixidct.dis
+	/dis/mpeg/fltidct.dis
+	/dis/mpeg/mpeg.dis
+	/dis/mpeg/mpegio.dis
+	/dis/mpeg/refidct.dis
+	/dis/mpeg/remap.dis
+	/dis/mpeg/remap1.dis
+	/dis/mpeg/remap2.dis
+	/dis/mpeg/remap24.dis
+	/dis/mpeg/remap4.dis
+	/dis/mpeg/remap8.dis
+	/dis/mpeg/scidct.dis
+	/dis/mpeg/vlc.dis
+	/dis/ndb/csquery.dis
+	/dis/ndb/cs.dis
+	/dis/ndb/dnsquery.dis
+	/dis/ndb/dns.dis
+	/dis/ndb/registry.dis
+	/dis/ndb/mkhash.dis
+	/dis/ndb/query.dis
+	/dis/ndb/regquery.dis
+	/dis/sh/echo.dis
+	/dis/sh/arg.dis
+	/dis/sh/csv.dis
+	/dis/sh/file2chan.dis
+	/dis/sh/expr.dis
+	/dis/sh/mload.dis
+	/dis/sh/mpexpr.dis
+	/dis/sh/regex.dis
+	/dis/sh/sexprs.dis
+	/dis/sh/sh.dis
+	/dis/sh/std.dis
+	/dis/sh/string.dis
+	/dis/sh/test.dis
+	/dis/sh/tk.dis
+	/dis/spki/verify.dis
+	/dis/spree/clients/cards.dis
+	/dis/spree/clients/gather.dis
+	/dis/spree/clients/lobby.dis
+	/dis/spree/clients/othello.dis
+	/dis/spree/engines/canfield.dis
+	/dis/spree/engines/afghan.dis
+	/dis/spree/engines/bounce.dis
+	/dis/spree/engines/freecell.dis
+	/dis/spree/engines/gather.dis
+	/dis/spree/engines/lobby.dis
+	/dis/spree/engines/othello.dis
+	/dis/spree/engines/racingdemon.dis
+	/dis/spree/engines/spider.dis
+	/dis/spree/engines/spit.dis
+	/dis/spree/engines/whist.dis
+	/dis/spree/lib/commandline.dis
+	/dis/spree/lib/allow.dis
+	/dis/spree/lib/cardlib.dis
+	/dis/spree/lib/objstore.dis
+	/dis/spree/lib/tricks.dis
+	/dis/spree/archives.dis
+	/dis/spree/join.dis
+	/dis/spree/spree.dis
+	/dis/svc/httpd/cgiparse.dis
+	/dis/svc/httpd/alarms.dis
+	/dis/svc/httpd/cache.dis
+	/dis/svc/httpd/contents.dis
+	/dis/svc/httpd/date.dis
+	/dis/svc/httpd/echo.dis
+	/dis/svc/httpd/httpd.dis
+	/dis/svc/httpd/imagemap.dis
+	/dis/svc/httpd/parser.dis
+	/dis/svc/httpd/redirect.dis
+	/dis/svc/httpd/stats.dis
+	/dis/svc/webget/image2enc.dis
+	/dis/svc/webget/date.dis
+	/dis/svc/webget/file.dis
+	/dis/svc/webget/ftp.dis
+	/dis/svc/webget/http.dis
+	/dis/svc/webget/message.dis
+	/dis/svc/webget/webget.dis
+	/dis/svc/webget/wgutils.dis
+	/dis/svc/registry
+	/dis/svc/auth
+	/dis/svc/net
+	/dis/svc/rstyx
+	/dis/svc/styx
+	/dis/tiny/broke.dis
+	/dis/tiny/kill.dis
+	/dis/tiny/rm.dis
+	/dis/tiny/sh.dis
+	/dis/usb/usbd.dis
+	/dis/wm/brutus/excerpt.dis
+	/dis/wm/brutus/image.dis
+	/dis/wm/brutus/mod.dis
+	/dis/wm/brutus/table.dis
+	/dis/wm/calendar.dis
+	/dis/wm/about.dis
+	/dis/wm/avi.dis
+	/dis/wm/bounce.dis
+	/dis/wm/brutus.dis
+	/dis/wm/c4.dis
+	/dis/wm/filename.dis
+	/dis/wm/clock.dis
+	/dis/wm/coffee.dis
+	/dis/wm/collide.dis
+	/dis/wm/colors.dis
+	/dis/wm/cprof.dis
+	/dis/wm/date.dis
+	/dis/wm/deb.dis
+	/dis/wm/debdata.dis
+	/dis/wm/debsrc.dis
+	/dis/wm/dir.dis
+	/dis/wm/dmview.dis
+	/dis/wm/dmwm.dis
+	/dis/wm/edit.dis
+	/dis/wm/ftree.dis
+	/dis/wm/getauthinfo.dis
+	/dis/wm/keyboard.dis
+	/dis/wm/logon.dis
+	/dis/wm/logwindow.dis
+	/dis/wm/man.dis
+	/dis/wm/mand.dis
+	/dis/wm/mash.dis
+	/dis/wm/memory.dis
+	/dis/wm/mprof.dis
+	/dis/wm/pen.dis
+	/dis/wm/polyhedra.dis
+	/dis/wm/prof.dis
+	/dis/wm/readmail.dis
+	/dis/wm/remotelogon.dis
+	/dis/wm/reversi.dis
+	/dis/wm/rmtdir.dis
+	/dis/wm/rt.dis
+	/dis/wm/sendmail.dis
+	/dis/wm/sh.dis
+	/dis/wm/smenu.dis
+	/dis/wm/snake.dis
+	/dis/wm/stopwatch.dis
+	/dis/wm/sweeper.dis
+	/dis/wm/task.dis
+	/dis/wm/telnet.dis
+	/dis/wm/tetris.dis
+	/dis/wm/toolbar.dis
+	/dis/wm/unibrowse.dis
+	/dis/wm/view.dis
+	/dis/wm/vt.dis
+	/dis/wm/wish.dis
+	/dis/wm/wm.dis
+	/dis/wm/wmplay.dis
+	/dis/acme.dis
+	/dis/broke
+	/dis/charon.dis
+	/dis/lc
+	/dis/lookman
+	/dis/man
+	/dis/mash.dis
+	/dis/sh.dis
+	/dis/shutdown
+	/dis/sig
+	/dis/basename.dis
+	/dis/9660srv.dis
+	/dis/B.dis
+	/dis/9export.dis
+	/dis/9srvfs.dis
+	/dis/9win.dis
+	/dis/demo/camera/tkinterface.dis
+	/dis/demo/camera/camera.dis
+	/dis/demo/camera/camload.bit
+	/dis/demo/camera/camproc.bit
+	/dis/demo/camera/runcam
+	/dis/demo/chat/chat.dis
+	/dis/demo/chat/chatclient
+	/dis/demo/chat/chatsrv.dis
+	/dis/demo/cpupool/regpoll.dis
+	/dis/demo/cpupool/runrstyx
+	/dis/demo/lego/clockface.dis
+	/dis/demo/lego/clockreg
+	/dis/demo/lego/firmdl.dis
+	/dis/demo/lego/legolink.dis
+	/dis/demo/lego/rcxsend.dis
+	/dis/demo/lego/styx.srec
+	/dis/demo/lego/timers.dis
+	/dis/demo/ns/ns.dis
+	/dis/demo/ns/runns
+	/dis/demo/odbc/odbcmnt.dis
+	/dis/demo/odbc/runodbc
+	/dis/demo/spree/spreeclient
+	/dis/demo/whiteboard/wbsrv.dis
+	/dis/demo/whiteboard/runwb
+	/dis/demo/whiteboard/whiteboard.dis
+	/dis/ar.dis
+	/dis/archfs.dis
+	/dis/auhdr.dis
+	/dis/auplay.dis
+	/dis/cleanname.dis
+	/dis/bind.dis
+	/dis/disk/calc.tab.dis
+	/dis/disk/fdisk.dis
+	/dis/disk/format.dis
+	/dis/disk/ftl.dis
+	/dis/disk/kfs.dis
+	/dis/disk/kfscmd.dis
+	/dis/disk/mbr.dis
+	/dis/disk/mkext.dis
+	/dis/disk/mkfs.dis
+	/dis/disk/pedit.dis
+	/dis/disk/prep.dis
+	/dis/bytes.dis
+	/dis/cal.dis
+	/dis/calc.dis
+	/dis/cat.dis
+	/dis/cd.dis
+	/dis/cddb.dis
+	/dis/ebook/cssparser.dis
+	/dis/ebook/cssfont.dis
+	/dis/ebook/mimeimage.dis
+	/dis/ebook/ebook.dis
+	/dis/ebook/oebpackage.dis
+	/dis/ebook/reader.dis
+	/dis/ebook/strmap.dis
+	/dis/ebook/stylesheet.dis
+	/dis/ebook/table.dis
+	/dis/ebook/units.dis
+	/dis/chgrp.dis
+	/dis/chmod.dis
+	/dis/comm.dis
+	/dis/cmp.dis
+	/dis/getauthinfo.dis
+	/dis/fs/bundle.dis
+	/dis/fs/and.dis
+	/dis/fs/mergewrite.dis
+	/dis/fs/chstat.dis
+	/dis/fs/compose.dis
+	/dis/fs/depth.dis
+	/dis/fs/entries.dis
+	/dis/fs/eval.dis
+	/dis/fs/exec.dis
+	/dis/fs/filter.dis
+	/dis/fs/ls.dis
+	/dis/fs/match.dis
+	/dis/fs/merge.dis
+	/dis/fs/unbundle.dis
+	/dis/fs/mode.dis
+	/dis/fs/not.dis
+	/dis/fs/or.dis
+	/dis/fs/path.dis
+	/dis/fs/pipe.dis
+	/dis/fs/print.dis
+	/dis/fs/proto.dis
+	/dis/fs/query.dis
+	/dis/fs/run.dis
+	/dis/fs/select.dis
+	/dis/fs/setroot.dis
+	/dis/fs/size.dis
+	/dis/fs/void.dis
+	/dis/fs/walk.dis
+	/dis/fs/write.dis
+	/dis/cook.dis
+	/dis/cp.dis
+	/dis/cprof.dis
+	/dis/cpu.dis
+	/dis/avr/burn.dis
+	/dis/crypt.dis
+	/dis/date.dis
+	/dis/dbfs.dis
+	/dis/dd.dis
+	/dis/dial.dis
+	/dis/diff.dis
+	/dis/disdep.dis
+	/dis/disdump.dis
+	/dis/dossrv.dis
+	/dis/du.dis
+	/dis/echo.dis
+	/dis/ed.dis
+	/dis/emuinit.dis
+	/dis/env.dis
+	/dis/export.dis
+	/dis/fc.dis
+	/dis/fcp.dis
+	/dis/fmt.dis
+	/dis/fortune.dis
+	/dis/freq.dis
+	/dis/fs.dis
+	/dis/ftest.dis
+	/dis/ftpfs.dis
+	/dis/itreplay.dis
+	/dis/gettar.dis
+	/dis/grep.dis
+	/dis/gunzip.dis
+	/dis/gzip.dis
+	/dis/idea.dis
+	/dis/import.dis
+	/dis/iostats.dis
+	/dis/itest.dis
+	/dis/man2html.dis
+	/dis/kill.dis
+	/dis/limbo.dis
+	/dis/listen.dis
+	/dis/lockfs.dis
+	/dis/logfile.dis
+	/dis/look.dis
+	/dis/charon/cookiesrv.dis
+	/dis/charon/build.dis
+	/dis/charon/chutils.dis
+	/dis/charon/ctype.dis
+	/dis/charon/date.dis
+	/dis/charon/event.dis
+	/dis/charon/file.dis
+	/dis/charon/ftp.dis
+	/dis/charon/gui.dis
+	/dis/charon/http.dis
+	/dis/charon/img.dis
+	/dis/charon/jscript.dis
+	/dis/charon/layout.dis
+	/dis/charon/lex.dis
+	/dis/charon/url.dis
+	/dis/ls.dis
+	/dis/lstar.dis
+	/dis/m4.dis
+	/dis/man2txt.dis
+	/dis/collab/clients/whiteboard.dis
+	/dis/collab/clients/chat.dis
+	/dis/collab/clients/poll.dis
+	/dis/collab/clients/poller.dis
+	/dis/collab/lib/messages.dis
+	/dis/collab/servers/memfssrv.dis
+	/dis/collab/servers/chatsrv.dis
+	/dis/collab/servers/wbsrv.dis
+	/dis/collab/servers/mpx.dis
+	/dis/collab/collabsrv.dis
+	/dis/collab/connect.dis
+	/dis/collab/proxy.dis
+	/dis/collab/srvmgr.dis
+	/dis/mc.dis
+	/dis/md5sum.dis
+	/dis/mdb.dis
+	/dis/memfs.dis
+	/dis/mk.dis
+	/dis/randpass.dis
+	/dis/metamorph.dis
+	/dis/mv.dis
+	/dis/mkdir.dis
+	/dis/mntgen.dis
+	/dis/mount.dis
+	/dis/mouse.dis
+	/dis/mprof.dis
+	/dis/netkey.dis
+	/dis/netstat.dis
+	/dis/newer.dis
+	/dis/ns.dis
+	/dis/nsbuild.dis
+	/dis/os.dis
+	/dis/p.dis
+	/dis/pause.dis
+	/dis/plumb.dis
+	/dis/plumber.dis
+	/dis/prof.dis
+	/dis/ps.dis
+	/dis/puttar.dis
+	/dis/pwd.dis
+	/dis/ramfile.dis
+	/dis/raw2iaf.dis
+	/dis/rawdbfs.dis
+	/dis/rcmd.dis
+	/dis/tee.dis
+	/dis/tr.dis
+	/dis/rdp.dis
+	/dis/read.dis
+	/dis/rioimport.dis
+	/dis/rm.dis
+	/dis/runas.dis
+	/dis/sed.dis
+	/dis/sendmail.dis
+	/dis/sha1sum.dis
+	/dis/auxi/cpuslave.dis
+	/dis/auxi/digest.dis
+	/dis/auxi/fpgaload.dis
+	/dis/auxi/mangaload.dis
+	/dis/auxi/pcmcia.dis
+	/dis/auxi/rdbgsrv.dis
+	/dis/auxi/rstyxd.dis
+	/dis/sleep.dis
+	/dis/sort.dis
+	/dis/src.dis
+	/dis/stack.dis
+	/dis/stackv.dis
+	/dis/stream.dis
+	/dis/strings.dis
+	/dis/styxchat.dis
+	/dis/styxlisten.dis
+	/dis/styxmon.dis
+	/dis/sum.dis
+	/dis/tail.dis
+	/dis/tarfs.dis
+	/dis/tclsh.dis
+	/dis/tcs.dis
+	/dis/telnet.dis
+	/dis/test.dis
+	/dis/time.dis
+	/dis/timestamp.dis
+	/dis/tkcmd.dis
+	/dis/tokenize.dis
+	/dis/touch.dis
+	/dis/touchcal.dis
+	/dis/trfs.dis
+	/dis/tsort.dis
+	/dis/unicode.dis
+	/dis/uniq.dis
+	/dis/units.dis
+	/dis/unmount.dis
+	/dis/uudecode.dis
+	/dis/uuencode.dis
+	/dis/vacfs.dis
+	/dis/vacget.dis
+	/dis/vacput.dis
+	/dis/wav2iaf.dis
+	/dis/wc.dis
+	/dis/webgrab.dis
+	/dis/wish.dis
+	/dis/acme/acme.dis
+	/dis/acme/buff.dis
+	/dis/acme/col.dis
+	/dis/acme/dat.dis
+	/dis/acme/disk.dis
+	/dis/acme/ecmd.dis
+	/dis/acme/edit.dis
+	/dis/acme/elog.dis
+	/dis/acme/exec.dis
+	/dis/acme/file.dis
+	/dis/acme/frame.dis
+	/dis/acme/fsys.dis
+	/dis/acme/graph.dis
+	/dis/acme/gui.dis
+	/dis/acme/look.dis
+	/dis/acme/regx.dis
+	/dis/acme/row.dis
+	/dis/acme/scrl.dis
+	/dis/acme/styxaux.dis
+	/dis/acme/text.dis
+	/dis/acme/time.dis
+	/dis/acme/util.dis
+	/dis/acme/wind.dis
+	/dis/acme/xfid.dis
+	/dis/auth/proto/authquery.dis
+	/dis/auth/proto/p9any.dis
+	/dis/auth/proto/pass.dis
+	/dis/auth/proto/rsa.dis
+	/dis/auth/changelogin.dis
+	/dis/auth/aescbc.dis
+	/dis/auth/ai2key.dis
+	/dis/auth/countersigner.dis
+	/dis/auth/convpasswd.dis
+	/dis/auth/createsignerkey.dis
+	/dis/auth/dsagen.dis
+	/dis/auth/factotum.dis
+	/dis/auth/feedkey.dis
+	/dis/auth/getpk.dis
+	/dis/auth/keyfs.dis
+	/dis/auth/keysrv.dis
+	/dis/auth/logind.dis
+	/dis/auth/mkauthinfo.dis
+	/dis/auth/passwd.dis
+	/dis/auth/rpc.dis
+	/dis/auth/rsagen.dis
+	/dis/auth/secstore.dis
+	/dis/auth/signer.dis
+	/dis/auth/verify.dis
+	/dis/wmexport.dis
+	/dis/wmimport.dis
+	/dis/xd.dis
+	/dis/xargs.dis
+	/dis/yacc.dis
+	/dis/zeros.dis
+
+bootdir
+	/$objtype/bin/paqfs
+	/$objtype/bin/auth/factotum
+	bootfs.paq
+	boot
--- /dev/null
+++ b/os/pc64/pc64.c
@@ -1,0 +1,140 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "../port/error.h"
+#include "interp.h"
+
+#include "pc64.root.h"
+
+ulong ndevs = 29;
+extern Dev rootdevtab;
+extern Dev consdevtab;
+extern Dev archdevtab;
+extern Dev envdevtab;
+extern Dev mntdevtab;
+extern Dev pipedevtab;
+extern Dev progdevtab;
+extern Dev rtcdevtab;
+extern Dev srvdevtab;
+extern Dev dupdevtab;
+extern Dev ssldevtab;
+extern Dev capdevtab;
+extern Dev drawdevtab;
+extern Dev pointerdevtab;
+extern Dev vgadevtab;
+extern Dev ipdevtab;
+extern Dev etherdevtab;
+extern Dev audiodevtab;
+extern Dev uartdevtab;
+extern Dev sddevtab;
+extern Dev tinyfsdevtab;
+Dev* devtab[29]={
+	&rootdevtab,
+	&consdevtab,
+	&archdevtab,
+	&envdevtab,
+	&mntdevtab,
+	&pipedevtab,
+	&progdevtab,
+	&rtcdevtab,
+	&srvdevtab,
+	&dupdevtab,
+	&ssldevtab,
+	&capdevtab,
+	&drawdevtab,
+	&pointerdevtab,
+	&vgadevtab,
+	&ipdevtab,
+	&etherdevtab,
+	&audiodevtab,
+	&uartdevtab,
+	&sddevtab,
+	&tinyfsdevtab,
+	nil,
+};
+
+extern void ether2114xlink(void);
+extern void ethervirtiolink(void);
+void links(void){
+	ether2114xlink();
+	ethervirtiolink();
+}
+
+extern void sysmodinit(void);
+extern void drawmodinit(void);
+extern void tkmodinit(void);
+extern void keyringmodinit(void);
+extern void mathmodinit(void);
+void modinit(void){
+	sysmodinit();
+	drawmodinit();
+	tkmodinit();
+	keyringmodinit();
+	mathmodinit();
+}
+
+extern PCArch archgeneric;
+extern PCArch archmp;
+PCArch* knownarch[] = {
+	&archgeneric,
+	&archmp,
+	nil,
+};
+
+#include "../port/sd.h"
+extern SDifc sdvirtioifc;
+SDifc* sdifc[] = {
+	&sdvirtioifc,
+	nil,
+};
+
+extern PhysUart i8250physuart;
+PhysUart* physuart[] = {
+	&i8250physuart,
+	nil,
+};
+
+#include <draw.h>
+#include <memdraw.h>
+#include "screen.h"
+extern VGAdev vgavesadev;
+VGAdev* vgadev[] = {
+	&vgavesadev,
+	nil,
+};
+
+extern VGAcur vgasoftcur;
+VGAcur* vgacur[] = {
+	&vgasoftcur,
+	nil,
+};
+
+#include "../ip/ip.h"
+extern void tcpinit(Fs*);
+extern void udpinit(Fs*);
+extern void ipifcinit(Fs*);
+extern void icmpinit(Fs*);
+extern void icmp6init(Fs*);
+extern void ipmuxinit(Fs*);
+void (*ipprotoinit[])(Fs*) = {
+	tcpinit,
+	udpinit,
+	ipifcinit,
+	icmpinit,
+	icmp6init,
+	ipmuxinit,
+	nil,
+};
+
+	int kernel_pool_pcnt = 10;
+	int main_pool_pcnt = 40;
+	int heap_pool_pcnt = 20;
+	int image_pool_pcnt = 40;
+	int cflag=0;
+	int swcursor=0;
+	int consoleprint=1;
+char* conffile = "pc64";
+ulong kerndate = KERNDATE;
--- /dev/null
+++ b/os/pc64/pc64.root.h
@@ -1,0 +1,3141 @@
+extern unsigned char root14code[];
+extern int root14len;
+extern unsigned char root15code[];
+extern int root15len;
+extern unsigned char root16code[];
+extern int root16len;
+extern unsigned char root17code[];
+extern int root17len;
+extern unsigned char root18code[];
+extern int root18len;
+extern unsigned char root19code[];
+extern int root19len;
+extern unsigned char root20code[];
+extern int root20len;
+extern unsigned char root21code[];
+extern int root21len;
+extern unsigned char root22code[];
+extern int root22len;
+extern unsigned char root23code[];
+extern int root23len;
+extern unsigned char root24code[];
+extern int root24len;
+extern unsigned char root25code[];
+extern int root25len;
+extern unsigned char root26code[];
+extern int root26len;
+extern unsigned char root27code[];
+extern int root27len;
+extern unsigned char root28code[];
+extern int root28len;
+extern unsigned char root29code[];
+extern int root29len;
+extern unsigned char root30code[];
+extern int root30len;
+extern unsigned char root31code[];
+extern int root31len;
+extern unsigned char root34code[];
+extern int root34len;
+extern unsigned char root35code[];
+extern int root35len;
+extern unsigned char root37code[];
+extern int root37len;
+extern unsigned char root38code[];
+extern int root38len;
+extern unsigned char root39code[];
+extern int root39len;
+extern unsigned char root40code[];
+extern int root40len;
+extern unsigned char root41code[];
+extern int root41len;
+extern unsigned char root42code[];
+extern int root42len;
+extern unsigned char root43code[];
+extern int root43len;
+extern unsigned char root44code[];
+extern int root44len;
+extern unsigned char root45code[];
+extern int root45len;
+extern unsigned char root46code[];
+extern int root46len;
+extern unsigned char root47code[];
+extern int root47len;
+extern unsigned char root48code[];
+extern int root48len;
+extern unsigned char root49code[];
+extern int root49len;
+extern unsigned char root50code[];
+extern int root50len;
+extern unsigned char root51code[];
+extern int root51len;
+extern unsigned char root52code[];
+extern int root52len;
+extern unsigned char root54code[];
+extern int root54len;
+extern unsigned char root55code[];
+extern int root55len;
+extern unsigned char root56code[];
+extern int root56len;
+extern unsigned char root57code[];
+extern int root57len;
+extern unsigned char root58code[];
+extern int root58len;
+extern unsigned char root59code[];
+extern int root59len;
+extern unsigned char root60code[];
+extern int root60len;
+extern unsigned char root61code[];
+extern int root61len;
+extern unsigned char root62code[];
+extern int root62len;
+extern unsigned char root63code[];
+extern int root63len;
+extern unsigned char root64code[];
+extern int root64len;
+extern unsigned char root65code[];
+extern int root65len;
+extern unsigned char root66code[];
+extern int root66len;
+extern unsigned char root67code[];
+extern int root67len;
+extern unsigned char root68code[];
+extern int root68len;
+extern unsigned char root69code[];
+extern int root69len;
+extern unsigned char root70code[];
+extern int root70len;
+extern unsigned char root71code[];
+extern int root71len;
+extern unsigned char root72code[];
+extern int root72len;
+extern unsigned char root73code[];
+extern int root73len;
+extern unsigned char root74code[];
+extern int root74len;
+extern unsigned char root77code[];
+extern int root77len;
+extern unsigned char root78code[];
+extern int root78len;
+extern unsigned char root79code[];
+extern int root79len;
+extern unsigned char root80code[];
+extern int root80len;
+extern unsigned char root81code[];
+extern int root81len;
+extern unsigned char root82code[];
+extern int root82len;
+extern unsigned char root83code[];
+extern int root83len;
+extern unsigned char root84code[];
+extern int root84len;
+extern unsigned char root85code[];
+extern int root85len;
+extern unsigned char root86code[];
+extern int root86len;
+extern unsigned char root87code[];
+extern int root87len;
+extern unsigned char root88code[];
+extern int root88len;
+extern unsigned char root89code[];
+extern int root89len;
+extern unsigned char root91code[];
+extern int root91len;
+extern unsigned char root92code[];
+extern int root92len;
+extern unsigned char root93code[];
+extern int root93len;
+extern unsigned char root94code[];
+extern int root94len;
+extern unsigned char root95code[];
+extern int root95len;
+extern unsigned char root96code[];
+extern int root96len;
+extern unsigned char root97code[];
+extern int root97len;
+extern unsigned char root99code[];
+extern int root99len;
+extern unsigned char root100code[];
+extern int root100len;
+extern unsigned char root101code[];
+extern int root101len;
+extern unsigned char root102code[];
+extern int root102len;
+extern unsigned char root103code[];
+extern int root103len;
+extern unsigned char root104code[];
+extern int root104len;
+extern unsigned char root105code[];
+extern int root105len;
+extern unsigned char root106code[];
+extern int root106len;
+extern unsigned char root107code[];
+extern int root107len;
+extern unsigned char root108code[];
+extern int root108len;
+extern unsigned char root109code[];
+extern int root109len;
+extern unsigned char root110code[];
+extern int root110len;
+extern unsigned char root111code[];
+extern int root111len;
+extern unsigned char root113code[];
+extern int root113len;
+extern unsigned char root114code[];
+extern int root114len;
+extern unsigned char root115code[];
+extern int root115len;
+extern unsigned char root116code[];
+extern int root116len;
+extern unsigned char root118code[];
+extern int root118len;
+extern unsigned char root119code[];
+extern int root119len;
+extern unsigned char root120code[];
+extern int root120len;
+extern unsigned char root121code[];
+extern int root121len;
+extern unsigned char root123code[];
+extern int root123len;
+extern unsigned char root124code[];
+extern int root124len;
+extern unsigned char root125code[];
+extern int root125len;
+extern unsigned char root127code[];
+extern int root127len;
+extern unsigned char root128code[];
+extern int root128len;
+extern unsigned char root130code[];
+extern int root130len;
+extern unsigned char root131code[];
+extern int root131len;
+extern unsigned char root132code[];
+extern int root132len;
+extern unsigned char root134code[];
+extern int root134len;
+extern unsigned char root135code[];
+extern int root135len;
+extern unsigned char root136code[];
+extern int root136len;
+extern unsigned char root138code[];
+extern int root138len;
+extern unsigned char root139code[];
+extern int root139len;
+extern unsigned char root141code[];
+extern int root141len;
+extern unsigned char root142code[];
+extern int root142len;
+extern unsigned char root143code[];
+extern int root143len;
+extern unsigned char root144code[];
+extern int root144len;
+extern unsigned char root146code[];
+extern int root146len;
+extern unsigned char root147code[];
+extern int root147len;
+extern unsigned char root148code[];
+extern int root148len;
+extern unsigned char root150code[];
+extern int root150len;
+extern unsigned char root151code[];
+extern int root151len;
+extern unsigned char root152code[];
+extern int root152len;
+extern unsigned char root153code[];
+extern int root153len;
+extern unsigned char root155code[];
+extern int root155len;
+extern unsigned char root156code[];
+extern int root156len;
+extern unsigned char root157code[];
+extern int root157len;
+extern unsigned char root158code[];
+extern int root158len;
+extern unsigned char root159code[];
+extern int root159len;
+extern unsigned char root160code[];
+extern int root160len;
+extern unsigned char root161code[];
+extern int root161len;
+extern unsigned char root162code[];
+extern int root162len;
+extern unsigned char root163code[];
+extern int root163len;
+extern unsigned char root164code[];
+extern int root164len;
+extern unsigned char root165code[];
+extern int root165len;
+extern unsigned char root166code[];
+extern int root166len;
+extern unsigned char root167code[];
+extern int root167len;
+extern unsigned char root168code[];
+extern int root168len;
+extern unsigned char root169code[];
+extern int root169len;
+extern unsigned char root170code[];
+extern int root170len;
+extern unsigned char root171code[];
+extern int root171len;
+extern unsigned char root172code[];
+extern int root172len;
+extern unsigned char root173code[];
+extern int root173len;
+extern unsigned char root174code[];
+extern int root174len;
+extern unsigned char root175code[];
+extern int root175len;
+extern unsigned char root176code[];
+extern int root176len;
+extern unsigned char root177code[];
+extern int root177len;
+extern unsigned char root178code[];
+extern int root178len;
+extern unsigned char root179code[];
+extern int root179len;
+extern unsigned char root180code[];
+extern int root180len;
+extern unsigned char root181code[];
+extern int root181len;
+extern unsigned char root182code[];
+extern int root182len;
+extern unsigned char root183code[];
+extern int root183len;
+extern unsigned char root184code[];
+extern int root184len;
+extern unsigned char root185code[];
+extern int root185len;
+extern unsigned char root186code[];
+extern int root186len;
+extern unsigned char root187code[];
+extern int root187len;
+extern unsigned char root188code[];
+extern int root188len;
+extern unsigned char root189code[];
+extern int root189len;
+extern unsigned char root190code[];
+extern int root190len;
+extern unsigned char root191code[];
+extern int root191len;
+extern unsigned char root192code[];
+extern int root192len;
+extern unsigned char root193code[];
+extern int root193len;
+extern unsigned char root194code[];
+extern int root194len;
+extern unsigned char root195code[];
+extern int root195len;
+extern unsigned char root196code[];
+extern int root196len;
+extern unsigned char root197code[];
+extern int root197len;
+extern unsigned char root198code[];
+extern int root198len;
+extern unsigned char root199code[];
+extern int root199len;
+extern unsigned char root200code[];
+extern int root200len;
+extern unsigned char root201code[];
+extern int root201len;
+extern unsigned char root202code[];
+extern int root202len;
+extern unsigned char root203code[];
+extern int root203len;
+extern unsigned char root204code[];
+extern int root204len;
+extern unsigned char root205code[];
+extern int root205len;
+extern unsigned char root206code[];
+extern int root206len;
+extern unsigned char root207code[];
+extern int root207len;
+extern unsigned char root208code[];
+extern int root208len;
+extern unsigned char root209code[];
+extern int root209len;
+extern unsigned char root210code[];
+extern int root210len;
+extern unsigned char root211code[];
+extern int root211len;
+extern unsigned char root212code[];
+extern int root212len;
+extern unsigned char root213code[];
+extern int root213len;
+extern unsigned char root214code[];
+extern int root214len;
+extern unsigned char root215code[];
+extern int root215len;
+extern unsigned char root216code[];
+extern int root216len;
+extern unsigned char root217code[];
+extern int root217len;
+extern unsigned char root218code[];
+extern int root218len;
+extern unsigned char root219code[];
+extern int root219len;
+extern unsigned char root220code[];
+extern int root220len;
+extern unsigned char root221code[];
+extern int root221len;
+extern unsigned char root222code[];
+extern int root222len;
+extern unsigned char root223code[];
+extern int root223len;
+extern unsigned char root224code[];
+extern int root224len;
+extern unsigned char root225code[];
+extern int root225len;
+extern unsigned char root226code[];
+extern int root226len;
+extern unsigned char root227code[];
+extern int root227len;
+extern unsigned char root228code[];
+extern int root228len;
+extern unsigned char root229code[];
+extern int root229len;
+extern unsigned char root230code[];
+extern int root230len;
+extern unsigned char root231code[];
+extern int root231len;
+extern unsigned char root232code[];
+extern int root232len;
+extern unsigned char root233code[];
+extern int root233len;
+extern unsigned char root234code[];
+extern int root234len;
+extern unsigned char root235code[];
+extern int root235len;
+extern unsigned char root236code[];
+extern int root236len;
+extern unsigned char root237code[];
+extern int root237len;
+extern unsigned char root238code[];
+extern int root238len;
+extern unsigned char root239code[];
+extern int root239len;
+extern unsigned char root240code[];
+extern int root240len;
+extern unsigned char root241code[];
+extern int root241len;
+extern unsigned char root242code[];
+extern int root242len;
+extern unsigned char root243code[];
+extern int root243len;
+extern unsigned char root244code[];
+extern int root244len;
+extern unsigned char root245code[];
+extern int root245len;
+extern unsigned char root246code[];
+extern int root246len;
+extern unsigned char root247code[];
+extern int root247len;
+extern unsigned char root248code[];
+extern int root248len;
+extern unsigned char root249code[];
+extern int root249len;
+extern unsigned char root250code[];
+extern int root250len;
+extern unsigned char root251code[];
+extern int root251len;
+extern unsigned char root252code[];
+extern int root252len;
+extern unsigned char root253code[];
+extern int root253len;
+extern unsigned char root254code[];
+extern int root254len;
+extern unsigned char root255code[];
+extern int root255len;
+extern unsigned char root256code[];
+extern int root256len;
+extern unsigned char root257code[];
+extern int root257len;
+extern unsigned char root258code[];
+extern int root258len;
+extern unsigned char root259code[];
+extern int root259len;
+extern unsigned char root260code[];
+extern int root260len;
+extern unsigned char root261code[];
+extern int root261len;
+extern unsigned char root262code[];
+extern int root262len;
+extern unsigned char root263code[];
+extern int root263len;
+extern unsigned char root264code[];
+extern int root264len;
+extern unsigned char root265code[];
+extern int root265len;
+extern unsigned char root266code[];
+extern int root266len;
+extern unsigned char root267code[];
+extern int root267len;
+extern unsigned char root268code[];
+extern int root268len;
+extern unsigned char root269code[];
+extern int root269len;
+extern unsigned char root270code[];
+extern int root270len;
+extern unsigned char root271code[];
+extern int root271len;
+extern unsigned char root272code[];
+extern int root272len;
+extern unsigned char root273code[];
+extern int root273len;
+extern unsigned char root274code[];
+extern int root274len;
+extern unsigned char root275code[];
+extern int root275len;
+extern unsigned char root276code[];
+extern