git: 9front

Download patch

ref: 29e68949b480d86710c7b436fa38580bbb90ab80
parent: 8d7e1908b930ab7e6364ae874fa1054775689cee
author: cinap_lenrek <cinap_lenrek@gmx.de>
date: Sun May 26 18:41:40 EDT 2013

keep fpregs always in sse (FXSAVE) format, adapt libmach and acid files for new format

we now always use the new FXSAVE format in FPsave structure and fpregs
file, converting back and forth in fpx87save() and fpx87restore().

document that fprestore() is a destructive operation now.

change fp register definition in libmach and adapt fpr() acid funciton.

avoid unneccesary copy of fpstate and fpsave in sysfork(). functions
including syscalls do not preserve the fp registers and copying fpstate
from the current process would mean we had to fpsave(&up->fpsave); first.
simply not doing it, new process starts in FPinit state.

--- a/sys/lib/acid/386
+++ b/sys/lib/acid/386
@@ -69,22 +69,14 @@
 	print("F5\t",  *F5, "\n");
 	print("F6\t",  *F6, "\n");
 	print("F7\t",  *F7, "\n");
-	print("control\t", *fmt(E0, 'x'), "\n");
-	print("status\t", *fmt(E1, 'x'), "\n");
-	print("tag\t", *fmt(E2, 'x'), "\n");
-	print("ip offset\t", *fmt(E3, 'X'), "\n");
-	print("cs selector\t", *fmt(E4, 'x'), "\n");
-	print("opcode\t", *fmt(E4>>8, 'x'), "\n");
-	print("data operand offset\t", *fmt(E5, 'x'), "\n");
-	print("operand selector\t", *fmt(E6, 'x'), "\n");
-}
-
-defn mmregs()
-{
-	print("MM0\t", *MM0, " MM1\t", *MM1, "\n");
-	print("MM2\t", *MM2, " MM3\t", *MM3, "\n");
-	print("MM4\t", *MM4, " MM5\t", *MM5, "\n");
-	print("MM6\t", *MM6, " MM7\t", *MM7, "\n");
+	print("control\t", *FCW, "\n");
+	print("status\t", *FSW, "\n");
+	print("tag\t", *FTW, "\n");
+	print("ip\t", *FIP, "\n");
+	print("cs selector\t", *FCS, "\n");
+	print("opcode\t", *FOP, "\n");
+	print("data operand\t", *FDP, "\n");
+	print("operand selector\t", *FDS, "\n");
 }
 
 defn pstop(pid)
--- a/sys/src/9/pc/dat.h
+++ b/sys/src/9/pc/dat.h
@@ -76,10 +76,10 @@
 	ushort	r3;
 	ulong	pc;
 	ushort	selector;
-	ushort	r4;
+	ushort	opcode;
 	ulong	operand;
 	ushort	oselector;
-	ushort	r5;
+	ushort	r4;
 	uchar	regs[80];	/* floating point registers */
 };
 
@@ -91,10 +91,10 @@
 	ushort	fop;		/* opcode */
 	ulong	fpuip;		/* pc */
 	ushort	cs;		/* pc segment */
-	ushort	r1;		/* reserved */
+	ushort	rsrvd1;		/* reserved */
 	ulong	fpudp;		/* data pointer */
 	ushort	ds;		/* data pointer segment */
-	ushort	r2;
+	ushort	rsrvd2;
 	ulong	mxcsr;		/* MXCSR register state */
 	ulong	mxcsr_mask;	/* MXCSR mask register */
 	uchar	xregs[480];	/* extended registers */
--- a/sys/src/9/pc/fns.h
+++ b/sys/src/9/pc/fns.h
@@ -41,7 +41,9 @@
 void	fpssesave0(FPsave*);
 ulong	fpstatus(void);
 void	fpx87restore(FPsave*);
+void	fpx87restore0(FPsave*);
 void	fpx87save(FPsave*);
+void	fpx87save0(FPsave*);
 ulong	getcr0(void);
 ulong	getcr2(void);
 ulong	getcr3(void);
--- a/sys/src/9/pc/l.s
+++ b/sys/src/9/pc/l.s
@@ -657,13 +657,13 @@
 	WAIT
 	RET
 
-TEXT fpx87save(SB), $0				/* save state and disable */
+TEXT fpx87save0(SB), $0				/* save state and disable */
 	MOVL	p+0(FP), AX
 	FSAVE	0(AX)				/* no WAIT */
 	FPOFF(l2)
 	RET
 
-TEXT fpx87restore(SB), $0				/* enable and restore state */
+TEXT fpx87restore0(SB), $0				/* enable and restore state */
 	FPON
 	MOVL	p+0(FP), AX
 	FRSTOR	0(AX)
--- a/sys/src/9/pc/main.c
+++ b/sys/src/9/pc/main.c
@@ -469,6 +469,121 @@
 	}
 }
 
+/*
+ * we keep FPsave structure in sse format emulating FXSAVE / FXRSTOR
+ * instructions for legacy x87 fpu.
+ *
+ * Note that fpx87restore() and fpxsserestore() do modify the FPsave
+ * data structure for conversion / realignment shuffeling. this means
+ * that p->fpsave is only valid when p->fpstate == FPinactive.
+ */
+void
+fpx87save(FPsave *fps)
+{
+	fpx87save0(fps);
+
+	/* NOP fps->fcw = fps->control; */
+	fps->fsw = fps->status;
+	fps->ftw = fps->tag;
+	fps->fop = fps->opcode;
+	fps->fpuip = fps->pc;
+	fps->cs = fps->selector;
+	fps->fpudp = fps->operand;
+	fps->ds = fps->oselector;
+
+#define MOVA(d,s) \
+	*((ushort*)(d+8)) = *((ushort*)(s+8)), \
+	*((ulong*)(d+4)) = *((ulong*)(s+4)), \
+	*((ulong*)(d)) = *((ulong*)(s))
+
+	MOVA(fps->xregs+0x70, fps->regs+70);
+	MOVA(fps->xregs+0x60, fps->regs+60);
+	MOVA(fps->xregs+0x50, fps->regs+50);
+	MOVA(fps->xregs+0x40, fps->regs+40);
+	MOVA(fps->xregs+0x30, fps->regs+30);
+	MOVA(fps->xregs+0x20, fps->regs+20);
+	MOVA(fps->xregs+0x10, fps->regs+10);
+	MOVA(fps->xregs+0x00, fps->regs+00);
+
+#undef MOVA
+
+#define CLR6(d)	\
+	*((ulong*)(d)) = 0, \
+	*((ushort*)(d+4)) = 0
+
+	CLR6(fps->xregs+0x70+10);
+	CLR6(fps->xregs+0x60+10);
+	CLR6(fps->xregs+0x50+10);
+	CLR6(fps->xregs+0x40+10);
+	CLR6(fps->xregs+0x30+10);
+	CLR6(fps->xregs+0x20+10);
+	CLR6(fps->xregs+0x10+10);
+	CLR6(fps->xregs+0x00+10);
+
+#undef CLR6
+
+	fps->rsrvd1 = fps->rsrvd2 = fps->mxcsr = fps->mxcsr_mask = 0;
+}
+
+void
+fpx87restore(FPsave *fps)
+{
+#define MOVA(d,s) \
+	*((ulong*)(d)) = *((ulong*)(s)), \
+	*((ulong*)(d+4)) = *((ulong*)(s+4)), \
+	*((ushort*)(d+8)) = *((ushort*)(s+8))
+
+	MOVA(fps->regs+00, fps->xregs+0x00);
+	MOVA(fps->regs+10, fps->xregs+0x10);
+	MOVA(fps->regs+20, fps->xregs+0x20);
+	MOVA(fps->regs+30, fps->xregs+0x30);
+	MOVA(fps->regs+40, fps->xregs+0x40);
+	MOVA(fps->regs+50, fps->xregs+0x50);
+	MOVA(fps->regs+60, fps->xregs+0x60);
+	MOVA(fps->regs+70, fps->xregs+0x70);
+
+#undef MOVA
+
+	fps->oselector = fps->ds;
+	fps->operand = fps->fpudp;
+	fps->opcode = (fps->fop & 0x7ff);
+	fps->selector = fps->cs;
+	fps->pc = fps->fpuip;
+	fps->tag = fps->ftw;
+	fps->status = fps->fsw;
+	/* NOP fps->control = fps->fcw;  */
+
+	fps->r1 = fps->r2 = fps->r3 = fps->r4 = 0;
+
+	fpx87restore0(fps);
+}
+
+/*
+ * sse fp save and restore buffers have to be 16-byte (FPalign) aligned,
+ * so we shuffle the data up and down as needed or make copies.
+ */
+void
+fpssesave(FPsave *fps)
+{
+	FPsave *afps;
+
+	afps = (FPsave *)ROUND(((uintptr)fps), FPalign);
+	fpssesave0(afps);
+	if(fps != afps)  /* not aligned? shuffle down from aligned buffer */
+		memmove(fps, afps, sizeof(FPssestate) - FPalign);
+}
+
+void
+fpsserestore(FPsave *fps)
+{
+	FPsave *afps;
+
+	afps = (FPsave *)ROUND(((uintptr)fps), FPalign);
+	if(fps != afps)  /* shuffle up to make aligned */
+		memmove(afps, fps, sizeof(FPssestate) - FPalign);
+	fpsserestore0(afps);
+}
+
 static char* mathmsg[] =
 {
 	nil,	/* handled below */
@@ -511,61 +626,6 @@
 }
 
 /*
- * sse fp save and restore buffers have to be 16-byte (FPalign) aligned,
- * so we shuffle the data up and down as needed or make copies.
- */
-void
-fpssesave(FPsave *fps)
-{
-	FPsave *afps;
-
-	afps = (FPsave *)ROUND(((uintptr)fps), FPalign);
-	fpssesave0(afps);
-	if(fps != afps)  /* not aligned? shuffle down from aligned buffer */
-		memmove(fps, afps, sizeof(FPssestate) - FPalign);
-}
-
-void
-fpsserestore(FPsave *fps)
-{
-	FPsave *afps;
-
-	afps = (FPsave *)ROUND(((uintptr)fps), FPalign);
-	if(fps != afps)  /* shuffle up to make aligned */
-		memmove(afps, fps, sizeof(FPssestate) - FPalign);
-	fpsserestore0(afps);
-	if(fps != afps)  /* shuffle regs back down when unaligned */
-		memmove(fps, afps, sizeof(FPssestate) - FPalign);
-}
-
-/*
- * extract control, status and fppc from process
- * floating point state independent of format.
- */
-static void
-mathstate(ulong *stsp, ulong *pcp, ulong *ctlp)
-{
-	ulong sts, fpc, ctl;
-	FPsave *f = &up->fpsave;
-
-	if(fpsave == fpx87save){
-		sts = f->status;
-		fpc = f->pc;
-		ctl = f->control;
-	} else {
-		sts = f->fsw;
-		fpc = f->fpuip;
-		ctl = f->fcw;
-	}
-	if(stsp)
-		*stsp = sts;
-	if(pcp)
-		*pcp = fpc;
-	if(ctlp)
-		*ctlp = ctl;
-}
-
-/*
  *  math coprocessor error
  */
 static void
@@ -591,7 +651,7 @@
 static void
 mathemu(Ureg *ureg, void*)
 {
-	ulong status, control, pc;
+	ulong status, control;
 
 	if(up->fpstate & FPillegal){
 		/* someone did floating point in a note handler */
@@ -611,9 +671,10 @@
 		 * More attention should probably be paid here to the
 		 * exception masks and error summary.
 		 */
-		mathstate(&status, &pc, &control);
+		status = up->fpsave.fsw;
+		control = up->fpsave.fcw;
 		if((status & ~control) & 0x07F){
-			mathnote(status, pc);
+			mathnote(status, up->fpsave.fpuip);
 			break;
 		}
 		fprestore(&up->fpsave);
--- a/sys/src/9/port/sysproc.c
+++ b/sys/src/9/port/sysproc.c
@@ -84,7 +84,6 @@
 
 	p = newproc();
 
-	p->fpsave = up->fpsave;
 	p->scallnr = up->scallnr;
 	p->s = up->s;
 	p->nerrlab = 0;
@@ -180,7 +179,6 @@
 	if((flag&RFNOTEG) == 0)
 		p->noteid = up->noteid;
 
-	p->fpstate = up->fpstate;
 	pid = p->pid;
 	memset(p->time, 0, sizeof(p->time));
 	p->time[TReal] = MACHP(0)->ticks;
--- a/sys/src/libmach/8.c
+++ b/sys/src/libmach/8.c
@@ -14,9 +14,11 @@
 #define	AX		REGOFF(ax)
 
 #define	REGSIZE		sizeof(struct Ureg)
+#define FP_CTLS(x)	(REGSIZE+2*(x))
 #define FP_CTL(x)	(REGSIZE+4*(x))
-#define FP_REG(x)	(FP_CTL(7)+10*(x))
-#define	FPREGSIZE	(7*4+8*10)
+#define FP_REG(x)	(FP_CTL(8)+16*(x))
+#define XM_REG(x)	(FP_CTL(8)+8*16+16*(x))
+#define	FPREGSIZE	512
 
 Reglist i386reglist[] = {
 	{"DI",		REGOFF(di),	RINT, 'X'},
@@ -38,21 +40,44 @@
 	{"SP",		SP,		RINT, 'X'},
 	{"SS",		REGOFF(ss),	RINT, 'X'},
 
-	{"E0",		FP_CTL(0),	RFLT, 'X'},
-	{"E1",		FP_CTL(1),	RFLT, 'X'},
-	{"E2",		FP_CTL(2),	RFLT, 'X'},
-	{"E3",		FP_CTL(3),	RFLT, 'X'},
-	{"E4",		FP_CTL(4),	RFLT, 'X'},
-	{"E5",		FP_CTL(5),	RFLT, 'X'},
-	{"E6",		FP_CTL(6),	RFLT, 'X'},
-	{"F0",		FP_REG(0),	RFLT, '3'},
-	{"F1",		FP_REG(1),	RFLT, '3'},
-	{"F2",		FP_REG(2),	RFLT, '3'},
-	{"F3",		FP_REG(3),	RFLT, '3'},
-	{"F4",		FP_REG(4),	RFLT, '3'},
-	{"F5",		FP_REG(5),	RFLT, '3'},
-	{"F6",		FP_REG(6),	RFLT, '3'},
-	{"F7",		FP_REG(7),	RFLT, '3'},
+	{"FCW",		FP_CTLS(0),	RFLT, 'x'},
+	{"FSW",		FP_CTLS(1),	RFLT, 'x'},
+	{"FTW",		FP_CTLS(2),	RFLT, 'b'},
+	{"FOP",		FP_CTLS(3),	RFLT, 'x'},
+	{"FIP",		FP_CTL(2),	RFLT, 'X'},
+	{"FCS",		FP_CTLS(6),	RFLT, 'x'},
+	{"FDP",		FP_CTL(4),	RFLT, 'X'},
+	{"FDS",		FP_CTLS(10),	RFLT, 'x'},
+	{"MXCSR",	FP_CTL(6),	RFLT, 'X'},
+	{"MXCSRMASK",	FP_CTL(7),	RFLT, 'X'},
+
+	{"M0",		FP_REG(0),	RFLT, 'F'},	/* assumes double */
+	{"M1",		FP_REG(1),	RFLT, 'F'},
+	{"M2",		FP_REG(2),	RFLT, 'F'},
+	{"M3",		FP_REG(3),	RFLT, 'F'},
+	{"M4",		FP_REG(4),	RFLT, 'F'},
+	{"M5",		FP_REG(5),	RFLT, 'F'},
+	{"M6",		FP_REG(6),	RFLT, 'F'},
+	{"M7",		FP_REG(7),	RFLT, 'F'},
+
+	{"X0",		XM_REG(0),	RFLT, 'F'},	/* assumes double */
+	{"X1",		XM_REG(1),	RFLT, 'F'},
+	{"X2",		XM_REG(2),	RFLT, 'F'},
+	{"X3",		XM_REG(3),	RFLT, 'F'},
+	{"X4",		XM_REG(4),	RFLT, 'F'},
+	{"X5",		XM_REG(5),	RFLT, 'F'},
+	{"X6",		XM_REG(6),	RFLT, 'F'},
+	{"X7",		XM_REG(7),	RFLT, 'F'},
+
+	{"F0",		FP_REG(7),	RFLT, '3'},
+	{"F1",		FP_REG(6),	RFLT, '3'},
+	{"F2",		FP_REG(5),	RFLT, '3'},
+	{"F3",		FP_REG(4),	RFLT, '3'},
+	{"F4",		FP_REG(3),	RFLT, '3'},
+	{"F5",		FP_REG(2),	RFLT, '3'},
+	{"F6",		FP_REG(1),	RFLT, '3'},
+	{"F7",		FP_REG(0),	RFLT, '3'},
+
 	{  0 }
 };
 
--