ref: 29e68949b480d86710c7b436fa38580bbb90ab80
parent: 8d7e1908b930ab7e6364ae874fa1054775689cee
author: cinap_lenrek <cinap_lenrek@gmx.de>
date: Sun May 26 18:41:40 EDT 2013
keep fpregs always in sse (FXSAVE) format, adapt libmach and acid files for new format we now always use the new FXSAVE format in FPsave structure and fpregs file, converting back and forth in fpx87save() and fpx87restore(). document that fprestore() is a destructive operation now. change fp register definition in libmach and adapt fpr() acid funciton. avoid unneccesary copy of fpstate and fpsave in sysfork(). functions including syscalls do not preserve the fp registers and copying fpstate from the current process would mean we had to fpsave(&up->fpsave); first. simply not doing it, new process starts in FPinit state.
--- a/sys/lib/acid/386
+++ b/sys/lib/acid/386
@@ -69,22 +69,14 @@
print("F5\t", *F5, "\n"); print("F6\t", *F6, "\n"); print("F7\t", *F7, "\n");- print("control\t", *fmt(E0, 'x'), "\n");- print("status\t", *fmt(E1, 'x'), "\n");- print("tag\t", *fmt(E2, 'x'), "\n");- print("ip offset\t", *fmt(E3, 'X'), "\n");- print("cs selector\t", *fmt(E4, 'x'), "\n");- print("opcode\t", *fmt(E4>>8, 'x'), "\n");- print("data operand offset\t", *fmt(E5, 'x'), "\n");- print("operand selector\t", *fmt(E6, 'x'), "\n");-}
-
-defn mmregs()
-{- print("MM0\t", *MM0, " MM1\t", *MM1, "\n");- print("MM2\t", *MM2, " MM3\t", *MM3, "\n");- print("MM4\t", *MM4, " MM5\t", *MM5, "\n");- print("MM6\t", *MM6, " MM7\t", *MM7, "\n");+ print("control\t", *FCW, "\n");+ print("status\t", *FSW, "\n");+ print("tag\t", *FTW, "\n");+ print("ip\t", *FIP, "\n");+ print("cs selector\t", *FCS, "\n");+ print("opcode\t", *FOP, "\n");+ print("data operand\t", *FDP, "\n");+ print("operand selector\t", *FDS, "\n");}
defn pstop(pid)
--- a/sys/src/9/pc/dat.h
+++ b/sys/src/9/pc/dat.h
@@ -76,10 +76,10 @@
ushort r3;
ulong pc;
ushort selector;
- ushort r4;
+ ushort opcode;
ulong operand;
ushort oselector;
- ushort r5;
+ ushort r4;
uchar regs[80]; /* floating point registers */
};
@@ -91,10 +91,10 @@
ushort fop; /* opcode */
ulong fpuip; /* pc */
ushort cs; /* pc segment */
- ushort r1; /* reserved */
+ ushort rsrvd1; /* reserved */
ulong fpudp; /* data pointer */
ushort ds; /* data pointer segment */
- ushort r2;
+ ushort rsrvd2;
ulong mxcsr; /* MXCSR register state */
ulong mxcsr_mask; /* MXCSR mask register */
uchar xregs[480]; /* extended registers */
--- a/sys/src/9/pc/fns.h
+++ b/sys/src/9/pc/fns.h
@@ -41,7 +41,9 @@
void fpssesave0(FPsave*);
ulong fpstatus(void);
void fpx87restore(FPsave*);
+void fpx87restore0(FPsave*);
void fpx87save(FPsave*);
+void fpx87save0(FPsave*);
ulong getcr0(void);
ulong getcr2(void);
ulong getcr3(void);
--- a/sys/src/9/pc/l.s
+++ b/sys/src/9/pc/l.s
@@ -657,13 +657,13 @@
WAIT
RET
-TEXT fpx87save(SB), $0 /* save state and disable */
+TEXT fpx87save0(SB), $0 /* save state and disable */
MOVL p+0(FP), AX
FSAVE 0(AX) /* no WAIT */
FPOFF(l2)
RET
-TEXT fpx87restore(SB), $0 /* enable and restore state */
+TEXT fpx87restore0(SB), $0 /* enable and restore state */
FPON
MOVL p+0(FP), AX
FRSTOR 0(AX)
--- a/sys/src/9/pc/main.c
+++ b/sys/src/9/pc/main.c
@@ -469,6 +469,121 @@
}
}
+/*
+ * we keep FPsave structure in sse format emulating FXSAVE / FXRSTOR
+ * instructions for legacy x87 fpu.
+ *
+ * Note that fpx87restore() and fpxsserestore() do modify the FPsave
+ * data structure for conversion / realignment shuffeling. this means
+ * that p->fpsave is only valid when p->fpstate == FPinactive.
+ */
+void
+fpx87save(FPsave *fps)
+{+ fpx87save0(fps);
+
+ /* NOP fps->fcw = fps->control; */
+ fps->fsw = fps->status;
+ fps->ftw = fps->tag;
+ fps->fop = fps->opcode;
+ fps->fpuip = fps->pc;
+ fps->cs = fps->selector;
+ fps->fpudp = fps->operand;
+ fps->ds = fps->oselector;
+
+#define MOVA(d,s) \
+ *((ushort*)(d+8)) = *((ushort*)(s+8)), \
+ *((ulong*)(d+4)) = *((ulong*)(s+4)), \
+ *((ulong*)(d)) = *((ulong*)(s))
+
+ MOVA(fps->xregs+0x70, fps->regs+70);
+ MOVA(fps->xregs+0x60, fps->regs+60);
+ MOVA(fps->xregs+0x50, fps->regs+50);
+ MOVA(fps->xregs+0x40, fps->regs+40);
+ MOVA(fps->xregs+0x30, fps->regs+30);
+ MOVA(fps->xregs+0x20, fps->regs+20);
+ MOVA(fps->xregs+0x10, fps->regs+10);
+ MOVA(fps->xregs+0x00, fps->regs+00);
+
+#undef MOVA
+
+#define CLR6(d) \
+ *((ulong*)(d)) = 0, \
+ *((ushort*)(d+4)) = 0
+
+ CLR6(fps->xregs+0x70+10);
+ CLR6(fps->xregs+0x60+10);
+ CLR6(fps->xregs+0x50+10);
+ CLR6(fps->xregs+0x40+10);
+ CLR6(fps->xregs+0x30+10);
+ CLR6(fps->xregs+0x20+10);
+ CLR6(fps->xregs+0x10+10);
+ CLR6(fps->xregs+0x00+10);
+
+#undef CLR6
+
+ fps->rsrvd1 = fps->rsrvd2 = fps->mxcsr = fps->mxcsr_mask = 0;
+}
+
+void
+fpx87restore(FPsave *fps)
+{+#define MOVA(d,s) \
+ *((ulong*)(d)) = *((ulong*)(s)), \
+ *((ulong*)(d+4)) = *((ulong*)(s+4)), \
+ *((ushort*)(d+8)) = *((ushort*)(s+8))
+
+ MOVA(fps->regs+00, fps->xregs+0x00);
+ MOVA(fps->regs+10, fps->xregs+0x10);
+ MOVA(fps->regs+20, fps->xregs+0x20);
+ MOVA(fps->regs+30, fps->xregs+0x30);
+ MOVA(fps->regs+40, fps->xregs+0x40);
+ MOVA(fps->regs+50, fps->xregs+0x50);
+ MOVA(fps->regs+60, fps->xregs+0x60);
+ MOVA(fps->regs+70, fps->xregs+0x70);
+
+#undef MOVA
+
+ fps->oselector = fps->ds;
+ fps->operand = fps->fpudp;
+ fps->opcode = (fps->fop & 0x7ff);
+ fps->selector = fps->cs;
+ fps->pc = fps->fpuip;
+ fps->tag = fps->ftw;
+ fps->status = fps->fsw;
+ /* NOP fps->control = fps->fcw; */
+
+ fps->r1 = fps->r2 = fps->r3 = fps->r4 = 0;
+
+ fpx87restore0(fps);
+}
+
+/*
+ * sse fp save and restore buffers have to be 16-byte (FPalign) aligned,
+ * so we shuffle the data up and down as needed or make copies.
+ */
+void
+fpssesave(FPsave *fps)
+{+ FPsave *afps;
+
+ afps = (FPsave *)ROUND(((uintptr)fps), FPalign);
+ fpssesave0(afps);
+ if(fps != afps) /* not aligned? shuffle down from aligned buffer */
+ memmove(fps, afps, sizeof(FPssestate) - FPalign);
+}
+
+void
+fpsserestore(FPsave *fps)
+{+ FPsave *afps;
+
+ afps = (FPsave *)ROUND(((uintptr)fps), FPalign);
+ if(fps != afps) /* shuffle up to make aligned */
+ memmove(afps, fps, sizeof(FPssestate) - FPalign);
+ fpsserestore0(afps);
+}
+
static char* mathmsg[] =
{nil, /* handled below */
@@ -511,61 +626,6 @@
}
/*
- * sse fp save and restore buffers have to be 16-byte (FPalign) aligned,
- * so we shuffle the data up and down as needed or make copies.
- */
-void
-fpssesave(FPsave *fps)
-{- FPsave *afps;
-
- afps = (FPsave *)ROUND(((uintptr)fps), FPalign);
- fpssesave0(afps);
- if(fps != afps) /* not aligned? shuffle down from aligned buffer */
- memmove(fps, afps, sizeof(FPssestate) - FPalign);
-}
-
-void
-fpsserestore(FPsave *fps)
-{- FPsave *afps;
-
- afps = (FPsave *)ROUND(((uintptr)fps), FPalign);
- if(fps != afps) /* shuffle up to make aligned */
- memmove(afps, fps, sizeof(FPssestate) - FPalign);
- fpsserestore0(afps);
- if(fps != afps) /* shuffle regs back down when unaligned */
- memmove(fps, afps, sizeof(FPssestate) - FPalign);
-}
-
-/*
- * extract control, status and fppc from process
- * floating point state independent of format.
- */
-static void
-mathstate(ulong *stsp, ulong *pcp, ulong *ctlp)
-{- ulong sts, fpc, ctl;
- FPsave *f = &up->fpsave;
-
- if(fpsave == fpx87save){- sts = f->status;
- fpc = f->pc;
- ctl = f->control;
- } else {- sts = f->fsw;
- fpc = f->fpuip;
- ctl = f->fcw;
- }
- if(stsp)
- *stsp = sts;
- if(pcp)
- *pcp = fpc;
- if(ctlp)
- *ctlp = ctl;
-}
-
-/*
* math coprocessor error
*/
static void
@@ -591,7 +651,7 @@
static void
mathemu(Ureg *ureg, void*)
{- ulong status, control, pc;
+ ulong status, control;
if(up->fpstate & FPillegal){/* someone did floating point in a note handler */
@@ -611,9 +671,10 @@
* More attention should probably be paid here to the
* exception masks and error summary.
*/
- mathstate(&status, &pc, &control);
+ status = up->fpsave.fsw;
+ control = up->fpsave.fcw;
if((status & ~control) & 0x07F){- mathnote(status, pc);
+ mathnote(status, up->fpsave.fpuip);
break;
}
fprestore(&up->fpsave);
--- a/sys/src/9/port/sysproc.c
+++ b/sys/src/9/port/sysproc.c
@@ -84,7 +84,6 @@
p = newproc();
- p->fpsave = up->fpsave;
p->scallnr = up->scallnr;
p->s = up->s;
p->nerrlab = 0;
@@ -180,7 +179,6 @@
if((flag&RFNOTEG) == 0)
p->noteid = up->noteid;
- p->fpstate = up->fpstate;
pid = p->pid;
memset(p->time, 0, sizeof(p->time));
p->time[TReal] = MACHP(0)->ticks;
--- a/sys/src/libmach/8.c
+++ b/sys/src/libmach/8.c
@@ -14,9 +14,11 @@
#define AX REGOFF(ax)
#define REGSIZE sizeof(struct Ureg)
+#define FP_CTLS(x) (REGSIZE+2*(x))
#define FP_CTL(x) (REGSIZE+4*(x))
-#define FP_REG(x) (FP_CTL(7)+10*(x))
-#define FPREGSIZE (7*4+8*10)
+#define FP_REG(x) (FP_CTL(8)+16*(x))
+#define XM_REG(x) (FP_CTL(8)+8*16+16*(x))
+#define FPREGSIZE 512
Reglist i386reglist[] = { {"DI", REGOFF(di), RINT, 'X'},@@ -38,21 +40,44 @@
{"SP", SP, RINT, 'X'}, {"SS", REGOFF(ss), RINT, 'X'},- {"E0", FP_CTL(0), RFLT, 'X'},- {"E1", FP_CTL(1), RFLT, 'X'},- {"E2", FP_CTL(2), RFLT, 'X'},- {"E3", FP_CTL(3), RFLT, 'X'},- {"E4", FP_CTL(4), RFLT, 'X'},- {"E5", FP_CTL(5), RFLT, 'X'},- {"E6", FP_CTL(6), RFLT, 'X'},- {"F0", FP_REG(0), RFLT, '3'},- {"F1", FP_REG(1), RFLT, '3'},- {"F2", FP_REG(2), RFLT, '3'},- {"F3", FP_REG(3), RFLT, '3'},- {"F4", FP_REG(4), RFLT, '3'},- {"F5", FP_REG(5), RFLT, '3'},- {"F6", FP_REG(6), RFLT, '3'},- {"F7", FP_REG(7), RFLT, '3'},+ {"FCW", FP_CTLS(0), RFLT, 'x'},+ {"FSW", FP_CTLS(1), RFLT, 'x'},+ {"FTW", FP_CTLS(2), RFLT, 'b'},+ {"FOP", FP_CTLS(3), RFLT, 'x'},+ {"FIP", FP_CTL(2), RFLT, 'X'},+ {"FCS", FP_CTLS(6), RFLT, 'x'},+ {"FDP", FP_CTL(4), RFLT, 'X'},+ {"FDS", FP_CTLS(10), RFLT, 'x'},+ {"MXCSR", FP_CTL(6), RFLT, 'X'},+ {"MXCSRMASK", FP_CTL(7), RFLT, 'X'},+
+ {"M0", FP_REG(0), RFLT, 'F'}, /* assumes double */+ {"M1", FP_REG(1), RFLT, 'F'},+ {"M2", FP_REG(2), RFLT, 'F'},+ {"M3", FP_REG(3), RFLT, 'F'},+ {"M4", FP_REG(4), RFLT, 'F'},+ {"M5", FP_REG(5), RFLT, 'F'},+ {"M6", FP_REG(6), RFLT, 'F'},+ {"M7", FP_REG(7), RFLT, 'F'},+
+ {"X0", XM_REG(0), RFLT, 'F'}, /* assumes double */+ {"X1", XM_REG(1), RFLT, 'F'},+ {"X2", XM_REG(2), RFLT, 'F'},+ {"X3", XM_REG(3), RFLT, 'F'},+ {"X4", XM_REG(4), RFLT, 'F'},+ {"X5", XM_REG(5), RFLT, 'F'},+ {"X6", XM_REG(6), RFLT, 'F'},+ {"X7", XM_REG(7), RFLT, 'F'},+
+ {"F0", FP_REG(7), RFLT, '3'},+ {"F1", FP_REG(6), RFLT, '3'},+ {"F2", FP_REG(5), RFLT, '3'},+ {"F3", FP_REG(4), RFLT, '3'},+ {"F4", FP_REG(3), RFLT, '3'},+ {"F5", FP_REG(2), RFLT, '3'},+ {"F6", FP_REG(1), RFLT, '3'},+ {"F7", FP_REG(0), RFLT, '3'},+
{ 0 }};
--
⑨