ref: 1f89efba4ad60c8c794e7e78a3f1bd4ece8c9540
parent: f20373b1bf87d26f1f261ac968f8760c51f37df0
author: jpathy <jpathy@mail.nanosouffle.net>
date: Tue May 21 19:15:13 EDT 2013
add 6(a|l) sse support to 8(a|l)
--- a/sys/src/cmd/8a/a.y
+++ b/sys/src/cmd/8a/a.y
@@ -20,9 +20,9 @@
%left '+' '-'
%left '*' '/' '%'
%token <lval> LTYPE0 LTYPE1 LTYPE2 LTYPE3 LTYPE4
-%token <lval> LTYPEC LTYPED LTYPEN LTYPER LTYPET LTYPES LTYPEM LTYPEI LTYPEG
+%token <lval> LTYPEC LTYPED LTYPEN LTYPER LTYPET LTYPES LTYPEM LTYPEI LTYPEG LTYPEXC LTYPEX
%token <lval> LCONST LFP LPC LSB
-%token <lval> LBREG LLREG LSREG LFREG
+%token <lval> LBREG LLREG LSREG LFREG LMREG LXREG
%token <dval> LFCONST
%token <sval> LSCONST LSP
%token <sym> LNAME LLAB LVAR
@@ -30,7 +30,7 @@
%type <con2> con2
%type <gen> mem imm imm2 reg nam rel rem rim rom omem nmem
%type <gen2> nonnon nonrel nonrem rimnon rimrem remrim
-%type <gen2> spec1 spec2 spec3 spec4 spec5 spec6 spec7 spec8
+%type <gen2> spec1 spec2 spec3 spec4 spec5 spec6 spec7 spec8 spec9 spec10
%%
prog:
| prog line
@@ -79,6 +79,8 @@
| LTYPEM spec6 { outcode($1, &$2); } | LTYPEI spec7 { outcode($1, &$2); } | LTYPEG spec8 { outcode($1, &$2); }+| LTYPEXC spec9 { outcode($1, &$2); }+| LTYPEX spec10 { outcode($1, &$2); }nonnon:
{@@ -237,7 +239,24 @@
$$.from.scale = $3;
$$.to = $5;
}
+spec9: /* CMPPS/CMPPD */
+ reg ',' rem ',' con
+ {+ $$.from = $1;
+ $$.to = $3;
+ $$.from.offset = $5;
+ }
+spec10: /* shufl */
+ imm ',' rem ',' reg
+ {+ $$.from = $3;
+ $$.to = $5;
+ if($1.type != D_CONST)
+ yyerror("illegal constant");+ $$.to.offset = $1.offset;
+ }
+
rem:
reg
| mem
@@ -301,6 +320,11 @@
$$ = nullgen;
$$.type = $1;
}
+| LMREG
+ {+ $$ = nullgen;
+ $$.type = $1;
+ }
| LSP
{$$ = nullgen;
@@ -307,6 +331,11 @@
$$.type = D_SP;
}
| LSREG
+ {+ $$ = nullgen;
+ $$.type = $1;
+ }
+| LXREG
{$$ = nullgen;
$$.type = $1;
--- a/sys/src/cmd/8a/lex.c
+++ b/sys/src/cmd/8a/lex.c
@@ -192,6 +192,24 @@
"F6", LFREG, D_F0+6,
"F7", LFREG, D_F0+7,
+ "M0", LMREG, D_M0+0,
+ "M1", LMREG, D_M0+1,
+ "M2", LMREG, D_M0+2,
+ "M3", LMREG, D_M0+3,
+ "M4", LMREG, D_M0+4,
+ "M5", LMREG, D_M0+5,
+ "M6", LMREG, D_M0+6,
+ "M7", LMREG, D_M0+7,
+
+ "X0", LXREG, D_X0+0,
+ "X1", LXREG, D_X0+1,
+ "X2", LXREG, D_X0+2,
+ "X3", LXREG, D_X0+3,
+ "X4", LXREG, D_X0+4,
+ "X5", LXREG, D_X0+5,
+ "X6", LXREG, D_X0+6,
+ "X7", LXREG, D_X0+7,
+
"CS", LSREG, D_CS,
"SS", LSREG, D_SS,
"DS", LSREG, D_DS,
@@ -277,6 +295,7 @@
"CMPXCHGB", LTYPE3, ACMPXCHGB,
"CMPXCHGL", LTYPE3, ACMPXCHGL,
"CMPXCHGW", LTYPE3, ACMPXCHGW,
+ "CPUID", LTYPE0, ACPUID,
"DAA", LTYPE0, ADAA,
"DAS", LTYPE0, ADAS,
"DATA", LTYPED, ADATA,
@@ -638,6 +657,188 @@
"FXTRACT", LTYPE0, AFXTRACT,
"FYL2X", LTYPE0, AFYL2X,
"FYL2XP1", LTYPE0, AFYL2XP1,
+
+ "ADDPD", LTYPE3, AADDPD,
+ "ADDPS", LTYPE3, AADDPS,
+ "ADDSD", LTYPE3, AADDSD,
+ "ADDSS", LTYPE3, AADDSS,
+ "ANDNPD", LTYPE3, AANDNPD,
+ "ANDNPS", LTYPE3, AANDNPS,
+ "ANDPD", LTYPE3, AANDPD,
+ "ANDPS", LTYPE3, AANDPS,
+ "CMPPD", LTYPEXC,ACMPPD,
+ "CMPPS", LTYPEXC,ACMPPS,
+ "CMPSD", LTYPEXC,ACMPSD,
+ "CMPSS", LTYPEXC,ACMPSS,
+ "COMISD", LTYPE3, ACOMISD,
+ "COMISS", LTYPE3, ACOMISS,
+ "CVTPL2PD", LTYPE3, ACVTPL2PD,
+ "CVTPL2PS", LTYPE3, ACVTPL2PS,
+ "CVTPD2PL", LTYPE3, ACVTPD2PL,
+ "CVTPD2PS", LTYPE3, ACVTPD2PS,
+ "CVTPS2PL", LTYPE3, ACVTPS2PL,
+ "PF2IW", LTYPE3, APF2IW,
+ "PF2IL", LTYPE3, APF2IL,
+ "PF2ID", LTYPE3, APF2IL, /* syn */
+ "PI2FL", LTYPE3, API2FL,
+ "PI2FD", LTYPE3, API2FL, /* syn */
+ "PI2FW", LTYPE3, API2FW,
+ "CVTPS2PD", LTYPE3, ACVTPS2PD,
+ "CVTSD2SL", LTYPE3, ACVTSD2SL,
+ "CVTSD2SS", LTYPE3, ACVTSD2SS,
+ "CVTSL2SD", LTYPE3, ACVTSL2SD,
+ "CVTSL2SS", LTYPE3, ACVTSL2SS,
+ "CVTSS2SD", LTYPE3, ACVTSS2SD,
+ "CVTSS2SL", LTYPE3, ACVTSS2SL,
+ "CVTTPD2PL", LTYPE3, ACVTTPD2PL,
+ "CVTTPS2PL", LTYPE3, ACVTTPS2PL,
+ "CVTTSD2SL", LTYPE3, ACVTTSD2SL,
+ "CVTTSS2SL", LTYPE3, ACVTTSS2SL,
+ "DIVPD", LTYPE3, ADIVPD,
+ "DIVPS", LTYPE3, ADIVPS,
+ "DIVSD", LTYPE3, ADIVSD,
+ "DIVSS", LTYPE3, ADIVSS,
+ "FXRSTOR", LTYPE2, AFXRSTOR,
+ "FXSAVE", LTYPE1, AFXSAVE,
+ "LDMXCSR", LTYPE2, ALDMXCSR,
+ "MASKMOVOU", LTYPE3, AMASKMOVOU,
+ "MASKMOVDQU", LTYPE3, AMASKMOVOU, /* syn */
+ "MAXPD", LTYPE3, AMAXPD,
+ "MAXPS", LTYPE3, AMAXPS,
+ "MAXSD", LTYPE3, AMAXSD,
+ "MAXSS", LTYPE3, AMAXSS,
+ "MINPD", LTYPE3, AMINPD,
+ "MINPS", LTYPE3, AMINPS,
+ "MINSD", LTYPE3, AMINSD,
+ "MINSS", LTYPE3, AMINSS,
+ "MOVAPD", LTYPE3, AMOVAPD,
+ "MOVAPS", LTYPE3, AMOVAPS,
+ "MOVO", LTYPE3, AMOVO,
+ "MOVOA", LTYPE3, AMOVO, /* syn */
+ "MOVOU", LTYPE3, AMOVOU,
+ "MOVHLPS", LTYPE3, AMOVHLPS,
+ "MOVHPD", LTYPE3, AMOVHPD,
+ "MOVHPS", LTYPE3, AMOVHPS,
+ "MOVLHPS", LTYPE3, AMOVLHPS,
+ "MOVLPD", LTYPE3, AMOVLPD,
+ "MOVLPS", LTYPE3, AMOVLPS,
+ "MOVMSKPD", LTYPE3, AMOVMSKPD,
+ "MOVMSKPS", LTYPE3, AMOVMSKPS,
+ "MOVNTO", LTYPE3, AMOVNTO,
+ "MOVNTDQ", LTYPE3, AMOVNTO, /* syn */
+ "MOVNTPD", LTYPE3, AMOVNTPD,
+ "MOVNTPS", LTYPE3, AMOVNTPS,
+ "MOVNTQ", LTYPE3, AMOVNTQ,
+ "MOVQOZX", LTYPE3, AMOVQOZX,
+ "MOVSD", LTYPE3, AMOVSD,
+ "MOVSS", LTYPE3, AMOVSS,
+ "MOVUPD", LTYPE3, AMOVUPD,
+ "MOVUPS", LTYPE3, AMOVUPS,
+ "MULPD", LTYPE3, AMULPD,
+ "MULPS", LTYPE3, AMULPS,
+ "MULSD", LTYPE3, AMULSD,
+ "MULSS", LTYPE3, AMULSS,
+ "ORPD", LTYPE3, AORPD,
+ "ORPS", LTYPE3, AORPS,
+ "PACKSSLW", LTYPE3, APACKSSLW,
+ "PACKSSWB", LTYPE3, APACKSSWB,
+ "PACKUSWB", LTYPE3, APACKUSWB,
+ "PADDB", LTYPE3, APADDB,
+ "PADDL", LTYPE3, APADDL,
+ "PADDQ", LTYPE3, APADDQ,
+ "PADDSB", LTYPE3, APADDSB,
+ "PADDSW", LTYPE3, APADDSW,
+ "PADDUSB", LTYPE3, APADDUSB,
+ "PADDUSW", LTYPE3, APADDUSW,
+ "PADDW", LTYPE3, APADDW,
+ "PAND", LTYPE3, APAND,
+ "PANDB", LTYPE3, APANDB,
+ "PANDL", LTYPE3, APANDL,
+ "PANDSB", LTYPE3, APANDSB,
+ "PANDSW", LTYPE3, APANDSW,
+ "PANDUSB", LTYPE3, APANDUSB,
+ "PANDUSW", LTYPE3, APANDUSW,
+ "PANDW", LTYPE3, APANDW,
+ "PANDN", LTYPE3, APANDN,
+ "PAVGB", LTYPE3, APAVGB,
+ "PAVGW", LTYPE3, APAVGW,
+ "PCMPEQB", LTYPE3, APCMPEQB,
+ "PCMPEQL", LTYPE3, APCMPEQL,
+ "PCMPEQW", LTYPE3, APCMPEQW,
+ "PCMPGTB", LTYPE3, APCMPGTB,
+ "PCMPGTL", LTYPE3, APCMPGTL,
+ "PCMPGTW", LTYPE3, APCMPGTW,
+ "PEXTRW", LTYPEX, APEXTRW,
+ "PINSRW", LTYPEX, APINSRW,
+ "PMADDWL", LTYPE3, APMADDWL,
+ "PMAXSW", LTYPE3, APMAXSW,
+ "PMAXUB", LTYPE3, APMAXUB,
+ "PMINSW", LTYPE3, APMINSW,
+ "PMINUB", LTYPE3, APMINUB,
+ "PMOVMSKB", LTYPE3, APMOVMSKB,
+ "PMULHRW", LTYPE3, APMULHRW,
+ "PMULHUW", LTYPE3, APMULHUW,
+ "PMULHW", LTYPE3, APMULHW,
+ "PMULLW", LTYPE3, APMULLW,
+ "PMULULQ", LTYPE3, APMULULQ,
+ "POR", LTYPE3, APOR,
+ "PSADBW", LTYPE3, APSADBW,
+ "PSHUFHW", LTYPEX, APSHUFHW,
+ "PSHUFL", LTYPEX, APSHUFL,
+ "PSHUFLW", LTYPEX, APSHUFLW,
+ "PSHUFW", LTYPEX, APSHUFW,
+ "PSLLO", LTYPE3, APSLLO,
+ "PSLLDQ", LTYPE3, APSLLO, /* syn */
+ "PSLLL", LTYPE3, APSLLL,
+ "PSLLQ", LTYPE3, APSLLQ,
+ "PSLLW", LTYPE3, APSLLW,
+ "PSRAL", LTYPE3, APSRAL,
+ "PSRAW", LTYPE3, APSRAW,
+ "PSRLO", LTYPE3, APSRLO,
+ "PSRLDQ", LTYPE3, APSRLO, /* syn */
+ "PSRLL", LTYPE3, APSRLL,
+ "PSRLQ", LTYPE3, APSRLQ,
+ "PSRLW", LTYPE3, APSRLW,
+ "PSUBB", LTYPE3, APSUBB,
+ "PSUBL", LTYPE3, APSUBL,
+ "PSUBQ", LTYPE3, APSUBQ,
+ "PSUBSB", LTYPE3, APSUBSB,
+ "PSUBSW", LTYPE3, APSUBSW,
+ "PSUBUSB", LTYPE3, APSUBUSB,
+ "PSUBUSW", LTYPE3, APSUBUSW,
+ "PSUBW", LTYPE3, APSUBW,
+ "PUNPCKHBW", LTYPE3, APUNPCKHBW,
+ "PUNPCKHLQ", LTYPE3, APUNPCKHLQ,
+ "PUNPCKHQDQ", LTYPE3, APUNPCKHQDQ,
+ "PUNPCKHWL", LTYPE3, APUNPCKHWL,
+ "PUNPCKLBW", LTYPE3, APUNPCKLBW,
+ "PUNPCKLLQ", LTYPE3, APUNPCKLLQ,
+ "PUNPCKLQDQ", LTYPE3, APUNPCKLQDQ,
+ "PUNPCKLWL", LTYPE3, APUNPCKLWL,
+ "PXOR", LTYPE3, APXOR,
+ "RCPPS", LTYPE3, ARCPPS,
+ "RCPSS", LTYPE3, ARCPSS,
+ "RSQRTPS", LTYPE3, ARSQRTPS,
+ "RSQRTSS", LTYPE3, ARSQRTSS,
+ "SHUFPD", LTYPEX, ASHUFPD,
+ "SHUFPS", LTYPEX, ASHUFPS,
+ "SQRTPD", LTYPE3, ASQRTPD,
+ "SQRTPS", LTYPE3, ASQRTPS,
+ "SQRTSD", LTYPE3, ASQRTSD,
+ "SQRTSS", LTYPE3, ASQRTSS,
+ "STMXCSR", LTYPE1, ASTMXCSR,
+ "SUBPD", LTYPE3, ASUBPD,
+ "SUBPS", LTYPE3, ASUBPS,
+ "SUBSD", LTYPE3, ASUBSD,
+ "SUBSS", LTYPE3, ASUBSS,
+ "UCOMISD", LTYPE3, AUCOMISD,
+ "UCOMISS", LTYPE3, AUCOMISS,
+ "UNPCKHPD", LTYPE3, AUNPCKHPD,
+ "UNPCKHPS", LTYPE3, AUNPCKHPS,
+ "UNPCKLPD", LTYPE3, AUNPCKLPD,
+ "UNPCKLPS", LTYPE3, AUNPCKLPS,
+ "XORPD", LTYPE3, AXORPD,
+ "XORPS", LTYPE3, AXORPS,
0
};
--- a/sys/src/cmd/8c/8.out.h
+++ b/sys/src/cmd/8c/8.out.h
@@ -361,6 +361,7 @@
ACMPXCHGB,
ACMPXCHGL,
ACMPXCHGW,
+ ACPUID,
/* conditional move */
ACMOVLCC,
@@ -405,6 +406,185 @@
AFCMOVNU,
AFCMOVUN,
+ /* media */
+ AADDPD,
+ AADDPS,
+ AADDSD,
+ AADDSS,
+ AANDNPD,
+ AANDNPS,
+ AANDPD,
+ AANDPS,
+ ACMPPD,
+ ACMPPS,
+ ACMPSD,
+ ACMPSS,
+ ACOMISD,
+ ACOMISS,
+ ACVTPD2PL,
+ ACVTPD2PS,
+ ACVTPL2PD,
+ ACVTPL2PS,
+ ACVTPS2PD,
+ ACVTPS2PL,
+ ACVTSD2SL,
+ ACVTSD2SS,
+ ACVTSL2SD,
+ ACVTSL2SS,
+ ACVTSS2SD,
+ ACVTSS2SL,
+ ACVTTPD2PL,
+ ACVTTPS2PL,
+ ACVTTSD2SL,
+ ACVTTSS2SL,
+ ADIVPD,
+ ADIVPS,
+ ADIVSD,
+ ADIVSS,
+ AFXRSTOR,
+ AFXSAVE,
+ ALDMXCSR,
+ AMASKMOVOU,
+ AMASKMOVQ,
+ AMAXPD,
+ AMAXPS,
+ AMAXSD,
+ AMAXSS,
+ AMINPD,
+ AMINPS,
+ AMINSD,
+ AMINSS,
+ AMOVAPD,
+ AMOVAPS,
+ AMOVOU,
+ AMOVHLPS,
+ AMOVHPD,
+ AMOVHPS,
+ AMOVLHPS,
+ AMOVLPD,
+ AMOVLPS,
+ AMOVMSKPD,
+ AMOVMSKPS,
+ AMOVNTO,
+ AMOVNTPD,
+ AMOVNTPS,
+ AMOVNTQ,
+ AMOVO,
+ AMOVQOZX,
+ AMOVSD,
+ AMOVSS,
+ AMOVUPD,
+ AMOVUPS,
+ AMULPD,
+ AMULPS,
+ AMULSD,
+ AMULSS,
+ AORPD,
+ AORPS,
+ APACKSSLW,
+ APACKSSWB,
+ APACKUSWB,
+ APADDB,
+ APADDL,
+ APADDQ,
+ APADDSB,
+ APADDSW,
+ APADDUSB,
+ APADDUSW,
+ APADDW,
+ APANDB,
+ APANDL,
+ APANDSB,
+ APANDSW,
+ APANDUSB,
+ APANDUSW,
+ APANDW,
+ APAND,
+ APANDN,
+ APAVGB,
+ APAVGW,
+ APCMPEQB,
+ APCMPEQL,
+ APCMPEQW,
+ APCMPGTB,
+ APCMPGTL,
+ APCMPGTW,
+ APEXTRW,
+ APINSRW,
+ APMADDWL,
+ APMAXSW,
+ APMAXUB,
+ APMINSW,
+ APMINUB,
+ APMOVMSKB,
+ APMULHRW,
+ APMULHUW,
+ APMULHW,
+ APMULLW,
+ APMULULQ,
+ APOR,
+ APSADBW,
+ APSHUFHW,
+ APSHUFL,
+ APSHUFLW,
+ APSHUFW,
+ APSLLO,
+ APSLLL,
+ APSLLQ,
+ APSLLW,
+ APSRAL,
+ APSRAW,
+ APSRLO,
+ APSRLL,
+ APSRLQ,
+ APSRLW,
+ APSUBB,
+ APSUBL,
+ APSUBQ,
+ APSUBSB,
+ APSUBSW,
+ APSUBUSB,
+ APSUBUSW,
+ APSUBW,
+ APSWAPL,
+ APUNPCKHBW,
+ APUNPCKHLQ,
+ APUNPCKHQDQ,
+ APUNPCKHWL,
+ APUNPCKLBW,
+ APUNPCKLLQ,
+ APUNPCKLQDQ,
+ APUNPCKLWL,
+ APXOR,
+ ARCPPS,
+ ARCPSS,
+ ARSQRTPS,
+ ARSQRTSS,
+ ASHUFPD,
+ ASHUFPS,
+ ASQRTPD,
+ ASQRTPS,
+ ASQRTSD,
+ ASQRTSS,
+ ASTMXCSR,
+ ASUBPD,
+ ASUBPS,
+ ASUBSD,
+ ASUBSS,
+ AUCOMISD,
+ AUCOMISS,
+ AUNPCKHPD,
+ AUNPCKHPS,
+ AUNPCKLPD,
+ AUNPCKLPS,
+ AXORPD,
+ AXORPS,
+
+ APF2IW,
+ APF2IL,
+ API2FW,
+ API2FL,
+
/* add new operations here. nowhere else. here. */
ALAST
};
@@ -470,6 +650,10 @@
D_CONST2 = D_INDIR+D_INDIR,
D_SIZE, /* 8l internal */
+
+ D_M0,
+ D_X0 = D_M0 + 8,
+ D_XNONE = D_X0 + 8,
T_TYPE = 1<<0,
T_INDEX = 1<<1,
--- a/sys/src/cmd/8c/enam.c
+++ b/sys/src/cmd/8c/enam.c
@@ -340,6 +340,7 @@
"CMPXCHGB",
"CMPXCHGL",
"CMPXCHGW",
+ "CPUID",
"CMOVLCC",
"CMOVLCS",
"CMOVLEQ",
@@ -380,5 +381,181 @@
"FCMOVNE",
"FCMOVNU",
"FCMOVUN",
+ "ADDPD",
+ "ADDPS",
+ "ADDSD",
+ "ADDSS",
+ "ANDNPD",
+ "ANDNPS",
+ "ANDPD",
+ "ANDPS",
+ "CMPPD",
+ "CMPPS",
+ "CMPSD",
+ "CMPSS",
+ "COMISD",
+ "COMISS",
+ "CVTPD2PL",
+ "CVTPD2PS",
+ "CVTPL2PD",
+ "CVTPL2PS",
+ "CVTPS2PD",
+ "CVTPS2PL",
+ "CVTSD2SL",
+ "CVTSD2SS",
+ "CVTSL2SD",
+ "CVTSL2SS",
+ "CVTSS2SD",
+ "CVTSS2SL",
+ "CVTTPD2PL",
+ "CVTTPS2PL",
+ "CVTTSD2SL",
+ "CVTTSS2SL",
+ "DIVPD",
+ "DIVPS",
+ "DIVSD",
+ "DIVSS",
+ "FXRSTOR",
+ "FXSAVE",
+ "LDMXCSR",
+ "MASKMOVOU",
+ "MASKMOVQ",
+ "MAXPD",
+ "MAXPS",
+ "MAXSD",
+ "MAXSS",
+ "MINPD",
+ "MINPS",
+ "MINSD",
+ "MINSS",
+ "MOVAPD",
+ "MOVAPS",
+ "MOVOU",
+ "MOVHLPS",
+ "MOVHPD",
+ "MOVHPS",
+ "MOVLHPS",
+ "MOVLPD",
+ "MOVLPS",
+ "MOVMSKPD",
+ "MOVMSKPS",
+ "MOVNTO",
+ "MOVNTPD",
+ "MOVNTPS",
+ "MOVNTQ",
+ "MOVO",
+ "MOVQOZX",
+ "MOVSD",
+ "MOVSS",
+ "MOVUPD",
+ "MOVUPS",
+ "MULPD",
+ "MULPS",
+ "MULSD",
+ "MULSS",
+ "ORPD",
+ "ORPS",
+ "PACKSSLW",
+ "PACKSSWB",
+ "PACKUSWB",
+ "PADDB",
+ "PADDL",
+ "PADDQ",
+ "PADDSB",
+ "PADDSW",
+ "PADDUSB",
+ "PADDUSW",
+ "PADDW",
+ "PANDB",
+ "PANDL",
+ "PANDSB",
+ "PANDSW",
+ "PANDUSB",
+ "PANDUSW",
+ "PANDW",
+ "PAND",
+ "PANDN",
+ "PAVGB",
+ "PAVGW",
+ "PCMPEQB",
+ "PCMPEQL",
+ "PCMPEQW",
+ "PCMPGTB",
+ "PCMPGTL",
+ "PCMPGTW",
+ "PEXTRW",
+ "PINSRW",
+ "PMADDWL",
+ "PMAXSW",
+ "PMAXUB",
+ "PMINSW",
+ "PMINUB",
+ "PMOVMSKB",
+ "PMULHRW",
+ "PMULHUW",
+ "PMULHW",
+ "PMULLW",
+ "PMULULQ",
+ "POR",
+ "PSADBW",
+ "PSHUFHW",
+ "PSHUFL",
+ "PSHUFLW",
+ "PSHUFW",
+ "PSLLO",
+ "PSLLL",
+ "PSLLQ",
+ "PSLLW",
+ "PSRAL",
+ "PSRAW",
+ "PSRLO",
+ "PSRLL",
+ "PSRLQ",
+ "PSRLW",
+ "PSUBB",
+ "PSUBL",
+ "PSUBQ",
+ "PSUBSB",
+ "PSUBSW",
+ "PSUBUSB",
+ "PSUBUSW",
+ "PSUBW",
+ "PSWAPL",
+ "PUNPCKHBW",
+ "PUNPCKHLQ",
+ "PUNPCKHQDQ",
+ "PUNPCKHWL",
+ "PUNPCKLBW",
+ "PUNPCKLLQ",
+ "PUNPCKLQDQ",
+ "PUNPCKLWL",
+ "PXOR",
+ "RCPPS",
+ "RCPSS",
+ "RSQRTPS",
+ "RSQRTSS",
+ "SHUFPD",
+ "SHUFPS",
+ "SQRTPD",
+ "SQRTPS",
+ "SQRTSD",
+ "SQRTSS",
+ "STMXCSR",
+ "SUBPD",
+ "SUBPS",
+ "SUBSD",
+ "SUBSS",
+ "UCOMISD",
+ "UCOMISS",
+ "UNPCKHPD",
+ "UNPCKHPS",
+ "UNPCKLPD",
+ "UNPCKLPS",
+ "XORPD",
+ "XORPS",
+ "PF2IW",
+ "PF2IL",
+ "PI2FW",
+ "PI2FL",
"LAST",
};
--- a/sys/src/cmd/8l/l.h
+++ b/sys/src/cmd/8l/l.h
@@ -90,7 +90,7 @@
short as;
uchar* ytab;
uchar prefix;
- uchar op[10];
+ uchar op[20];
};
enum
@@ -142,6 +142,8 @@
Ycr0, Ycr1, Ycr2, Ycr3, Ycr4, Ycr5, Ycr6, Ycr7,
Ydr0, Ydr1, Ydr2, Ydr3, Ydr4, Ydr5, Ydr6, Ydr7,
Ytr0, Ytr1, Ytr2, Ytr3, Ytr4, Ytr5, Ytr6, Ytr7,
+ Ymr, Ymm,
+ Yxr, Yxm,
Ymax,
Zxxx = 0,
@@ -153,6 +155,7 @@
Zib_,
Zib_rp,
Zibo_m,
+ Zibo_m_xm,
Zil_,
Zil_rp,
Zilo_m,
@@ -160,10 +163,16 @@
Zloop,
Zm_o,
Zm_r,
+ Zm_r_xm,
+ Zm_r_i_xm,
+ Zm_r_3d,
+ Zibm_r, /* mmx1,mmx2/mem64,imm8 */
Zaut_r,
Zo_m,
Zpseudo,
Zr_m,
+ Zr_m_xm,
+ Zr_m_i_xm,
Zrp_,
Z_ib,
Z_il,
@@ -181,6 +190,8 @@
Pm = 0x0f, /* 2byte opcode escape */
Pq = 0xff, /* both escape */
Pb = 0xfe, /* byte operands */
+ Pf2 = 0xf2, /* xmm escape 1 */
+ Pf3 = 0xf3, /* xmm escape 2 */
Roffset = 22, /* no. bits for offset in relocation address */
Rindex = 10, /* no. bits for index in relocation address */
@@ -250,7 +261,7 @@
EXTERN char ycover[Ymax*Ymax];
EXTERN uchar* andptr;
EXTERN uchar and[30];
-EXTERN char reg[D_NONE];
+EXTERN char reg[D_XNONE];
EXTERN Prog* lastp;
EXTERN long lcsize;
EXTERN int nerrors;
@@ -279,6 +290,7 @@
#define UP (&undefp)
extern Optab optab[];
+extern Optab* opindex[];
extern char* anames[];
int Aconv(Fmt*);
--- a/sys/src/cmd/8l/list.c
+++ b/sys/src/cmd/8l/list.c
@@ -61,7 +61,7 @@
a = va_arg(fp->args, Adr*);
i = a->type;
- if(i >= D_INDIR) {+ if(i >= D_INDIR && i < D_M0) {if(a->offset)
snprint(str, sizeof(str), "%ld(%R)", a->offset, i-D_INDIR);
else
@@ -208,6 +208,24 @@
"TR7",
"NONE", /* [D_NONE] */
+
+[D_M0] "M0", /* [D_M0] */
+ "M1",
+ "M2",
+ "M3",
+ "M4",
+ "M5",
+ "M6",
+ "M7",
+
+[D_X0] "X0", /* [D_X0] */
+ "X1",
+ "X2",
+ "X3",
+ "X4",
+ "X5",
+ "X6",
+ "X7",
};
int
@@ -217,7 +235,7 @@
int r;
r = va_arg(fp->args, int);
- if(r >= D_AL && r <= D_NONE)
+ if((r >= D_AL && r <= D_NONE) || (r >= D_M0 && r <= D_X0+7))
snprint(str, sizeof(str), "%s", regstr[r-D_AL]);
else
snprint(str, sizeof(str), "gok(%d)", r);
--- a/sys/src/cmd/8l/obj.c
+++ b/sys/src/cmd/8l/obj.c
@@ -195,11 +195,14 @@
Bprint(&bso, "HEADER = -H0x%ld -T0x%lux -D0x%lux -R0x%lux\n",
HEADTYPE, INITTEXT, INITDAT, INITRND);
Bflush(&bso);
- for(i=1; optab[i].as; i++)
- if(i != optab[i].as) {- diag("phase error in optab: %d", i);+ for(i=1; optab[i].as; i++) {+ c = optab[i].as;
+ if(opindex[c] != nil) {+ diag("phase error in optab: %d (%A)", i, c);errorexit();
}
+ opindex[c] = &optab[i];
+ }
for(i=0; i<Ymax; i++)
ycover[i*Ymax + i] = 1;
@@ -240,7 +243,13 @@
ycover[Yrl*Ymax + Yml] = 1;
ycover[Ym*Ymax + Yml] = 1;
- for(i=0; i<D_NONE; i++) {+ ycover[Ym*Ymax + Ymm] = 1;
+ ycover[Ymr*Ymax + Ymm] = 1;
+
+ ycover[Ym*Ymax + Yxm] = 1;
+ ycover[Yxr*Ymax + Yxm] = 1;
+
+ for(i=0; i<D_XNONE; i++) {reg[i] = -1;
if(i >= D_AL && i <= D_BH)
reg[i] = (i-D_AL) & 7;
@@ -248,6 +257,10 @@
reg[i] = (i-D_AX) & 7;
if(i >= D_F0 && i <= D_F0+7)
reg[i] = (i-D_F0) & 7;
+ if(i >= D_M0 && i <= D_M0+7)
+ reg[i] = (i-D_M0) & 7;
+ if(i >= D_X0 && i <= D_X0+7)
+ reg[i] = (i-D_X0) & 7;
}
zprg.link = P;
@@ -988,6 +1001,13 @@
case AFDIVRF:
case AFCOMF:
case AFCOMFP:
+ case AMOVSS:
+ case AADDSS:
+ case ASUBSS:
+ case AMULSS:
+ case ADIVSS:
+ case ACOMISS:
+ case AUCOMISS:
if(skip)
goto casdef;
if(p->from.type == D_FCONST) {@@ -1026,6 +1046,13 @@
case AFDIVRD:
case AFCOMD:
case AFCOMDP:
+ case AMOVSD:
+ case AADDSD:
+ case ASUBSD:
+ case AMULSD:
+ case ADIVSD:
+ case ACOMISD:
+ case AUCOMISD:
if(skip)
goto casdef;
if(p->from.type == D_FCONST) {--- a/sys/src/cmd/8l/optab.c
+++ b/sys/src/cmd/8l/optab.c
@@ -15,8 +15,10 @@
Ynone, Ynone, Zpseudo,1,
Ynone, Yml, Zpseudo,1,
Ynone, Yrf, Zpseudo,1,
+ Ynone, Yxr, Zpseudo,1,
Yml, Ynone, Zpseudo,1,
Yrf, Ynone, Zpseudo,1,
+ Yxr, Ynone, Zpseudo,1,
0
};
uchar yxorb[] =
@@ -120,6 +122,10 @@
// Yi0, Yml, Zibo_m, 2, // shorter but slower AND $0,dst
Yi32, Yrl, Zil_rp, 1,
Yi32, Yml, Zilo_m, 2,
+ Yml, Ymr, Zm_r_xm, 1, // MMX MOVD
+ Ymr, Yml, Zr_m_xm, 1, // MMX MOVD
+ Yml, Yxr, Zm_r_xm, 2, // XMM MOVD (32 bit)
+ Yxr, Yml, Zr_m_xm, 2, // XMM MOVD (32 bit)
Yiauto, Yrl, Zaut_r, 2,
0
};
@@ -306,6 +312,134 @@
Ym, Ynone, Zm_o, 2,
0
};
+uchar ymm[] =
+{+ Ymm, Ymr, Zm_r_xm, 1,
+ Yxm, Yxr, Zm_r_xm, 2,
+ 0
+};
+uchar yxm[] =
+{+ Yxm, Yxr, Zm_r_xm, 1,
+ 0
+};
+uchar yxcvm1[] =
+{+ Yxm, Yxr, Zm_r_xm, 2,
+ 0
+};
+uchar yxcvm2[] =
+{+ Yxm, Yxr, Zm_r_xm, 2,
+ 0
+};
+uchar yxmq[] =
+{+ Yxm, Yxr, Zm_r_xm, 2,
+ 0
+};
+uchar yxr[] =
+{+ Yxr, Yxr, Zm_r_xm, 1,
+ 0
+};
+uchar yxr_ml[] =
+{+ Yxr, Yml, Zr_m_xm, 1,
+ 0
+};
+uchar ymr[] =
+{+ Ymr, Ymr, Zm_r, 1,
+ 0
+};
+uchar ymr_ml[] =
+{+ Ymr, Yml, Zr_m_xm, 1,
+ 0
+};
+uchar yxcmp[] =
+{+ Yxm, Yxr, Zm_r_xm, 1,
+ 0
+};
+uchar yxcmpi[] =
+{+ Yxm, Yxr, Zm_r_i_xm, 2,
+ 0
+};
+uchar yxmov[] =
+{+ Yxm, Yxr, Zm_r_xm, 1,
+ Yxr, Yxm, Zr_m_xm, 1,
+ 0
+};
+uchar yxcvfl[] =
+{+ Yxm, Yrl, Zm_r_xm, 1,
+ 0
+};
+uchar yxcvlf[] =
+{+ Yml, Yxr, Zm_r_xm, 1,
+ 0
+};
+uchar yps[] =
+{+ Ymm, Ymr, Zm_r_xm, 1,
+ Yi8, Ymr, Zibo_m_xm, 2,
+ Yxm, Yxr, Zm_r_xm, 2,
+ Yi8, Yxr, Zibo_m_xm, 3,
+ 0
+};
+uchar yxrrl[] =
+{+ Yxr, Yrl, Zm_r, 1,
+ 0
+};
+uchar ymfp[] =
+{+ Ymm, Ymr, Zm_r_3d, 1,
+ 0,
+};
+uchar ymrxr[] =
+{+ Ymr, Yxr, Zm_r, 1,
+ Yxm, Yxr, Zm_r_xm, 1,
+ 0
+};
+uchar ymshuf[] =
+{+ Ymm, Ymr, Zibm_r, 1,
+ 0
+};
+uchar yxshuf[] =
+{+ Yxm, Yxr, Zibm_r, 1,
+ 0
+};
+uchar yextrw[] =
+{+ Yxr, Yrl, Zibm_r, 1,
+ 0
+};
+uchar ypsdq[] =
+{+ Yi8, Yxr, Zibo_m, 2,
+ 0
+};
+uchar ymskb[] =
+{+ Yxr, Yrl, Zm_r_xm, 2,
+ Ymr, Yrl, Zm_r_xm, 1,
+ 0
+};
+uchar yxaes[] =
+{+ Yxm, Yxr, Zm_r_xm, 2,
+ Yxm, Yxr, Zm_r_i_xm, 2,
+ 0
+};
Optab optab[] =
/* as, ytab, andproto, opcode */
@@ -320,10 +454,18 @@
{ AADCW, yxorl, Pe, 0x83,(02),0x15,0x81,(02),0x11,0x13 }, { AADDB, yxorb, Px, 0x04,0x80,(00),0x00,0x02 }, { AADDL, yaddl, Px, 0x83,(00),0x05,0x81,(00),0x01,0x03 },+ { AADDPD, yxm, Pq, 0x58 },+ { AADDPS, yxm, Pm, 0x58 },+ { AADDSD, yxm, Pf2, 0x58 },+ { AADDSS, yxm, Pf3, 0x58 }, { AADDW, yaddl, Pe, 0x83,(00),0x05,0x81,(00),0x01,0x03 }, { AADJSP }, { AANDB, yxorb, Pb, 0x24,0x80,(04),0x20,0x22 }, { AANDL, yxorl, Px, 0x83,(04),0x25,0x81,(04),0x21,0x23 },+ { AANDNPD, yxm, Pq, 0x55 },+ { AANDNPS, yxm, Pm, 0x55 },+ { AANDPD, yxm, Pq, 0x54 },+ { AANDPS, yxm, Pq, 0x54 }, { AANDW, yxorl, Pe, 0x83,(04),0x25,0x81,(04),0x21,0x23 }, { AARPL, yrl_ml, Px, 0x63 }, { ABOUNDL, yrl_m, Px, 0x62 },@@ -349,9 +491,32 @@
{ ACMC, ynone, Px, 0xf5 }, { ACMPB, ycmpb, Pb, 0x3c,0x80,(07),0x38,0x3a }, { ACMPL, ycmpl, Px, 0x83,(07),0x3d,0x81,(07),0x39,0x3b },+ { ACMPPD, yxcmpi, Px, Pe,0xc2 },+ { ACMPPS, yxcmpi, Pm, 0xc2,0 }, { ACMPW, ycmpl, Pe, 0x83,(07),0x3d,0x81,(07),0x39,0x3b },+ { ACOMISD, yxcmp, Pe, 0x2f },+ { ACOMISS, yxcmp, Pm, 0x2f },+ { ACPUID, ynone, Pm, 0xa2 },+ { ACVTPL2PD, yxcvm2, Px, Pf3,0xe6 },+ { ACVTPL2PS, yxcvm2, Pm, 0x5b },+ { ACVTPD2PL, yxcvm1, Px, Pf2,0xe6 },+ { ACVTPD2PS, yxm, Pe, 0x5a },+ { ACVTPS2PL, yxcvm1, Px, Pe,0x5b },+ { ACVTPS2PD, yxm, Pm, 0x5a },+ { ACVTSD2SL, yxcvfl, Pf2, 0x2d },+ { ACVTSD2SS, yxm, Pf2, 0x5a },+ { ACVTSL2SD, yxcvlf, Pf2, 0x2a },+ { ACVTSL2SS, yxcvlf, Pf3, 0x2a },+ { ACVTSS2SD, yxm, Pf3, 0x5a },+ { ACVTSS2SL, yxcvfl, Pf3, 0x2d },+ { ACVTTPD2PL, yxcvm1, Px, Pe,0xe6 },+ { ACVTTPS2PL, yxcvm1, Px, Pf3,0x5b },+ { ACVTTSD2SL, yxcvfl, Pf2, 0x2c },+ { ACVTTSS2SL, yxcvfl, Pf3, 0x2c }, { ACMPSB, ynone, Pb, 0xa6 },+ { ACMPSD, yxcmpi, Px, Pf2,0xc2 }, { ACMPSL, ynone, Px, 0xa7 },+ { ACMPSS, yxcmpi, Px, Pf3,0xc2 }, { ACMPSW, ynone, Pe, 0xa7 }, { ADAA, ynone, Px, 0x27 }, { ADAS, ynone, Px, 0x2f },@@ -361,8 +526,14 @@
{ ADECW, yincl, Pe, 0x48,0xff,(01) }, { ADIVB, ydivb, Pb, 0xf6,(06) }, { ADIVL, ydivl, Px, 0xf7,(06) },+ { ADIVPD, yxm, Pe, 0x5e },+ { ADIVPS, yxm, Pm, 0x5e },+ { ADIVSD, yxm, Pf2, 0x5e },+ { ADIVSS, yxm, Pf3, 0x5e }, { ADIVW, ydivl, Pe, 0xf7,(06) }, { AENTER }, /* botch */+ { AFXRSTOR, ysvrs, Pm, 0xae,(01),0xae,(01) },+ { AFXSAVE, ysvrs, Pm, 0xae,(00),0xae,(00) }, { AGLOBL }, { AGOK }, { AHISTORY },@@ -407,6 +578,7 @@
{ ALAHF, ynone, Px, 0x9f }, { ALARL, yml_rl, Pm, 0x02 }, { ALARW, yml_rl, Pq, 0x02 },+ { ALDMXCSR, ysvrs, Pm, 0xae,(02),0xae,(02) }, { ALEAL, ym_rl, Px, 0x8d }, { ALEAW, ym_rl, Pe, 0x8d }, { ALEAVEL, ynone, Px, 0xc9 },@@ -421,8 +593,20 @@
{ ALOOPNE, yloop, Px, 0xe0 }, { ALSLL, yml_rl, Pm, 0x03 }, { ALSLW, yml_rl, Pq, 0x03 },+ { AMASKMOVOU, yxr, Pe, 0xf7 },+ { AMASKMOVQ, ymr, Pm, 0xf7 },+ { AMAXPD, yxm, Pe, 0x5f },+ { AMAXPS, yxm, Pm, 0x5f },+ { AMAXSD, yxm, Pf2, 0x5f },+ { AMAXSS, yxm, Pf3, 0x5f },+ { AMINPD, yxm, Pe, 0x5d },+ { AMINPS, yxm, Pm, 0x5d },+ { AMINSD, yxm, Pf2, 0x5d },+ { AMINSS, yxm, Pf3, 0x5d },+ { AMOVAPD, yxmov, Pe, 0x28,0x29 },+ { AMOVAPS, yxmov, Pm, 0x28,0x29 }, { AMOVB, ymovb, Pb, 0x88,0x8a,0xb0,0xc6,(00) },- { AMOVL, ymovl, Px, 0x89,0x8b,0x31,0x83,(04),0xb8,0xc7,(00) },+ { AMOVL, ymovl, Px, 0x89,0x8b,0x31,0x83,(04),0xb8,0xc7,(00),0x6e,0x7e,Pe,0x6e,Pe,0x7e }, { AMOVW, ymovl, Pe, 0x89,0x8b,0x31,0x83,(04),0xb8,0xc7,(00) }, { AMOVBLSX, ymb_rl, Pm, 0xbe }, { AMOVBLZX, ymb_rl, Pm, 0xb6 },@@ -430,11 +614,34 @@
{ AMOVBWZX, ymb_rl, Pq, 0xb6 }, { AMOVWLSX, yml_rl, Pm, 0xbf }, { AMOVWLZX, yml_rl, Pm, 0xb7 },+ { AMOVO, yxmov, Pe, 0x6f,0x7f },+ { AMOVOU, yxmov, Pf3, 0x6f,0x7f },+ { AMOVHLPS, yxr, Pm, 0x12 },+ { AMOVHPD, yxmov, Pe, 0x16,0x17 },+ { AMOVHPS, yxmov, Pm, 0x16,0x17 },+ { AMOVLHPS, yxr, Pm, 0x16 },+ { AMOVLPD, yxmov, Pe, 0x12,0x13 },+ { AMOVLPS, yxmov, Pm, 0x12,0x13 },+ { AMOVMSKPD, yxrrl, Pq, 0x50 },+ { AMOVMSKPS, yxrrl, Pm, 0x50 },+ { AMOVNTO, yxr_ml, Pe, 0xe7 },+ { AMOVNTPD, yxr_ml, Pe, 0x2b },+ { AMOVNTPS, yxr_ml, Pm, 0x2b },+ { AMOVNTQ, ymr_ml, Pm, 0xe7 },+ { AMOVQOZX, ymrxr, Pf3, 0xd6,0x7e }, { AMOVSB, ynone, Pb, 0xa4 },+ { AMOVSD, yxmov, Pf2, 0x10,0x11 }, { AMOVSL, ynone, Px, 0xa5 },+ { AMOVSS, yxmov, Pf3, 0x10,0x11 }, { AMOVSW, ynone, Pe, 0xa5 },+ { AMOVUPD, yxmov, Pe, 0x10,0x11 },+ { AMOVUPS, yxmov, Pm, 0x10,0x11 }, { AMULB, ydivb, Pb, 0xf6,(04) }, { AMULL, ydivl, Px, 0xf7,(04) },+ { AMULPD, yxm, Pe, 0x59 },+ { AMULPS, yxm, Ym, 0x59 },+ { AMULSD, yxm, Pf2, 0x59 },+ { AMULSS, yxm, Pf3, 0x59 }, { AMULW, ydivl, Pe, 0xf7,(04) }, { ANAME }, { ANEGB, yscond, Px, 0xf6,(03) },@@ -446,6 +653,8 @@
{ ANOTW, yscond, Pe, 0xf7,(02) }, { AORB, yxorb, Pb, 0x0c,0x80,(01),0x08,0x0a }, { AORL, yxorl, Px, 0x83,(01),0x0d,0x81,(01),0x09,0x0b },+ { AORPD, yxm, Pq, 0x56 },+ { AORPS, yxm, Pm, 0x56 }, { AORW, yxorl, Pe, 0x83,(01),0x0d,0x81,(01),0x09,0x0b }, { AOUTB, yin, Pb, 0xe6,0xee }, { AOUTL, yin, Px, 0xe7,0xef },@@ -453,6 +662,44 @@
{ AOUTSB, ynone, Pb, 0x6e }, { AOUTSL, ynone, Px, 0x6f }, { AOUTSW, ynone, Pe, 0x6f },+ { APACKSSLW, ymm, Px, 0x6b,Pe,0x6b },+ { APACKSSWB, ymm, Px, 0x63,Pe,0x63 },+ { APACKUSWB, ymm, Px, 0x67,Pe,0x67 },+ { APADDB, ymm, Px, 0xfc,Pe,0xfc },+ { APADDL, ymm, Px, 0xfe,Pe,0xfe },+ { APADDQ, yxm, Pe, 0xd4 },+ { APADDSB, ymm, Px, 0xec,Pe,0xec },+ { APADDSW, ymm, Px, 0xed,Pe,0xed },+ { APADDUSB, ymm, Px, 0xdc,Pe,0xdc },+ { APADDUSW, ymm, Px, 0xdd,Pe,0xdd },+ { APADDW, ymm, Px, 0xfd,Pe,0xfd },+ { APAND, ymm, Px, 0xdb,Pe,0xdb },+ { APANDN, ymm, Px, 0xdf,Pe,0xdf },+ { APAVGB, ymm, Px, 0xe0,Pe,0xe0 },+ { APAVGW, ymm, Px, 0xe3,Pe,0xe3 },+ { APCMPEQB, ymm, Px, 0x74,Pe,0x74 },+ { APCMPEQL, ymm, Px, 0x76,Pe,0x76 },+ { APCMPEQW, ymm, Px, 0x75,Pe,0x75 },+ { APCMPGTB, ymm, Px, 0x64,Pe,0x64 },+ { APCMPGTL, ymm, Px, 0x66,Pe,0x66 },+ { APCMPGTW, ymm, Px, 0x65,Pe,0x65 },+ { APEXTRW, yextrw, Pq, 0xc5 },+ { APF2IL, ymfp, Px, 0x1d },+ { APF2IW, ymfp, Px, 0x1c },+ { API2FL, ymfp, Px, 0x0d },+ { API2FW, ymfp, Px, 0x0c },+ { APINSRW, yextrw, Pq, 0xc4 },+ { APMADDWL, ymm, Px, 0xf5,Pe,0xf5 },+ { APMAXSW, yxm, Pe, 0xee },+ { APMAXUB, yxm, Pe, 0xde },+ { APMINSW, yxm, Pe, 0xea },+ { APMINUB, yxm, Pe, 0xda },+ { APMOVMSKB, ymskb, Px, Pe,0xd7,0xd7 },+ { APMULHRW, ymfp, Px, 0xb7 },+ { APMULHUW, ymm, Px, 0xe4,Pe,0xe4 },+ { APMULHW, ymm, Px, 0xe5,Pe,0xe5 },+ { APMULLW, ymm, Px, 0xd5,Pe,0xd5 },+ { APMULULQ, ymm, Px, 0xf4,Pe,0xf4 }, { APOPAL, ynone, Px, 0x61 }, { APOPAW, ynone, Pe, 0x61 }, { APOPFL, ynone, Px, 0x9d },@@ -459,6 +706,38 @@
{ APOPFW, ynone, Pe, 0x9d }, { APOPL, ypopl, Px, 0x58,0x8f,(00) }, { APOPW, ypopl, Pe, 0x58,0x8f,(00) },+ { APOR, ymm, Px, 0xeb,Pe,0xeb },+ { APSADBW, yxm, Pq, 0xf6 },+ { APSHUFHW, yxshuf, Pf3, 0x70 },+ { APSHUFL, yxshuf, Pq, 0x70 },+ { APSHUFLW, yxshuf, Pf2, 0x70 },+ { APSHUFW, ymshuf, Pm, 0x70 },+ { APSLLO, ypsdq, Pq, 0x73,(07) },+ { APSLLL, yps, Px, 0xf2, 0x72,(06), Pe,0xf2, Pe,0x72,(06) },+ { APSLLQ, yps, Px, 0xf3, 0x73,(06), Pe,0xf3, Pe,0x7e,(06) },+ { APSLLW, yps, Px, 0xf1, 0x71,(06), Pe,0xf1, Pe,0x71,(06) },+ { APSRAL, yps, Px, 0xe2, 0x72,(04), Pe,0xe2, Pe,0x72,(04) },+ { APSRAW, yps, Px, 0xe1, 0x71,(04), Pe,0xe1, Pe,0x71,(04) },+ { APSRLO, ypsdq, Pq, 0x73,(03) },+ { APSRLL, yps, Px, 0xd2, 0x72,(02), Pe,0xd2, Pe,0x72,(02) },+ { APSRLQ, yps, Px, 0xd3, 0x73,(02), Pe,0xd3, Pe,0x73,(02) },+ { APSRLW, yps, Px, 0xd1, 0x71,(02), Pe,0xe1, Pe,0x71,(02) },+ { APSUBB, yxm, Pe, 0xf8 },+ { APSUBL, yxm, Pe, 0xfa },+ { APSUBQ, yxm, Pe, 0xfb },+ { APSUBSB, yxm, Pe, 0xe8 },+ { APSUBSW, yxm, Pe, 0xe9 },+ { APSUBUSB, yxm, Pe, 0xd8 },+ { APSUBUSW, yxm, Pe, 0xd9 },+ { APSUBW, yxm, Pe, 0xf9 },+ { APUNPCKHBW, ymm, Px, 0x68,Pe,0x68 },+ { APUNPCKHLQ, ymm, Px, 0x6a,Pe,0x6a },+ { APUNPCKHQDQ, yxm, Pe, 0x6d },+ { APUNPCKHWL, ymm, Px, 0x69,Pe,0x69 },+ { APUNPCKLBW, ymm, Px, 0x60,Pe,0x60 },+ { APUNPCKLLQ, ymm, Px, 0x62,Pe,0x62 },+ { APUNPCKLQDQ, yxm, Pe, 0x6c },+ { APUNPCKLWL, ymm, Px, 0x61,Pe,0x61 }, { APUSHAL, ynone, Px, 0x60 }, { APUSHAW, ynone, Pe, 0x60 }, { APUSHFL, ynone, Px, 0x9c },@@ -465,9 +744,12 @@
{ APUSHFW, ynone, Pe, 0x9c }, { APUSHL, ypushl, Px, 0x50,0xff,(06),0x6a,0x68 }, { APUSHW, ypushl, Pe, 0x50,0xff,(06),0x6a,0x68 },+ { APXOR, ymm, Px, 0xef,Pe,0xef }, { ARCLB, yshb, Pb, 0xd0,(02),0xc0,(02),0xd2,(02) }, { ARCLL, yshl, Px, 0xd1,(02),0xc1,(02),0xd3,(02),0xd3,(02) }, { ARCLW, yshl, Pe, 0xd1,(02),0xc1,(02),0xd3,(02),0xd3,(02) },+ { ARCPPS, yxm, Pm, 0x53 },+ { ARCPSS, yxm, Pf3, 0x53 }, { ARCRB, yshb, Pb, 0xd0,(03),0xc0,(03),0xd2,(03) }, { ARCRL, yshl, Px, 0xd1,(03),0xc1,(03),0xd3,(03),0xd3,(03) }, { ARCRW, yshl, Pe, 0xd1,(03),0xc1,(03),0xd3,(03),0xd3,(03) },@@ -480,6 +762,8 @@
{ ARORB, yshb, Pb, 0xd0,(01),0xc0,(01),0xd2,(01) }, { ARORL, yshl, Px, 0xd1,(01),0xc1,(01),0xd3,(01),0xd3,(01) }, { ARORW, yshl, Pe, 0xd1,(01),0xc1,(01),0xd3,(01),0xd3,(01) },+ { ARSQRTPS, yxm, Pm, 0x52 },+ { ARSQRTSS, yxm, Pf3, 0x52 }, { ASAHF, ynone, Px, 0x9e }, { ASALB, yshb, Pb, 0xd0,(04),0xc0,(04),0xd2,(04) }, { ASALL, yshl, Px, 0xd1,(04),0xc1,(04),0xd3,(04),0xd3,(04) },@@ -517,14 +801,25 @@
{ ASHRB, yshb, Pb, 0xd0,(05),0xc0,(05),0xd2,(05) }, { ASHRL, yshl, Px, 0xd1,(05),0xc1,(05),0xd3,(05),0xd3,(05) }, { ASHRW, yshl, Pe, 0xd1,(05),0xc1,(05),0xd3,(05),0xd3,(05) },+ { ASHUFPD, yxshuf, Pq, 0xc6 },+ { ASHUFPS, yxshuf, Pm, 0xc6 },+ { ASQRTPD, yxm, Pe, 0x51 },+ { ASQRTPS, yxm, Pm, 0x51 },+ { ASQRTSD, yxm, Pf2, 0x51 },+ { ASQRTSS, yxm, Pf3, 0x51 }, { ASTC, ynone, Px, 0xf9 }, { ASTD, ynone, Px, 0xfd }, { ASTI, ynone, Px, 0xfb },+ { ASTMXCSR, ysvrs, Pm, 0xae,(03),0xae,(03) }, { ASTOSB, ynone, Pb, 0xaa }, { ASTOSL, ynone, Px, 0xab }, { ASTOSW, ynone, Pe, 0xab }, { ASUBB, yxorb, Pb, 0x2c,0x80,(05),0x28,0x2a }, { ASUBL, yaddl, Px, 0x83,(05),0x2d,0x81,(05),0x29,0x2b },+ { ASUBPD, yxm, Pe, 0x5c },+ { ASUBPS, yxm, Pm, 0x5c },+ { ASUBSD, yxm, Pf2, 0x5c },+ { ASUBSS, yxm, Pf3, 0x5c }, { ASUBW, yaddl, Pe, 0x83,(05),0x2d,0x81,(05),0x29,0x2b }, { ASYSCALL, ynone, Px, 0xcd,100 }, { ATESTB, ytestb, Pb, 0xa8,0xf6,(00),0x84,0x84 },@@ -531,6 +826,12 @@
{ ATESTL, ytestl, Px, 0xa9,0xf7,(00),0x85,0x85 }, { ATESTW, ytestl, Pe, 0xa9,0xf7,(00),0x85,0x85 }, { ATEXT, ytext, Px },+ { AUCOMISD, yxcmp, Pe, 0x2e },+ { AUCOMISS, yxcmp, Pm, 0x2e },+ { AUNPCKHPD, yxm, Pe, 0x15 },+ { AUNPCKHPS, yxm, Pm, 0x15 },+ { AUNPCKLPD, yxm, Pe, 0x14 },+ { AUNPCKLPS, yxm, Pm, 0x14 }, { AVERR, ydivl, Pm, 0x00,(04) }, { AVERW, ydivl, Pm, 0x00,(05) }, { AWAIT, ynone, Px, 0x9b },@@ -541,6 +842,8 @@
{ AXLAT, ynone, Px, 0xd7 }, { AXORB, yxorb, Pb, 0x34,0x80,(06),0x30,0x32 }, { AXORL, yxorl, Px, 0x83,(06),0x35,0x81,(06),0x31,0x33 },+ { AXORPD, yxm, Pe, 0x57 },+ { AXORPS, yxm, Pm, 0x57 }, { AXORW, yxorl, Pe, 0x83,(06),0x35,0x81,(06),0x31,0x33 }, { AFMOVB, yfmvx, Px, 0xdf,(04) },@@ -649,6 +952,9 @@
{ AFXTRACT, ynone, Px, 0xd9, 0xf4 }, { AFYL2X, ynone, Px, 0xd9, 0xf1 }, { AFYL2XP1, ynone, Px, 0xd9, 0xf9 },+
{ AEND },0
};
+
+Optab* opindex[ALAST+1];
--- a/sys/src/cmd/8l/span.c
+++ b/sys/src/cmd/8l/span.c
@@ -326,7 +326,7 @@
{long v;
- if(a->type >= D_INDIR || a->index != D_NONE) {+ if((a->type >= D_INDIR && a->type < D_M0) || a->index != D_NONE) { if(a->index != D_NONE && a->scale == 0) { if(a->type == D_ADDR) { switch(a->index) {@@ -387,6 +387,26 @@
case D_F0+7:
return Yrf;
+ case D_M0+0:
+ case D_M0+1:
+ case D_M0+2:
+ case D_M0+3:
+ case D_M0+4:
+ case D_M0+5:
+ case D_M0+6:
+ case D_M0+7:
+ return Ymr;
+
+ case D_X0+0:
+ case D_X0+1:
+ case D_X0+2:
+ case D_X0+3:
+ case D_X0+4:
+ case D_X0+5:
+ case D_X0+6:
+ case D_X0+7:
+ return Yxr;
+
case D_NONE:
return Ynone;
@@ -576,7 +596,7 @@
v = a->offset;
t = a->type;
if(a->index != D_NONE) {- if(t >= D_INDIR) {+ if(t >= D_INDIR && t < D_M0) {t -= D_INDIR;
if(t == D_NONE) {*andptr++ = (0 << 6) | (4 << 0) | (r << 3);
@@ -624,7 +644,13 @@
*andptr++ = (3 << 6) | (reg[t] << 0) | (r << 3);
return;
}
- if(t >= D_INDIR) {+ if(t >= D_M0 && t <= D_X0+7) {+ if(v)
+ goto bad;
+ *andptr++ = (3 << 6) | (reg[t] << 0) | (r << 3);
+ return;
+ }
+ if(t >= D_INDIR && t < D_M0) {t -= D_INDIR;
if(t == D_NONE || D_CS <= t && t <= D_GS) {*andptr++ = (0 << 6) | (5 << 0) | (r << 3);
@@ -835,6 +861,30 @@
print("%P\n", p);}
+static int
+mediaop(Optab *o, int op, int osize, int z)
+{+ switch(op){+ case Pm:
+ case Pe:
+ case Pf2:
+ case Pf3:
+ if(osize != 1){+ if(op != Pm)
+ *andptr++ = op;
+ *andptr++ = Pm;
+ op = o->op[++z];
+ break;
+ }
+ default:
+ if(andptr == and || andptr[-1] != Pm)
+ *andptr++ = Pm;
+ break;
+ }
+ *andptr++ = op;
+ return z;
+}
+
void
doasm(Prog *p)
{@@ -851,7 +901,7 @@
if(pre)
*andptr++ = pre;
- o = &optab[p->as];
+ o = opindex[p->as];
ft = oclass(&p->from) * Ymax;
tt = oclass(&p->to) * Ymax;
t = o->ytab;
@@ -872,6 +922,12 @@
*andptr++ = Pm;
break;
+ case Pf2: /* xmm opcode escape */
+ case Pf3:
+ *andptr++ = o->prefix;
+ *andptr++ = Pm;
+ break;
+
case Pm: /* opcode escape */
*andptr++ = Pm;
break;
@@ -903,6 +959,30 @@
asmand(&p->from, reg[p->to.type]);
break;
+ case Zm_r_xm:
+ mediaop(o, op, t[3], z);
+ asmand(&p->from, reg[p->to.type]);
+ break;
+
+ case Zm_r_i_xm:
+ mediaop(o, op, t[3], z);
+ asmand(&p->from, reg[p->to.type]);
+ *andptr++ = p->to.offset;
+ break;
+
+ case Zm_r_3d:
+ *andptr++ = 0x0f;
+ *andptr++ = 0x0f;
+ asmand(&p->from, reg[p->to.type]);
+ *andptr++ = op;
+ break;
+
+ case Zibm_r:
+ *andptr++ = op;
+ asmand(&p->from, reg[p->to.type]);
+ *andptr++ = p->to.offset;
+ break;
+
case Zaut_r:
*andptr++ = 0x8d; /* leal */
if(p->from.type != D_ADDR)
@@ -924,6 +1004,17 @@
asmand(&p->to, reg[p->from.type]);
break;
+ case Zr_m_xm:
+ mediaop(o, op, t[3], z);
+ asmand(&p->to, reg[p->from.type]);
+ break;
+
+ case Zr_m_i_xm:
+ mediaop(o, op, t[3], z);
+ asmand(&p->to, reg[p->from.type]);
+ *andptr++ = p->from.offset;
+ break;
+
case Zo_m:
*andptr++ = op;
asmand(&p->to, o->op[z+1]);
@@ -941,6 +1032,12 @@
asmand(&p->to, o->op[z+1]);
*andptr++ = v;
break;
+
+ case Zibo_m_xm:
+ z = mediaop(o, op, t[3], z);
+ asmand(&p->to, o->op[z+1]);
+ *andptr++ = v;
+ break;
case Z_ib:
v = vaddr(&p->to);
--
⑨