code: 9ferno

ref: cbf559e618ec9c384302bf6004ad6101e860109a
dir: /libsec/9front-amd64/aesni.s/

View raw version
#define AESOP(o,r1,r2) \
	BYTE	$0x66; \
	BYTE	$0x0F; \
	BYTE	$0x38; \
	BYTE	$(o); \
	BYTE	$(0xC0 | r2<<3 | r1)

#define AESIMC(r1,r2)		AESOP(0xDB,r1,r2)
#define	AESENC(r1,r2)		AESOP(0xDC,r1,r2)
#define	AESENCLAST(r1,r2)	AESOP(0xDD,r1,r2)
#define	AESDEC(r1,r2)		AESOP(0xDE,r1,r2)
#define	AESDECLAST(r1,r2)	AESOP(0xDF,r1,r2)

#define	AESKEYGENASSIST(i,r1,r2) \
	BYTE	$0x66; \
	BYTE	$0x0F; \
	BYTE	$0x3A; \
	BYTE	$0xDF; \
	BYTE	$(0xC0 | r2<<3 | r1); \
	BYTE	$(i)

TEXT aesni_init(SB), 0, $0
	MOVL	$1, AX
	CPUID
	XORL	AX, AX
	ANDL	$(1<<25), CX
	JZ	_ret

	/* override aes function pointers */
	MOVQ	$AESencrypt<>(SB), AX
	MOVQ	AX, aes_encrypt(SB)
	MOVQ	$AESdecrypt<>(SB), AX
	MOVQ	AX, aes_decrypt(SB)

	/* return setup function pointer */
	MOVQ	$AESsetup<>(SB), AX
_ret:
	RET

TEXT AESencrypt<>(SB), 0, $0
	MOVL	Nr+8(FP), CX
	MOVQ	pt+16(FP), SI
	MOVQ	ct+24(FP), DI
	MOVO	(RARG), X0
	MOVOU	(SI), X7
	ADDQ	$16, RARG
	PXOR	X7, X0
	CMPL	CX, $12
	JLT	erounds10
	JEQ	erounds12
erounds14:
	MOVO	0(RARG), X1
	MOVO	16(RARG), X2
	ADDQ	$32, RARG
	AESENC(1, 0)
	AESENC(2, 0)
erounds12:
	MOVO	0(RARG), X3
	MOVO	16(RARG), X4
	ADDQ	$32, RARG
	AESENC(3, 0)
	AESENC(4, 0)
erounds10:
	MOVO	0(RARG), X1
	MOVO	16(RARG), X2
	MOVO	32(RARG), X3
	MOVO	48(RARG), X4
	MOVO	64(RARG), X5
	MOVO	80(RARG), X6
	MOVO	96(RARG), X7
	AESENC(1, 0)
	MOVO	112(RARG), X1
	AESENC(2, 0)
	MOVO	128(RARG), X2
	AESENC(3, 0)
	MOVO	144(RARG), X3
	AESENC(4, 0)
	AESENC(5, 0)
	AESENC(6, 0)
	AESENC(7, 0)

	AESENC(1, 0)
	AESENC(2, 0)
	AESENCLAST(3, 0)
	MOVOU	X0, (DI)
	RET

TEXT AESdecrypt<>(SB), 0, $0
	MOVL	Nr+8(FP), CX
	MOVQ	ct+16(FP), SI
	MOVQ	pt+24(FP), DI
	MOVO	(RARG), X0
	MOVOU	(SI), X7
	ADDQ	$16, RARG
	PXOR	X7, X0
	CMPL	CX, $12
	JLT	drounds10
	JEQ	drounds12
drounds14:
	MOVO	0(RARG), X1
	MOVO	16(RARG), X2
	ADDQ	$32, RARG
	AESDEC(1, 0)
	AESDEC(2, 0)
drounds12:
	MOVO	0(RARG), X3
	MOVO	16(RARG), X4
	ADDQ	$32, RARG
	AESDEC(3, 0)
	AESDEC(4, 0)
drounds10:
	MOVO	0(RARG), X1
	MOVO	16(RARG), X2
	MOVO	32(RARG), X3
	MOVO	48(RARG), X4
	MOVO	64(RARG), X5
	MOVO	80(RARG), X6
	MOVO	96(RARG), X7
	AESDEC(1, 0)
	MOVO	112(RARG), X1
	AESDEC(2, 0)
	MOVO	128(RARG), X2
	AESDEC(3, 0)
	MOVO	144(RARG), X3
	AESDEC(4, 0)
	AESDEC(5, 0)
	AESDEC(6, 0)
	AESDEC(7, 0)

	AESDEC(1, 0)
	AESDEC(2, 0)
	AESDECLAST(3, 0)
	MOVOU	X0, (DI)
	RET

TEXT AESsetup<>(SB), 0, $16
	MOVQ	RARG, erk+0(FP)
	MOVQ	key+16(FP), DX
	MOVL	nkey+24(FP), BX
	MOVQ	DX, 8(SP)
	CMPL	BX, $32
	JEQ	esetup256
	CMPL	BX, $24
	JEQ	esetup192
	CMPL	BX, $16
	JEQ	esetup128
	XORL	AX, AX
	RET	
esetup256:
	CALL	setupEnc256<>(SB)
	JMP	dsetup
esetup192:
	CALL	setupEnc192<>(SB)
	JMP	dsetup
esetup128:
	CALL	setupEnc128<>(SB)
dsetup:
	MOVQ	erk+0(FP), SI
	MOVQ	drk+8(FP), DI

	MOVL	AX, BX
	SHLL	$4, BX
	ADDQ	BX, SI

	MOVO	(SI), X0
	MOVO	X0, (DI)

	MOVO	-16(SI), X1
	MOVO	-32(SI), X2
	MOVO	-48(SI), X3
	MOVO	-64(SI), X4
	AESIMC(1, 1)
	AESIMC(2, 2)
	AESIMC(3, 3)
	AESIMC(4, 4)
	MOVO	X1, 16(DI)
	MOVO	X2, 32(DI)
	MOVO	X3, 48(DI)
	MOVO	X4, 64(DI)

	MOVO	-80(SI), X1
	MOVO	-96(SI), X2
	MOVO	-112(SI), X3
	MOVO	-128(SI), X4
	AESIMC(1, 1)
	AESIMC(2, 2)
	AESIMC(3, 3)
	AESIMC(4, 4)
	MOVO	X1, 80(DI)
	MOVO	X2, 96(DI)
	MOVO	X3, 112(DI)
	MOVO	X4, 128(DI)

	MOVO 	-144(SI), X1
	AESIMC(1, 1)
	MOVO	X1, 144(DI)

	CMPL	AX, $10
	JEQ	dsetupend
	
	MOVO	-160(SI), X1
	MOVO	-176(SI), X2
	AESIMC(1, 1)
	AESIMC(2, 2)
	MOVO	X1, 160(DI)
	MOVO	X2, 176(DI)

	CMPL	AX, $12
	JEQ	dsetupend

	MOVO	-192(SI), X1
	MOVO	-208(SI), X2
	AESIMC(1, 1)
	AESIMC(2, 2)
	MOVO	X1, 192(DI)
	MOVO	X2, 208(DI)
dsetupend:
	SUBQ	BX, SI
	ADDQ	BX, DI
	MOVO	(SI), X0
	MOVO	X0, (DI)
	RET

TEXT setupEnc128<>(SB), 0, $0
	MOVQ	key+8(FP), SI
	MOVOU	(SI), X1
	MOVO	X1, (RARG)
	AESKEYGENASSIST(0x01, 1, 0)
	CALL	rk128<>(SB)
	MOVO	X1, 16(RARG)
	AESKEYGENASSIST(0x02, 1, 0)
	CALL	rk128<>(SB)
	MOVO	X1, 32(RARG)
	AESKEYGENASSIST(0x04, 1, 0)
	CALL	rk128<>(SB)
	MOVO	X1, 48(RARG)
	AESKEYGENASSIST(0x08, 1, 0)
	CALL	rk128<>(SB)
	MOVO	X1, 64(RARG)
	AESKEYGENASSIST(0x10, 1, 0)
	CALL	rk128<>(SB)
	MOVO	X1, 80(RARG)
	AESKEYGENASSIST(0x20, 1, 0)
	CALL	rk128<>(SB)
	MOVO	X1, 96(RARG)
	AESKEYGENASSIST(0x40, 1, 0)
	CALL	rk128<>(SB)
	MOVO	X1, 112(RARG)
	AESKEYGENASSIST(0x80, 1, 0)
	CALL	rk128<>(SB)
	MOVO	X1, 128(RARG)
	AESKEYGENASSIST(0x1b, 1, 0)
	CALL	rk128<>(SB)
	MOVO	X1, 144(RARG)
	AESKEYGENASSIST(0x36, 1, 0)
	CALL	rk128<>(SB)
	MOVO	X1, 160(RARG)
	MOVL	$10, AX
	RET
TEXT rk128<>(SB), 0, $0
	PSHUFL	$0xff, X0, X0
	MOVO	X1, X2
	PSLLO	$4, X2
	PXOR	X2, X1
	PSLLO	$4, X2
	PXOR	X2, X1
	PSLLO	$4, X2
	PXOR	X2, X1
	PXOR	X0, X1
	RET
	
TEXT setupEnc192<>(SB), 0, $0
	MOVQ	key+8(FP), SI
	MOVOU	(SI), X1
	MOVOU	16(SI), X2
	MOVO	X1, (RARG)
	MOVO	X2, X5
	AESKEYGENASSIST(0x01, 2, 0)
	CALL	rk192<>(SB)
	SHUFPD	$0, X1, X5
	MOVO	X5, 16(RARG)
	MOVO	X1, X6
	SHUFPD	$1, X2, X6
	MOVO	X6, 32(RARG)
	AESKEYGENASSIST(0x02, 2, 0)
	CALL	rk192<>(SB)
	MOVO	X1, 48(RARG)
	MOVO	X2, X5
	AESKEYGENASSIST(0x04, 2, 0)
	CALL	rk192<>(SB)
	SHUFPD	$0, X1, X5
	MOVO	X5, 64(RARG)
	MOVO	X1, X6
	SHUFPD	$1, X2, X6
	MOVO	X6, 80(RARG)
	AESKEYGENASSIST(0x08, 2, 0)
	CALL	rk192<>(SB)
	MOVO	X1, 96(RARG)
	MOVO	X2, X5
	AESKEYGENASSIST(0x10, 2, 0)
	CALL	rk192<>(SB)
	SHUFPD	$0, X1, X5
	MOVO	X5, 112(RARG)
	MOVO	X1, X6
	SHUFPD	$1, X2, X6
	MOVO	X6, 128(RARG)
	AESKEYGENASSIST(0x20, 2, 0)
	CALL	rk192<>(SB)
	MOVO	X1, 144(RARG)
	MOVO	X2, X5
	AESKEYGENASSIST(0x40, 2, 0)
	CALL	rk192<>(SB)
	SHUFPD	$0, X1, X5
	MOVO	X5, 160(RARG)
	MOVO	X1, X6
	SHUFPD	$1, X2, X6
	MOVO	X6, 176(RARG)
	AESKEYGENASSIST(0x80, 2, 0)
	CALL	rk192<>(SB)
	MOVO	X1, 192(RARG)
	MOVL	$12, AX
	RET
TEXT rk192<>(SB), 0, $0
	PSHUFL	$0x55, X0, X0
	MOVOU	X1, X4
	PSLLO	$4, X4
	PXOR	X4, X1
	PSLLO	$4, X4
	PXOR	X4, X1
	PSLLO	$4, X4
	PXOR	X4, X1
	PXOR	X0, X1
	PSHUFL	$0xff, X1, X0
	MOVOU	X2, X4
	PSLLO	$4, X4
	PXOR	X4, X2
	PXOR	X0, X2
	RET
	
TEXT setupEnc256<>(SB), 0, $0
	MOVQ	key+8(FP), SI
	MOVOU	(SI), X1
	MOVOU	16(SI), X2
	MOVO	X1, (RARG)
	MOVO	X2, 16(RARG)
	AESKEYGENASSIST(0x01, 2, 0)
	CALL	rk256_a<>(SB)
	MOVO	X1, 32(RARG)
	AESKEYGENASSIST(0x00, 1, 0)
	CALL	rk256_b<>(SB)
	MOVO	X2, 48(RARG)
	AESKEYGENASSIST(0x02, 2, 0)
	CALL	rk256_a<>(SB)
	MOVO	X1, 64(RARG)
	AESKEYGENASSIST(0x00, 1, 0)
	CALL	rk256_b<>(SB)
	MOVO	X2, 80(RARG)
	AESKEYGENASSIST(0x04, 2, 0)
	CALL	rk256_a<>(SB)
	MOVO	X1, 96(RARG)
	AESKEYGENASSIST(0x00, 1, 0)
	CALL	rk256_b<>(SB)
	MOVO	X2, 112(RARG)
	AESKEYGENASSIST(0x08, 2, 0)
	CALL	rk256_a<>(SB)
	MOVO	X1, 128(RARG)
	AESKEYGENASSIST(0x00, 1, 0)
	CALL	rk256_b<>(SB)
	MOVO	X2, 144(RARG)
	AESKEYGENASSIST(0x10, 2, 0)
	CALL	rk256_a<>(SB)
	MOVO	X1, 160(RARG)
	AESKEYGENASSIST(0x00, 1, 0)
	CALL	rk256_b<>(SB)
	MOVO	X2, 176(RARG)
	AESKEYGENASSIST(0x20, 2, 0)
	CALL	rk256_a<>(SB)
	MOVO	X1, 192(RARG)
	AESKEYGENASSIST(0x00, 1, 0)
	CALL	rk256_b<>(SB)
	MOVO	X2, 208(RARG)
	AESKEYGENASSIST(0x40, 2, 0)
	CALL	rk256_a<>(SB)
	MOVO	X1, 224(RARG)
	MOVL	$14, AX
	RET
TEXT rk256_a<>(SB), 0, $0
	PSHUFL	$0xff, X0, X0
	MOVO	X1, X4
	PSLLO	$4, X4
	PXOR	X4, X1
	PSLLO	$4, X4
	PXOR	X4, X1
	PSLLO	$4, X4
	PXOR	X4, X1
	PXOR	X0, X1
	RET
TEXT rk256_b<>(SB), 0, $0
	PSHUFL	$0xaa, X0, X0
	MOVO	X2, X4
	PSLLO	$4, X4
	PXOR	X4, X2
	PSLLO	$4, X4
	PXOR	X4, X2
	PSLLO	$4, X4
	PXOR	X4, X2
	PXOR	X0, X2
	RET