code: 9ferno

Download patch

ref: 65e8434fe2b4b44e44cda2870c444e86a9407752
parent: 0424afdea0587f8ea18e68d53339432a96eb1974
author: 9ferno <gophone2015@gmail.com>
date: Wed Oct 20 13:16:56 EDT 2021

imported 9front libmp and libsec

--- a/include/libsec.h
+++ b/include/libsec.h
@@ -1,12 +1,14 @@
-#pragma	src	"/usr/inferno/libsec"
+#pragma	lib	"libsec.a"
+#pragma	src	"/libsec"
 
+
 #ifndef _MPINT
 typedef struct mpint mpint;
 #endif
 
-/*/////////////////////////////////////////////////////// */
-/* AES definitions */
-/*/////////////////////////////////////////////////////// */
+/*
+ * AES definitions
+ */
 
 enum
 {
@@ -19,26 +21,57 @@
 struct AESstate
 {
 	u32	setup;
+	u32	offset;
 	int	rounds;
 	int	keybytes;
-	uchar	key[AESmaxkey];		/* unexpanded key */
-	u32	ekey[4*(AESmaxrounds + 1)];	/* encryption key */
-	u32	dkey[4*(AESmaxrounds + 1)];	/* decryption key */
-	uchar	ivec[AESbsize];	/* initialization vector */
+	void	*ekey;				/* expanded encryption round key */
+	void	*dkey;				/* expanded decryption round key */
+	uchar	key[AESmaxkey];			/* unexpanded key */
+	uchar	ivec[AESbsize];			/* initialization vector */
+	uchar	storage[512];			/* storage for expanded keys */
 };
 
-void	setupAESstate(AESstate *s, uchar key[], int keybytes, uchar *ivec);
+/* block ciphers */
+extern void (*aes_encrypt)(u32 rk[], int Nr, uchar pt[16], uchar ct[16]);
+extern void (*aes_decrypt)(u32 rk[], int Nr, uchar ct[16], uchar pt[16]);
+
+void	setupAESstate(AESstate *s, uchar key[], int nkey, uchar *ivec);
+
 void	aesCBCencrypt(uchar *p, int len, AESstate *s);
 void	aesCBCdecrypt(uchar *p, int len, AESstate *s);
+void	aesCFBencrypt(uchar *p, int len, AESstate *s);
+void	aesCFBdecrypt(uchar *p, int len, AESstate *s);
+void	aesOFBencrypt(uchar *p, int len, AESstate *s);
 
-/*/////////////////////////////////////////////////////// */
-/* Blowfish Definitions */
-/*/////////////////////////////////////////////////////// */
+void	aes_xts_encrypt(AESstate *tweak, AESstate *ecb, uvlong sectorNumber, uchar *input, uchar *output, u32 len);
+void	aes_xts_decrypt(AESstate *tweak, AESstate *ecb, uvlong sectorNumber, uchar *input, uchar *output, u32 len);
 
+#ifdef H
+#undef H;
+#endif
+
+typedef struct AESGCMstate AESGCMstate;
+struct AESGCMstate
+{
+	AESstate;
+
+	u32	H[4];
+	u32	M[16][256][4];
+};
+
+void	setupAESGCMstate(AESGCMstate *s, uchar *key, int keylen, uchar *iv, int ivlen);
+void	aesgcm_setiv(AESGCMstate *s, uchar *iv, int ivlen);
+void	aesgcm_encrypt(uchar *dat, u32 ndat, uchar *aad, u32 naad, uchar tag[16], AESGCMstate *s);
+int	aesgcm_decrypt(uchar *dat, u32 ndat, uchar *aad, u32 naad, uchar tag[16], AESGCMstate *s);
+
+/*
+ * Blowfish Definitions
+ */
+
 enum
 {
 	BFbsize	= 8,
-	BFrounds	= 16
+	BFrounds= 16
 };
 
 /* 16-round Blowfish */
@@ -50,8 +83,8 @@
 	uchar	key[56];
 	uchar	ivec[8];
 
-	u32 	pbox[BFrounds+2];
-	u32	sbox[1024];
+	u32int 	pbox[BFrounds+2];
+	u32int	sbox[1024];
 };
 
 void	setupBFstate(BFstate *s, uchar key[], int keybytes, uchar *ivec);
@@ -60,12 +93,82 @@
 void	bfECBencrypt(uchar*, int, BFstate*);
 void	bfECBdecrypt(uchar*, int, BFstate*);
 
-/*/////////////////////////////////////////////////////// */
-/* DES definitions */
-/*/////////////////////////////////////////////////////// */
+/*
+ * Chacha definitions
+ */
 
 enum
 {
+	ChachaBsize=	64,
+	ChachaKeylen=	256/8,
+	ChachaIVlen=	96/8,
+	XChachaIVlen=	192/8,
+};
+
+typedef struct Chachastate Chachastate;
+struct Chachastate
+{
+	union{
+		u32int	input[16];
+		struct {
+			u32int	constant[4];
+			u32int	key[8];
+			u32int	counter;
+			u32int	iv[3];
+		};
+	};
+	u32int	xkey[8];
+	int	rounds;
+	int	ivwords;
+};
+
+void	setupChachastate(Chachastate*, uchar*, u32, uchar*, u32, int);
+void	chacha_setiv(Chachastate *, uchar*);
+void	chacha_setblock(Chachastate*, u64int);
+void	chacha_encrypt(uchar*, u32, Chachastate*);
+void	chacha_encrypt2(uchar*, uchar*, u32, Chachastate*);
+
+void	hchacha(uchar h[32], uchar *key, u32 keylen, uchar nonce[16], int rounds);
+
+void	ccpoly_encrypt(uchar *dat, u32 ndat, uchar *aad, u32 naad, uchar tag[16], Chachastate *cs);
+int	ccpoly_decrypt(uchar *dat, u32 ndat, uchar *aad, u32 naad, uchar tag[16], Chachastate *cs);
+
+/*
+ * Salsa definitions
+ */
+enum
+{
+	SalsaBsize=	64,
+	SalsaKeylen=	256/8,
+	SalsaIVlen=	64/8,
+	XSalsaIVlen=	192/8,
+};
+
+typedef struct Salsastate Salsastate;
+struct Salsastate
+{
+	u32int	input[16];
+	u32int	xkey[8];
+	int	rounds;
+	int	ivwords;
+};
+
+void	setupSalsastate(Salsastate*, uchar*, u32, uchar*, u32, int);
+void	salsa_setiv(Salsastate*, uchar*);
+void	salsa_setblock(Salsastate*, u64int);
+void	salsa_encrypt(uchar*, u32, Salsastate*);
+void	salsa_encrypt2(uchar*, uchar*, u32, Salsastate*);
+
+void	salsa_core(u32int in[16], u32int out[16], int rounds);
+
+void	hsalsa(uchar h[32], uchar *key, u32 keylen, uchar nonce[16], int rounds);
+
+/*
+ * DES definitions
+ */
+
+enum
+{
 	DESbsize=	8
 };
 
@@ -87,7 +190,7 @@
 void	desECBencrypt(uchar*, int, DESstate*);
 void	desECBdecrypt(uchar*, int, DESstate*);
 
-/* for backward compatibility with 7 byte DES key format */
+/* for backward compatibility with 7-byte DES key format */
 void	des56to64(uchar *k56, uchar *k64);
 void	des64to56(uchar *k64, uchar *k56);
 void	key_setup(uchar[7], u32[32]);
@@ -118,106 +221,109 @@
 void	des3ECBencrypt(uchar*, int, DES3state*);
 void	des3ECBdecrypt(uchar*, int, DES3state*);
 
+/* TODO obsolete stuff -- should get rid of the below */
 /* IDEA */
 typedef struct IDEAstate IDEAstate;
 struct IDEAstate
 {
-	uchar	key[16];
-	ushort	edkey[104];
-	uchar	ivec[8];
+	uchar   key[16];
+	ushort  edkey[104];
+	uchar   ivec[8];
 };
 
-void	setupIDEAstate(IDEAstate*, uchar*, uchar*);
-void	idea_key_setup(uchar*, ushort*);
-void	idea_cipher(ushort*, uchar*, int);
+void   setupIDEAstate(IDEAstate*, uchar*, uchar*);
+void   idea_key_setup(uchar*, ushort*);
+void   idea_cipher(ushort*, uchar*, int);
+/* TODO obsolete stuff -- should get rid of the above */
 
+/*
+ * digests
+ */
 
-/*/////////////////////////////////////////////////////// */
-/* digests */
-/*/////////////////////////////////////////////////////// */
-
 enum
 {
-	/* digest lengths */
-	SHA1dlen=	20,
-	MD4dlen=	16,
-	MD5dlen=	16,
+	SHA1dlen=	20,	/* SHA digest length */
+	SHA2_224dlen=	28,	/* SHA-224 digest length */
+	SHA2_256dlen=	32,	/* SHA-256 digest length */
+	SHA2_384dlen=	48,	/* SHA-384 digest length */
+	SHA2_512dlen=	64,	/* SHA-512 digest length */
+	MD4dlen=	16,	/* MD4 digest length */
+	MD5dlen=	16,	/* MD5 digest length */
+	RIPEMD160dlen=	20,	/* RIPEMD-160 digest length */
+	Poly1305dlen=	16,	/* Poly1305 digest length */
 
-	SHA224dlen=	28,
-	SHA256dlen=	32,
-
-	SHA384dlen=	48,
-	SHA512dlen=	64,
-
-	/* block sizes */
-	SHA256bsize=	64,
-	SHA512bsize=	128,
-	Digestbsize=	128,		/* maximum */
+	Hmacblksz	= 64,	/* in bytes; from rfc2104 */
 };
 
 typedef struct DigestState DigestState;
 struct DigestState
 {
-	u64 len;
-	u32 state[5];
-	uchar buf[Digestbsize];
-	int blen;
-	u64 nb128[2];
-	u64 h64[8];
-	u32 h32[8];
-	char malloced;
-	char seeded;
+	uvlong	len;
+	union {
+		u32int	state[16];
+		u64int	bstate[8];
+	};
+	uchar	buf[256];
+	int	blen;
+	char	malloced;
+	char	seeded;
 };
 typedef struct DigestState SHAstate;	/* obsolete name */
 typedef struct DigestState SHA1state;
+typedef struct DigestState SHA2_224state;
+typedef struct DigestState SHA2_256state;
+typedef struct DigestState SHA2_384state;
+typedef struct DigestState SHA2_512state;
 typedef struct DigestState MD5state;
 typedef struct DigestState MD4state;
-typedef struct DigestState SHA256state;
-typedef struct DigestState SHA512state;
 
-DigestState* md4(uchar*, u32, uchar*, DigestState*);
-DigestState* md5(uchar*, u32, uchar*, DigestState*);
-DigestState* sha1(uchar*, u32, uchar*, DigestState*);
-DigestState* sha224(uchar*, u32, uchar*, DigestState*);
-DigestState* sha256(uchar*, u32, uchar*, DigestState*);
-DigestState* sha384(uchar*, u32, uchar*, DigestState*);
-DigestState* sha512(uchar*, u32, uchar*, DigestState*);
-DigestState* hmac_md5(uchar*, u32, uchar*, u32, uchar*, DigestState*);
-DigestState* hmac_sha1(uchar*, u32, uchar*, u32, uchar*, DigestState*);
-char* md5pickle(MD5state*);
-MD5state* md5unpickle(char*);
-char* sha1pickle(SHA1state*);
-SHA1state* sha1unpickle(char*);
+DigestState*	md4(uchar*, u32, uchar*, DigestState*);
+DigestState*	md5(uchar*, u32, uchar*, DigestState*);
+DigestState*	ripemd160(uchar *, u32, uchar *, DigestState *);
+DigestState*	sha1(uchar*, u32, uchar*, DigestState*);
+DigestState*	sha2_224(uchar*, u32, uchar*, DigestState*);
+DigestState*	sha2_256(uchar*, u32, uchar*, DigestState*);
+DigestState*	sha2_384(uchar*, u32, uchar*, DigestState*);
+DigestState*	sha2_512(uchar*, u32, uchar*, DigestState*);
+DigestState*	hmac_x(uchar *p, u32 len, uchar *key, u32 klen,
+			uchar *digest, DigestState *s,
+			DigestState*(*x)(uchar*, u32, uchar*, DigestState*),
+			int xlen);
+DigestState*	hmac_md5(uchar*, u32, uchar*, u32, uchar*, DigestState*);
+DigestState*	hmac_sha1(uchar*, u32, uchar*, u32, uchar*, DigestState*);
+DigestState*	hmac_sha2_224(uchar*, u32, uchar*, u32, uchar*, DigestState*);
+DigestState*	hmac_sha2_256(uchar*, u32, uchar*, u32, uchar*, DigestState*);
+DigestState*	hmac_sha2_384(uchar*, u32, uchar*, u32, uchar*, DigestState*);
+DigestState*	hmac_sha2_512(uchar*, u32, uchar*, u32, uchar*, DigestState*);
+DigestState*	poly1305(uchar*, u32, uchar*, u32, uchar*, DigestState*);
 
-/*/////////////////////////////////////////////////////// */
-/* random number generation */
-/*/////////////////////////////////////////////////////// */
+/*
+ * random number generation
+ */
 void	genrandom(uchar *buf, int nbytes);
-void	_genrandomqlock(void);
-void	_genrandomqunlock(void);
 void	prng(uchar *buf, int nbytes);
 u32	fastrand(void);
 u32	nfastrand(u32);
 
-/*/////////////////////////////////////////////////////// */
-/* primes */
-/*/////////////////////////////////////////////////////// */
-void	genprime(mpint *p, int n, int accuracy); /* generate an n bit probable prime */
-void	gensafeprime(mpint *p, mpint *alpha, int n, int accuracy);	/* prime and generator */
-void	genstrongprime(mpint *p, int n, int accuracy);	/* generate an n bit strong prime */
+/*
+ * primes
+ */
+void	genprime(mpint *p, int n, int accuracy); /* generate n-bit probable prime */
+void	gensafeprime(mpint *p, mpint *alpha, int n, int accuracy); /* prime & generator */
+void	genstrongprime(mpint *p, int n, int accuracy); /* generate n-bit strong prime */
 void	DSAprimes(mpint *q, mpint *p, uchar seed[SHA1dlen]);
 int	probably_prime(mpint *n, int nrep);	/* miller-rabin test */
-int	smallprimetest(mpint *p);		/* returns -1 if not prime, 0 otherwise */
+int	smallprimetest(mpint *p);  /* returns -1 if not prime, 0 otherwise */
 
-/*/////////////////////////////////////////////////////// */
-/* rc4 */
-/*/////////////////////////////////////////////////////// */
+/*
+ * rc4
+ */
 typedef struct RC4state RC4state;
 struct RC4state
 {
-	 uchar state[256];
-	 uchar x;
-	 uchar y;
+	 uchar	state[256];
+	 uchar	x;
+	 uchar	y;
 };
 
 void	setupRC4state(RC4state*, uchar*, int);
@@ -225,9 +331,9 @@
 void	rc4skip(RC4state*, int);
 void	rc4back(RC4state*, int);
 
-/*/////////////////////////////////////////////////////// */
-/* rsa */
-/*/////////////////////////////////////////////////////// */
+/*
+ * rsa
+ */
 typedef struct RSApub RSApub;
 typedef struct RSApriv RSApriv;
 typedef struct PEMChain PEMChain;
@@ -255,9 +361,9 @@
 };
 
 struct PEMChain{
-	PEMChain *next;
-	uchar *pem;
-	int pemlen;
+	PEMChain*next;
+	uchar	*pem;
+	int	pemlen;
 };
 
 RSApriv*	rsagen(int nlen, int elen, int rounds);
@@ -270,18 +376,31 @@
 void		rsaprivfree(RSApriv*);
 RSApub*		rsaprivtopub(RSApriv*);
 RSApub*		X509toRSApub(uchar*, int, char*, int);
+RSApub*		X509reqtoRSApub(uchar*, int, char*, int);
+RSApub*		asn1toRSApub(uchar*, int);
 RSApriv*	asn1toRSApriv(uchar*, int);
 void		asn1dump(uchar *der, int len);
 uchar*		decodePEM(char *s, char *type, int *len, char **new_s);
 PEMChain*	decodepemchain(char *s, char *type);
-uchar*		X509gen(RSApriv *priv, char *subj, u32 valid[2], int *certlen);
-uchar*		X509req(RSApriv *priv, char *subj, int *certlen);
-char*		X509verify(uchar *cert, int ncert, RSApub *pk);
+uchar*		X509rsagen(RSApriv *priv, char *subj, u32 valid[2], int *certlen);
+uchar*		X509rsareq(RSApriv *priv, char *subj, int *certlen);
+char*		X509rsaverify(uchar *cert, int ncert, RSApub *pk);
+char*		X509rsaverifydigest(uchar *sig, int siglen, uchar *edigest, int edigestlen, RSApub *pk);
+
 void		X509dump(uchar *cert, int ncert);
 
-/*/////////////////////////////////////////////////////// */
-/* elgamal */
-/*/////////////////////////////////////////////////////// */
+mpint*		pkcs1padbuf(uchar *buf, int len, mpint *modulus, int blocktype);
+int		pkcs1unpadbuf(uchar *buf, int len, mpint *modulus, int blocktype);
+int		asn1encodeRSApub(RSApub *pk, uchar *buf, int len);
+int		asn1encodeRSApriv(RSApriv *k, uchar *buf, int len);
+int		asn1encodedigest(DigestState* (*fun)(uchar*, u32, uchar*, DigestState*),
+			uchar *digest, uchar *buf, int len);
+
+int		X509digestSPKI(uchar *, int, DigestState* (*)(uchar*, u32, uchar*, DigestState*), uchar *);
+
+/*
+ * elgamal
+ */
 typedef struct EGpub EGpub;
 typedef struct EGpriv EGpriv;
 typedef struct EGsig EGsig;
@@ -298,7 +417,7 @@
 struct EGpriv
 {
 	EGpub	pub;
-	mpint	*secret; /* (decryption key) */
+	mpint	*secret;	/* (decryption key) */
 };
 
 /* signature */
@@ -320,9 +439,9 @@
 void		egsigfree(EGsig*);
 EGpub*		egprivtopub(EGpriv*);
 
-/*/////////////////////////////////////////////////////// */
-/* dsa */
-/*/////////////////////////////////////////////////////// */
+/*
+ * dsa
+ */
 typedef struct DSApub DSApub;
 typedef struct DSApriv DSApriv;
 typedef struct DSAsig DSAsig;
@@ -340,7 +459,7 @@
 struct DSApriv
 {
 	DSApub	pub;
-	mpint	*secret; /* (decryption key) */
+	mpint	*secret;	/* (decryption key) */
 };
 
 /* signature */
@@ -360,21 +479,31 @@
 void		dsasigfree(DSAsig*);
 DSApub*		dsaprivtopub(DSApriv*);
 
-/*/////////////////////////////////////////////////////// */
-/* TLS */
-/*/////////////////////////////////////////////////////// */
+/*
+ * TLS
+ */
 typedef struct Thumbprint{
 	struct Thumbprint *next;
-	uchar sha1[SHA1dlen];
+	uchar	hash[SHA2_256dlen];
+	uchar	len;
 } Thumbprint;
 
 typedef struct TLSconn{
-	char dir[40];  /* connection directory */
-	uchar *cert;   /* certificate (local on input, remote on output) */
-	uchar *sessionID;
-	int certlen, sessionIDlen;
-	int (*trace)(char*fmt, ...);
-	PEMChain *chain; /* optional extra certificate evidence for servers to present */
+	char	dir[40];	/* connection directory */
+	uchar	*cert;	/* certificate (local on input, remote on output) */
+	uchar	*sessionID;
+	uchar	*psk;
+	int	certlen;
+	int	sessionIDlen;
+	int	psklen;
+	int	(*trace)(char*fmt, ...);
+	PEMChain*chain;	/* optional extra certificate evidence for servers to present */
+	char	*sessionType;
+	uchar	*sessionKey;
+	int	sessionKeylen;
+	char	*sessionConst;
+	char	*serverName;
+	char	*pskID;
 } TLSconn;
 
 /* tlshand.c */
@@ -382,10 +511,104 @@
 int tlsServer(int fd, TLSconn *c);
 
 /* thumb.c */
-Thumbprint* initThumbprints(char *ok, char *crl);
+Thumbprint* initThumbprints(char *ok, char *crl, char *tag);
 void	freeThumbprints(Thumbprint *ok);
-int		okThumbprint(uchar *sha1, Thumbprint *ok);
+int	okThumbprint(uchar *hash, int len, Thumbprint *ok);
+int	okCertificate(uchar *cert, int len, Thumbprint *ok);
 
 /* readcert.c */
 uchar	*readcert(char *filename, int *pcertlen);
-PEMChain *readcertchain(char *filename);
+PEMChain*readcertchain(char *filename);
+
+typedef struct ECpoint{
+	int inf;
+	mpint *x;
+	mpint *y;
+	mpint *z;	/* nil when using affine coordinates */
+} ECpoint;
+
+typedef ECpoint ECpub;
+typedef struct ECpriv{
+	ECpoint;
+	mpint *d;
+} ECpriv;
+
+typedef struct ECdomain{
+	mpint *p;
+	mpint *a;
+	mpint *b;
+	ECpoint G;
+	mpint *n;
+	mpint *h;
+} ECdomain;
+
+void	ecdominit(ECdomain *, void (*init)(mpint *p, mpint *a, mpint *b, mpint *x, mpint *y, mpint *n, mpint *h));
+void	ecdomfree(ECdomain *);
+
+void	ecassign(ECdomain *, ECpoint *old, ECpoint *new);
+void	ecadd(ECdomain *, ECpoint *a, ECpoint *b, ECpoint *s);
+void	ecmul(ECdomain *, ECpoint *a, mpint *k, ECpoint *s);
+ECpoint*	strtoec(ECdomain *, char *, char **, ECpoint *);
+ECpriv*	ecgen(ECdomain *, ECpriv*);
+int	ecverify(ECdomain *, ECpoint *);
+int	ecpubverify(ECdomain *, ECpub *);
+void	ecdsasign(ECdomain *, ECpriv *, uchar *, int, mpint *, mpint *);
+int	ecdsaverify(ECdomain *, ECpub *, uchar *, int, mpint *, mpint *);
+void	base58enc(uchar *, char *, int);
+int	base58dec(char *, uchar *, int);
+
+ECpub*	ecdecodepub(ECdomain *dom, uchar *, int);
+int	ecencodepub(ECdomain *dom, ECpub *, uchar *, int);
+void	ecpubfree(ECpub *);
+
+ECpub*	X509toECpub(uchar *cert, int ncert, char *name, int nname, ECdomain *dom);
+char*	X509ecdsaverify(uchar *cert, int ncert, ECdomain *dom, ECpub *pub);
+char*	X509ecdsaverifydigest(uchar *sig, int siglen, uchar *edigest, int edigestlen, ECdomain *dom, ECpub *pub);
+
+/* curves */
+void	secp256r1(mpint *p, mpint *a, mpint *b, mpint *x, mpint *y, mpint *n, mpint *h);
+void	secp256k1(mpint *p, mpint *a, mpint *b, mpint *x, mpint *y, mpint *n, mpint *h);
+void	secp384r1(mpint *p, mpint *a, mpint *b, mpint *x, mpint *y, mpint *n, mpint *h);
+
+/*
+ * Diffie-Hellman key exchange
+ */
+
+typedef struct DHstate DHstate;
+struct DHstate
+{
+	mpint	*g;	/* base g */
+	mpint	*p;	/* large prime */
+	mpint	*q;	/* subgroup prime */
+	mpint	*x;	/* random secret */
+	mpint	*y;	/* public key y = g**x % p */
+};
+
+/* generate new public key: y = g**x % p */
+mpint* dh_new(DHstate *dh, mpint *p, mpint *q, mpint *g);
+
+/* calculate shared key: k = y**x % p */
+mpint* dh_finish(DHstate *dh, mpint *y);
+
+/* Curve25519 elliptic curve, public key function */
+void curve25519(uchar mypublic[32], uchar secret[32], uchar basepoint[32]);
+
+/* Curve25519 diffie hellman */
+void curve25519_dh_new(uchar x[32], uchar y[32]);
+int curve25519_dh_finish(uchar x[32], uchar y[32], uchar z[32]);
+
+/* password-based key derivation function 2 (rfc2898) */
+void pbkdf2_x(uchar *p, u32 plen, uchar *s, u32 slen, u32 rounds, uchar *d, u32 dlen,
+	DigestState* (*x)(uchar*, u32, uchar*, u32, uchar*, DigestState*), int xlen);
+
+/* scrypt password-based key derivation function */
+char* scrypt(uchar *p, u32 plen, uchar *s, u32 slen,
+	u32 N, u32 R, u32 P,
+	uchar *d, u32 dlen);
+
+/* hmac-based key derivation function (rfc5869) */
+void hkdf_x(uchar *salt, u32 nsalt, uchar *info, u32 ninfo, uchar *key, u32 nkey, uchar *d, u32 dlen,
+	DigestState* (*x)(uchar*, u32, uchar*, u32, uchar*, DigestState*), int xlen);
+
+/* timing safe memcmp() */
+int tsmemcmp(void*, void*, u32);
--- a/include/mp.h
+++ b/include/mp.h
@@ -1,11 +1,13 @@
-#pragma	src	"/usr/inferno/src/libmp"
+#pragma	src	"/libmp"
+#pragma	lib	"libmp.a"
 
 #define _MPINT 1
 
-/* the code assumes mpdigit to be at least an int */
-/* mpdigit must be an atomic type.  mpdigit is defined */
-/* in the architecture specific u.h */
-
+/*
+ * the code assumes mpdigit to be at least an int
+ * mpdigit must be an atomic type.  mpdigit is defined
+ * in the architecture specific u.h
+ */
 typedef struct mpint mpint;
 
 struct mpint
@@ -19,7 +21,11 @@
 
 enum
 {
-	MPstatic=	0x01,
+	MPstatic=	0x01,	/* static constant */
+	MPnorm=		0x02,	/* normalization status */
+	MPtimesafe=	0x04,	/* request time invariant computation */
+	MPfield=	0x08,	/* this mpint is a field modulus */
+
 	Dbytes=		sizeof(mpdigit),	/* bytes per digit */
 	Dbits=		Dbytes*8		/* bits per digit */
 };
@@ -29,12 +35,14 @@
 mpint*	mpnew(int n);		/* create a new mpint with at least n bits */
 void	mpfree(mpint *b);
 void	mpbits(mpint *b, int n);	/* ensure that b has at least n bits */
-void	mpnorm(mpint *b);		/* dump leading zeros */
+mpint*	mpnorm(mpint *b);		/* dump leading zeros */
 mpint*	mpcopy(mpint *b);
 void	mpassign(mpint *old, mpint *new);
 
 /* random bits */
 mpint*	mprand(int bits, void (*gen)(uchar*, int), mpint *b);
+/* return uniform random [0..n-1] */
+mpint*	mpnrand(mpint *n, void (*gen)(uchar*, int), mpint *b);
 
 /* conversion */
 mpint*	strtomp(char*, char**, int, mpint*);	/* ascii */
@@ -42,8 +50,10 @@
 char*	mptoa(mpint*, int, char*, int);
 mpint*	letomp(uchar*, uint, mpint*);	/* byte array, little-endian */
 int	mptole(mpint*, uchar*, uint, uchar**);
-mpint*	betomp(uchar*, uint, mpint*);	/* byte array, little-endian */
+void	mptolel(mpint *b, uchar *p, int n);
+mpint*	betomp(uchar*, uint, mpint*);	/* byte array, big-endian */
 int	mptobe(mpint*, uchar*, uint, uchar**);
+void	mptober(mpint *b, uchar *p, int n);
 uint	mptoui(mpint*);			/* unsigned int */
 mpint*	uitomp(uint, mpint*);
 int	mptoi(mpint*);			/* int */
@@ -52,6 +62,8 @@
 mpint*	uvtomp(uvlong, mpint*);
 vlong	mptov(mpint*);			/* vlong */
 mpint*	vtomp(vlong, mpint*);
+double	mptod(mpint*);			/* double */
+mpint*	dtomp(double, mpint*);
 
 /* divide 2 digits by one */
 void	mpdigdiv(mpdigit *dividend, mpdigit divisor, mpdigit *quotient);
@@ -66,6 +78,21 @@
 void	mpexp(mpint *b, mpint *e, mpint *m, mpint *res);	/* res = b**e mod m */
 void	mpmod(mpint *b, mpint *m, mpint *remainder);	/* remainder = b mod m */
 
+/* logical operations */
+void	mpand(mpint *b1, mpint *b2, mpint *res);
+void	mpbic(mpint *b1, mpint *b2, mpint *res);
+void	mpor(mpint *b1, mpint *b2, mpint *res);
+void	mpnot(mpint *b, mpint *res);
+void	mpxor(mpint *b1, mpint *b2, mpint *res);
+void	mptrunc(mpint *b, int n, mpint *res);
+void	mpxtend(mpint *b, int n, mpint *res);
+void	mpasr(mpint *b, int shift, mpint *res);
+
+/* modular arithmetic, time invariant when 0≤b1≤m-1 and 0≤b2≤m-1 */
+void	mpmodadd(mpint *b1, mpint *b2, mpint *m, mpint *sum);	/* sum = b1+b2 % m */
+void	mpmodsub(mpint *b1, mpint *b2, mpint *m, mpint *diff);	/* diff = b1-b2 % m */
+void	mpmodmul(mpint *b1, mpint *b2, mpint *m, mpint *prod);	/* prod = b1*b2 % m */
+
 /* quotient = dividend/divisor, remainder = dividend % divisor */
 void	mpdiv(mpint *dividend, mpint *divisor,  mpint *quotient, mpint *remainder);
 
@@ -72,6 +99,12 @@
 /* return neg, 0, pos as b1-b2 is neg, 0, pos */
 int	mpcmp(mpint *b1, mpint *b2);
 
+/* res = s != 0 ? b1 : b2 */
+void	mpsel(int s, mpint *b1, mpint *b2, mpint *res);
+
+/* return n! */
+mpint*	mpfactorial(ulong n);
+
 /* extended gcd return d, x, and y, s.t. d = gcd(a,b) and ax+by = d */
 void	mpextendedgcd(mpint *a, mpint *b, mpint *d, mpint *x, mpint *y);
 
@@ -101,12 +134,14 @@
 /* prereq: p has room for n+1 digits */
 int	mpvecdigmulsub(mpdigit *b, int n, mpdigit m, mpdigit *p);
 
-/* p[0:alen*blen-1] = a[0:alen-1] * b[0:blen-1] */
+/* p[0:alen+blen-1] = a[0:alen-1] * b[0:blen-1] */
 /* prereq: alen >= blen, p has room for m*n digits */
 void	mpvecmul(mpdigit *a, int alen, mpdigit *b, int blen, mpdigit *p);
+void	mpvectsmul(mpdigit *a, int alen, mpdigit *b, int blen, mpdigit *p);
 
 /* sign of a - b or zero if the same */
 int	mpveccmp(mpdigit *a, int alen, mpdigit *b, int blen);
+int	mpvectscmp(mpdigit *a, int alen, mpdigit *b, int blen);
 
 /* divide the 2 digit dividend by the one digit divisor and stick in quotient */
 /* we assume that the result is one digit - overflow is all 1's */
@@ -122,6 +157,8 @@
 					/*  twixt residues and mpint */
 typedef struct CRTres	CRTres;		/* residue form of an mpint */
 
+#pragma incomplete CRTpre
+
 struct CRTres
 {
 	int	n;		/* number of residues */
@@ -134,6 +171,18 @@
 void	crtprefree(CRTpre*);
 void	crtresfree(CRTres*);
 
+/* fast field arithmetic */
+typedef struct Mfield	Mfield;
 
+struct Mfield
+{
+	mpint;
+	int	(*reduce)(Mfield*, mpint*, mpint*);
+};
+
+mpint *mpfield(mpint*);
+
+Mfield *gmfield(mpint*);
+Mfield *cnfield(mpint*);
+
 #pragma	varargck	type	"B"	mpint*
-#pragma varargck	type	"U" mpint*
--- a/libkeyring/dsaalg.c
+++ b/libkeyring/dsaalg.c
@@ -81,11 +81,11 @@
 	ep = buf + len - 1;
 	cp = buf;
 
-	cp += snprint(cp, ep - cp, "%U\n", dsa->pub.p);
-	cp += snprint(cp, ep - cp, "%U\n", dsa->pub.q);
-	cp += snprint(cp, ep - cp, "%U\n", dsa->pub.alpha);
-	cp += snprint(cp, ep - cp, "%U\n", dsa->pub.key);
-	cp += snprint(cp, ep - cp, "%U\n", dsa->secret);
+	cp += snprint(cp, ep - cp, "%B\n", dsa->pub.p);
+	cp += snprint(cp, ep - cp, "%B\n", dsa->pub.q);
+	cp += snprint(cp, ep - cp, "%B\n", dsa->pub.alpha);
+	cp += snprint(cp, ep - cp, "%B\n", dsa->pub.key);
+	cp += snprint(cp, ep - cp, "%B\n", dsa->secret);
 	*cp = 0;
 
 	return cp - buf;
@@ -101,10 +101,10 @@
 	ep = buf + len - 1;
 	cp = buf;
 
-	cp += snprint(cp, ep - cp, "%U\n", dsa->p);
-	cp += snprint(cp, ep - cp, "%U\n", dsa->q);
-	cp += snprint(cp, ep - cp, "%U\n", dsa->alpha);
-	cp += snprint(cp, ep - cp, "%U\n", dsa->key);
+	cp += snprint(cp, ep - cp, "%B\n", dsa->p);
+	cp += snprint(cp, ep - cp, "%B\n", dsa->q);
+	cp += snprint(cp, ep - cp, "%B\n", dsa->alpha);
+	cp += snprint(cp, ep - cp, "%B\n", dsa->key);
 	*cp = 0;
 
 	return cp - buf;
@@ -120,8 +120,8 @@
 	ep = buf + len - 1;
 	cp = buf;
 
-	cp += snprint(cp, ep - cp, "%U\n", dsa->r);
-	cp += snprint(cp, ep - cp, "%U\n", dsa->s);
+	cp += snprint(cp, ep - cp, "%B\n", dsa->r);
+	cp += snprint(cp, ep - cp, "%B\n", dsa->s);
 	*cp = 0;
 
 	return cp - buf;
--- a/libkeyring/egalg.c
+++ b/libkeyring/egalg.c
@@ -78,10 +78,10 @@
 	ep = buf + len - 1;
 	cp = buf;
 
-	cp += snprint(cp, ep - cp, "%U\n", eg->pub.p);
-	cp += snprint(cp, ep - cp, "%U\n", eg->pub.alpha);
-	cp += snprint(cp, ep - cp, "%U\n", eg->pub.key);
-	cp += snprint(cp, ep - cp, "%U\n", eg->secret);
+	cp += snprint(cp, ep - cp, "%B\n", eg->pub.p);
+	cp += snprint(cp, ep - cp, "%B\n", eg->pub.alpha);
+	cp += snprint(cp, ep - cp, "%B\n", eg->pub.key);
+	cp += snprint(cp, ep - cp, "%B\n", eg->secret);
 	*cp = 0;
 
 	return cp - buf;
@@ -97,9 +97,9 @@
 	ep = buf + len - 1;
 	cp = buf;
 
-	cp += snprint(cp, ep - cp, "%U\n", eg->p);
-	cp += snprint(cp, ep - cp, "%U\n", eg->alpha);
-	cp += snprint(cp, ep - cp, "%U\n", eg->key);
+	cp += snprint(cp, ep - cp, "%B\n", eg->p);
+	cp += snprint(cp, ep - cp, "%B\n", eg->alpha);
+	cp += snprint(cp, ep - cp, "%B\n", eg->key);
 	*cp = 0;
 
 	return cp - buf;
@@ -115,8 +115,8 @@
 	ep = buf + len - 1;
 	cp = buf;
 
-	cp += snprint(cp, ep - cp, "%U\n", eg->r);
-	cp += snprint(cp, ep - cp, "%U\n", eg->s);
+	cp += snprint(cp, ep - cp, "%B\n", eg->r);
+	cp += snprint(cp, ep - cp, "%B\n", eg->s);
 	*cp = 0;
 
 	return cp - buf;
--- a/libkeyring/rsaalg.c
+++ b/libkeyring/rsaalg.c
@@ -81,14 +81,14 @@
 	ep = buf + len - 1;
 	cp = buf;
 
-	cp += snprint(cp, ep - cp, "%U\n", rsa->pub.n);
-	cp += snprint(cp, ep - cp, "%U\n", rsa->pub.ek);
-	cp += snprint(cp, ep - cp, "%U\n", rsa->dk);
-	cp += snprint(cp, ep - cp, "%U\n", rsa->p);
-	cp += snprint(cp, ep - cp, "%U\n", rsa->q);
-	cp += snprint(cp, ep - cp, "%U\n", rsa->kp);
-	cp += snprint(cp, ep - cp, "%U\n", rsa->kq);
-	cp += snprint(cp, ep - cp, "%U\n", rsa->c2);
+	cp += snprint(cp, ep - cp, "%B\n", rsa->pub.n);
+	cp += snprint(cp, ep - cp, "%B\n", rsa->pub.ek);
+	cp += snprint(cp, ep - cp, "%B\n", rsa->dk);
+	cp += snprint(cp, ep - cp, "%B\n", rsa->p);
+	cp += snprint(cp, ep - cp, "%B\n", rsa->q);
+	cp += snprint(cp, ep - cp, "%B\n", rsa->kp);
+	cp += snprint(cp, ep - cp, "%B\n", rsa->kq);
+	cp += snprint(cp, ep - cp, "%B\n", rsa->c2);
 	*cp = 0;
 
 	return cp - buf;
@@ -103,8 +103,8 @@
 	rsa = vrsa;
 	ep = buf + len - 1;
 	cp = buf;
-	cp += snprint(cp, ep - cp, "%U\n", rsa->n);
-	cp += snprint(cp, ep - cp, "%U\n", rsa->ek);
+	cp += snprint(cp, ep - cp, "%B\n", rsa->n);
+	cp += snprint(cp, ep - cp, "%B\n", rsa->ek);
 	*cp = 0;
 
 	return cp - buf;
@@ -120,7 +120,7 @@
 	ep = buf + len - 1;
 	cp = buf;
 
-	cp += snprint(cp, ep - cp, "%U\n", rsa);
+	cp += snprint(cp, ep - cp, "%B\n", rsa);
 	*cp = 0;
 
 	return cp - buf;
--- /dev/null
+++ b/libmp/port/cnfield.c
@@ -1,0 +1,114 @@
+#include "os.h"
+#include <mp.h>
+#include "dat.h"
+
+/*
+ * fast reduction for crandall numbers of the form: 2^n - c
+ */
+
+enum {
+	MAXDIG = 1024 / Dbits,
+};
+
+typedef struct CNfield CNfield;
+struct CNfield
+{
+	Mfield;	
+
+	mpint	m[1];
+
+	int	s;
+	mpdigit	c;
+};
+
+static int
+cnreduce(Mfield *m, mpint *a, mpint *r)
+{
+	mpdigit q[MAXDIG-1], t[MAXDIG], d;
+	CNfield *f = (CNfield*)m;
+	int qn, tn, k;
+
+	k = f->top;
+	if((a->top - k) >= MAXDIG)
+		return -1;
+
+	mpleft(a, f->s, r);
+	if(r->top <= k)
+		mpbits(r, (k+1)*Dbits);
+
+	/* q = hi(r) */
+	qn = r->top - k;
+	memmove(q, r->p+k, qn*Dbytes);
+
+	/* r = lo(r) */
+	r->top = k;
+	r->sign = 1;
+
+	do {
+		/* t = q*c */
+		tn = qn+1;
+		memset(t, 0, tn*Dbytes);
+		mpvecdigmuladd(q, qn, f->c, t);
+
+		/* q = hi(t) */
+		qn = tn - k;
+		if(qn <= 0) qn = 0;
+		else memmove(q, t+k, qn*Dbytes);
+
+		/* r += lo(t) */
+		if(tn > k)
+			tn = k;
+		mpvecadd(r->p, k, t, tn, r->p);
+
+		/* if(r >= m) r -= m */
+		mpvecsub(r->p, k+1, f->m->p, k, t);
+		d = t[k];
+		for(tn = 0; tn < k; tn++)
+			r->p[tn] = (r->p[tn] & d) | (t[tn] & ~d);
+	} while(qn > 0);
+
+	if(f->s != 0)
+		mpright(r, f->s, r);
+	mpnorm(r);
+
+	return 0;
+}
+
+Mfield*
+cnfield(mpint *N)
+{
+	mpint *M, *C;
+	CNfield *f;
+	mpdigit d;
+	int s;
+
+	if(N->top <= 2 || N->top >= MAXDIG)
+		return nil;
+	f = nil;
+	d = N->p[N->top-1];
+	for(s = 0; (d & (mpdigit)1<<Dbits-1) == 0; s++)
+		d <<= 1;
+	C = mpnew(0);
+	M = mpcopy(N);
+	mpleft(N, s, M);
+	mpleft(mpone, M->top*Dbits, C);
+	mpsub(C, M, C);
+	if(C->top != 1)
+		goto out;
+	f = mallocz(sizeof(CNfield) + M->top*sizeof(mpdigit), 1);
+	if(f == nil)
+		goto out;
+	f->s = s;
+	f->c = C->p[0];
+	f->m->size = M->top;
+	f->m->p = (mpdigit*)&f[1];
+	mpassign(M, f->m);
+	mpassign(N, f);
+	f->reduce = cnreduce;
+	f->flags |= MPfield;
+out:
+	mpfree(M);
+	mpfree(C);
+
+	return f;
+}
--- /dev/null
+++ b/libmp/port/gmfield.c
@@ -1,0 +1,173 @@
+#include "os.h"
+#include <mp.h>
+#include "dat.h"
+
+/*
+ * fast reduction for generalized mersenne numbers (GM)
+ * using a series of additions and subtractions.
+ */
+
+enum {
+	MAXDIG = 1024/Dbits,
+};
+
+typedef struct GMfield GMfield;
+struct GMfield
+{
+	Mfield;	
+
+	mpint	m2[1];
+
+	int	nadd;
+	int	nsub;
+	int	indx[256];
+};
+
+static int
+gmreduce(Mfield *m, mpint *a, mpint *r)
+{
+	GMfield *g = (GMfield*)m;
+	mpdigit d0, t[MAXDIG];
+	int i, j, d, *x;
+
+	if(mpmagcmp(a, g->m2) >= 0)
+		return -1;
+
+	if(a != r)
+		mpassign(a, r);
+
+	d = g->top;
+	mpbits(r, (d+1)*Dbits*2);
+	memmove(t+d, r->p+d, d*Dbytes);
+
+	r->sign = 1;
+	r->top = d;
+	r->p[d] = 0;
+
+	if(g->nsub > 0)
+		mpvecdigmuladd(g->p, d, g->nsub, r->p);
+
+	x = g->indx;
+	for(i=0; i<g->nadd; i++){
+		t[0] = 0;
+		d0 = t[*x++];
+		for(j=1; j<d; j++)
+			t[j] = t[*x++];
+		t[0] = d0;
+
+		mpvecadd(r->p, d+1, t, d, r->p);
+	}
+
+	for(i=0; i<g->nsub; i++){
+		t[0] = 0;
+		d0 = t[*x++];
+		for(j=1; j<d; j++)
+			t[j] = t[*x++];
+		t[0] = d0;
+
+		mpvecsub(r->p, d+1, t, d, r->p);
+	}
+
+	mpvecdigmulsub(g->p, d, r->p[d], r->p);
+	r->p[d] = 0;
+
+	mpvecsub(r->p, d+1, g->p, d, r->p+d+1);
+	d0 = r->p[2*d+1];
+	for(j=0; j<d; j++)
+		r->p[j] = (r->p[j] & d0) | (r->p[j+d+1] & ~d0);
+
+	mpnorm(r);
+
+	return 0;
+}
+
+Mfield*
+gmfield(mpint *N)
+{
+	int i,j,d, s, *C, *X, *x, *e;
+	mpint *M, *T;
+	GMfield *g;
+
+	d = N->top;
+	if(d <= 2 || d > MAXDIG/2 || (mpsignif(N) % Dbits) != 0)
+		return nil;
+	g = nil;
+	T = mpnew(0);
+	M = mpcopy(N);
+	C = malloc(sizeof(int)*(d+1));
+	X = malloc(sizeof(int)*(d*d));
+	if(C == nil || X == nil)
+		goto out;
+
+	for(i=0; i<=d; i++){
+		if((M->p[i]>>8) != 0 && (~M->p[i]>>8) != 0)
+			goto out;
+		j = M->p[i];
+		C[d - i] = -j;
+		itomp(j, T);
+		mpleft(T, i*Dbits, T);
+		mpsub(M, T, M);
+	}
+	for(j=0; j<d; j++)
+		X[j] = C[d-j];
+	for(i=1; i<d; i++){
+		X[d*i] = X[d*(i-1) + d-1]*C[d];
+		for(j=1; j<d; j++)
+			X[d*i + j] = X[d*(i-1) + j-1] + X[d*(i-1) + d-1]*C[d-j];
+	}
+	g = mallocz(sizeof(GMfield) + (d+1)*sizeof(mpdigit)*2, 1);
+	if(g == nil)
+		goto out;
+
+	g->m2->p = (mpdigit*)&g[1];
+	g->m2->size = d*2+1;
+	mpmul(N, N, g->m2);
+	mpassign(N, g);
+	g->reduce = gmreduce;
+	g->flags |= MPfield;
+
+	s = 0;
+	x = g->indx;
+	e = x + nelem(g->indx) - d;
+	for(g->nadd=0; x <= e; x += d, g->nadd++){
+		s = 0;
+		for(i=0; i<d; i++){
+			for(j=0; j<d; j++){
+				if(X[d*i+j] > 0 && x[j] == 0){
+					X[d*i+j]--;
+					x[j] = d+i;
+					s = 1;
+					break;
+				}
+			}
+		}
+		if(s == 0)
+			break;
+	}
+	for(g->nsub=0; x <= e; x += d, g->nsub++){
+		s = 0;
+		for(i=0; i<d; i++){
+			for(j=0; j<d; j++){
+				if(X[d*i+j] < 0 && x[j] == 0){
+					X[d*i+j]++;
+					x[j] = d+i;
+					s = 1;
+					break;
+				}
+			}
+		}
+		if(s == 0)
+			break;
+	}
+	if(s != 0){
+		mpfree(g);
+		g = nil;
+	}
+out:
+	free(C);
+	free(X);
+	mpfree(M);
+	mpfree(T);
+	return g;
+}
+
--- a/libmp/port/mkfile
+++ b/libmp/port/mkfile
@@ -6,12 +6,15 @@
 	mpfmt\
 	strtomp\
 	mptobe\
+	mptober\
 	mptole\
+	mptolel\
 	betomp\
 	letomp\
 	mpadd\
 	mpsub\
 	mpcmp\
+	mpsel\
 	mpfactorial\
 	mpmul\
 	mpleft\
@@ -24,6 +27,7 @@
 	mpdiv\
 	mpexp\
 	mpmod\
+	mpmodop\
 	mpextendedgcd\
 	mpinvert\
 	mprand\
@@ -32,6 +36,11 @@
 	mptoui\
 	mptov\
 	mptouv\
+	mpfield\
+	cnfield\
+	gmfield\
+	mplogic\
+	mptod\
 
 ALLOFILES=${FILES:%=%.$O}
 # cull things in the per-machine directories from this list
--- a/libmp/port/mpaux.c
+++ b/libmp/port/mpaux.c
@@ -5,11 +5,9 @@
 static mpdigit _mptwodata[1] = { 2 };
 static mpint _mptwo =
 {
-	1,
-	1,
-	1,
+	1, 1, 1,
 	_mptwodata,
-	MPstatic
+	MPstatic|MPnorm
 };
 mpint *mptwo = &_mptwo;
 
@@ -16,11 +14,9 @@
 static mpdigit _mponedata[1] = { 1 };
 static mpint _mpone =
 {
-	1,
-	1,
-	1,
+	1, 1, 1,
 	_mponedata,
-	MPstatic
+	MPstatic|MPnorm
 };
 mpint *mpone = &_mpone;
 
@@ -27,11 +23,9 @@
 static mpdigit _mpzerodata[1] = { 0 };
 static mpint _mpzero =
 {
-	1,
-	1,
-	0,
+	1, 1, 0,
 	_mpzerodata,
-	MPstatic
+	MPstatic|MPnorm
 };
 mpint *mpzero = &_mpzero;
 
@@ -57,17 +51,17 @@
 	if(n < 0)
 		sysfatal("mpsetminbits: n < 0");
 
-	b = mallocz(sizeof(mpint), 1);
-	if(b == nil)
-		sysfatal("mpnew: %r");
 	n = DIGITS(n);
 	if(n < mpmindigits)
 		n = mpmindigits;
-	b->p = (mpdigit*)mallocz(n*Dbytes, 1);
-	if(b->p == nil)
+	b = mallocz(sizeof(mpint) + n*Dbytes, 1);
+	if(b == nil)
 		sysfatal("mpnew: %r");
+	setmalloctag(b, getcallerpc(&n));
+	b->p = (mpdigit*)&b[1];
 	b->size = n;
 	b->sign = 1;
+	b->flags = MPnorm;
 
 	return b;
 }
@@ -82,16 +76,23 @@
 	if(b->size >= n){
 		if(b->top >= n)
 			return;
-		memset(&b->p[b->top], 0, Dbytes*(n - b->top));
-		b->top = n;
-		return;
+	} else {
+		if(b->p == (mpdigit*)&b[1]){
+			b->p = (mpdigit*)mallocz(n*Dbytes, 0);
+			if(b->p == nil)
+				sysfatal("mpbits: %r");
+			memmove(b->p, &b[1], Dbytes*b->top);
+			memset(&b[1], 0, Dbytes*b->size);
+		} else {
+			b->p = (mpdigit*)realloc(b->p, n*Dbytes);
+			if(b->p == nil)
+				sysfatal("mpbits: %r");
+		}
+		b->size = n;
 	}
-	b->p = (mpdigit*)realloc(b->p, n*Dbytes);
-	if(b->p == nil)
-		sysfatal("mpbits: %r");
 	memset(&b->p[b->top], 0, Dbytes*(n - b->top));
-	b->size = n;
 	b->top = n;
+	b->flags &= ~MPnorm;
 }
 
 void
@@ -101,16 +102,22 @@
 		return;
 	if(b->flags & MPstatic)
 		sysfatal("freeing mp constant");
-	memset(b->p, 0, b->size*Dbytes);	// information hiding
-	free(b->p);
+	memset(b->p, 0, b->size*Dbytes);
+	if(b->p != (mpdigit*)&b[1])
+		free(b->p);
 	free(b);
 }
 
-void
+mpint*
 mpnorm(mpint *b)
 {
 	int i;
 
+	if(b->flags & MPtimesafe){
+		assert(b->sign == 1);
+		b->flags &= ~MPnorm;
+		return b;
+	}
 	for(i = b->top-1; i >= 0; i--)
 		if(b->p[i] != 0)
 			break;
@@ -117,6 +124,8 @@
 	b->top = i+1;
 	if(b->top == 0)
 		b->sign = 1;
+	b->flags |= MPnorm;
+	return b;
 }
 
 mpint*
@@ -125,8 +134,10 @@
 	mpint *new;
 
 	new = mpnew(Dbits*old->size);
-	new->top = old->top;
+	setmalloctag(new, getcallerpc(&old));
 	new->sign = old->sign;
+	new->top = old->top;
+	new->flags = old->flags & ~(MPstatic|MPfield);
 	memmove(new->p, old->p, Dbytes*old->top);
 	return new;
 }
@@ -134,9 +145,14 @@
 void
 mpassign(mpint *old, mpint *new)
 {
+	if(new == nil || old == new)
+		return;
+	new->top = 0;
 	mpbits(new, Dbits*old->top);
 	new->sign = old->sign;
 	new->top = old->top;
+	new->flags &= ~MPnorm;
+	new->flags |= old->flags & ~(MPstatic|MPfield);
 	memmove(new->p, old->p, Dbytes*old->top);
 }
 
@@ -166,6 +182,7 @@
 	int k, bit, digit;
 	mpdigit d;
 
+	assert(n->flags & MPnorm);
 	if(n->top==0)
 		return 0;
 	k = 0;
@@ -186,4 +203,3 @@
 	}
 	return k;
 }
-
--- /dev/null
+++ b/libmp/port/mpfield.c
@@ -1,0 +1,21 @@
+#include "os.h"
+#include <mp.h>
+#include "dat.h"
+
+mpint*
+mpfield(mpint *N)
+{
+	Mfield *f;
+
+	if(N == nil || N->flags & (MPfield|MPstatic))
+		return N;
+	if((f = cnfield(N)) != nil)
+		goto Exchange;
+	if((f = gmfield(N)) != nil)
+		goto Exchange;
+	return N;
+Exchange:
+	setmalloctag(f, getcallerpc(&N));
+	mpfree(N);
+	return f;
+}
--- /dev/null
+++ b/libmp/port/mplogic.c
@@ -1,0 +1,212 @@
+#include "os.h"
+#include <mp.h>
+#include "dat.h"
+
+/*
+	mplogic calculates b1|b2 subject to the
+	following flag bits (fl)
+
+	bit 0: subtract 1 from b1
+	bit 1: invert b1
+	bit 2: subtract 1 from b2
+	bit 3: invert b2
+	bit 4: add 1 to output
+	bit 5: invert output
+	
+	it inverts appropriate bits automatically
+	depending on the signs of the inputs
+*/
+
+static void
+mplogic(mpint *b1, mpint *b2, mpint *sum, int fl)
+{
+	mpint *t;
+	mpdigit *dp1, *dp2, *dpo, d1, d2, d;
+	int c1, c2, co;
+	int i;
+
+	assert(((b1->flags | b2->flags | sum->flags) & MPtimesafe) == 0);
+	if(b1->sign < 0) fl ^= 0x03;
+	if(b2->sign < 0) fl ^= 0x0c;
+	sum->sign = (int)(((fl|fl>>2)^fl>>4)<<30)>>31|1;
+	if(sum->sign < 0) fl ^= 0x30;
+	if(b2->top > b1->top){
+		t = b1;
+		b1 = b2;
+		b2 = t;
+		fl = fl >> 2 & 0x03 | fl << 2 & 0x0c | fl & 0x30;
+	}
+	mpbits(sum, b1->top*Dbits+1);
+	dp1 = b1->p;
+	dp2 = b2->p;
+	dpo = sum->p;
+	c1 = fl & 1;
+	c2 = fl >> 2 & 1;
+	co = fl >> 4 & 1;
+	for(i = 0; i < b1->top; i++){
+		d1 = dp1[i] - c1;
+		if(i < b2->top)
+			d2 = dp2[i] - c2;
+		else
+			d2 = 0;
+		if(d1 != (mpdigit)-1) c1 = 0;
+		if(d2 != (mpdigit)-1) c2 = 0;
+		if((fl & 2) != 0) d1 ^= -1;
+		if((fl & 8) != 0) d2 ^= -1;
+		d = d1 | d2;
+		if((fl & 32) != 0) d ^= -1;
+		d += co;
+		if(d != 0) co = 0;
+		dpo[i] = d;
+	}
+	sum->top = i;
+	if(co)
+		dpo[sum->top++] = co;
+	mpnorm(sum);
+}
+
+void
+mpor(mpint *b1, mpint *b2, mpint *sum)
+{
+	mplogic(b1, b2, sum, 0);
+}
+
+void
+mpand(mpint *b1, mpint *b2, mpint *sum)
+{
+	mplogic(b1, b2, sum, 0x2a);
+}
+
+void
+mpbic(mpint *b1, mpint *b2, mpint *sum)
+{
+	mplogic(b1, b2, sum, 0x22);
+}
+
+void
+mpnot(mpint *b, mpint *r)
+{
+	mpadd(b, mpone, r);
+	if(r->top != 0)
+		r->sign ^= -2;
+}
+
+void
+mpxor(mpint *b1, mpint *b2, mpint *sum)
+{
+	mpint *t;
+	mpdigit *dp1, *dp2, *dpo, d1, d2, d;
+	int c1, c2, co;
+	int i, fl;
+
+	assert(((b1->flags | b2->flags | sum->flags) & MPtimesafe) == 0);
+	if(b2->top > b1->top){
+		t = b1;
+		b1 = b2;
+		b2 = t;
+	}
+	fl = (b1->sign & 10) ^ (b2->sign & 12);
+	sum->sign = (int)(fl << 28) >> 31 | 1;
+	mpbits(sum, b1->top*Dbits+1);
+	dp1 = b1->p;
+	dp2 = b2->p;
+	dpo = sum->p;
+	c1 = fl >> 1 & 1;
+	c2 = fl >> 2 & 1;
+	co = fl >> 3 & 1;
+	for(i = 0; i < b1->top; i++){
+		d1 = dp1[i] - c1;
+		if(i < b2->top)
+			d2 = dp2[i] - c2;
+		else
+			d2 = 0;
+		if(d1 != (mpdigit)-1) c1 = 0;
+		if(d2 != (mpdigit)-1) c2 = 0;
+		d = d1 ^ d2;
+		d += co;
+		if(d != 0) co = 0;
+		dpo[i] = d;
+	}
+	sum->top = i;
+	if(co)
+		dpo[sum->top++] = co;
+	mpnorm(sum);
+}
+
+void
+mptrunc(mpint *b, int n, mpint *r)
+{
+	int d, m, i, c;
+
+	assert(((b->flags | r->flags) & MPtimesafe) == 0);
+	mpbits(r, n);
+	r->top = DIGITS(n);
+	d = n / Dbits;
+	m = n % Dbits;
+	if(b->sign == -1){
+		c = 1;
+		for(i = 0; i < r->top; i++){
+			if(i < b->top)
+				r->p[i] = ~(b->p[i] - c);
+			else
+				r->p[i] = -1;
+			if(r->p[i] != 0)
+				c = 0;
+		}
+		if(m != 0)
+			r->p[d] &= (1<<m) - 1;
+	}else if(b->sign == 1){
+		if(d >= b->top){
+			mpassign(b, r);
+			mpnorm(r);
+			return;
+		}
+		if(b != r)
+			for(i = 0; i < d; i++)
+				r->p[i] = b->p[i];
+		if(m != 0)
+			r->p[d] = b->p[d] & (1<<m)-1;
+	}
+	r->sign = 1;
+	mpnorm(r);
+}
+
+void
+mpxtend(mpint *b, int n, mpint *r)
+{
+	int d, m, c, i;
+
+	d = (n - 1) / Dbits;
+	m = (n - 1) % Dbits;
+	if(d >= b->top){
+		mpassign(b, r);
+		return;
+	}
+	mptrunc(b, n, r);
+	mpbits(r, n);
+	if((r->p[d] & 1<<m) == 0){
+		mpnorm(r);
+		return;
+	}
+	r->p[d] |= -(1<<m);
+	r->sign = -1;
+	c = 1;
+	for(i = 0; i < r->top; i++){
+		r->p[i] = ~(r->p[i] - c);
+		if(r->p[i] != 0)
+			c = 0;
+	}
+	mpnorm(r);
+}
+
+void
+mpasr(mpint *b, int n, mpint *r)
+{
+	if(b->sign > 0 || n <= 0){
+		mpright(b, n, r);
+		return;
+	}
+	mpadd(b, mpone, r);
+	mpright(r, n, r);
+	mpsub(r, mpone, r);
+}
--- /dev/null
+++ b/libmp/port/mpmodop.c
@@ -1,0 +1,95 @@
+#include "os.h"
+#include <mp.h>
+
+/* operands need to have m->top+1 digits of space and satisfy 0 ≤ a ≤ m-1 */
+static mpint*
+modarg(mpint *a, mpint *m)
+{
+	if(a->size <= m->top || a->sign < 0 || mpmagcmp(a, m) >= 0){
+		a = mpcopy(a);
+		mpmod(a, m, a);
+		mpbits(a, Dbits*(m->top+1));
+		a->top = m->top;
+	} else if(a->top < m->top){
+		memset(&a->p[a->top], 0, (m->top - a->top)*Dbytes);
+	}
+	return a;
+}
+
+void
+mpmodadd(mpint *b1, mpint *b2, mpint *m, mpint *sum)
+{
+	mpint *a, *b;
+	mpdigit d;
+	int i, j;
+
+	a = modarg(b1, m);
+	b = modarg(b2, m);
+
+	sum->flags |= (a->flags | b->flags) & MPtimesafe;
+	mpbits(sum, Dbits*2*(m->top+1));
+
+	mpvecadd(a->p, m->top, b->p, m->top, sum->p);
+	mpvecsub(sum->p, m->top+1, m->p, m->top, sum->p+m->top+1);
+
+	d = sum->p[2*m->top+1];
+	for(i = 0, j = m->top+1; i < m->top; i++, j++)
+		sum->p[i] = (sum->p[i] & d) | (sum->p[j] & ~d);
+
+	sum->top = m->top;
+	sum->sign = 1;
+	mpnorm(sum);
+
+	if(a != b1)
+		mpfree(a);
+	if(b != b2)
+		mpfree(b);
+}
+
+void
+mpmodsub(mpint *b1, mpint *b2, mpint *m, mpint *diff)
+{
+	mpint *a, *b;
+	mpdigit d;
+	int i, j;
+
+	a = modarg(b1, m);
+	b = modarg(b2, m);
+
+	diff->flags |= (a->flags | b->flags) & MPtimesafe;
+	mpbits(diff, Dbits*2*(m->top+1));
+
+	a->p[m->top] = 0;
+	mpvecsub(a->p, m->top+1, b->p, m->top, diff->p);
+	mpvecadd(diff->p, m->top, m->p, m->top, diff->p+m->top+1);
+
+	d = ~diff->p[m->top];
+	for(i = 0, j = m->top+1; i < m->top; i++, j++)
+		diff->p[i] = (diff->p[i] & d) | (diff->p[j] & ~d);
+
+	diff->top = m->top;
+	diff->sign = 1;
+	mpnorm(diff);
+
+	if(a != b1)
+		mpfree(a);
+	if(b != b2)
+		mpfree(b);
+}
+
+void
+mpmodmul(mpint *b1, mpint *b2, mpint *m, mpint *prod)
+{
+	mpint *a, *b;
+
+	a = modarg(b1, m);
+	b = modarg(b2, m);
+
+	mpmul(a, b, prod);
+	mpmod(prod, m, prod);
+
+	if(a != b1)
+		mpfree(a);
+	if(b != b2)
+		mpfree(b);
+}
--- /dev/null
+++ b/libmp/port/mpsel.c
@@ -1,0 +1,42 @@
+#include "os.h"
+#include <mp.h>
+#include "dat.h"
+
+// res = s != 0 ? b1 : b2
+void
+mpsel(int s, mpint *b1, mpint *b2, mpint *res)
+{
+	mpdigit d;
+	int n, m, i;
+
+	res->flags |= (b1->flags | b2->flags) & MPtimesafe;
+	if((res->flags & MPtimesafe) == 0){
+		mpassign(s ? b1 : b2, res);
+		return;
+	}
+	res->flags &= ~MPnorm;
+
+	n = b1->top;
+	m = b2->top;
+	mpbits(res, Dbits*(n >= m ? n : m));
+	res->top = n >= m ? n : m;
+
+	s = (-s^s|s)>>(sizeof(s)*8-1);
+	res->sign = (b1->sign & s) | (b2->sign & ~s);
+
+	d = -((mpdigit)s & 1);
+
+	i = 0;
+	while(i < n && i < m){
+		res->p[i] = (b1->p[i] & d) | (b2->p[i] & ~d);
+		i++;
+	}
+	while(i < n){
+		res->p[i] = b1->p[i] & d;
+		i++;
+	}
+	while(i < m){
+		res->p[i] = b2->p[i] & ~d;
+		i++;
+	}
+}
--- /dev/null
+++ b/libmp/port/mptober.c
@@ -1,0 +1,34 @@
+#include "os.h"
+#include <mp.h>
+#include "dat.h"
+
+void
+mptober(mpint *b, uchar *p, int n)
+{
+	int i, j, m;
+	mpdigit x;
+
+	memset(p, 0, n);
+
+	p += n;
+	m = b->top*Dbytes;
+	if(m < n)
+		n = m;
+
+	i = 0;
+	while(n >= Dbytes){
+		n -= Dbytes;
+		x = b->p[i++];
+		for(j = 0; j < Dbytes; j++){
+			*--p = x;
+			x >>= 8;
+		}
+	}
+	if(n > 0){
+		x = b->p[i];
+		for(j = 0; j < n; j++){
+			*--p = x;
+			x >>= 8;
+		}
+	}
+}
--- /dev/null
+++ b/libmp/port/mptod.c
@@ -1,0 +1,83 @@
+#include "os.h"
+#include <mp.h>
+#include "dat.h"
+
+double
+mptod(mpint *a)
+{
+	u64int v;
+	mpdigit w, r;
+	int sf, i, n, m, s;
+	FPdbleword x;
+	
+	if(a->top == 0) return 0.0;
+	sf = mpsignif(a);
+	if(sf > 1024) return Inf(a->sign);
+	i = a->top - 1;
+	v = a->p[i];
+	n = sf & Dbits - 1;
+	n |= n - 1 & Dbits;
+	r = 0;
+	if(n > 54){
+		s = n - 54;
+		r = v & (1<<s) - 1;
+		v >>= s;
+	}
+	while(n < 54){
+		if(--i < 0)
+			w = 0;
+		else
+			w = a->p[i];
+		m = 54 - n;
+		if(m > Dbits) m = Dbits;
+		s = Dbits - m & Dbits - 1;
+		v = v << m | w >> s;
+		r = w & (1<<s) - 1;
+		n += m;
+	}
+	if((v & 3) == 1){
+		while(--i >= 0)
+			r |= a->p[i];
+		if(r != 0)
+			v++;
+	}else
+		v++;
+	v >>= 1;
+	while((v >> 53) != 0){
+		v >>= 1;
+		if(++sf > 1024)
+			return Inf(a->sign);
+	}
+	x.lo = v;
+	x.hi = (u32int)(v >> 32) & (1<<20) - 1 | sf + 1022 << 20 | a->sign & 1<<31;
+	return x.x;
+}
+
+mpint *
+dtomp(double d, mpint *a)
+{
+	FPdbleword x;
+	uvlong v;
+	int e;
+
+	if(a == nil)
+		a = mpnew(0);
+	x.x = d;
+	e = x.hi >> 20 & 2047;
+	assert(e != 2047);
+	if(e < 1022){
+		mpassign(mpzero, a);
+		return a;
+	}
+	v = x.lo | (uvlong)(x.hi & (1<<20) - 1) << 32 | 1ULL<<52;
+	if(e < 1075){
+		v += (1ULL<<1074 - e) - (~v >> 1075 - e & 1);
+		v >>= 1075 - e;
+	}
+	uvtomp(v, a);
+	if(e > 1075)
+		mpleft(a, e - 1075, a);
+	if((int)x.hi < 0)
+		a->sign = -1;
+	return a;
+}
--- /dev/null
+++ b/libmp/port/mptolel.c
@@ -1,0 +1,33 @@
+#include "os.h"
+#include <mp.h>
+#include "dat.h"
+
+void
+mptolel(mpint *b, uchar *p, int n)
+{
+	int i, j, m;
+	mpdigit x;
+
+	memset(p, 0, n);
+
+	m = b->top*Dbytes;
+	if(m < n)
+		n = m;
+
+	i = 0;
+	while(n >= Dbytes){
+		n -= Dbytes;
+		x = b->p[i++];
+		for(j = 0; j < Dbytes; j++){
+			*p++ = x;
+			x >>= 8;
+		}
+	}
+	if(n > 0){
+		x = b->p[i];
+		for(j = 0; j < n; j++){
+			*p++ = x;
+			x >>= 8;
+		}
+	}
+}
--- /dev/null
+++ b/libsec/9front-amd64/aesni.s
@@ -1,0 +1,408 @@
+#define AESOP(o,r1,r2) \
+	BYTE	$0x66; \
+	BYTE	$0x0F; \
+	BYTE	$0x38; \
+	BYTE	$(o); \
+	BYTE	$(0xC0 | r2<<3 | r1)
+
+#define AESIMC(r1,r2)		AESOP(0xDB,r1,r2)
+#define	AESENC(r1,r2)		AESOP(0xDC,r1,r2)
+#define	AESENCLAST(r1,r2)	AESOP(0xDD,r1,r2)
+#define	AESDEC(r1,r2)		AESOP(0xDE,r1,r2)
+#define	AESDECLAST(r1,r2)	AESOP(0xDF,r1,r2)
+
+#define	AESKEYGENASSIST(i,r1,r2) \
+	BYTE	$0x66; \
+	BYTE	$0x0F; \
+	BYTE	$0x3A; \
+	BYTE	$0xDF; \
+	BYTE	$(0xC0 | r2<<3 | r1); \
+	BYTE	$(i)
+
+TEXT aesni_init(SB), 0, $0
+	MOVL	$1, AX
+	CPUID
+	XORL	AX, AX
+	ANDL	$(1<<25), CX
+	JZ	_ret
+
+	/* override aes function pointers */
+	MOVQ	$AESencrypt<>(SB), AX
+	MOVQ	AX, aes_encrypt(SB)
+	MOVQ	$AESdecrypt<>(SB), AX
+	MOVQ	AX, aes_decrypt(SB)
+
+	/* return setup function pointer */
+	MOVQ	$AESsetup<>(SB), AX
+_ret:
+	RET
+
+TEXT AESencrypt<>(SB), 0, $0
+	MOVL	Nr+8(FP), CX
+	MOVQ	pt+16(FP), SI
+	MOVQ	ct+24(FP), DI
+	MOVO	(RARG), X0
+	MOVOU	(SI), X7
+	ADDQ	$16, RARG
+	PXOR	X7, X0
+	CMPL	CX, $12
+	JLT	erounds10
+	JEQ	erounds12
+erounds14:
+	MOVO	0(RARG), X1
+	MOVO	16(RARG), X2
+	ADDQ	$32, RARG
+	AESENC(1, 0)
+	AESENC(2, 0)
+erounds12:
+	MOVO	0(RARG), X3
+	MOVO	16(RARG), X4
+	ADDQ	$32, RARG
+	AESENC(3, 0)
+	AESENC(4, 0)
+erounds10:
+	MOVO	0(RARG), X1
+	MOVO	16(RARG), X2
+	MOVO	32(RARG), X3
+	MOVO	48(RARG), X4
+	MOVO	64(RARG), X5
+	MOVO	80(RARG), X6
+	MOVO	96(RARG), X7
+	AESENC(1, 0)
+	MOVO	112(RARG), X1
+	AESENC(2, 0)
+	MOVO	128(RARG), X2
+	AESENC(3, 0)
+	MOVO	144(RARG), X3
+	AESENC(4, 0)
+	AESENC(5, 0)
+	AESENC(6, 0)
+	AESENC(7, 0)
+
+	AESENC(1, 0)
+	AESENC(2, 0)
+	AESENCLAST(3, 0)
+	MOVOU	X0, (DI)
+	RET
+
+TEXT AESdecrypt<>(SB), 0, $0
+	MOVL	Nr+8(FP), CX
+	MOVQ	ct+16(FP), SI
+	MOVQ	pt+24(FP), DI
+	MOVO	(RARG), X0
+	MOVOU	(SI), X7
+	ADDQ	$16, RARG
+	PXOR	X7, X0
+	CMPL	CX, $12
+	JLT	drounds10
+	JEQ	drounds12
+drounds14:
+	MOVO	0(RARG), X1
+	MOVO	16(RARG), X2
+	ADDQ	$32, RARG
+	AESDEC(1, 0)
+	AESDEC(2, 0)
+drounds12:
+	MOVO	0(RARG), X3
+	MOVO	16(RARG), X4
+	ADDQ	$32, RARG
+	AESDEC(3, 0)
+	AESDEC(4, 0)
+drounds10:
+	MOVO	0(RARG), X1
+	MOVO	16(RARG), X2
+	MOVO	32(RARG), X3
+	MOVO	48(RARG), X4
+	MOVO	64(RARG), X5
+	MOVO	80(RARG), X6
+	MOVO	96(RARG), X7
+	AESDEC(1, 0)
+	MOVO	112(RARG), X1
+	AESDEC(2, 0)
+	MOVO	128(RARG), X2
+	AESDEC(3, 0)
+	MOVO	144(RARG), X3
+	AESDEC(4, 0)
+	AESDEC(5, 0)
+	AESDEC(6, 0)
+	AESDEC(7, 0)
+
+	AESDEC(1, 0)
+	AESDEC(2, 0)
+	AESDECLAST(3, 0)
+	MOVOU	X0, (DI)
+	RET
+
+TEXT AESsetup<>(SB), 0, $16
+	MOVQ	RARG, erk+0(FP)
+	MOVQ	key+16(FP), DX
+	MOVL	nkey+24(FP), BX
+	MOVQ	DX, 8(SP)
+	CMPL	BX, $32
+	JEQ	esetup256
+	CMPL	BX, $24
+	JEQ	esetup192
+	CMPL	BX, $16
+	JEQ	esetup128
+	XORL	AX, AX
+	RET	
+esetup256:
+	CALL	setupEnc256<>(SB)
+	JMP	dsetup
+esetup192:
+	CALL	setupEnc192<>(SB)
+	JMP	dsetup
+esetup128:
+	CALL	setupEnc128<>(SB)
+dsetup:
+	MOVQ	erk+0(FP), SI
+	MOVQ	drk+8(FP), DI
+
+	MOVL	AX, BX
+	SHLL	$4, BX
+	ADDQ	BX, SI
+
+	MOVO	(SI), X0
+	MOVO	X0, (DI)
+
+	MOVO	-16(SI), X1
+	MOVO	-32(SI), X2
+	MOVO	-48(SI), X3
+	MOVO	-64(SI), X4
+	AESIMC(1, 1)
+	AESIMC(2, 2)
+	AESIMC(3, 3)
+	AESIMC(4, 4)
+	MOVO	X1, 16(DI)
+	MOVO	X2, 32(DI)
+	MOVO	X3, 48(DI)
+	MOVO	X4, 64(DI)
+
+	MOVO	-80(SI), X1
+	MOVO	-96(SI), X2
+	MOVO	-112(SI), X3
+	MOVO	-128(SI), X4
+	AESIMC(1, 1)
+	AESIMC(2, 2)
+	AESIMC(3, 3)
+	AESIMC(4, 4)
+	MOVO	X1, 80(DI)
+	MOVO	X2, 96(DI)
+	MOVO	X3, 112(DI)
+	MOVO	X4, 128(DI)
+
+	MOVO 	-144(SI), X1
+	AESIMC(1, 1)
+	MOVO	X1, 144(DI)
+
+	CMPL	AX, $10
+	JEQ	dsetupend
+	
+	MOVO	-160(SI), X1
+	MOVO	-176(SI), X2
+	AESIMC(1, 1)
+	AESIMC(2, 2)
+	MOVO	X1, 160(DI)
+	MOVO	X2, 176(DI)
+
+	CMPL	AX, $12
+	JEQ	dsetupend
+
+	MOVO	-192(SI), X1
+	MOVO	-208(SI), X2
+	AESIMC(1, 1)
+	AESIMC(2, 2)
+	MOVO	X1, 192(DI)
+	MOVO	X2, 208(DI)
+dsetupend:
+	SUBQ	BX, SI
+	ADDQ	BX, DI
+	MOVO	(SI), X0
+	MOVO	X0, (DI)
+	RET
+
+TEXT setupEnc128<>(SB), 0, $0
+	MOVQ	key+8(FP), SI
+	MOVOU	(SI), X1
+	MOVO	X1, (RARG)
+	AESKEYGENASSIST(0x01, 1, 0)
+	CALL	rk128<>(SB)
+	MOVO	X1, 16(RARG)
+	AESKEYGENASSIST(0x02, 1, 0)
+	CALL	rk128<>(SB)
+	MOVO	X1, 32(RARG)
+	AESKEYGENASSIST(0x04, 1, 0)
+	CALL	rk128<>(SB)
+	MOVO	X1, 48(RARG)
+	AESKEYGENASSIST(0x08, 1, 0)
+	CALL	rk128<>(SB)
+	MOVO	X1, 64(RARG)
+	AESKEYGENASSIST(0x10, 1, 0)
+	CALL	rk128<>(SB)
+	MOVO	X1, 80(RARG)
+	AESKEYGENASSIST(0x20, 1, 0)
+	CALL	rk128<>(SB)
+	MOVO	X1, 96(RARG)
+	AESKEYGENASSIST(0x40, 1, 0)
+	CALL	rk128<>(SB)
+	MOVO	X1, 112(RARG)
+	AESKEYGENASSIST(0x80, 1, 0)
+	CALL	rk128<>(SB)
+	MOVO	X1, 128(RARG)
+	AESKEYGENASSIST(0x1b, 1, 0)
+	CALL	rk128<>(SB)
+	MOVO	X1, 144(RARG)
+	AESKEYGENASSIST(0x36, 1, 0)
+	CALL	rk128<>(SB)
+	MOVO	X1, 160(RARG)
+	MOVL	$10, AX
+	RET
+TEXT rk128<>(SB), 0, $0
+	PSHUFL	$0xff, X0, X0
+	MOVO	X1, X2
+	PSLLO	$4, X2
+	PXOR	X2, X1
+	PSLLO	$4, X2
+	PXOR	X2, X1
+	PSLLO	$4, X2
+	PXOR	X2, X1
+	PXOR	X0, X1
+	RET
+	
+TEXT setupEnc192<>(SB), 0, $0
+	MOVQ	key+8(FP), SI
+	MOVOU	(SI), X1
+	MOVOU	16(SI), X2
+	MOVO	X1, (RARG)
+	MOVO	X2, X5
+	AESKEYGENASSIST(0x01, 2, 0)
+	CALL	rk192<>(SB)
+	SHUFPD	$0, X1, X5
+	MOVO	X5, 16(RARG)
+	MOVO	X1, X6
+	SHUFPD	$1, X2, X6
+	MOVO	X6, 32(RARG)
+	AESKEYGENASSIST(0x02, 2, 0)
+	CALL	rk192<>(SB)
+	MOVO	X1, 48(RARG)
+	MOVO	X2, X5
+	AESKEYGENASSIST(0x04, 2, 0)
+	CALL	rk192<>(SB)
+	SHUFPD	$0, X1, X5
+	MOVO	X5, 64(RARG)
+	MOVO	X1, X6
+	SHUFPD	$1, X2, X6
+	MOVO	X6, 80(RARG)
+	AESKEYGENASSIST(0x08, 2, 0)
+	CALL	rk192<>(SB)
+	MOVO	X1, 96(RARG)
+	MOVO	X2, X5
+	AESKEYGENASSIST(0x10, 2, 0)
+	CALL	rk192<>(SB)
+	SHUFPD	$0, X1, X5
+	MOVO	X5, 112(RARG)
+	MOVO	X1, X6
+	SHUFPD	$1, X2, X6
+	MOVO	X6, 128(RARG)
+	AESKEYGENASSIST(0x20, 2, 0)
+	CALL	rk192<>(SB)
+	MOVO	X1, 144(RARG)
+	MOVO	X2, X5
+	AESKEYGENASSIST(0x40, 2, 0)
+	CALL	rk192<>(SB)
+	SHUFPD	$0, X1, X5
+	MOVO	X5, 160(RARG)
+	MOVO	X1, X6
+	SHUFPD	$1, X2, X6
+	MOVO	X6, 176(RARG)
+	AESKEYGENASSIST(0x80, 2, 0)
+	CALL	rk192<>(SB)
+	MOVO	X1, 192(RARG)
+	MOVL	$12, AX
+	RET
+TEXT rk192<>(SB), 0, $0
+	PSHUFL	$0x55, X0, X0
+	MOVOU	X1, X4
+	PSLLO	$4, X4
+	PXOR	X4, X1
+	PSLLO	$4, X4
+	PXOR	X4, X1
+	PSLLO	$4, X4
+	PXOR	X4, X1
+	PXOR	X0, X1
+	PSHUFL	$0xff, X1, X0
+	MOVOU	X2, X4
+	PSLLO	$4, X4
+	PXOR	X4, X2
+	PXOR	X0, X2
+	RET
+	
+TEXT setupEnc256<>(SB), 0, $0
+	MOVQ	key+8(FP), SI
+	MOVOU	(SI), X1
+	MOVOU	16(SI), X2
+	MOVO	X1, (RARG)
+	MOVO	X2, 16(RARG)
+	AESKEYGENASSIST(0x01, 2, 0)
+	CALL	rk256_a<>(SB)
+	MOVO	X1, 32(RARG)
+	AESKEYGENASSIST(0x00, 1, 0)
+	CALL	rk256_b<>(SB)
+	MOVO	X2, 48(RARG)
+	AESKEYGENASSIST(0x02, 2, 0)
+	CALL	rk256_a<>(SB)
+	MOVO	X1, 64(RARG)
+	AESKEYGENASSIST(0x00, 1, 0)
+	CALL	rk256_b<>(SB)
+	MOVO	X2, 80(RARG)
+	AESKEYGENASSIST(0x04, 2, 0)
+	CALL	rk256_a<>(SB)
+	MOVO	X1, 96(RARG)
+	AESKEYGENASSIST(0x00, 1, 0)
+	CALL	rk256_b<>(SB)
+	MOVO	X2, 112(RARG)
+	AESKEYGENASSIST(0x08, 2, 0)
+	CALL	rk256_a<>(SB)
+	MOVO	X1, 128(RARG)
+	AESKEYGENASSIST(0x00, 1, 0)
+	CALL	rk256_b<>(SB)
+	MOVO	X2, 144(RARG)
+	AESKEYGENASSIST(0x10, 2, 0)
+	CALL	rk256_a<>(SB)
+	MOVO	X1, 160(RARG)
+	AESKEYGENASSIST(0x00, 1, 0)
+	CALL	rk256_b<>(SB)
+	MOVO	X2, 176(RARG)
+	AESKEYGENASSIST(0x20, 2, 0)
+	CALL	rk256_a<>(SB)
+	MOVO	X1, 192(RARG)
+	AESKEYGENASSIST(0x00, 1, 0)
+	CALL	rk256_b<>(SB)
+	MOVO	X2, 208(RARG)
+	AESKEYGENASSIST(0x40, 2, 0)
+	CALL	rk256_a<>(SB)
+	MOVO	X1, 224(RARG)
+	MOVL	$14, AX
+	RET
+TEXT rk256_a<>(SB), 0, $0
+	PSHUFL	$0xff, X0, X0
+	MOVO	X1, X4
+	PSLLO	$4, X4
+	PXOR	X4, X1
+	PSLLO	$4, X4
+	PXOR	X4, X1
+	PSLLO	$4, X4
+	PXOR	X4, X1
+	PXOR	X0, X1
+	RET
+TEXT rk256_b<>(SB), 0, $0
+	PSHUFL	$0xaa, X0, X0
+	MOVO	X2, X4
+	PSLLO	$4, X4
+	PXOR	X4, X2
+	PSLLO	$4, X4
+	PXOR	X4, X2
+	PSLLO	$4, X4
+	PXOR	X4, X2
+	PXOR	X0, X2
+	RET
--- /dev/null
+++ b/libsec/9front-amd64/chachablock.s
@@ -1,0 +1,74 @@
+#define ROTATE(n, v1, v2) \
+	MOVO	v1, v2; \
+	PSLLL	$(n), v1; \
+	PSRLL	$(32-n), v2; \
+	POR	v1, v2
+
+TEXT _chachablock(SB), 0, $0
+	MOVOU	 0(RARG), X0
+	MOVOU	16(RARG), X1
+	MOVOU	32(RARG), X2
+	MOVOU	48(RARG), X3
+
+	MOVL	rounds+8(FP), CX
+	SHRL	$1, CX
+
+_loop:
+	PADDL	X1, X0
+	PXOR	X0, X3
+	/* ROTATE(16, X3, X3) */
+	PSHUFLW $(1<<0 | 0<<2 | 3<<4 | 2<<6), X3, X3
+	PSHUFHW $(1<<0 | 0<<2 | 3<<4 | 2<<6), X3, X3
+
+	PADDL	X3, X2
+	MOVO	X1, X4
+	PXOR	X2, X4
+	ROTATE(12, X4, X1)
+	
+	PADDL	X1, X0
+	MOVO	X0, X4
+	PXOR	X3, X4
+	ROTATE(8, X4, X3)
+
+	PADDL	X3, X2
+	MOVO	X1, X4
+	PXOR	X2, X4
+	ROTATE(7, X4, X1)
+
+	PSHUFL $(1<<0 | 2<<2 | 3<<4 | 0<<6), X1, X1
+	PSHUFL $(2<<0 | 3<<2 | 0<<4 | 1<<6), X2, X2
+	PSHUFL $(3<<0 | 0<<2 | 1<<4 | 2<<6), X3, X3
+
+	PADDL	X1, X0
+	PXOR	X0, X3
+	/* ROTATE(16, X3, X3) */
+	PSHUFLW $(1<<0 | 0<<2 | 3<<4 | 2<<6), X3, X3
+	PSHUFHW $(1<<0 | 0<<2 | 3<<4 | 2<<6), X3, X3
+
+	PADDL	X3, X2
+	MOVO	X1, X4
+	PXOR	X2, X4
+	ROTATE(12, X4, X1)
+	
+	PADDL	X1, X0
+	MOVO	X0, X4
+	PXOR	X3, X4
+	ROTATE(8, X4, X3)
+
+	PADDL	X3, X2
+	MOVO	X1, X4
+	PXOR	X2, X4
+	ROTATE(7, X4, X1)
+
+	PSHUFL $(3<<0 | 0<<2 | 1<<4 | 2<<6), X1, X1
+	PSHUFL $(2<<0 | 3<<2 | 0<<4 | 1<<6), X2, X2
+	PSHUFL $(1<<0 | 2<<2 | 3<<4 | 0<<6), X3, X3
+
+	DECL CX
+	JNE _loop
+
+	MOVOU	X0, 0(RARG)
+	MOVOU	X1, 16(RARG)
+	MOVOU	X2, 32(RARG)
+	MOVOU	X3, 48(RARG)
+	RET
--- a/libsec/9front-amd64/md5block.s
+++ b/libsec/9front-amd64/md5block.s
@@ -103,7 +103,7 @@
 #define	LEN	8
 #define	STATE	16
 
-TEXT	_md5block+0(SB),$0
+TEXT	_md5block+0(SB), $0
 
 	MOVQ	RARG,R8
 	MOVLQZX len+LEN(FP),BX
@@ -206,7 +206,7 @@
 	ADDL CX,8(DI)
 	ADDL DX,12(DI)
 
-	CMPL BP,R8
+	CMPQ BP,R8
 	JCS mainloop
 
 	RET
--- a/libsec/9front-amd64/mkfile
+++ b/libsec/9front-amd64/mkfile
@@ -4,8 +4,11 @@
 
 LIB=libsec.a
 FILES=\
-	md5block\
-	sha1block\
+        chachablock\
+        md5block\
+        sha1block\
+        aesni\
+
 
 HFILES=$ROOT/include/libsec.h
 
--- a/libsec/9front-amd64/sha1block.s
+++ b/libsec/9front-amd64/sha1block.s
@@ -1,5 +1,3 @@
-	TEXT	_sha1block+0(SB),$384
-
 /* x = (wp[off-f] ^ wp[off-8] ^ wp[off-14] ^ wp[off-16]) <<< 1;
  * wp[off] = x;
  * x += A <<< 5;
@@ -84,7 +82,7 @@
  * ulong a = eax, b = ebx, c = ecx, d = edx, e = esi
  * ulong tmp = edi
  */
-#define Rpdata	R8
+#define	Rpdata	R8
 #define WARRAY	(-8-(80*4))
 #define TMP1	(-16-(80*4))
 #define TMP2	(-24-(80*4))
@@ -93,6 +91,8 @@
 #define W60	(-48-(80*4))
 #define W80	(-56-(80*4))
 #define EDATA	(-64-(80*4))
+
+TEXT	_sha1block+0(SB),$384
 
 	MOVQ RARG, Rpdata
 	MOVLQZX len+LEN(FP),BX
--- a/libsec/port/aes.c
+++ b/libsec/port/aes.c
@@ -31,147 +31,14 @@
 #include "os.h"
 #include <libsec.h>
 
-typedef uchar	u8;
-typedef u32	u32;
+#define GETU32(pt) (((u32)(pt)[0]<<24) ^ ((u32)(pt)[1]<<16) ^ \
+		    ((u32)(pt)[2]<< 8) ^ ((u32)(pt)[3]))
+#define PUTU32(ct, st) { (ct)[0] = (u8)((st)>>24); (ct)[1] = (u8)((st)>>16); \
+			 (ct)[2] = (u8)((st)>> 8); (ct)[3] = (u8)(st); }
+
 #define FULL_UNROLL
 
-static const u32 Td0[256];
-static const u32 Td1[256];
-static const u32 Td2[256];
-static const u32 Td3[256];
-static const u8  Te4[256];
-
-static int rijndaelKeySetupEnc(u32 rk[/*4*(Nr + 1)*/], const u8 cipherKey[], int keyBits);
-static int rijndaelKeySetupDec(u32 rk[/*4*(Nr + 1)*/], const u8 cipherKey[], int keyBits);
-static int rijndaelKeySetup(u32 erk[/*4*(Nr + 1)*/], u32 drk[/*4*(Nr + 1)*/], const u8 cipherKey[], int keyBits);
-static void	rijndaelEncrypt(const u32 rk[], int Nr, const uchar pt[16], uchar ct[16]);
-static void	rijndaelDecrypt(const u32 rk[], int Nr, const uchar ct[16], uchar pt[16]);
-
-void
-setupAESstate(AESstate *s, uchar key[], int keybytes, uchar *ivec)
-{
-	memset(s, 0, sizeof(*s));
-	if(keybytes > AESmaxkey)
-		keybytes = AESmaxkey;
-	memmove(s->key, key, keybytes);
-	s->keybytes = keybytes;
-	s->rounds = rijndaelKeySetup(s->ekey, s->dkey, s->key, keybytes * 8);
-	if(ivec != nil)
-		memmove(s->ivec, ivec, AESbsize);
-	if(keybytes==16 || keybytes==24 || keybytes==32)
-		s->setup = 0xcafebabe;
-	// else rijndaelKeySetup was invalid
-}
-
-// Define by analogy with desCBCencrypt;  AES modes are not standardized yet.
-// Because of the way that non-multiple-of-16 buffers are handled,
-// the decryptor must be fed buffers of the same size as the encryptor.
-void
-aesCBCencrypt(uchar *p, int len, AESstate *s)
-{
-	uchar *p2, *ip, *eip;
-	uchar q[AESbsize];
-
-	for(; len >= AESbsize; len -= AESbsize){
-		p2 = p;
-		ip = s->ivec;
-		for(eip = ip+AESbsize; ip < eip; )
-			*p2++ ^= *ip++;
-		rijndaelEncrypt(s->ekey, s->rounds, p, q);
-		memmove(s->ivec, q, AESbsize);
-		memmove(p, q, AESbsize);
-		p += AESbsize;
-	}
-
-	if(len > 0){
-		ip = s->ivec;
-		rijndaelEncrypt(s->ekey, s->rounds, ip, q);
-		memmove(s->ivec, q, AESbsize);
-		for(eip = ip+len; ip < eip; )
-			*p++ ^= *ip++;
-	}
-}
-
-void
-aesCBCdecrypt(uchar *p, int len, AESstate *s)
-{
-	uchar *ip, *eip, *tp;
-	uchar tmp[AESbsize], q[AESbsize];
-
-	for(; len >= AESbsize; len -= AESbsize){
-		memmove(tmp, p, AESbsize);
-		rijndaelDecrypt(s->dkey, s->rounds, p, q);
-		memmove(p, q, AESbsize);
-		tp = tmp;
-		ip = s->ivec;
-		for(eip = ip+AESbsize; ip < eip; ){
-			*p++ ^= *ip;
-			*ip++ = *tp++;
-		}
-	}
-
-	if(len > 0){
-		ip = s->ivec;
-		rijndaelEncrypt(s->ekey, s->rounds, ip, q);
-		memmove(s->ivec, q, AESbsize);
-		for(eip = ip+len; ip < eip; )
-			*p++ ^= *ip++;
-	}
-}
-
 /*
- * this function has been changed for plan 9.
- * Expand the cipher key into the encryption and decryption key schedules.
- *
- * @return	the number of rounds for the given cipher key size.
- */
-static int rijndaelKeySetup(u32 erk[/*4*(Nr + 1)*/], u32 drk[/*4*(Nr + 1)*/], const u8 cipherKey[], int keyBits) {
-	int Nr, i;
-
-	/* expand the cipher key: */
-	Nr = rijndaelKeySetupEnc(erk, cipherKey, keyBits);
-
-	/*
-	 * invert the order of the round keys and
-	 * apply the inverse MixColumn transform to all round keys but the first and the last
-	 */
-	drk[0       ] = erk[4*Nr    ]; 
-	drk[1       ] = erk[4*Nr + 1];
-	drk[2       ] = erk[4*Nr + 2]; 
-	drk[3       ] = erk[4*Nr + 3];
-	drk[4*Nr    ] = erk[0       ]; 
-	drk[4*Nr + 1] = erk[1       ];
-	drk[4*Nr + 2] = erk[2       ]; 
-	drk[4*Nr + 3] = erk[3       ];
-	erk += 4 * Nr;
-	for (i = 1; i < Nr; i++) {
-		drk += 4;
-		erk -= 4;
-		drk[0] =
-		    Td0[Te4[(erk[0] >> 24)       ]] ^
-		    Td1[Te4[(erk[0] >> 16) & 0xff]] ^
-		    Td2[Te4[(erk[0] >>  8) & 0xff]] ^
-		    Td3[Te4[(erk[0]      ) & 0xff]];
-		drk[1] =
-		    Td0[Te4[(erk[1] >> 24)       ]] ^
-		    Td1[Te4[(erk[1] >> 16) & 0xff]] ^
-		    Td2[Te4[(erk[1] >>  8) & 0xff]] ^
-		    Td3[Te4[(erk[1]      ) & 0xff]];
-		drk[2] =
-		    Td0[Te4[(erk[2] >> 24)       ]] ^
-		    Td1[Te4[(erk[2] >> 16) & 0xff]] ^
-		    Td2[Te4[(erk[2] >>  8) & 0xff]] ^
-		    Td3[Te4[(erk[2]      ) & 0xff]];
-		drk[3] =
-		    Td0[Te4[(erk[3] >> 24)       ]] ^
-		    Td1[Te4[(erk[3] >> 16) & 0xff]] ^
-		    Td2[Te4[(erk[3] >>  8) & 0xff]] ^
-		    Td3[Te4[(erk[3]      ) & 0xff]];
-	}
-	return Nr;
-}
-
-/*
 Te0[x] = S [x].[02, 01, 01, 03];
 Te1[x] = S [x].[03, 02, 01, 01];
 Te2[x] = S [x].[01, 03, 02, 01];
@@ -185,698 +52,691 @@
 Td4[x] = Si[x]
 */
 
-static const u32 Te0[256] = {
-    0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
-    0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
-    0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
-    0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU,
-    0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U,
-    0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU,
-    0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU,
-    0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU,
-    0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU,
-    0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU,
-    0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U,
-    0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU,
-    0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU,
-    0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U,
-    0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU,
-    0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU,
-    0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU,
-    0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU,
-    0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU,
-    0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U,
-    0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU,
-    0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU,
-    0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU,
-    0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU,
-    0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U,
-    0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U,
-    0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U,
-    0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U,
-    0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU,
-    0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U,
-    0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U,
-    0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU,
-    0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU,
-    0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U,
-    0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U,
-    0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U,
-    0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU,
-    0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U,
-    0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU,
-    0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U,
-    0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU,
-    0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U,
-    0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U,
-    0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU,
-    0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U,
-    0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U,
-    0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U,
-    0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U,
-    0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U,
-    0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U,
-    0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U,
-    0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U,
-    0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU,
-    0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U,
-    0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U,
-    0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U,
-    0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U,
-    0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U,
-    0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U,
-    0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU,
-    0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U,
-    0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U,
-    0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
-    0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU,
+static u32 Te0[256] = {
+	 0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
+	 0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
+	 0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
+	 0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU,
+	 0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U,
+	 0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU,
+	 0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU,
+	 0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU,
+	 0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU,
+	 0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU,
+	 0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U,
+	 0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU,
+	 0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU,
+	 0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U,
+	 0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU,
+	 0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU,
+	 0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU,
+	 0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU,
+	 0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU,
+	 0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U,
+	 0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU,
+	 0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU,
+	 0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU,
+	 0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU,
+	 0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U,
+	 0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U,
+	 0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U,
+	 0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U,
+	 0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU,
+	 0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U,
+	 0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U,
+	 0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU,
+	 0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU,
+	 0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U,
+	 0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U,
+	 0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U,
+	 0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU,
+	 0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U,
+	 0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU,
+	 0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U,
+	 0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU,
+	 0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U,
+	 0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U,
+	 0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU,
+	 0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U,
+	 0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U,
+	 0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U,
+	 0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U,
+	 0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U,
+	 0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U,
+	 0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U,
+	 0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U,
+	 0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU,
+	 0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U,
+	 0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U,
+	 0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U,
+	 0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U,
+	 0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U,
+	 0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U,
+	 0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU,
+	 0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U,
+	 0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U,
+	 0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
+	 0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU,
 };
-static const u32 Te1[256] = {
-    0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU,
-    0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
-    0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU,
-    0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U,
-    0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU,
-    0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U,
-    0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU,
-    0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U,
-    0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U,
-    0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU,
-    0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U,
-    0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U,
-    0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U,
-    0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU,
-    0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U,
-    0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U,
-    0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU,
-    0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U,
-    0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U,
-    0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U,
-    0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU,
-    0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU,
-    0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U,
-    0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU,
-    0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU,
-    0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U,
-    0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU,
-    0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U,
-    0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU,
-    0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U,
-    0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U,
-    0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U,
-    0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU,
-    0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U,
-    0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU,
-    0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U,
-    0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU,
-    0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U,
-    0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U,
-    0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU,
-    0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU,
-    0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU,
-    0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U,
-    0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U,
-    0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU,
-    0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U,
-    0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU,
-    0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U,
-    0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU,
-    0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U,
-    0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU,
-    0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU,
-    0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U,
-    0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU,
-    0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U,
-    0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU,
-    0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U,
-    0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U,
-    0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U,
-    0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU,
-    0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU,
-    0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U,
-    0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU,
-    0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U,
+static u32 Te1[256] = {
+	 0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU,
+	 0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
+	 0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU,
+	 0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U,
+	 0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU,
+	 0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U,
+	 0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU,
+	 0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U,
+	 0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U,
+	 0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU,
+	 0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U,
+	 0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U,
+	 0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U,
+	 0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU,
+	 0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U,
+	 0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U,
+	 0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU,
+	 0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U,
+	 0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U,
+	 0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U,
+	 0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU,
+	 0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU,
+	 0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U,
+	 0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU,
+	 0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU,
+	 0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U,
+	 0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU,
+	 0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U,
+	 0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU,
+	 0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U,
+	 0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U,
+	 0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U,
+	 0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU,
+	 0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U,
+	 0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU,
+	 0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U,
+	 0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU,
+	 0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U,
+	 0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U,
+	 0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU,
+	 0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU,
+	 0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU,
+	 0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U,
+	 0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U,
+	 0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU,
+	 0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U,
+	 0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU,
+	 0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U,
+	 0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU,
+	 0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U,
+	 0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU,
+	 0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU,
+	 0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U,
+	 0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU,
+	 0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U,
+	 0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU,
+	 0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U,
+	 0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U,
+	 0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U,
+	 0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU,
+	 0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU,
+	 0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U,
+	 0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU,
+	 0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U,
 };
-static const u32 Te2[256] = {
-    0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU,
-    0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U,
-    0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU,
-    0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U,
-    0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU,
-    0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U,
-    0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU,
-    0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U,
-    0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U,
-    0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU,
-    0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U,
-    0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U,
-    0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U,
-    0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU,
-    0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U,
-    0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U,
-    0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU,
-    0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U,
-    0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U,
-    0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U,
-    0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU,
-    0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU,
-    0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U,
-    0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU,
-    0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU,
-    0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U,
-    0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU,
-    0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U,
-    0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU,
-    0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U,
-    0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U,
-    0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U,
-    0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU,
-    0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U,
-    0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU,
-    0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U,
-    0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU,
-    0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U,
-    0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U,
-    0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU,
-    0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU,
-    0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU,
-    0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U,
-    0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U,
-    0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU,
-    0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U,
-    0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU,
-    0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U,
-    0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU,
-    0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U,
-    0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU,
-    0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU,
-    0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U,
-    0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU,
-    0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U,
-    0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU,
-    0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U,
-    0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U,
-    0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U,
-    0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU,
-    0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU,
-    0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U,
-    0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU,
-    0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U,
+static u32 Te2[256] = {
+	 0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU,
+	 0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U,
+	 0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU,
+	 0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U,
+	 0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU,
+	 0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U,
+	 0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU,
+	 0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U,
+	 0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U,
+	 0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU,
+	 0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U,
+	 0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U,
+	 0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U,
+	 0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU,
+	 0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U,
+	 0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U,
+	 0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU,
+	 0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U,
+	 0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U,
+	 0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U,
+	 0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU,
+	 0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU,
+	 0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U,
+	 0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU,
+	 0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU,
+	 0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U,
+	 0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU,
+	 0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U,
+	 0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU,
+	 0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U,
+	 0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U,
+	 0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U,
+	 0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU,
+	 0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U,
+	 0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU,
+	 0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U,
+	 0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU,
+	 0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U,
+	 0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U,
+	 0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU,
+	 0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU,
+	 0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU,
+	 0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U,
+	 0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U,
+	 0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU,
+	 0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U,
+	 0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU,
+	 0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U,
+	 0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU,
+	 0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U,
+	 0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU,
+	 0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU,
+	 0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U,
+	 0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU,
+	 0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U,
+	 0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU,
+	 0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U,
+	 0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U,
+	 0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U,
+	 0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU,
+	 0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU,
+	 0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U,
+	 0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU,
+	 0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U,
 };
-static const u32 Te3[256] = {
+static u32 Te3[256] = {
 
-    0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U,
-    0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
-    0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U,
-    0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU,
-    0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU,
-    0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU,
-    0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U,
-    0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU,
-    0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU,
-    0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U,
-    0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U,
-    0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU,
-    0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU,
-    0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU,
-    0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU,
-    0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU,
-    0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U,
-    0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU,
-    0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU,
-    0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U,
-    0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U,
-    0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U,
-    0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U,
-    0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U,
-    0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU,
-    0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U,
-    0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU,
-    0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU,
-    0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U,
-    0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U,
-    0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U,
-    0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU,
-    0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U,
-    0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU,
-    0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU,
-    0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U,
-    0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U,
-    0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU,
-    0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U,
-    0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU,
-    0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U,
-    0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U,
-    0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U,
-    0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U,
-    0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU,
-    0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U,
-    0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU,
-    0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U,
-    0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU,
-    0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U,
-    0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU,
-    0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU,
-    0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU,
-    0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU,
-    0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U,
-    0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U,
-    0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U,
-    0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U,
-    0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U,
-    0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U,
-    0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU,
-    0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U,
-    0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
-    0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU,
+	 0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U,
+	 0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
+	 0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U,
+	 0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU,
+	 0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU,
+	 0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU,
+	 0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U,
+	 0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU,
+	 0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU,
+	 0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U,
+	 0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U,
+	 0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU,
+	 0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU,
+	 0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU,
+	 0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU,
+	 0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU,
+	 0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U,
+	 0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU,
+	 0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU,
+	 0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U,
+	 0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U,
+	 0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U,
+	 0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U,
+	 0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U,
+	 0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU,
+	 0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U,
+	 0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU,
+	 0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU,
+	 0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U,
+	 0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U,
+	 0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U,
+	 0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU,
+	 0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U,
+	 0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU,
+	 0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU,
+	 0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U,
+	 0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U,
+	 0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU,
+	 0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U,
+	 0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU,
+	 0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U,
+	 0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U,
+	 0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U,
+	 0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U,
+	 0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU,
+	 0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U,
+	 0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU,
+	 0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U,
+	 0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU,
+	 0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U,
+	 0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU,
+	 0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU,
+	 0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU,
+	 0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU,
+	 0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U,
+	 0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U,
+	 0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U,
+	 0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U,
+	 0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U,
+	 0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U,
+	 0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU,
+	 0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U,
+	 0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
+	 0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU,
 };
-static const u8 Te4[256] = {
-    0x63U, 0x7cU, 0x77U, 0x7bU,
-    0xf2U, 0x6bU, 0x6fU, 0xc5U,
-    0x30U, 0x01U, 0x67U, 0x2bU,
-    0xfeU, 0xd7U, 0xabU, 0x76U,
-    0xcaU, 0x82U, 0xc9U, 0x7dU,
-    0xfaU, 0x59U, 0x47U, 0xf0U,
-    0xadU, 0xd4U, 0xa2U, 0xafU,
-    0x9cU, 0xa4U, 0x72U, 0xc0U,
-    0xb7U, 0xfdU, 0x93U, 0x26U,
-    0x36U, 0x3fU, 0xf7U, 0xccU,
-    0x34U, 0xa5U, 0xe5U, 0xf1U,
-    0x71U, 0xd8U, 0x31U, 0x15U,
-    0x04U, 0xc7U, 0x23U, 0xc3U,
-    0x18U, 0x96U, 0x05U, 0x9aU,
-    0x07U, 0x12U, 0x80U, 0xe2U,
-    0xebU, 0x27U, 0xb2U, 0x75U,
-    0x09U, 0x83U, 0x2cU, 0x1aU,
-    0x1bU, 0x6eU, 0x5aU, 0xa0U,
-    0x52U, 0x3bU, 0xd6U, 0xb3U,
-    0x29U, 0xe3U, 0x2fU, 0x84U,
-    0x53U, 0xd1U, 0x00U, 0xedU,
-    0x20U, 0xfcU, 0xb1U, 0x5bU,
-    0x6aU, 0xcbU, 0xbeU, 0x39U,
-    0x4aU, 0x4cU, 0x58U, 0xcfU,
-    0xd0U, 0xefU, 0xaaU, 0xfbU,
-    0x43U, 0x4dU, 0x33U, 0x85U,
-    0x45U, 0xf9U, 0x02U, 0x7fU,
-    0x50U, 0x3cU, 0x9fU, 0xa8U,
-    0x51U, 0xa3U, 0x40U, 0x8fU,
-    0x92U, 0x9dU, 0x38U, 0xf5U,
-    0xbcU, 0xb6U, 0xdaU, 0x21U,
-    0x10U, 0xffU, 0xf3U, 0xd2U,
-    0xcdU, 0x0cU, 0x13U, 0xecU,
-    0x5fU, 0x97U, 0x44U, 0x17U,
-    0xc4U, 0xa7U, 0x7eU, 0x3dU,
-    0x64U, 0x5dU, 0x19U, 0x73U,
-    0x60U, 0x81U, 0x4fU, 0xdcU,
-    0x22U, 0x2aU, 0x90U, 0x88U,
-    0x46U, 0xeeU, 0xb8U, 0x14U,
-    0xdeU, 0x5eU, 0x0bU, 0xdbU,
-    0xe0U, 0x32U, 0x3aU, 0x0aU,
-    0x49U, 0x06U, 0x24U, 0x5cU,
-    0xc2U, 0xd3U, 0xacU, 0x62U,
-    0x91U, 0x95U, 0xe4U, 0x79U,
-    0xe7U, 0xc8U, 0x37U, 0x6dU,
-    0x8dU, 0xd5U, 0x4eU, 0xa9U,
-    0x6cU, 0x56U, 0xf4U, 0xeaU,
-    0x65U, 0x7aU, 0xaeU, 0x08U,
-    0xbaU, 0x78U, 0x25U, 0x2eU,
-    0x1cU, 0xa6U, 0xb4U, 0xc6U,
-    0xe8U, 0xddU, 0x74U, 0x1fU,
-    0x4bU, 0xbdU, 0x8bU, 0x8aU,
-    0x70U, 0x3eU, 0xb5U, 0x66U,
-    0x48U, 0x03U, 0xf6U, 0x0eU,
-    0x61U, 0x35U, 0x57U, 0xb9U,
-    0x86U, 0xc1U, 0x1dU, 0x9eU,
-    0xe1U, 0xf8U, 0x98U, 0x11U,
-    0x69U, 0xd9U, 0x8eU, 0x94U,
-    0x9bU, 0x1eU, 0x87U, 0xe9U,
-    0xceU, 0x55U, 0x28U, 0xdfU,
-    0x8cU, 0xa1U, 0x89U, 0x0dU,
-    0xbfU, 0xe6U, 0x42U, 0x68U,
-    0x41U, 0x99U, 0x2dU, 0x0fU,
-    0xb0U, 0x54U, 0xbbU, 0x16U,
+static u8 Te4[256] = {
+	 0x63U, 0x7cU, 0x77U, 0x7bU,
+	 0xf2U, 0x6bU, 0x6fU, 0xc5U,
+	 0x30U, 0x01U, 0x67U, 0x2bU,
+	 0xfeU, 0xd7U, 0xabU, 0x76U,
+	 0xcaU, 0x82U, 0xc9U, 0x7dU,
+	 0xfaU, 0x59U, 0x47U, 0xf0U,
+	 0xadU, 0xd4U, 0xa2U, 0xafU,
+	 0x9cU, 0xa4U, 0x72U, 0xc0U,
+	 0xb7U, 0xfdU, 0x93U, 0x26U,
+	 0x36U, 0x3fU, 0xf7U, 0xccU,
+	 0x34U, 0xa5U, 0xe5U, 0xf1U,
+	 0x71U, 0xd8U, 0x31U, 0x15U,
+	 0x04U, 0xc7U, 0x23U, 0xc3U,
+	 0x18U, 0x96U, 0x05U, 0x9aU,
+	 0x07U, 0x12U, 0x80U, 0xe2U,
+	 0xebU, 0x27U, 0xb2U, 0x75U,
+	 0x09U, 0x83U, 0x2cU, 0x1aU,
+	 0x1bU, 0x6eU, 0x5aU, 0xa0U,
+	 0x52U, 0x3bU, 0xd6U, 0xb3U,
+	 0x29U, 0xe3U, 0x2fU, 0x84U,
+	 0x53U, 0xd1U, 0x00U, 0xedU,
+	 0x20U, 0xfcU, 0xb1U, 0x5bU,
+	 0x6aU, 0xcbU, 0xbeU, 0x39U,
+	 0x4aU, 0x4cU, 0x58U, 0xcfU,
+	 0xd0U, 0xefU, 0xaaU, 0xfbU,
+	 0x43U, 0x4dU, 0x33U, 0x85U,
+	 0x45U, 0xf9U, 0x02U, 0x7fU,
+	 0x50U, 0x3cU, 0x9fU, 0xa8U,
+	 0x51U, 0xa3U, 0x40U, 0x8fU,
+	 0x92U, 0x9dU, 0x38U, 0xf5U,
+	 0xbcU, 0xb6U, 0xdaU, 0x21U,
+	 0x10U, 0xffU, 0xf3U, 0xd2U,
+	 0xcdU, 0x0cU, 0x13U, 0xecU,
+	 0x5fU, 0x97U, 0x44U, 0x17U,
+	 0xc4U, 0xa7U, 0x7eU, 0x3dU,
+	 0x64U, 0x5dU, 0x19U, 0x73U,
+	 0x60U, 0x81U, 0x4fU, 0xdcU,
+	 0x22U, 0x2aU, 0x90U, 0x88U,
+	 0x46U, 0xeeU, 0xb8U, 0x14U,
+	 0xdeU, 0x5eU, 0x0bU, 0xdbU,
+	 0xe0U, 0x32U, 0x3aU, 0x0aU,
+	 0x49U, 0x06U, 0x24U, 0x5cU,
+	 0xc2U, 0xd3U, 0xacU, 0x62U,
+	 0x91U, 0x95U, 0xe4U, 0x79U,
+	 0xe7U, 0xc8U, 0x37U, 0x6dU,
+	 0x8dU, 0xd5U, 0x4eU, 0xa9U,
+	 0x6cU, 0x56U, 0xf4U, 0xeaU,
+	 0x65U, 0x7aU, 0xaeU, 0x08U,
+	 0xbaU, 0x78U, 0x25U, 0x2eU,
+	 0x1cU, 0xa6U, 0xb4U, 0xc6U,
+	 0xe8U, 0xddU, 0x74U, 0x1fU,
+	 0x4bU, 0xbdU, 0x8bU, 0x8aU,
+	 0x70U, 0x3eU, 0xb5U, 0x66U,
+	 0x48U, 0x03U, 0xf6U, 0x0eU,
+	 0x61U, 0x35U, 0x57U, 0xb9U,
+	 0x86U, 0xc1U, 0x1dU, 0x9eU,
+	 0xe1U, 0xf8U, 0x98U, 0x11U,
+	 0x69U, 0xd9U, 0x8eU, 0x94U,
+	 0x9bU, 0x1eU, 0x87U, 0xe9U,
+	 0xceU, 0x55U, 0x28U, 0xdfU,
+	 0x8cU, 0xa1U, 0x89U, 0x0dU,
+	 0xbfU, 0xe6U, 0x42U, 0x68U,
+	 0x41U, 0x99U, 0x2dU, 0x0fU,
+	 0xb0U, 0x54U, 0xbbU, 0x16U,
 };
-static const u32 Td0[256] = {
-    0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
-    0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
-    0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
-    0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU,
-    0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U,
-    0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U,
-    0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU,
-    0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U,
-    0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU,
-    0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U,
-    0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U,
-    0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U,
-    0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U,
-    0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU,
-    0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U,
-    0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU,
-    0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U,
-    0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU,
-    0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U,
-    0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U,
-    0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U,
-    0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU,
-    0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U,
-    0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU,
-    0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U,
-    0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU,
-    0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U,
-    0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU,
-    0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU,
-    0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U,
-    0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU,
-    0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U,
-    0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU,
-    0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U,
-    0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U,
-    0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U,
-    0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU,
-    0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U,
-    0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U,
-    0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU,
-    0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U,
-    0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U,
-    0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U,
-    0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U,
-    0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U,
-    0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU,
-    0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U,
-    0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U,
-    0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U,
-    0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U,
-    0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U,
-    0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU,
-    0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU,
-    0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU,
-    0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU,
-    0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U,
-    0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U,
-    0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU,
-    0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU,
-    0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U,
-    0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU,
-    0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U,
-    0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
-    0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U,
-};
-static const u32 Td1[256] = {
-    0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU,
-    0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
-    0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU,
-    0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U,
-    0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U,
-    0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U,
-    0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U,
-    0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U,
-    0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U,
-    0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU,
-    0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU,
-    0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU,
-    0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U,
-    0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU,
-    0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U,
-    0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U,
-    0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U,
-    0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU,
-    0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU,
-    0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U,
-    0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU,
-    0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U,
-    0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU,
-    0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU,
-    0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U,
-    0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U,
-    0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U,
-    0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU,
-    0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U,
-    0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU,
-    0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U,
-    0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U,
-    0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U,
-    0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU,
-    0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U,
-    0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U,
-    0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U,
-    0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U,
-    0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U,
-    0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U,
-    0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU,
-    0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU,
-    0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U,
-    0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU,
-    0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U,
-    0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU,
-    0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU,
-    0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U,
-    0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU,
-    0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U,
-    0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U,
-    0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U,
-    0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U,
-    0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U,
-    0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U,
-    0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U,
-    0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU,
-    0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U,
-    0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U,
-    0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU,
-    0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U,
-    0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U,
-    0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U,
-    0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U,
+static u32 Td0[256] = {
+	 0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
+	 0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
+	 0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
+	 0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU,
+	 0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U,
+	 0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U,
+	 0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU,
+	 0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U,
+	 0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU,
+	 0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U,
+	 0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U,
+	 0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U,
+	 0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U,
+	 0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU,
+	 0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U,
+	 0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU,
+	 0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U,
+	 0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU,
+	 0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U,
+	 0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U,
+	 0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U,
+	 0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU,
+	 0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U,
+	 0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU,
+	 0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U,
+	 0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU,
+	 0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U,
+	 0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU,
+	 0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU,
+	 0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U,
+	 0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU,
+	 0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U,
+	 0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU,
+	 0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U,
+	 0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U,
+	 0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U,
+	 0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU,
+	 0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U,
+	 0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U,
+	 0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU,
+	 0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U,
+	 0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U,
+	 0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U,
+	 0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U,
+	 0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U,
+	 0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU,
+	 0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U,
+	 0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U,
+	 0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U,
+	 0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U,
+	 0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U,
+	 0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU,
+	 0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU,
+	 0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU,
+	 0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU,
+	 0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U,
+	 0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U,
+	 0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU,
+	 0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU,
+	 0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U,
+	 0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU,
+	 0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U,
+	 0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
+	 0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U,
 };
-static const u32 Td2[256] = {
-    0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U,
-    0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U,
-    0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U,
-    0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U,
-    0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU,
-    0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U,
-    0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U,
-    0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U,
-    0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U,
-    0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU,
-    0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U,
-    0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U,
-    0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU,
-    0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U,
-    0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U,
-    0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U,
-    0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U,
-    0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U,
-    0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U,
-    0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU,
-
-    0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U,
-    0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U,
-    0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U,
-    0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U,
-    0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U,
-    0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU,
-    0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU,
-    0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U,
-    0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU,
-    0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U,
-    0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU,
-    0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU,
-    0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU,
-    0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU,
-    0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U,
-    0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U,
-    0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U,
-    0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U,
-    0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U,
-    0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U,
-    0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U,
-    0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU,
-    0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU,
-    0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U,
-    0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U,
-    0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU,
-    0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU,
-    0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U,
-    0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U,
-    0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U,
-    0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U,
-    0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U,
-    0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U,
-    0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U,
-    0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU,
-    0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U,
-    0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U,
-    0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U,
-    0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U,
-    0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U,
-    0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U,
-    0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU,
-    0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U,
-    0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U,
+static u32 Td1[256] = {
+	 0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU,
+	 0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
+	 0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU,
+	 0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U,
+	 0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U,
+	 0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U,
+	 0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U,
+	 0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U,
+	 0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U,
+	 0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU,
+	 0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU,
+	 0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU,
+	 0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U,
+	 0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU,
+	 0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U,
+	 0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U,
+	 0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U,
+	 0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU,
+	 0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU,
+	 0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U,
+	 0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU,
+	 0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U,
+	 0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU,
+	 0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU,
+	 0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U,
+	 0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U,
+	 0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U,
+	 0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU,
+	 0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U,
+	 0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU,
+	 0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U,
+	 0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U,
+	 0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U,
+	 0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU,
+	 0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U,
+	 0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U,
+	 0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U,
+	 0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U,
+	 0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U,
+	 0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U,
+	 0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU,
+	 0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU,
+	 0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U,
+	 0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU,
+	 0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U,
+	 0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU,
+	 0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU,
+	 0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U,
+	 0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU,
+	 0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U,
+	 0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U,
+	 0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U,
+	 0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U,
+	 0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U,
+	 0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U,
+	 0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U,
+	 0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU,
+	 0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U,
+	 0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U,
+	 0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU,
+	 0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U,
+	 0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U,
+	 0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U,
+	 0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U,
 };
-static const u32 Td3[256] = {
-    0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU,
-    0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU,
-    0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U,
-    0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U,
-    0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU,
-    0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU,
-    0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U,
-    0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU,
-    0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U,
-    0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU,
-    0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U,
-    0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U,
-    0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U,
-    0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U,
-    0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U,
-    0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU,
-    0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU,
-    0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U,
-    0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U,
-    0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU,
-    0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU,
-    0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U,
-    0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U,
-    0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U,
-    0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U,
-    0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU,
-    0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U,
-    0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U,
-    0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU,
-    0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU,
-    0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U,
-    0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U,
-    0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U,
-    0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU,
-    0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U,
-    0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U,
-    0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U,
-    0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U,
-    0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U,
-    0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U,
-    0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U,
-    0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU,
-    0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U,
-    0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U,
-    0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU,
-    0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU,
-    0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U,
-    0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU,
-    0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U,
-    0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U,
-    0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U,
-    0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U,
-    0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U,
-    0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U,
-    0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU,
-    0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU,
-    0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU,
-    0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU,
-    0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U,
-    0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U,
-    0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U,
-    0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU,
-    0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
-    0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U,
+static u32 Td2[256] = {
+	 0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U,
+	 0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U,
+	 0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U,
+	 0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U,
+	 0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU,
+	 0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U,
+	 0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U,
+	 0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U,
+	 0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U,
+	 0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU,
+	 0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U,
+	 0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U,
+	 0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU,
+	 0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U,
+	 0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U,
+	 0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U,
+	 0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U,
+	 0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U,
+	 0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U,
+	 0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU,
+
+	 0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U,
+	 0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U,
+	 0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U,
+	 0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U,
+	 0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U,
+	 0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU,
+	 0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU,
+	 0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U,
+	 0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU,
+	 0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U,
+	 0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU,
+	 0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU,
+	 0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU,
+	 0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU,
+	 0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U,
+	 0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U,
+	 0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U,
+	 0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U,
+	 0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U,
+	 0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U,
+	 0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U,
+	 0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU,
+	 0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU,
+	 0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U,
+	 0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U,
+	 0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU,
+	 0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU,
+	 0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U,
+	 0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U,
+	 0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U,
+	 0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U,
+	 0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U,
+	 0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U,
+	 0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U,
+	 0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU,
+	 0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U,
+	 0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U,
+	 0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U,
+	 0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U,
+	 0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U,
+	 0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U,
+	 0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU,
+	 0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U,
+	 0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U,
 };
-static const u8 Td4[256] = {
-    0x52U, 0x09U, 0x6aU, 0xd5U,
-    0x30U, 0x36U, 0xa5U, 0x38U,
-    0xbfU, 0x40U, 0xa3U, 0x9eU,
-    0x81U, 0xf3U, 0xd7U, 0xfbU,
-    0x7cU, 0xe3U, 0x39U, 0x82U,
-    0x9bU, 0x2fU, 0xffU, 0x87U,
-    0x34U, 0x8eU, 0x43U, 0x44U,
-    0xc4U, 0xdeU, 0xe9U, 0xcbU,
-    0x54U, 0x7bU, 0x94U, 0x32U,
-    0xa6U, 0xc2U, 0x23U, 0x3dU,
-    0xeeU, 0x4cU, 0x95U, 0x0bU,
-    0x42U, 0xfaU, 0xc3U, 0x4eU,
-    0x08U, 0x2eU, 0xa1U, 0x66U,
-    0x28U, 0xd9U, 0x24U, 0xb2U,
-    0x76U, 0x5bU, 0xa2U, 0x49U,
-    0x6dU, 0x8bU, 0xd1U, 0x25U,
-    0x72U, 0xf8U, 0xf6U, 0x64U,
-    0x86U, 0x68U, 0x98U, 0x16U,
-    0xd4U, 0xa4U, 0x5cU, 0xccU,
-    0x5dU, 0x65U, 0xb6U, 0x92U,
-    0x6cU, 0x70U, 0x48U, 0x50U,
-    0xfdU, 0xedU, 0xb9U, 0xdaU,
-    0x5eU, 0x15U, 0x46U, 0x57U,
-    0xa7U, 0x8dU, 0x9dU, 0x84U,
-    0x90U, 0xd8U, 0xabU, 0x00U,
-    0x8cU, 0xbcU, 0xd3U, 0x0aU,
-    0xf7U, 0xe4U, 0x58U, 0x05U,
-    0xb8U, 0xb3U, 0x45U, 0x06U,
-    0xd0U, 0x2cU, 0x1eU, 0x8fU,
-    0xcaU, 0x3fU, 0x0fU, 0x02U,
-    0xc1U, 0xafU, 0xbdU, 0x03U,
-    0x01U, 0x13U, 0x8aU, 0x6bU,
-    0x3aU, 0x91U, 0x11U, 0x41U,
-    0x4fU, 0x67U, 0xdcU, 0xeaU,
-    0x97U, 0xf2U, 0xcfU, 0xceU,
-    0xf0U, 0xb4U, 0xe6U, 0x73U,
-    0x96U, 0xacU, 0x74U, 0x22U,
-    0xe7U, 0xadU, 0x35U, 0x85U,
-    0xe2U, 0xf9U, 0x37U, 0xe8U,
-    0x1cU, 0x75U, 0xdfU, 0x6eU,
-    0x47U, 0xf1U, 0x1aU, 0x71U,
-    0x1dU, 0x29U, 0xc5U, 0x89U,
-    0x6fU, 0xb7U, 0x62U, 0x0eU,
-    0xaaU, 0x18U, 0xbeU, 0x1bU,
-    0xfcU, 0x56U, 0x3eU, 0x4bU,
-    0xc6U, 0xd2U, 0x79U, 0x20U,
-    0x9aU, 0xdbU, 0xc0U, 0xfeU,
-    0x78U, 0xcdU, 0x5aU, 0xf4U,
-    0x1fU, 0xddU, 0xa8U, 0x33U,
-    0x88U, 0x07U, 0xc7U, 0x31U,
-    0xb1U, 0x12U, 0x10U, 0x59U,
-    0x27U, 0x80U, 0xecU, 0x5fU,
-    0x60U, 0x51U, 0x7fU, 0xa9U,
-    0x19U, 0xb5U, 0x4aU, 0x0dU,
-    0x2dU, 0xe5U, 0x7aU, 0x9fU,
-    0x93U, 0xc9U, 0x9cU, 0xefU,
-    0xa0U, 0xe0U, 0x3bU, 0x4dU,
-    0xaeU, 0x2aU, 0xf5U, 0xb0U,
-    0xc8U, 0xebU, 0xbbU, 0x3cU,
-    0x83U, 0x53U, 0x99U, 0x61U,
-    0x17U, 0x2bU, 0x04U, 0x7eU,
-    0xbaU, 0x77U, 0xd6U, 0x26U,
-    0xe1U, 0x69U, 0x14U, 0x63U,
-    0x55U, 0x21U, 0x0cU, 0x7dU,
+static u32 Td3[256] = {
+	 0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU,
+	 0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU,
+	 0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U,
+	 0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U,
+	 0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU,
+	 0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU,
+	 0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U,
+	 0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU,
+	 0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U,
+	 0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU,
+	 0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U,
+	 0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U,
+	 0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U,
+	 0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U,
+	 0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U,
+	 0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU,
+	 0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU,
+	 0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U,
+	 0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U,
+	 0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU,
+	 0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU,
+	 0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U,
+	 0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U,
+	 0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U,
+	 0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U,
+	 0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU,
+	 0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U,
+	 0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U,
+	 0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU,
+	 0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU,
+	 0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U,
+	 0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U,
+	 0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U,
+	 0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU,
+	 0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U,
+	 0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U,
+	 0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U,
+	 0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U,
+	 0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U,
+	 0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U,
+	 0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U,
+	 0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU,
+	 0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U,
+	 0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U,
+	 0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU,
+	 0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU,
+	 0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U,
+	 0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU,
+	 0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U,
+	 0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U,
+	 0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U,
+	 0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U,
+	 0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U,
+	 0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U,
+	 0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU,
+	 0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU,
+	 0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU,
+	 0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU,
+	 0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U,
+	 0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U,
+	 0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U,
+	 0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU,
+	 0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
+	 0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U,
+};
+static u8 Td4[256] = {
+	 0x52U, 0x09U, 0x6aU, 0xd5U,
+	 0x30U, 0x36U, 0xa5U, 0x38U,
+	 0xbfU, 0x40U, 0xa3U, 0x9eU,
+	 0x81U, 0xf3U, 0xd7U, 0xfbU,
+	 0x7cU, 0xe3U, 0x39U, 0x82U,
+	 0x9bU, 0x2fU, 0xffU, 0x87U,
+	 0x34U, 0x8eU, 0x43U, 0x44U,
+	 0xc4U, 0xdeU, 0xe9U, 0xcbU,
+	 0x54U, 0x7bU, 0x94U, 0x32U,
+	 0xa6U, 0xc2U, 0x23U, 0x3dU,
+	 0xeeU, 0x4cU, 0x95U, 0x0bU,
+	 0x42U, 0xfaU, 0xc3U, 0x4eU,
+	 0x08U, 0x2eU, 0xa1U, 0x66U,
+	 0x28U, 0xd9U, 0x24U, 0xb2U,
+	 0x76U, 0x5bU, 0xa2U, 0x49U,
+	 0x6dU, 0x8bU, 0xd1U, 0x25U,
+	 0x72U, 0xf8U, 0xf6U, 0x64U,
+	 0x86U, 0x68U, 0x98U, 0x16U,
+	 0xd4U, 0xa4U, 0x5cU, 0xccU,
+	 0x5dU, 0x65U, 0xb6U, 0x92U,
+	 0x6cU, 0x70U, 0x48U, 0x50U,
+	 0xfdU, 0xedU, 0xb9U, 0xdaU,
+	 0x5eU, 0x15U, 0x46U, 0x57U,
+	 0xa7U, 0x8dU, 0x9dU, 0x84U,
+	 0x90U, 0xd8U, 0xabU, 0x00U,
+	 0x8cU, 0xbcU, 0xd3U, 0x0aU,
+	 0xf7U, 0xe4U, 0x58U, 0x05U,
+	 0xb8U, 0xb3U, 0x45U, 0x06U,
+	 0xd0U, 0x2cU, 0x1eU, 0x8fU,
+	 0xcaU, 0x3fU, 0x0fU, 0x02U,
+	 0xc1U, 0xafU, 0xbdU, 0x03U,
+	 0x01U, 0x13U, 0x8aU, 0x6bU,
+	 0x3aU, 0x91U, 0x11U, 0x41U,
+	 0x4fU, 0x67U, 0xdcU, 0xeaU,
+	 0x97U, 0xf2U, 0xcfU, 0xceU,
+	 0xf0U, 0xb4U, 0xe6U, 0x73U,
+	 0x96U, 0xacU, 0x74U, 0x22U,
+	 0xe7U, 0xadU, 0x35U, 0x85U,
+	 0xe2U, 0xf9U, 0x37U, 0xe8U,
+	 0x1cU, 0x75U, 0xdfU, 0x6eU,
+	 0x47U, 0xf1U, 0x1aU, 0x71U,
+	 0x1dU, 0x29U, 0xc5U, 0x89U,
+	 0x6fU, 0xb7U, 0x62U, 0x0eU,
+	 0xaaU, 0x18U, 0xbeU, 0x1bU,
+	 0xfcU, 0x56U, 0x3eU, 0x4bU,
+	 0xc6U, 0xd2U, 0x79U, 0x20U,
+	 0x9aU, 0xdbU, 0xc0U, 0xfeU,
+	 0x78U, 0xcdU, 0x5aU, 0xf4U,
+	 0x1fU, 0xddU, 0xa8U, 0x33U,
+	 0x88U, 0x07U, 0xc7U, 0x31U,
+	 0xb1U, 0x12U, 0x10U, 0x59U,
+	 0x27U, 0x80U, 0xecU, 0x5fU,
+	 0x60U, 0x51U, 0x7fU, 0xa9U,
+	 0x19U, 0xb5U, 0x4aU, 0x0dU,
+	 0x2dU, 0xe5U, 0x7aU, 0x9fU,
+	 0x93U, 0xc9U, 0x9cU, 0xefU,
+	 0xa0U, 0xe0U, 0x3bU, 0x4dU,
+	 0xaeU, 0x2aU, 0xf5U, 0xb0U,
+	 0xc8U, 0xebU, 0xbbU, 0x3cU,
+	 0x83U, 0x53U, 0x99U, 0x61U,
+	 0x17U, 0x2bU, 0x04U, 0x7eU,
+	 0xbaU, 0x77U, 0xd6U, 0x26U,
+	 0xe1U, 0x69U, 0x14U, 0x63U,
+	 0x55U, 0x21U, 0x0cU, 0x7dU,
 };
-static const u32 rcon[] = {
+static u32 rcon[] = {
 	0x01000000, 0x02000000, 0x04000000, 0x08000000,
 	0x10000000, 0x20000000, 0x40000000, 0x80000000,
-	0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
+	0x1B000000, 0x36000000,
+	/* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
 };
 
-#define SWAP(x) (_lrotl(x, 8) & 0x00ff00ff | _lrotr(x, 8) & 0xff00ff00)
-
-#ifdef _MSC_VER
-#define GETU32(p) SWAP(*((u32 *)(p)))
-#define PUTU32(ct, st) { *((u32 *)(ct)) = SWAP((st)); }
-#else
-#define GETU32(pt) (((u32)(pt)[0] << 24) ^ ((u32)(pt)[1] << 16) ^ ((u32)(pt)[2] <<  8) ^ ((u32)(pt)[3]))
-#define PUTU32(ct, st) { (ct)[0] = (u8)((st) >> 24); (ct)[1] = (u8)((st) >> 16); (ct)[2] = (u8)((st) >>  8); (ct)[3] = (u8)(st); }
-#endif
-
-/**
+/*
  * Expand the cipher key into the encryption key schedule.
  *
  * @return	the number of rounds for the given cipher key size.
  */
-static int rijndaelKeySetupEnc(u32 rk[/*4*(Nr + 1)*/], const u8 cipherKey[], int keyBits) {
-   	int i = 0;
+static int
+setupEnc(u32 rk[/*4*(Nr + 1)*/], uchar key[], int nkey)
+{
+	int i = 0;
 	u32 temp;
 
-	rk[0] = GETU32(cipherKey     );
-	rk[1] = GETU32(cipherKey +  4);
-	rk[2] = GETU32(cipherKey +  8);
-	rk[3] = GETU32(cipherKey + 12);
-	if (keyBits == 128) {
+	rk[0] = GETU32(key     );
+	rk[1] = GETU32(key +  4);
+	rk[2] = GETU32(key +  8);
+	rk[3] = GETU32(key + 12);
+	if (nkey == 16) {
 		for (;;) {
 			temp  = rk[3];
 			rk[4] = rk[0] ^
@@ -894,9 +754,9 @@
 			rk += 4;
 		}
 	}
-	rk[4] = GETU32(cipherKey + 16);
-	rk[5] = GETU32(cipherKey + 20);
-	if (keyBits == 192) {
+	rk[4] = GETU32(key + 16);
+	rk[5] = GETU32(key + 20);
+	if (nkey == 24) {
 		for (;;) {
 			temp = rk[ 5];
 			rk[ 6] = rk[ 0] ^
@@ -916,91 +776,101 @@
 			rk += 6;
 		}
 	}
-	rk[6] = GETU32(cipherKey + 24);
-	rk[7] = GETU32(cipherKey + 28);
-	if (keyBits == 256) {
-        for (;;) {
-        	temp = rk[ 7];
-        	rk[ 8] = rk[ 0] ^
-        		(Te4[(temp >> 16) & 0xff] << 24) ^
-        		(Te4[(temp >>  8) & 0xff] << 16) ^
-        		(Te4[(temp      ) & 0xff] <<  8) ^
-        		(Te4[(temp >> 24)       ]      ) ^
-        		rcon[i];
-        	rk[ 9] = rk[ 1] ^ rk[ 8];
-        	rk[10] = rk[ 2] ^ rk[ 9];
-        	rk[11] = rk[ 3] ^ rk[10];
+	rk[6] = GETU32(key + 24);
+	rk[7] = GETU32(key + 28);
+	if (nkey == 32) {
+		for (;;) {
+			temp = rk[ 7];
+			rk[ 8] = rk[ 0] ^
+				(Te4[(temp >> 16) & 0xff] << 24) ^
+				(Te4[(temp >>  8) & 0xff] << 16) ^
+				(Te4[(temp      ) & 0xff] <<  8) ^
+				(Te4[(temp >> 24)       ]      ) ^
+				rcon[i];
+			rk[ 9] = rk[ 1] ^ rk[ 8];
+			rk[10] = rk[ 2] ^ rk[ 9];
+			rk[11] = rk[ 3] ^ rk[10];
 			if (++i == 7) {
 				return 14;
 			}
-        	temp = rk[11];
-        	rk[12] = rk[ 4] ^
-        		(Te4[(temp >> 24)       ] << 24) ^
-        		(Te4[(temp >> 16) & 0xff] << 16) ^
-        		(Te4[(temp >>  8) & 0xff] <<  8) ^
-        		(Te4[(temp      ) & 0xff]      );
-        	rk[13] = rk[ 5] ^ rk[12];
-        	rk[14] = rk[ 6] ^ rk[13];
-        	rk[15] = rk[ 7] ^ rk[14];
-
+			temp = rk[11];
+			rk[12] = rk[ 4] ^
+				(Te4[(temp >> 24)       ] << 24) ^
+				(Te4[(temp >> 16) & 0xff] << 16) ^
+				(Te4[(temp >>  8) & 0xff] <<  8) ^
+				(Te4[(temp      ) & 0xff]      );
+			rk[13] = rk[ 5] ^ rk[12];
+			rk[14] = rk[ 6] ^ rk[13];
+			rk[15] = rk[ 7] ^ rk[14];
 			rk += 8;
-        }
+		}
 	}
 	return 0;
 }
 
-/**
- * Expand the cipher key into the decryption key schedule.
+/*
+ * Expand the cipher key into the encryption and decryption key schedules.
  *
  * @return	the number of rounds for the given cipher key size.
  */
-static int rijndaelKeySetupDec(u32 rk[/*4*(Nr + 1)*/], const u8 cipherKey[], int keyBits) {
-	int Nr, i, j;
-	u32 temp;
+static int
+AESsetup(u32 erk[/* 4*(Nr + 1) */], u32 drk[/* 4*(Nr + 1) */], uchar key[], int nkey)
+{
+	int Nr, i;
 
 	/* expand the cipher key: */
-	Nr = rijndaelKeySetupEnc(rk, cipherKey, keyBits);
-	/* invert the order of the round keys: */
-	for (i = 0, j = 4*Nr; i < j; i += 4, j -= 4) {
-		temp = rk[i    ]; rk[i    ] = rk[j    ]; rk[j    ] = temp;
-		temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
-		temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
-		temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
-	}
-	/* apply the inverse MixColumn transform to all round keys but the first and the last: */
+	Nr = setupEnc(erk, key, nkey);
+
+	/*
+	 * invert the order of the round keys and apply the inverse MixColumn
+	 * transform to all round keys but the first and the last
+	 */
+	drk[0       ] = erk[4*Nr    ];
+	drk[1       ] = erk[4*Nr + 1];
+	drk[2       ] = erk[4*Nr + 2];
+	drk[3       ] = erk[4*Nr + 3];
+	drk[4*Nr    ] = erk[0       ];
+	drk[4*Nr + 1] = erk[1       ];
+	drk[4*Nr + 2] = erk[2       ];
+	drk[4*Nr + 3] = erk[3       ];
+	erk += 4 * Nr;
 	for (i = 1; i < Nr; i++) {
-		rk += 4;
-		rk[0] =
-			Td0[Te4[(rk[0] >> 24)       ]] ^
-			Td1[Te4[(rk[0] >> 16) & 0xff]] ^
-			Td2[Te4[(rk[0] >>  8) & 0xff]] ^
-			Td3[Te4[(rk[0]      ) & 0xff]];
-		rk[1] =
-			Td0[Te4[(rk[1] >> 24)       ]] ^
-			Td1[Te4[(rk[1] >> 16) & 0xff]] ^
-			Td2[Te4[(rk[1] >>  8) & 0xff]] ^
-			Td3[Te4[(rk[1]      ) & 0xff]];
-		rk[2] =
-			Td0[Te4[(rk[2] >> 24)       ]] ^
-			Td1[Te4[(rk[2] >> 16) & 0xff]] ^
-			Td2[Te4[(rk[2] >>  8) & 0xff]] ^
-			Td3[Te4[(rk[2]      ) & 0xff]];
-		rk[3] =
-			Td0[Te4[(rk[3] >> 24)       ]] ^
-			Td1[Te4[(rk[3] >> 16) & 0xff]] ^
-			Td2[Te4[(rk[3] >>  8) & 0xff]] ^
-			Td3[Te4[(rk[3]      ) & 0xff]];
+		drk += 4;
+		erk -= 4;
+		drk[0] =
+		 	Td0[Te4[(erk[0] >> 24)       ]] ^
+		 	Td1[Te4[(erk[0] >> 16) & 0xff]] ^
+		 	Td2[Te4[(erk[0] >>  8) & 0xff]] ^
+		 	Td3[Te4[(erk[0]      ) & 0xff]];
+		drk[1] =
+		 	Td0[Te4[(erk[1] >> 24)       ]] ^
+		 	Td1[Te4[(erk[1] >> 16) & 0xff]] ^
+		 	Td2[Te4[(erk[1] >>  8) & 0xff]] ^
+		 	Td3[Te4[(erk[1]      ) & 0xff]];
+		drk[2] =
+		 	Td0[Te4[(erk[2] >> 24)       ]] ^
+		 	Td1[Te4[(erk[2] >> 16) & 0xff]] ^
+		 	Td2[Te4[(erk[2] >>  8) & 0xff]] ^
+		 	Td3[Te4[(erk[2]      ) & 0xff]];
+		drk[3] =
+		 	Td0[Te4[(erk[3] >> 24)       ]] ^
+		 	Td1[Te4[(erk[3] >> 16) & 0xff]] ^
+		 	Td2[Te4[(erk[3] >>  8) & 0xff]] ^
+		 	Td3[Te4[(erk[3]      ) & 0xff]];
 	}
 	return Nr;
 }
 
-static void rijndaelEncrypt(const u32 rk[/*4*(Nr + 1)*/], int Nr, const u8 pt[16], u8 ct[16]) {
+/* using round keys in rk, perform Nr rounds of encrypting pt into ct */
+static void
+AESencrypt(u32 rk[/* 4*(Nr + 1) */], int Nr, uchar pt[16], uchar ct[16])
+{
 	u32 s0, s1, s2, s3, t0, t1, t2, t3;
 #ifndef FULL_UNROLL
-    int r;
+	int r;
 #endif /* ?FULL_UNROLL */
 
-    /*
+	/*
 	 * map byte array block to cipher state
 	 * and add initial round key:
 	 */
@@ -1009,139 +879,138 @@
 	s2 = GETU32(pt +  8) ^ rk[2];
 	s3 = GETU32(pt + 12) ^ rk[3];
 #ifdef FULL_UNROLL
-    /* round 1: */
-   	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4];
-   	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5];
-   	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6];
-   	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7];
-   	/* round 2: */
-   	s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 8];
-   	s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 9];
-   	s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[10];
-   	s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[11];
-    /* round 3: */
-   	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[12];
-   	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[13];
-   	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[14];
-   	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[15];
-   	/* round 4: */
-   	s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[16];
-   	s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[17];
-   	s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[18];
-   	s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[19];
-    /* round 5: */
-   	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[20];
-   	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[21];
-   	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[22];
-   	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[23];
-   	/* round 6: */
-   	s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[24];
-   	s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[25];
-   	s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[26];
-   	s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[27];
-    /* round 7: */
-   	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[28];
-   	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[29];
-   	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[30];
-   	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[31];
-   	/* round 8: */
-   	s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[32];
-   	s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[33];
-   	s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[34];
-   	s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[35];
-    /* round 9: */
-   	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[36];
-   	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[37];
-   	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[38];
-   	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[39];
-    if (Nr > 10) {
-        /* round 10: */
-        s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[40];
-        s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[41];
-        s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[42];
-        s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[43];
-        /* round 11: */
-        t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[44];
-        t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[45];
-        t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[46];
-        t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[47];
-        if (Nr > 12) {
-            /* round 12: */
-            s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[48];
-            s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[49];
-            s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[50];
-            s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[51];
-            /* round 13: */
-            t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[52];
-            t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[53];
-            t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[54];
-            t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[55];
-        }
-    }
-    rk += Nr << 2;
-#else  /* !FULL_UNROLL */
-    /*
+	/* round 1: */
+	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4];
+	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5];
+	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6];
+	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7];
+	/* round 2: */
+	s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 8];
+	s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 9];
+	s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[10];
+	s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[11];
+	/* round 3: */
+	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[12];
+	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[13];
+	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[14];
+	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[15];
+	/* round 4: */
+	s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[16];
+	s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[17];
+	s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[18];
+	s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[19];
+	/* round 5: */
+	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[20];
+	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[21];
+	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[22];
+	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[23];
+	/* round 6: */
+	s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[24];
+	s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[25];
+	s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[26];
+	s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[27];
+	/* round 7: */
+	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[28];
+	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[29];
+	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[30];
+	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[31];
+	/* round 8: */
+	s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[32];
+	s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[33];
+	s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[34];
+	s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[35];
+	/* round 9: */
+	t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[36];
+	t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[37];
+	t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[38];
+	t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[39];
+	if (Nr > 10) {
+		/* round 10: */
+		s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[40];
+		s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[41];
+		s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[42];
+		s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[43];
+		/* round 11: */
+		t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[44];
+		t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[45];
+		t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[46];
+		t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[47];
+		if (Nr > 12) {
+			/* round 12: */
+			s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >>  8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[48];
+			s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >>  8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[49];
+			s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >>  8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[50];
+			s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >>  8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[51];
+			/* round 13: */
+			t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >>  8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[52];
+			t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >>  8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[53];
+			t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >>  8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[54];
+			t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >>  8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[55];
+		}
+	}
+	rk += Nr << 2;
+#else					/* !FULL_UNROLL */
+	/*
 	 * Nr - 1 full rounds:
 	 */
-    r = Nr >> 1;
-    for (;;) {
-        t0 =
-            Te0[(s0 >> 24)       ] ^
-            Te1[(s1 >> 16) & 0xff] ^
-            Te2[(s2 >>  8) & 0xff] ^
-            Te3[(s3      ) & 0xff] ^
-            rk[4];
-        t1 =
-            Te0[(s1 >> 24)       ] ^
-            Te1[(s2 >> 16) & 0xff] ^
-            Te2[(s3 >>  8) & 0xff] ^
-            Te3[(s0      ) & 0xff] ^
-            rk[5];
-        t2 =
-            Te0[(s2 >> 24)       ] ^
-            Te1[(s3 >> 16) & 0xff] ^
-            Te2[(s0 >>  8) & 0xff] ^
-            Te3[(s1      ) & 0xff] ^
-            rk[6];
-        t3 =
-            Te0[(s3 >> 24)       ] ^
-            Te1[(s0 >> 16) & 0xff] ^
-            Te2[(s1 >>  8) & 0xff] ^
-            Te3[(s2      ) & 0xff] ^
-            rk[7];
+	r = Nr >> 1;
+	for (;;) {
+		t0 =
+		 	Te0[(s0 >> 24)       ] ^
+		 	Te1[(s1 >> 16) & 0xff] ^
+		 	Te2[(s2 >>  8) & 0xff] ^
+		 	Te3[(s3      ) & 0xff] ^
+		 	rk[4];
+		t1 =
+		 	Te0[(s1 >> 24)       ] ^
+		 	Te1[(s2 >> 16) & 0xff] ^
+		 	Te2[(s3 >>  8) & 0xff] ^
+		 	Te3[(s0      ) & 0xff] ^
+		 	rk[5];
+		t2 =
+		 	Te0[(s2 >> 24)       ] ^
+		 	Te1[(s3 >> 16) & 0xff] ^
+		 	Te2[(s0 >>  8) & 0xff] ^
+		 	Te3[(s1      ) & 0xff] ^
+		 	rk[6];
+		t3 =
+		 	Te0[(s3 >> 24)       ] ^
+		 	Te1[(s0 >> 16) & 0xff] ^
+		 	Te2[(s1 >>  8) & 0xff] ^
+		 	Te3[(s2      ) & 0xff] ^
+		 	rk[7];
 
-        rk += 8;
-        if (--r == 0) {
-            break;
-        }
+		rk += 8;
+		if (--r == 0)
+			break;
 
-        s0 =
-            Te0[(t0 >> 24)       ] ^
-            Te1[(t1 >> 16) & 0xff] ^
-            Te2[(t2 >>  8) & 0xff] ^
-            Te3[(t3      ) & 0xff] ^
-            rk[0];
-        s1 =
-            Te0[(t1 >> 24)       ] ^
-            Te1[(t2 >> 16) & 0xff] ^
-            Te2[(t3 >>  8) & 0xff] ^
-            Te3[(t0      ) & 0xff] ^
-            rk[1];
-        s2 =
-            Te0[(t2 >> 24)       ] ^
-            Te1[(t3 >> 16) & 0xff] ^
-            Te2[(t0 >>  8) & 0xff] ^
-            Te3[(t1      ) & 0xff] ^
-            rk[2];
-        s3 =
-            Te0[(t3 >> 24)       ] ^
-            Te1[(t0 >> 16) & 0xff] ^
-            Te2[(t1 >>  8) & 0xff] ^
-            Te3[(t2      ) & 0xff] ^
-            rk[3];
-    }
-#endif /* ?FULL_UNROLL */
-    /*
+		s0 =
+		 	Te0[(t0 >> 24)       ] ^
+		 	Te1[(t1 >> 16) & 0xff] ^
+		 	Te2[(t2 >>  8) & 0xff] ^
+		 	Te3[(t3      ) & 0xff] ^
+		 	rk[0];
+		s1 =
+		 	Te0[(t1 >> 24)       ] ^
+		 	Te1[(t2 >> 16) & 0xff] ^
+		 	Te2[(t3 >>  8) & 0xff] ^
+		 	Te3[(t0      ) & 0xff] ^
+		 	rk[1];
+		s2 =
+		 	Te0[(t2 >> 24)       ] ^
+		 	Te1[(t3 >> 16) & 0xff] ^
+		 	Te2[(t0 >>  8) & 0xff] ^
+		 	Te3[(t1      ) & 0xff] ^
+		 	rk[2];
+		s3 =
+		 	Te0[(t3 >> 24)       ] ^
+		 	Te1[(t0 >> 16) & 0xff] ^
+		 	Te2[(t1 >>  8) & 0xff] ^
+		 	Te3[(t2      ) & 0xff] ^
+		 	rk[3];
+	}
+#endif					/* ?FULL_UNROLL */
+	/*
 	 * apply last round and
 	 * map cipher state to byte array block:
 	 */
@@ -1175,369 +1044,211 @@
 	PUTU32(ct + 12, s3);
 }
 
-static void rijndaelDecrypt(const u32 rk[/*4*(Nr + 1)*/], int Nr, const u8 ct[16], u8 pt[16]) {
+static void
+AESdecrypt(u32 rk[/* 4*(Nr + 1) */], int Nr, uchar ct[16], uchar pt[16])
+{
 	u32 s0, s1, s2, s3, t0, t1, t2, t3;
 #ifndef FULL_UNROLL
-    int r;
-#endif /* ?FULL_UNROLL */
-
-    /*
-	 * map byte array block to cipher state
-	 * and add initial round key:
-	 */
-    s0 = GETU32(ct     ) ^ rk[0];
-    s1 = GETU32(ct +  4) ^ rk[1];
-    s2 = GETU32(ct +  8) ^ rk[2];
-    s3 = GETU32(ct + 12) ^ rk[3];
-#ifdef FULL_UNROLL
-    /* round 1: */
-    t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[ 4];
-    t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[ 5];
-    t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[ 6];
-    t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[ 7];
-    /* round 2: */
-    s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[ 8];
-    s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[ 9];
-    s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[10];
-    s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[11];
-    /* round 3: */
-    t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[12];
-    t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[13];
-    t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[14];
-    t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[15];
-    /* round 4: */
-    s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[16];
-    s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[17];
-    s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[18];
-    s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[19];
-    /* round 5: */
-    t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[20];
-    t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[21];
-    t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[22];
-    t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[23];
-    /* round 6: */
-    s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[24];
-    s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[25];
-    s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[26];
-    s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[27];
-    /* round 7: */
-    t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[28];
-    t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[29];
-    t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[30];
-    t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[31];
-    /* round 8: */
-    s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[32];
-    s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[33];
-    s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[34];
-    s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[35];
-    /* round 9: */
-    t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[36];
-    t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[37];
-    t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[38];
-    t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[39];
-    if (Nr > 10) {
-        /* round 10: */
-        s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[40];
-        s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[41];
-        s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[42];
-        s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[43];
-        /* round 11: */
-        t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[44];
-        t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[45];
-        t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[46];
-        t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[47];
-        if (Nr > 12) {
-            /* round 12: */
-            s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[48];
-            s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[49];
-            s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[50];
-            s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[51];
-            /* round 13: */
-            t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[52];
-            t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[53];
-            t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[54];
-            t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[55];
-        }
-    }
-	rk += Nr << 2;
-#else  /* !FULL_UNROLL */
-    /*
-     * Nr - 1 full rounds:
-     */
-    r = Nr >> 1;
-    for (;;) {
-        t0 =
-            Td0[(s0 >> 24)       ] ^
-            Td1[(s3 >> 16) & 0xff] ^
-            Td2[(s2 >>  8) & 0xff] ^
-            Td3[(s1      ) & 0xff] ^
-            rk[4];
-        t1 =
-            Td0[(s1 >> 24)       ] ^
-            Td1[(s0 >> 16) & 0xff] ^
-            Td2[(s3 >>  8) & 0xff] ^
-            Td3[(s2      ) & 0xff] ^
-            rk[5];
-        t2 =
-            Td0[(s2 >> 24)       ] ^
-            Td1[(s1 >> 16) & 0xff] ^
-            Td2[(s0 >>  8) & 0xff] ^
-            Td3[(s3      ) & 0xff] ^
-            rk[6];
-        t3 =
-            Td0[(s3 >> 24)       ] ^
-            Td1[(s2 >> 16) & 0xff] ^
-            Td2[(s1 >>  8) & 0xff] ^
-            Td3[(s0      ) & 0xff] ^
-            rk[7];
-
-        rk += 8;
-        if (--r == 0) {
-            break;
-        }
-
-        s0 =
-            Td0[(t0 >> 24)       ] ^
-            Td1[(t3 >> 16) & 0xff] ^
-            Td2[(t2 >>  8) & 0xff] ^
-            Td3[(t1      ) & 0xff] ^
-            rk[0];
-        s1 =
-            Td0[(t1 >> 24)       ] ^
-            Td1[(t0 >> 16) & 0xff] ^
-            Td2[(t3 >>  8) & 0xff] ^
-            Td3[(t2      ) & 0xff] ^
-            rk[1];
-        s2 =
-            Td0[(t2 >> 24)       ] ^
-            Td1[(t1 >> 16) & 0xff] ^
-            Td2[(t0 >>  8) & 0xff] ^
-            Td3[(t3      ) & 0xff] ^
-            rk[2];
-        s3 =
-            Td0[(t3 >> 24)       ] ^
-            Td1[(t2 >> 16) & 0xff] ^
-            Td2[(t1 >>  8) & 0xff] ^
-            Td3[(t0      ) & 0xff] ^
-            rk[3];
-    }
-#endif /* ?FULL_UNROLL */
-    /*
-	 * apply last round and
-	 * map cipher state to byte array block:
-	 */
-   	s0 =
-   		(Td4[(t0 >> 24)       ] << 24) ^
-   		(Td4[(t3 >> 16) & 0xff] << 16) ^
-   		(Td4[(t2 >>  8) & 0xff] <<  8) ^
-   		(Td4[(t1      ) & 0xff]      ) ^
-   		rk[0];
-	PUTU32(pt     , s0);
-   	s1 =
-   		(Td4[(t1 >> 24)       ] << 24) ^
-   		(Td4[(t0 >> 16) & 0xff] << 16) ^
-   		(Td4[(t3 >>  8) & 0xff] <<  8) ^
-   		(Td4[(t2      ) & 0xff]      ) ^
-   		rk[1];
-	PUTU32(pt +  4, s1);
-   	s2 =
-   		(Td4[(t2 >> 24)       ] << 24) ^
-   		(Td4[(t1 >> 16) & 0xff] << 16) ^
-   		(Td4[(t0 >>  8) & 0xff] <<  8) ^
-   		(Td4[(t3      ) & 0xff]      ) ^
-   		rk[2];
-	PUTU32(pt +  8, s2);
-   	s3 =
-   		(Td4[(t3 >> 24)       ] << 24) ^
-   		(Td4[(t2 >> 16) & 0xff] << 16) ^
-   		(Td4[(t1 >>  8) & 0xff] <<  8) ^
-   		(Td4[(t0      ) & 0xff]      ) ^
-   		rk[3];
-	PUTU32(pt + 12, s3);
-}
-
-#ifdef INTERMEDIATE_VALUE_KAT
-
-static void rijndaelEncryptRound(const u32 rk[/*4*(Nr + 1)*/], int Nr, u8 block[16], int rounds) {
 	int r;
-	u32 s0, s1, s2, s3, t0, t1, t2, t3;
+#endif		/* ?FULL_UNROLL */
 
-    /*
+	/*
 	 * map byte array block to cipher state
 	 * and add initial round key:
 	 */
-	s0 = GETU32(block     ) ^ rk[0];
-	s1 = GETU32(block +  4) ^ rk[1];
-	s2 = GETU32(block +  8) ^ rk[2];
-	s3 = GETU32(block + 12) ^ rk[3];
-    rk += 4;
-
-    /*
-	 * Nr - 1 full rounds:
-	 */
-	for (r = (rounds < Nr ? rounds : Nr - 1); r > 0; r--) {
-		t0 =
-			Te0[(s0 >> 24)       ] ^
-			Te1[(s1 >> 16) & 0xff] ^
-			Te2[(s2 >>  8) & 0xff] ^
-			Te3[(s3      ) & 0xff] ^
-			rk[0];
-		t1 =
-			Te0[(s1 >> 24)       ] ^
-			Te1[(s2 >> 16) & 0xff] ^
-			Te2[(s3 >>  8) & 0xff] ^
-			Te3[(s0      ) & 0xff] ^
-			rk[1];
-		t2 =
-			Te0[(s2 >> 24)       ] ^
-			Te1[(s3 >> 16) & 0xff] ^
-			Te2[(s0 >>  8) & 0xff] ^
-			Te3[(s1      ) & 0xff] ^
-			rk[2];
-		t3 =
-			Te0[(s3 >> 24)       ] ^
-			Te1[(s0 >> 16) & 0xff] ^
-			Te2[(s1 >>  8) & 0xff] ^
-			Te3[(s2      ) & 0xff] ^
-			rk[3];
-
-		s0 = t0;
-		s1 = t1;
-		s2 = t2;
-		s3 = t3;
-		rk += 4;
-
-    }
-
-    /*
-	 * apply last round and
-	 * map cipher state to byte array block:
-	 */
-	if (rounds == Nr) {
-    	t0 =
-    		(Te4[(s0 >> 24)       ] << 24) ^
-    		(Te4[(s1 >> 16) & 0xff] << 16) ^
-    		(Te4[(s2 >>  8) & 0xff] <<  8) ^
-    		(Te4[(s3      ) & 0xff]      ) ^
-    		rk[0];
-    	t1 =
-    		(Te4[(s1 >> 24)       ] << 24) ^
-    		(Te4[(s2 >> 16) & 0xff] << 16) ^
-    		(Te4[(s3 >>  8) & 0xff] <<  8) ^
-    		(Te4[(s0      ) & 0xff]      ) ^
-    		rk[1];
-    	t2 =
-    		(Te4[(s2 >> 24)       ] << 24) ^
-    		(Te4[(s3 >> 16) & 0xff] << 16) ^
-    		(Te4[(s0 >>  8) & 0xff] <<  8) ^
-    		(Te4[(s1      ) & 0xff]      ) ^
-    		rk[2];
-    	t3 =
-    		(Te4[(s3 >> 24)       ] << 24) ^
-    		(Te4[(s0 >> 16) & 0xff] << 16) ^
-    		(Te4[(s1 >>  8) & 0xff] <<  8) ^
-    		(Te4[(s2      ) & 0xff]      ) ^
-    		rk[3];
-		
-		s0 = t0;
-		s1 = t1;
-		s2 = t2;
-		s3 = t3;
+	s0 = GETU32(ct     ) ^ rk[0];
+	s1 = GETU32(ct +  4) ^ rk[1];
+	s2 = GETU32(ct +  8) ^ rk[2];
+	s3 = GETU32(ct + 12) ^ rk[3];
+#ifdef FULL_UNROLL
+	/* round 1: */
+	t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[ 4];
+	t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[ 5];
+	t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[ 6];
+	t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[ 7];
+	/* round 2: */
+	s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[ 8];
+	s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[ 9];
+	s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[10];
+	s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[11];
+	/* round 3: */
+	t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[12];
+	t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[13];
+	t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[14];
+	t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[15];
+	/* round 4: */
+	s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[16];
+	s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[17];
+	s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[18];
+	s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[19];
+	/* round 5: */
+	t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[20];
+	t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[21];
+	t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[22];
+	t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[23];
+	/* round 6: */
+	s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[24];
+	s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[25];
+	s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[26];
+	s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[27];
+	/* round 7: */
+	t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[28];
+	t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[29];
+	t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[30];
+	t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[31];
+	/* round 8: */
+	s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[32];
+	s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[33];
+	s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[34];
+	s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[35];
+	/* round 9: */
+	t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[36];
+	t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[37];
+	t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[38];
+	t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[39];
+	if (Nr > 10) {
+		/* round 10: */
+		s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[40];
+		s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[41];
+		s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[42];
+		s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[43];
+		/* round 11: */
+		t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[44];
+		t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[45];
+		t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[46];
+		t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[47];
+		if (Nr > 12) {
+			/* round 12: */
+			s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >>  8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[48];
+			s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >>  8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[49];
+			s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >>  8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[50];
+			s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >>  8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[51];
+			/* round 13: */
+			t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >>  8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[52];
+			t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >>  8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[53];
+			t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >>  8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[54];
+			t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >>  8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[55];
+		}
 	}
-
-	PUTU32(block     , s0);
-	PUTU32(block +  4, s1);
-	PUTU32(block +  8, s2);
-	PUTU32(block + 12, s3);
-}
-
-static void rijndaelDecryptRound(const u32 rk[/*4*(Nr + 1)*/], int Nr, u8 block[16], int rounds) {
-	int r;
-	u32 s0, s1, s2, s3, t0, t1, t2, t3;
-
-    /*
-	 * map byte array block to cipher state
-	 * and add initial round key:
-	 */
-	s0 = GETU32(block     ) ^ rk[0];
-	s1 = GETU32(block +  4) ^ rk[1];
-	s2 = GETU32(block +  8) ^ rk[2];
-	s3 = GETU32(block + 12) ^ rk[3];
-    rk += 4;
-
-    /*
+	rk += Nr << 2;
+#else					/* !FULL_UNROLL */
+	/*
 	 * Nr - 1 full rounds:
 	 */
-	for (r = (rounds < Nr ? rounds : Nr) - 1; r > 0; r--) {
+	r = Nr >> 1;
+	for (;;) {
 		t0 =
 			Td0[(s0 >> 24)       ] ^
 			Td1[(s3 >> 16) & 0xff] ^
 			Td2[(s2 >>  8) & 0xff] ^
 			Td3[(s1      ) & 0xff] ^
-			rk[0];
+			rk[4];
 		t1 =
 			Td0[(s1 >> 24)       ] ^
 			Td1[(s0 >> 16) & 0xff] ^
 			Td2[(s3 >>  8) & 0xff] ^
 			Td3[(s2      ) & 0xff] ^
-			rk[1];
+			rk[5];
 		t2 =
 			Td0[(s2 >> 24)       ] ^
 			Td1[(s1 >> 16) & 0xff] ^
 			Td2[(s0 >>  8) & 0xff] ^
 			Td3[(s3      ) & 0xff] ^
-			rk[2];
+			rk[6];
 		t3 =
 			Td0[(s3 >> 24)       ] ^
 			Td1[(s2 >> 16) & 0xff] ^
 			Td2[(s1 >>  8) & 0xff] ^
 			Td3[(s0      ) & 0xff] ^
-			rk[3];
+			rk[7];
 
-		s0 = t0;
-		s1 = t1;
-		s2 = t2;
-		s3 = t3;
-		rk += 4;
+		rk += 8;
+		if (--r == 0)
+			break;
 
-    }
-
-    /*
-	 * complete the last round and
+		s0 =
+			Td0[(t0 >> 24)       ] ^
+			Td1[(t3 >> 16) & 0xff] ^
+			Td2[(t2 >>  8) & 0xff] ^
+			Td3[(t1      ) & 0xff] ^
+			rk[0];
+		s1 =
+			Td0[(t1 >> 24)       ] ^
+			Td1[(t0 >> 16) & 0xff] ^
+			Td2[(t3 >>  8) & 0xff] ^
+			Td3[(t2      ) & 0xff] ^
+			rk[1];
+		s2 =
+			Td0[(t2 >> 24)       ] ^
+			Td1[(t1 >> 16) & 0xff] ^
+			Td2[(t0 >>  8) & 0xff] ^
+			Td3[(t3      ) & 0xff] ^
+			rk[2];
+		s3 =
+			Td0[(t3 >> 24)       ] ^
+			Td1[(t2 >> 16) & 0xff] ^
+			Td2[(t1 >>  8) & 0xff] ^
+			Td3[(t0      ) & 0xff] ^
+			rk[3];
+	}
+#endif					/* ?FULL_UNROLL */
+	/*
+	 * apply last round and
 	 * map cipher state to byte array block:
 	 */
-	t0 =
-		(Td4[(s0 >> 24)       ] << 24) ^
-		(Td4[(s3 >> 16) & 0xff] << 16) ^
-		(Td4[(s2 >>  8) & 0xff] <<  8) ^
-		(Td4[(s1      ) & 0xff]      );
-	t1 =
-		(Td4[(s1 >> 24)       ] << 24) ^
-		(Td4[(s0 >> 16) & 0xff] << 16) ^
-		(Td4[(s3 >>  8) & 0xff] <<  8) ^
-		(Td4[(s2      ) & 0xff]      );
-	t2 =
-		(Td4[(s2 >> 24)       ] << 24) ^
-		(Td4[(s1 >> 16) & 0xff] << 16) ^
-		(Td4[(s0 >>  8) & 0xff] <<  8) ^
-		(Td4[(s3      ) & 0xff]      );
-	t3 =
-		(Td4[(s3 >> 24)       ] << 24) ^
-		(Td4[(s2 >> 16) & 0xff] << 16) ^
-		(Td4[(s1 >>  8) & 0xff] <<  8) ^
-		(Td4[(s0      ) & 0xff]      );
+	s0 =
+		(Td4[(t0 >> 24)       ] << 24) ^
+		(Td4[(t3 >> 16) & 0xff] << 16) ^
+		(Td4[(t2 >>  8) & 0xff] <<  8) ^
+		(Td4[(t1      ) & 0xff]      ) ^
+		rk[0];
+	PUTU32(pt     , s0);
+	s1 =
+		(Td4[(t1 >> 24)       ] << 24) ^
+		(Td4[(t0 >> 16) & 0xff] << 16) ^
+		(Td4[(t3 >>  8) & 0xff] <<  8) ^
+		(Td4[(t2      ) & 0xff]      ) ^
+		rk[1];
+	PUTU32(pt +  4, s1);
+	s2 =
+		(Td4[(t2 >> 24)       ] << 24) ^
+		(Td4[(t1 >> 16) & 0xff] << 16) ^
+		(Td4[(t0 >>  8) & 0xff] <<  8) ^
+		(Td4[(t3      ) & 0xff]      ) ^
+		rk[2];
+	PUTU32(pt +  8, s2);
+	s3 =
+		(Td4[(t3 >> 24)       ] << 24) ^
+		(Td4[(t2 >> 16) & 0xff] << 16) ^
+		(Td4[(t1 >>  8) & 0xff] <<  8) ^
+		(Td4[(t0      ) & 0xff]      ) ^
+		rk[3];
+	PUTU32(pt + 12, s3);
+}
 
-	if (rounds == Nr) {
-	    t0 ^= rk[0];
-	    t1 ^= rk[1];
-	    t2 ^= rk[2];
-	    t3 ^= rk[3];
-	}
+void (*aes_encrypt)(u32 rk[], int Nr, uchar pt[16], uchar ct[16]) = AESencrypt;
+void (*aes_decrypt)(u32 rk[], int Nr, uchar ct[16], uchar pt[16]) = AESdecrypt;
 
-	PUTU32(block     , t0);
-	PUTU32(block +  4, t1);
-	PUTU32(block +  8, t2);
-	PUTU32(block + 12, t3);
-}
+void
+setupAESstate(AESstate *s, uchar key[], int nkey, uchar *ivec)
+{
+	static int (*aes_setup)(u32 erk[/* 4*(Nr + 1) */], u32 drk[/* 4*(Nr + 1) */], uchar key[], int nkey);
 
-#endif /* INTERMEDIATE_VALUE_KAT */
+	if(aes_setup == nil){
+		extern void *aesni_init(void);
+		if((aes_setup = aesni_init()) == nil)
+			aes_setup = AESsetup;
+	}
+	memset(s, 0, sizeof(*s));
+	if(nkey > AESmaxkey)
+		nkey = AESmaxkey;
+	memmove(s->key, key, nkey);
+	s->keybytes = nkey;
+	s->ekey = s->storage+16 - (s->storage - (uchar*)0 & 15);
+	s->dkey = (uchar*)s->ekey + 16*(AESmaxrounds+1);
+	s->rounds = (*aes_setup)(s->ekey, s->dkey, s->key, nkey);
+	if(ivec != nil)
+		memmove(s->ivec, ivec, AESbsize);
+	if(s->rounds != 0)
+		s->setup = 0xcafebabe;
+}
--- /dev/null
+++ b/libsec/port/aesCBC.c
@@ -1,0 +1,94 @@
+#include "os.h"
+#include <libsec.h>
+
+/*
+ * Define by analogy with desCBCencrypt;  AES modes are not standardized yet.
+ * Because of the way that non-multiple-of-16 buffers are handled,
+ * the decryptor must be fed buffers of the same size as the encryptor.
+ */
+void
+aesCBCencrypt(uchar *p, int len, AESstate *s)
+{
+	uchar *ip, *eip;
+
+	if(((p-(uchar*)0) & 3) == 0){
+		for(; len >= AESbsize; len -= AESbsize){
+			ip = s->ivec;
+			((u32int*)ip)[0] ^= ((u32int*)p)[0];
+			((u32int*)ip)[1] ^= ((u32int*)p)[1];
+			((u32int*)ip)[2] ^= ((u32int*)p)[2];
+			((u32int*)ip)[3] ^= ((u32int*)p)[3];
+
+			aes_encrypt(s->ekey, s->rounds, ip, ip);
+
+			((u32int*)p)[0] = ((u32int*)ip)[0];
+			((u32int*)p)[1] = ((u32int*)ip)[1];
+			((u32int*)p)[2] = ((u32int*)ip)[2];
+			((u32int*)p)[3] = ((u32int*)ip)[3];
+			p += AESbsize;
+		}
+	} else {
+		for(; len >= AESbsize; len -= AESbsize){
+			ip = s->ivec;
+			for(eip = ip+AESbsize; ip < eip; )
+				*ip++ ^= *p++;
+			aes_encrypt(s->ekey, s->rounds, s->ivec, s->ivec);
+			memmove(p - AESbsize, s->ivec, AESbsize);
+		}
+	}
+
+	if(len > 0){
+		ip = s->ivec;
+		aes_encrypt(s->ekey, s->rounds, ip, ip);
+		for(eip = ip+len; ip < eip; )
+			*p++ ^= *ip++;
+	}
+}
+
+void
+aesCBCdecrypt(uchar *p, int len, AESstate *s)
+{
+	uchar *ip, *eip, *tp;
+	u32int t[4];
+
+	if(((p-(uchar*)0) & 3) == 0){
+		for(; len >= AESbsize; len -= AESbsize){
+			t[0] = ((u32int*)p)[0];
+			t[1] = ((u32int*)p)[1];
+			t[2] = ((u32int*)p)[2];
+			t[3] = ((u32int*)p)[3];
+
+			aes_decrypt(s->dkey, s->rounds, p, p);
+
+			ip = s->ivec;
+			((u32int*)p)[0] ^= ((u32int*)ip)[0];
+			((u32int*)p)[1] ^= ((u32int*)ip)[1];
+			((u32int*)p)[2] ^= ((u32int*)ip)[2];
+			((u32int*)p)[3] ^= ((u32int*)ip)[3];
+			p += AESbsize;
+
+			((u32int*)ip)[0] = t[0];
+			((u32int*)ip)[1] = t[1];
+			((u32int*)ip)[2] = t[2];
+			((u32int*)ip)[3] = t[3];
+		}
+	} else {
+		for(; len >= AESbsize; len -= AESbsize){
+			tp = (uchar*)t;
+			memmove(tp, p, AESbsize);
+			aes_decrypt(s->dkey, s->rounds, p, p);
+			ip = s->ivec;
+			for(eip = ip+AESbsize; ip < eip; ){
+				*p++ ^= *ip;
+				*ip++ = *tp++;
+			}
+		}
+	}
+
+	if(len > 0){
+		ip = s->ivec;
+		aes_encrypt(s->ekey, s->rounds, ip, ip);
+		for(eip = ip+len; ip < eip; )
+			*p++ ^= *ip++;
+	}
+}
--- /dev/null
+++ b/libsec/port/aesCFB.c
@@ -1,0 +1,48 @@
+#include "os.h"
+#include <libsec.h>
+
+void
+aesCFBencrypt(uchar *p, int len, AESstate *s)
+{
+	u32 a, o = s->offset;
+
+	while(len > 0){
+		if(o % 16){
+		Odd:
+			a = (s->ivec[o++ % 16] ^= *p), *p++ = a, len--;
+			continue;
+		}
+		aes_encrypt(s->ekey, s->rounds, s->ivec, s->ivec);
+		if(len < 16 || ((p-(uchar*)0) & 3) != 0)
+			goto Odd;
+		((u32*)p)[0] = (((u32*)s->ivec)[0] ^= ((u32*)p)[0]);
+		((u32*)p)[1] = (((u32*)s->ivec)[1] ^= ((u32*)p)[1]);
+		((u32*)p)[2] = (((u32*)s->ivec)[2] ^= ((u32*)p)[2]);
+		((u32*)p)[3] = (((u32*)s->ivec)[3] ^= ((u32*)p)[3]);
+		o += 16, p += 16, len -= 16;
+	}
+	s->offset = o;
+}
+
+void
+aesCFBdecrypt(uchar *p, int len, AESstate *s)
+{
+	u32 a, o = s->offset;
+
+	while(len > 0){
+		if(o % 16){
+		Odd:
+			a = *p, *p++ ^= s->ivec[o % 16], s->ivec[o++ % 16] = a, len--;
+			continue;
+		}
+		aes_encrypt(s->ekey, s->rounds, s->ivec, s->ivec);
+		if(len < 16 || ((p-(uchar*)0) & 3) != 0)
+			goto Odd;
+		a = ((u32*)p)[0], ((u32*)p)[0] ^= ((u32*)s->ivec)[0], ((u32*)s->ivec)[0] = a;
+		a = ((u32*)p)[1], ((u32*)p)[1] ^= ((u32*)s->ivec)[1], ((u32*)s->ivec)[1] = a;
+		a = ((u32*)p)[2], ((u32*)p)[2] ^= ((u32*)s->ivec)[2], ((u32*)s->ivec)[2] = a;
+		a = ((u32*)p)[3], ((u32*)p)[3] ^= ((u32*)s->ivec)[3], ((u32*)s->ivec)[3] = a;
+		o += 16, p += 16, len -= 16;
+	}
+	s->offset = o;
+}
--- /dev/null
+++ b/libsec/port/aesOFB.c
@@ -1,0 +1,26 @@
+#include "os.h"
+#include <libsec.h>
+
+void
+aesOFBencrypt(uchar *p, int len, AESstate *s)
+{
+	u32 o = s->offset;
+
+	while(len > 0){
+		if(o % 16){
+		Odd:
+			*p++ ^= s->ivec[o++ % 16], len--;
+			continue;
+		}
+		aes_encrypt(s->ekey, s->rounds, s->ivec, s->ivec);
+		if(len < 16 || ((p-(uchar*)0) & 3) != 0)
+			goto Odd;
+		((u32*)p)[0] ^= ((u32*)s->ivec)[0];
+		((u32*)p)[1] ^= ((u32*)s->ivec)[1];
+		((u32*)p)[2] ^= ((u32*)s->ivec)[2];
+		((u32*)p)[3] ^= ((u32*)s->ivec)[3];
+		o += 16, p += 16, len -= 16;
+	}
+	s->offset = o;
+}
+
--- /dev/null
+++ b/libsec/port/aesXCBmac.c
@@ -1,0 +1,93 @@
+#include "os.h"
+#include <libsec.h>
+
+/*
+ * AES-XCBC-MAC-96 message authentication, per rfc3566.
+ */
+static uchar basekey[3][16] = {
+	{
+	0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+	0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
+	},
+	{
+	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
+	},
+	{
+	0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
+	0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
+	},
+};
+
+void
+setupAESXCBCstate(AESstate *s)		/* was setupmac96 */
+{
+	int i, j;
+	uint q[16 / sizeof(uint)];
+	uchar *p;
+
+	assert(s->keybytes == 16);
+	for(i = 0; i < 3; i++)
+		aes_encrypt(s->ekey, s->rounds, basekey[i],
+			s->mackey + AESbsize*i);
+
+	p = s->mackey;
+	memset(q, 0, AESbsize);
+
+	/*
+	 * put the in the right endian.  once figured, probably better
+	 * to use some fcall macros.
+	 * keys for encryption in local endianness for the algorithm...
+	 * only key1 is used for encryption;
+	 * BUG!!: I think this is what I got wrong.
+	 */
+	for(i = 0; i < 16 / sizeof(uint); i ++){
+		for(j = 0; j < sizeof(uint); j++)
+			q[i] |= p[sizeof(uint)-j-1] << 8*j;
+		p += sizeof(uint);
+	}
+	memmove(s->mackey, q, 16);
+}
+
+/*
+ * Not dealing with > 128-bit keys, not dealing with strange corner cases like
+ * empty message.  Should be fine for AES-XCBC-MAC-96.
+ */
+uchar*
+aesXCBCmac(uchar *p, int len, AESstate *s)
+{
+	uchar *p2, *ip, *eip, *mackey;
+	uchar q[AESbsize];
+
+	assert(s->keybytes == 16);	/* more complicated for bigger */
+	memset(s->ivec, 0, AESbsize);	/* E[0] is 0+ */
+
+	for(; len > AESbsize; len -= AESbsize){
+		memmove(q, p, AESbsize);
+		p2 = q;
+		ip = s->ivec;
+		for(eip = ip + AESbsize; ip < eip; )
+			*p2++ ^= *ip++;
+		aes_encrypt((ulong *)s->mackey, s->rounds, q, s->ivec);
+		p += AESbsize;
+	}
+	/* the last one */
+
+	memmove(q, p, len);
+	p2 = q+len;
+	if(len == AESbsize)
+		mackey = s->mackey + AESbsize;	/* k2 */
+	else{
+		mackey = s->mackey+2*AESbsize;	/* k3 */
+		*p2++ = 1 << 7;			/* padding */
+		len = AESbsize - len - 1;
+		memset(p2, 0, len);
+	}
+
+	ip = s->ivec;
+	p2 = q;
+	for(eip = ip + AESbsize; ip < eip; )
+		*p2++ ^= *ip++ ^ *mackey++;
+	aes_encrypt((ulong *)s->mackey, s->rounds, q, s->ivec);
+	return s->ivec;			/* only the 12 bytes leftmost */
+}
--- /dev/null
+++ b/libsec/port/aes_gcm.c
@@ -1,0 +1,199 @@
+#include "os.h"
+#include <libsec.h>
+
+static void
+load128(uchar b[16], u32 W[4])
+{
+	W[0] = (u32)b[15] | (u32)b[14]<<8 | (u32)b[13]<<16 | (u32)b[12]<<24;
+	W[1] = (u32)b[11] | (u32)b[10]<<8 | (u32)b[ 9]<<16 | (u32)b[ 8]<<24;
+	W[2] = (u32)b[ 7] | (u32)b[ 6]<<8 | (u32)b[ 5]<<16 | (u32)b[ 4]<<24;
+	W[3] = (u32)b[ 3] | (u32)b[ 2]<<8 | (u32)b[ 1]<<16 | (u32)b[ 0]<<24;
+}
+
+static void
+store128(u32 W[4], uchar b[16])
+{
+	b[15] = W[0], b[14] = W[0]>>8, b[13] = W[0]>>16, b[12] = W[0]>>24;
+	b[11] = W[1], b[10] = W[1]>>8, b[ 9] = W[1]>>16, b[ 8] = W[1]>>24;
+	b[ 7] = W[2], b[ 6] = W[2]>>8, b[ 5] = W[2]>>16, b[ 4] = W[2]>>24;
+	b[ 3] = W[3], b[ 2] = W[3]>>8, b[ 1] = W[3]>>16, b[ 0] = W[3]>>24;
+}
+
+static void
+gfmul(u32 X[4], u32 Y[4], u32 Z[4])
+{
+	long m, i;
+
+	Z[0] = Z[1] = Z[2] = Z[3] = 0;
+	for(i=127; i>=0; i--){
+		m = ((long)Y[i>>5] << 31-(i&31)) >> 31;
+		Z[0] ^= X[0] & m;
+		Z[1] ^= X[1] & m;
+		Z[2] ^= X[2] & m;
+		Z[3] ^= X[3] & m;
+		m = ((long)X[0]<<31) >> 31;
+		X[0] = X[0]>>1 | X[1]<<31;
+		X[1] = X[1]>>1 | X[2]<<31;
+		X[2] = X[2]>>1 | X[3]<<31;
+		X[3] = X[3]>>1 ^ (0xE1000000 & m);
+	}
+}
+
+static void
+prepareM(u32 H[4], u32 M[16][256][4])
+{
+	u32 X[4], i, j;
+
+	for(i=0; i<16; i++){
+		for(j=0; j<256; j++){
+			X[0] = X[1] = X[2] = X[3] = 0;
+			X[i>>2] = j<<((i&3)<<3);
+			gfmul(X, H, M[i][j]);
+		}
+	}
+}
+
+static void
+ghash1(AESGCMstate *s, u32 X[4], u32 Y[4])
+{
+	u32 *Xi, i;
+
+	X[0] ^= Y[0], X[1] ^= Y[1], X[2] ^= Y[2], X[3] ^= Y[3];
+	if(0){
+		gfmul(X, s->H, Y);
+		return;
+	}
+
+	Y[0] = Y[1] = Y[2] = Y[3] = 0;
+	for(i=0; i<16; i++){
+		Xi = s->M[i][(X[i>>2]>>((i&3)<<3))&0xFF];
+		Y[0] ^= Xi[0];
+		Y[1] ^= Xi[1];
+		Y[2] ^= Xi[2];
+		Y[3] ^= Xi[3];
+	}
+}
+
+static void
+ghashn(AESGCMstate *s, uchar *dat, u32 len, u32 Y[4])
+{
+	uchar tmp[16];
+	u32 X[4];
+
+	while(len >= 16){
+		load128(dat, X);
+		ghash1(s, X, Y);
+		dat += 16, len -= 16;
+	}
+	if(len > 0){
+		memmove(tmp, dat, len);
+		memset(tmp+len, 0, 16-len);
+		load128(tmp, X);
+		ghash1(s, X, Y);
+	}
+}
+
+static u32
+aesxctr1(AESstate *s, uchar ctr[AESbsize], uchar *dat, u32 len)
+{
+	uchar tmp[AESbsize];
+	u32 i;
+
+	aes_encrypt(s->ekey, s->rounds, ctr, tmp);
+	if(len > AESbsize)
+		len = AESbsize;
+	for(i=0; i<len; i++)
+		dat[i] ^= tmp[i];
+	return len;
+}
+
+static void
+aesxctrn(AESstate *s, uchar *dat, u32 len)
+{
+	uchar ctr[AESbsize];
+	u32 i;
+
+	memmove(ctr, s->ivec, AESbsize);
+	while(len > 0){
+		for(i=AESbsize-1; i>=AESbsize-4; i--)
+			if(++ctr[i] != 0)
+				break;
+
+		if(aesxctr1(s, ctr, dat, len) < AESbsize)
+			break;
+		dat += AESbsize;
+		len -= AESbsize;
+	}
+}
+
+void
+aesgcm_setiv(AESGCMstate *s, uchar *iv, int ivlen)
+{
+	if(ivlen == 96/8){
+		memmove(s->ivec, iv, ivlen);
+		memset(s->ivec+ivlen, 0, AESbsize-ivlen);
+		s->ivec[AESbsize-1] = 1;
+	} else {
+		u32 L[4], Y[4] = {0};
+
+		ghashn(s, iv, ivlen, Y);
+		L[0] = ivlen << 3;
+		L[1] = ivlen >> 29;
+		L[2] = L[3] = 0;
+		ghash1(s, L, Y);
+		store128(Y, s->ivec);
+	}
+}
+
+void
+setupAESGCMstate(AESGCMstate *s, uchar *key, int keylen, uchar *iv, int ivlen)
+{
+	setupAESstate(s, key, keylen, nil);
+
+	memset(s->ivec, 0, AESbsize);
+	aes_encrypt(s->ekey, s->rounds, s->ivec, s->ivec);
+	load128(s->ivec, s->H);
+	memset(s->ivec, 0, AESbsize);
+	prepareM(s->H, s->M);
+
+	if(iv != nil && ivlen > 0)
+		aesgcm_setiv(s, iv, ivlen);
+}
+
+void
+aesgcm_encrypt(uchar *dat, u32 ndat, uchar *aad, u32 naad, uchar tag[16], AESGCMstate *s)
+{
+	u32 L[4], Y[4] = {0};
+
+	ghashn(s, aad, naad, Y);
+	aesxctrn(s, dat, ndat);
+	ghashn(s, dat, ndat, Y);
+	L[0] = ndat << 3;
+	L[1] = ndat >> 29;
+	L[2] = naad << 3;
+	L[3] = naad >> 29;
+	ghash1(s, L, Y);
+	store128(Y, tag);
+	aesxctr1(s, s->ivec, tag, 16);
+}
+
+int
+aesgcm_decrypt(uchar *dat, u32 ndat, uchar *aad, u32 naad, uchar tag[16], AESGCMstate *s)
+{
+	u32 L[4], Y[4] = {0};
+	uchar tmp[16];
+
+	ghashn(s, aad, naad, Y);
+	ghashn(s, dat, ndat, Y);
+	L[0] = ndat << 3;
+	L[1] = ndat >> 29;
+	L[2] = naad << 3;
+	L[3] = naad >> 29;
+	ghash1(s, L, Y);
+	store128(Y, tmp);
+	aesxctr1(s, s->ivec, tmp, 16);
+	if(tsmemcmp(tag, tmp, 16) != 0)
+		return -1;
+	aesxctrn(s, dat, ndat);
+	return 0;
+}
--- /dev/null
+++ b/libsec/port/aes_xts.c
@@ -1,0 +1,83 @@
+#include "os.h"
+#include <libsec.h>
+
+/* little-endian data order */
+#define	GET4(p)		((p)[0]|((p)[1]<<8)|((p)[2]<<16)|((p)[3]<<24))
+#define	PUT4(p,v)	(p)[0]=(v);(p)[1]=(v)>>8;(p)[2]=(v)>>16;(p)[3]=(v)>>24
+
+static void
+gf_mulx(uchar *x)
+{
+	u32 t0, t1, t2, t3, t4;
+
+	t0 = GET4(x);
+	t1 = GET4(x+4);
+	t2 = GET4(x+8);
+	t3 = GET4(x+12);
+
+	t4 =             (t3 >> 31);
+	t3 = (t3 << 1) | (t2 >> 31);
+	t2 = (t2 << 1) | (t1 >> 31);
+	t1 = (t1 << 1) | (t0 >> 31);
+	t0 = (t0 << 1) ^ (t4*135);
+
+	PUT4(x, t0);
+	PUT4(x+4, t1);
+	PUT4(x+8, t2);
+	PUT4(x+12, t3);
+}
+
+static void
+xor128(uchar *o, uchar *i1, uchar *i2)
+{
+	int i;
+
+	for(i=0; i<16; i++)
+		o[i] = i1[i] ^ i2[i];
+}
+
+static void
+setupT(AESstate *tweak, uvlong sectorNumber, uchar T[AESbsize])
+{
+	PUT4(T+0, (u32)sectorNumber), sectorNumber >>= 32;
+	PUT4(T+4, (u32)sectorNumber);
+	PUT4(T+8, 0);
+	PUT4(T+12, 0);
+	aes_encrypt(tweak->ekey, tweak->rounds, T, T);
+}
+
+void
+aes_xts_encrypt(AESstate *tweak, AESstate *ecb,
+	uvlong sectorNumber, uchar *input, uchar *output, u32 len)
+{
+	uchar T[AESbsize], x[AESbsize];
+	
+	if(len % AESbsize)
+		abort();
+
+	setupT(tweak, sectorNumber, T);
+	for (; len > 0; len -= AESbsize, input += AESbsize, output += AESbsize) {
+		xor128(x, input, T);
+		aes_encrypt(ecb->ekey, ecb->rounds, x, x);
+		xor128(output, x, T);
+		gf_mulx(T);
+	}
+}
+
+void
+aes_xts_decrypt(AESstate *tweak, AESstate *ecb,
+	uvlong sectorNumber, uchar *input, uchar *output, u32 len)
+{
+	uchar T[AESbsize], x[AESbsize];
+	
+	if(len % AESbsize)
+		abort();
+
+	setupT(tweak, sectorNumber, T);
+	for (; len > 0; len -= AESbsize, input += AESbsize, output += AESbsize) {
+		xor128(x, input, T);
+		aes_decrypt(ecb->dkey, ecb->rounds, x, x);
+		xor128(output, x, T);
+		gf_mulx(T);
+	}
+}
--- /dev/null
+++ b/libsec/port/aesgcmtest.c
@@ -1,0 +1,314 @@
+#include <u.h>
+#include <libc.h>
+#include <mp.h>
+#include <libsec.h>
+
+typedef struct Test Test;
+struct Test
+{
+	char *K;
+	char *P;
+	char *A;
+	char *IV;
+	char *T;
+};
+
+Test tests[] = {
+	{	/* Test Case 1 */
+		"00000000000000000000000000000000",
+		"",
+		"",
+		"000000000000000000000000",
+
+		"58E2FCCEFA7E3061367F1D57A4E7455A"
+	},
+	{	/* Test Case 2 */
+		"00000000000000000000000000000000",
+		"00000000000000000000000000000000",
+		"",
+		"000000000000000000000000",
+
+		"AB6E47D42CEC13BDF53A67B21257BDDF",
+	},
+	{	/* Test Case 3 */
+		"feffe9928665731c6d6a8f9467308308",
+		"d9313225f88406e5a55909c5aff5269a"
+		"86a7a9531534f7da2e4c303d8a318a72"
+		"1c3c0c95956809532fcf0e2449a6b525"
+		"b16aedf5aa0de657ba637b391aafd255",
+		"",
+		"cafebabefacedbaddecaf888",
+
+		"4D5C2AF327CD64A62CF35ABD2BA6FAB4"
+	},
+	{	/* Test Case 4 */
+		"feffe9928665731c6d6a8f9467308308",
+		"d9313225f88406e5a55909c5aff5269a"
+		"86a7a9531534f7da2e4c303d8a318a72"
+		"1c3c0c95956809532fcf0e2449a6b525"
+		"b16aedf5aa0de657ba637b39",
+		"feedfacedeadbeeffeedfacedeadbeef"
+		"abaddad2",
+		"cafebabefacedbaddecaf888",
+
+		"5BC94FBC3221A5DB94FAE95AE7121A47"
+	},
+	{	/* Test Case 5 */
+		"feffe9928665731c6d6a8f9467308308",
+		"d9313225f88406e5a55909c5aff5269a"
+		"86a7a9531534f7da2e4c303d8a318a72"
+		"1c3c0c95956809532fcf0e2449a6b525"
+		"b16aedf5aa0de657ba637b39",
+		"feedfacedeadbeeffeedfacedeadbeef"
+		"abaddad2",
+		"cafebabefacedbad",
+
+		"3612D2E79E3B0785561BE14AACA2FCCB"
+	},
+	{	/* Test Case 6 */
+		"feffe9928665731c6d6a8f9467308308",
+		"d9313225f88406e5a55909c5aff5269a"
+		"86a7a9531534f7da2e4c303d8a318a72"
+		"1c3c0c95956809532fcf0e2449a6b525"
+		"b16aedf5aa0de657ba637b39",
+		"feedfacedeadbeeffeedfacedeadbeef"
+		"abaddad2",
+		"9313225df88406e555909c5aff5269aa"
+		"6a7a9538534f7da1e4c303d2a318a728"
+		"c3c0c95156809539fcf0e2429a6b5254"
+		"16aedbf5a0de6a57a637b39b",
+
+		"619CC5AEFFFE0BFA462AF43C1699D050"
+	},
+	{	/* Test Case 7 */
+		"00000000000000000000000000000000"
+		"0000000000000000",
+		"",
+		"",
+		"000000000000000000000000",
+
+		"CD33B28AC773F74BA00ED1F312572435"
+	},
+	{	/* Test Case 8 */
+		"00000000000000000000000000000000"
+		"0000000000000000",
+		"00000000000000000000000000000000",
+		"",
+		"000000000000000000000000",
+
+		"2FF58D80033927AB8EF4D4587514F0FB"
+	},
+	{	/* Test Case 9 */
+		"feffe9928665731c6d6a8f9467308308"
+		"feffe9928665731c",
+		"d9313225f88406e5a55909c5aff5269a"
+		"86a7a9531534f7da2e4c303d8a318a72"
+		"1c3c0c95956809532fcf0e2449a6b525"
+		"b16aedf5aa0de657ba637b391aafd255",
+		"",
+		"cafebabefacedbaddecaf888",
+
+		"9924A7C8587336BFB118024DB8674A14"
+	},
+	{	/* Test Case 10 */
+		"feffe9928665731c6d6a8f9467308308"
+		"feffe9928665731c",
+		"d9313225f88406e5a55909c5aff5269a"
+		"86a7a9531534f7da2e4c303d8a318a72"
+		"1c3c0c95956809532fcf0e2449a6b525"
+		"b16aedf5aa0de657ba637b39",
+		"feedfacedeadbeeffeedfacedeadbeef"
+		"abaddad2",
+		"cafebabefacedbaddecaf888",
+
+		"2519498E80F1478F37BA55BD6D27618C"
+	},
+	{	/* Test Case 11 */
+		"feffe9928665731c6d6a8f9467308308"
+		"feffe9928665731c",
+		"d9313225f88406e5a55909c5aff5269a"
+		"86a7a9531534f7da2e4c303d8a318a72"
+		"1c3c0c95956809532fcf0e2449a6b525"
+		"b16aedf5aa0de657ba637b39",
+		"feedfacedeadbeeffeedfacedeadbeef"
+		"abaddad2",
+		"cafebabefacedbad",
+
+		"65DCC57FCF623A24094FCCA40D3533F8"
+	},
+	{	/* Test Case 12 */
+		"feffe9928665731c6d6a8f9467308308"
+		"feffe9928665731c",
+		"d9313225f88406e5a55909c5aff5269a"
+		"86a7a9531534f7da2e4c303d8a318a72"
+		"1c3c0c95956809532fcf0e2449a6b525"
+		"b16aedf5aa0de657ba637b39",
+		"feedfacedeadbeeffeedfacedeadbeef"
+		"abaddad2",
+		"9313225df88406e555909c5aff5269aa"
+		"6a7a9538534f7da1e4c303d2a318a728"
+		"c3c0c95156809539fcf0e2429a6b5254"
+		"16aedbf5a0de6a57a637b39b",
+
+		"DCF566FF291C25BBB8568FC3D376A6D9"
+	},
+	{	/* Test Case 13 */
+		"00000000000000000000000000000000"
+		"00000000000000000000000000000000",
+		"",
+		"",
+		"000000000000000000000000",
+
+		"530F8AFBC74536B9A963B4F1C4CB738B"
+	},
+	{	/* Test Case 14 */
+		"00000000000000000000000000000000"
+		"00000000000000000000000000000000",
+		"00000000000000000000000000000000",
+		"",
+		"000000000000000000000000",
+
+		"D0D1C8A799996BF0265B98B5D48AB919"
+	},
+	{	/* Test Case 15 */
+		"feffe9928665731c6d6a8f9467308308"
+		"feffe9928665731c6d6a8f9467308308",
+		"d9313225f88406e5a55909c5aff5269a"
+		"86a7a9531534f7da2e4c303d8a318a72"
+		"1c3c0c95956809532fcf0e2449a6b525"
+		"b16aedf5aa0de657ba637b391aafd255",
+		"",
+		"cafebabefacedbaddecaf888",
+
+		"B094DAC5D93471BDEC1A502270E3CC6C"
+	},
+	{	/* Test Case 16 */
+		"feffe9928665731c6d6a8f9467308308"
+		"feffe9928665731c6d6a8f9467308308",
+		"d9313225f88406e5a55909c5aff5269a"
+		"86a7a9531534f7da2e4c303d8a318a72"
+		"1c3c0c95956809532fcf0e2449a6b525"
+		"b16aedf5aa0de657ba637b39",
+		"feedfacedeadbeeffeedfacedeadbeef"
+		"abaddad2",
+		"cafebabefacedbaddecaf888",
+
+		"76FC6ECE0F4E1768CDDF8853BB2D551B"
+	},
+	{	/* Test Case 17 */
+		"feffe9928665731c6d6a8f9467308308"
+		"feffe9928665731c6d6a8f9467308308",
+		"d9313225f88406e5a55909c5aff5269a"
+		"86a7a9531534f7da2e4c303d8a318a72"
+		"1c3c0c95956809532fcf0e2449a6b525"
+		"b16aedf5aa0de657ba637b39",
+		"feedfacedeadbeeffeedfacedeadbeef"
+		"abaddad2",
+		"cafebabefacedbad",
+
+		"3A337DBF46A792C45E454913FE2EA8F2"
+	},
+	{	/* Test Case 18 */
+		"feffe9928665731c6d6a8f9467308308"
+		"feffe9928665731c6d6a8f9467308308",
+		"d9313225f88406e5a55909c5aff5269a"
+		"86a7a9531534f7da2e4c303d8a318a72"
+		"1c3c0c95956809532fcf0e2449a6b525"
+		"b16aedf5aa0de657ba637b39",
+		"feedfacedeadbeeffeedfacedeadbeef"
+		"abaddad2",
+		"9313225df88406e555909c5aff5269aa"
+		"6a7a9538534f7da1e4c303d2a318a728"
+		"c3c0c95156809539fcf0e2429a6b5254"
+		"16aedbf5a0de6a57a637b39b",
+
+		"A44A8266EE1C8EB0C8B5D4CF5AE9F19A"
+	},
+};
+
+int
+parsehex(char *s, uchar *h, char *l)
+{
+	char *e;
+	mpint *m;
+	int n;
+
+	n = strlen(s);
+	if(n == 0)
+		return 0;
+	assert((n & 1) == 0);
+	n >>= 1;
+	e = nil;
+	m = strtomp(s, &e, 16, nil);
+	if(m == nil || *e != '\0')
+		abort();
+	mptober(m, h, n);
+	if(l != nil)
+		print("%s = %.*H\n", l, n, h);
+	return n;
+}
+
+void
+runtest(Test *t)
+{
+	AESGCMstate s;
+	uchar key[1024], plain[1024], aad[1024], iv[1024], tag[16], tmp[16];
+	int nkey, nplain, naad, niv;
+
+	nkey = parsehex(t->K, key, "K");
+	nplain = parsehex(t->P, plain, "P");
+	naad = parsehex(t->A, aad, "A");
+	niv = parsehex(t->IV, iv, "IV");
+
+	setupAESGCMstate(&s, key, nkey, iv, niv);
+	aesgcm_encrypt(plain, nplain, aad, naad, tag, &s);
+	print("C = %.*H\n", nplain, plain);
+	print("T = %.*H\n", 16, tag);
+
+	parsehex(t->T, tmp, nil);
+	assert(memcmp(tmp, tag, 16) == 0);
+}
+
+void
+perftest(void)
+{
+	AESGCMstate s;
+	static uchar zeros[16];
+	uchar buf[1024*1024], tag[16];
+	vlong now;
+	int i, delta;
+
+	now = nsec();
+	for(i=0; i<100; i++){
+		memset(buf, 0, sizeof(buf));
+		if(1){
+			setupAESGCMstate(&s, zeros, 16, zeros, 12);
+			aesgcm_encrypt(buf, sizeof(buf), nil, 0, tag, &s);
+		} else {
+			setupAESstate(&s, zeros, 16, zeros);
+			aesCBCencrypt(buf, sizeof(buf), &s);
+		}
+	}
+	delta = (nsec() - now) / 1000000000LL;
+	fprint(2, "%ds = %d/s\n", delta, i*sizeof(buf) / delta);
+}
+
+void
+main(int argc, char **argv)
+{
+	int i;
+
+	fmtinstall('H', encodefmt);
+
+	ARGBEGIN {
+	case 'p':
+		perftest();
+		exits(nil);
+	} ARGEND;
+
+	for(i=0; i<nelem(tests); i++){
+		print("Test Case %d\n", i+1);
+		runtest(&tests[i]);
+		print("\n");
+	}
+}
--- /dev/null
+++ b/libsec/port/aesni.c
@@ -1,0 +1,5 @@
+void*
+aesni_init(void)
+{
+	return 0;
+}
--- /dev/null
+++ b/libsec/port/bftest.c
@@ -1,0 +1,279 @@
+#include <u.h>
+#include <libc.h>
+#include <libsec.h>
+
+enum{
+	Bsz = 8,
+};
+
+typedef struct Testvector Testvector;
+
+struct Testvector{
+	uchar key[Bsz];
+	uchar plain[Bsz];
+	uchar cipher[Bsz];
+};
+
+/*
+ * Blowfish test vectors from https://www.schneier.com/code/vectors.txt
+ */
+Testvector vector [] = {
+	{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+	 {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+	 {0x4E, 0xF9, 0x97, 0x45, 0x61, 0x98, 0xDD, 0x78}},
+
+	{{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF},
+	 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF},
+	 {0x51, 0x86, 0x6F, 0xD5, 0xB8, 0x5E, 0xCB, 0x8A}},
+
+	{{0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+	 {0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01},
+	 {0x7D, 0x85, 0x6F, 0x9A, 0x61, 0x30, 0x63, 0xF2}},
+
+	{{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11},
+	 {0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11},
+	 {0x24, 0x66, 0xDD, 0x87, 0x8B, 0x96, 0x3C, 0x9D}},
+
+	{{0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF},
+	 {0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11},
+	 {0x61, 0xF9, 0xC3, 0x80, 0x22, 0x81, 0xB0, 0x96}},
+
+	{{0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11},
+	 {0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF},
+	 {0x7D, 0x0C, 0xC6, 0x30, 0xAF, 0xDA, 0x1E, 0xC7}},
+
+	{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+	 {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+	 {0x4E, 0xF9, 0x97, 0x45, 0x61, 0x98, 0xDD, 0x78}},
+
+	{{0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10},
+	 {0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF},
+	 {0x0A, 0xCE, 0xAB, 0x0F, 0xC6, 0xA0, 0xA2, 0x8D}},
+
+	{{0x7C, 0xA1, 0x10, 0x45, 0x4A, 0x1A, 0x6E, 0x57},
+	 {0x01, 0xA1, 0xD6, 0xD0, 0x39, 0x77, 0x67, 0x42},
+	 {0x59, 0xC6, 0x82, 0x45, 0xEB, 0x05, 0x28, 0x2B}},
+
+	{{0x01, 0x31, 0xD9, 0x61, 0x9D, 0xC1, 0x37, 0x6E},
+	 {0x5C, 0xD5, 0x4C, 0xA8, 0x3D, 0xEF, 0x57, 0xDA},
+	 {0xB1, 0xB8, 0xCC, 0x0B, 0x25, 0x0F, 0x09, 0xA0}},
+
+	{{0x07, 0xA1, 0x13, 0x3E, 0x4A, 0x0B, 0x26, 0x86},
+	 {0x02, 0x48, 0xD4, 0x38, 0x06, 0xF6, 0x71, 0x72},
+	 {0x17, 0x30, 0xE5, 0x77, 0x8B, 0xEA, 0x1D, 0xA4}},
+
+	{{0x38, 0x49, 0x67, 0x4C, 0x26, 0x02, 0x31, 0x9E},
+	 {0x51, 0x45, 0x4B, 0x58, 0x2D, 0xDF, 0x44, 0x0A},
+	 {0xA2, 0x5E, 0x78, 0x56, 0xCF, 0x26, 0x51, 0xEB}},
+
+	{{0x04, 0xB9, 0x15, 0xBA, 0x43, 0xFE, 0xB5, 0xB6},
+	 {0x42, 0xFD, 0x44, 0x30, 0x59, 0x57, 0x7F, 0xA2},
+	 {0x35, 0x38, 0x82, 0xB1, 0x09, 0xCE, 0x8F, 0x1A}},
+
+	{{0x01, 0x13, 0xB9, 0x70, 0xFD, 0x34, 0xF2, 0xCE},
+	 {0x05, 0x9B, 0x5E, 0x08, 0x51, 0xCF, 0x14, 0x3A},
+	 {0x48, 0xF4, 0xD0, 0x88, 0x4C, 0x37, 0x99, 0x18}},
+
+	{{0x01, 0x70, 0xF1, 0x75, 0x46, 0x8F, 0xB5, 0xE6},
+	 {0x07, 0x56, 0xD8, 0xE0, 0x77, 0x47, 0x61, 0xD2},
+	 {0x43, 0x21, 0x93, 0xB7, 0x89, 0x51, 0xFC, 0x98}},
+
+	{{0x43, 0x29, 0x7F, 0xAD, 0x38, 0xE3, 0x73, 0xFE},
+	 {0x76, 0x25, 0x14, 0xB8, 0x29, 0xBF, 0x48, 0x6A},
+	 {0x13, 0xF0, 0x41, 0x54, 0xD6, 0x9D, 0x1A, 0xE5}},
+
+	{{0x07, 0xA7, 0x13, 0x70, 0x45, 0xDA, 0x2A, 0x16},
+	 {0x3B, 0xDD, 0x11, 0x90, 0x49, 0x37, 0x28, 0x02},
+	 {0x2E, 0xED, 0xDA, 0x93, 0xFF, 0xD3, 0x9C, 0x79}},
+
+	{{0x04, 0x68, 0x91, 0x04, 0xC2, 0xFD, 0x3B, 0x2F},
+	 {0x26, 0x95, 0x5F, 0x68, 0x35, 0xAF, 0x60, 0x9A},
+	 {0xD8, 0x87, 0xE0, 0x39, 0x3C, 0x2D, 0xA6, 0xE3}},
+
+	{{0x37, 0xD0, 0x6B, 0xB5, 0x16, 0xCB, 0x75, 0x46},
+	 {0x16, 0x4D, 0x5E, 0x40, 0x4F, 0x27, 0x52, 0x32},
+	 {0x5F, 0x99, 0xD0, 0x4F, 0x5B, 0x16, 0x39, 0x69}},
+
+	{{0x1F, 0x08, 0x26, 0x0D, 0x1A, 0xC2, 0x46, 0x5E},
+	 {0x6B, 0x05, 0x6E, 0x18, 0x75, 0x9F, 0x5C, 0xCA},
+	 {0x4A, 0x05, 0x7A, 0x3B, 0x24, 0xD3, 0x97, 0x7B}},
+
+	{{0x58, 0x40, 0x23, 0x64, 0x1A, 0xBA, 0x61, 0x76},
+	 {0x00, 0x4B, 0xD6, 0xEF, 0x09, 0x17, 0x60, 0x62},
+	 {0x45, 0x20, 0x31, 0xC1, 0xE4, 0xFA, 0xDA, 0x8E}},
+
+	{{0x02, 0x58, 0x16, 0x16, 0x46, 0x29, 0xB0, 0x07},
+	 {0x48, 0x0D, 0x39, 0x00, 0x6E, 0xE7, 0x62, 0xF2},
+	 {0x75, 0x55, 0xAE, 0x39, 0xF5, 0x9B, 0x87, 0xBD}},
+
+	{{0x49, 0x79, 0x3E, 0xBC, 0x79, 0xB3, 0x25, 0x8F},
+	 {0x43, 0x75, 0x40, 0xC8, 0x69, 0x8F, 0x3C, 0xFA},
+	 {0x53, 0xC5, 0x5F, 0x9C, 0xB4, 0x9F, 0xC0, 0x19}},
+
+	{{0x4F, 0xB0, 0x5E, 0x15, 0x15, 0xAB, 0x73, 0xA7},
+	 {0x07, 0x2D, 0x43, 0xA0, 0x77, 0x07, 0x52, 0x92},
+	 {0x7A, 0x8E, 0x7B, 0xFA, 0x93, 0x7E, 0x89, 0xA3}},
+
+	{{0x49, 0xE9, 0x5D, 0x6D, 0x4C, 0xA2, 0x29, 0xBF},
+	 {0x02, 0xFE, 0x55, 0x77, 0x81, 0x17, 0xF1, 0x2A},
+	 {0xCF, 0x9C, 0x5D, 0x7A, 0x49, 0x86, 0xAD, 0xB5}},
+
+	{{0x01, 0x83, 0x10, 0xDC, 0x40, 0x9B, 0x26, 0xD6},
+	 {0x1D, 0x9D, 0x5C, 0x50, 0x18, 0xF7, 0x28, 0xC2},
+	 {0xD1, 0xAB, 0xB2, 0x90, 0x65, 0x8B, 0xC7, 0x78}},
+
+	{{0x1C, 0x58, 0x7F, 0x1C, 0x13, 0x92, 0x4F, 0xEF},
+	 {0x30, 0x55, 0x32, 0x28, 0x6D, 0x6F, 0x29, 0x5A},
+	 {0x55, 0xCB, 0x37, 0x74, 0xD1, 0x3E, 0xF2, 0x01}},
+
+	{{0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01},
+	 {0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF},
+	 {0xFA, 0x34, 0xEC, 0x48, 0x47, 0xB2, 0x68, 0xB2}},
+
+	{{0x1F, 0x1F, 0x1F, 0x1F, 0x0E, 0x0E, 0x0E, 0x0E},
+	 {0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF},
+	 {0xA7, 0x90, 0x79, 0x51, 0x08, 0xEA, 0x3C, 0xAE}},
+
+	{{0xE0, 0xFE, 0xE0, 0xFE, 0xF1, 0xFE, 0xF1, 0xFE},
+	 {0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF},
+	 {0xC3, 0x9E, 0x07, 0x2D, 0x9F, 0xAC, 0x63, 0x1D}},
+
+	{{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+	 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF},
+	 {0x01, 0x49, 0x33, 0xE0, 0xCD, 0xAF, 0xF6, 0xE4}},
+
+	{{0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF},
+	 {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+	 {0xF2, 0x1E, 0x9A, 0x77, 0xB7, 0x1C, 0x49, 0xBC}},
+
+	{{0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF},
+	 {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
+	 {0x24, 0x59, 0x46, 0x88, 0x57, 0x54, 0x36, 0x9A}},
+
+	{{0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10},
+	 {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF},
+	 {0x6B, 0x5C, 0x5A, 0x9C, 0x5D, 0x9E, 0x0A, 0x5A}}
+};
+
+uchar CBCkey[16] = { 
+	 0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF, 
+	 0xF0, 0xE1, 0xD2, 0xC3, 0xB4, 0xA5, 0x96, 0x87
+};
+
+uchar CBCiv[8] = {
+	 0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10
+};
+
+uchar CBCdata[29] = {
+	 0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x31, 0x20, 
+	 0x4E, 0x6F, 0x77, 0x20, 0x69, 0x73, 0x20, 0x74, 
+	 0x68, 0x65, 0x20, 0x74, 0x69, 0x6D, 0x65, 0x20, 
+	 0x66, 0x6F, 0x72, 0x20, 0x00
+};
+
+uchar CBCcipher[32] = {
+	 0x6B, 0x77, 0xB4, 0xD6, 0x30, 0x06, 0xDE, 0xE6,
+	 0x05, 0xB1, 0x56, 0xE2, 0x74, 0x03, 0x97, 0x93,
+	 0x58, 0xDE, 0xB9, 0xE7, 0x15, 0x46, 0x16, 0xD9,
+	 0x59, 0xF1, 0x65, 0x2B, 0xD5, 0xFF, 0x92, 0xCC
+};
+
+int
+testECB(Testvector *t)
+{
+	BFstate s;
+	int i;
+	uchar aux[Bsz];
+
+	memcpy(aux, t->plain, Bsz);
+
+	memset(&s, 0, sizeof(BFstate));
+	setupBFstate(&s, t->key, Bsz, nil);
+	bfECBencrypt(aux, Bsz, &s);
+
+	if(memcmp(aux, t->cipher, Bsz) != 0){
+		fprint(2, "ECB encrypt failed, ciphertext is:\n");
+		for(i = 0; i < Bsz; i++)
+			fprint(2, "%02X", aux[i]);
+		fprint(2, "\nand should be:\n");
+		for(i = 0; i < Bsz; i++)
+			fprint(2, "%02X", t->cipher[i]);
+		fprint(2, "\n");
+		return -1;
+	}
+
+	memset(&s, 0, sizeof(BFstate));
+	setupBFstate(&s, t->key, Bsz, nil);
+	bfECBdecrypt(aux, Bsz, &s);
+
+	if(memcmp(aux, t->plain, Bsz) != 0){
+		fprint(2, "ECB decrypt failed, plaintext is:\n");
+		for(i = 0; i < Bsz; i++)
+			fprint(2, "%02X", aux[i]);
+		fprint(2, "\nand should be:\n");
+		for(i = 0; i < Bsz; i++)
+			fprint(2, "%02X", t->plain[i]);
+		fprint(2, "\n");
+		return -1;
+	}
+	return 0;
+}
+
+int
+testCBC(void)
+{
+	BFstate s;
+	uchar aux[32];
+	int i;
+
+	memset(aux, 0 , sizeof(aux));
+	memcpy(aux, CBCdata, sizeof(CBCdata));
+	memset(&s, 0, sizeof(BFstate));
+	setupBFstate(&s, CBCkey, sizeof(CBCkey), CBCiv);
+	bfCBCencrypt(aux, 32, &s);
+	
+	if(memcmp(aux, CBCcipher, sizeof(CBCcipher)) != 0){
+		fprint(2, "CBC encrypt failed, ciphertext is:\n");
+		for(i = 0; i < sizeof(aux); i++)
+			fprint(2, "%02X", aux[i]);
+		fprint(2, "\nand should be:\n");
+		for(i = 0; i < sizeof(CBCcipher); i++)
+			fprint(2, "%02X", CBCcipher[i]);
+		fprint(2, "\n");
+		return -1;
+	}
+
+	memset(&s, 0, sizeof(BFstate));
+	setupBFstate(&s, CBCkey, sizeof(CBCkey), CBCiv);
+	bfCBCdecrypt(aux, 32, &s);
+
+	if(memcmp(aux, CBCdata, sizeof(CBCdata)) != 0){
+		fprint(2, "CBC decrypt failed, plaintext is:\n");
+		for(i = 0; i < sizeof(aux); i++)
+			fprint(2, "%02X", aux[i]);
+		fprint(2, "\nand should be:\n");
+		for(i = 0; i < sizeof(CBCdata); i++)
+			fprint(2, "%02X", CBCdata[i]);
+		fprint(2, "\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+void
+main(int argc, char **argv)
+{
+	int i;
+
+	if(argc != 1)
+		sysfatal("usage: %s", argv[0]);
+
+	for(i=0; i < nelem(vector); i++)
+		if(testECB(&vector[i]) < 0)
+			sysfatal("TestECB %d failed", i);
+
+	if(testCBC() < 0)
+		sysfatal("TestCBC failed");
+	exits(nil);
+}
--- a/libsec/port/blowfish.c
+++ b/libsec/port/blowfish.c
@@ -1,5 +1,4 @@
 #include "os.h"
-#include <mp.h>
 #include <libsec.h>
 
 // Blowfish block cipher.  See:
@@ -7,17 +6,48 @@
 // 	Fast Software Encryption
 // 	Cambridge Security Workshop, Cambridge, England (1993)
 
-static u32 sbox[1024];
-static u32 pbox[BFrounds+2];
+static u32int sbox[1024];
+static u32int pbox[BFrounds+2];
 
-static void bfencrypt(u32 *, BFstate *);
-static void bfdecrypt(u32 *, BFstate *);
+static void bfencrypt(u32int *, BFstate *);
+static void bfdecrypt(u32int *, BFstate *);
 
+/*
+ * Endianess agnostic functions to convert a 
+ * block (8-byte buffer) to a u32int array and 
+ * viceversa.
+ */
+
+static void
+buf2ints(uchar *p, u32int *b)
+{
+	b[0] =  p[0]<<24 | p[1]<<16  | p[2]<<8 | p[3];
+	b[1] =  p[4]<<24 | p[5]<<16  | p[6]<<8 | p[7];
+}
+
+static void
+ints2buf(u32int *b, uchar *p)
+{
+	u32int u;
+
+	u = b[0];
+	p[0] = u>>24;
+	p[1] = u>>16;
+	p[2] = u>>8;
+	p[3] = u;
+
+	u = b[1];
+	p[4] = u>>24;
+	p[5] = u>>16;
+	p[6] = u>>8;
+	p[7] = u;
+}
+
 void
 setupBFstate(BFstate *s, uchar key[], int keybytes, uchar *ivec)
 {
 	int i, j;
-	u32 n, buf[2];
+	u32int n, buf[2];
 
 	memset(s, 0, sizeof(*s));
 	memset(buf, 0, sizeof buf);
@@ -31,7 +61,7 @@
 		memmove(s->ivec, ivec, sizeof(s->ivec));
 	else
 		memset(s->ivec, 0, sizeof(s->ivec));
-		
+
 	memmove(s->pbox, pbox, sizeof(pbox));
 	memmove(s->sbox, sbox, sizeof(sbox));
 
@@ -76,17 +106,13 @@
 bfCBCencrypt(uchar *buf, int n, BFstate *s)
 {
 	int i;
-	uchar *p;
-	u32 bo[2], bi[2], b;
+	u32int bo[2], bi[2];
 
 	assert((n & 7) == 0);
 
-	bo[0] =  s->ivec[0] | ((u32) s->ivec[1]<<8) | ((u32)s->ivec[2]<<16) | ((u32)s->ivec[3]<<24);
-	bo[1] =  s->ivec[4] | ((u32) s->ivec[5]<<8) | ((u32)s->ivec[6]<<16) | ((u32)s->ivec[7]<<24);
-
+	buf2ints(s->ivec, bo);
 	for(i=0; i < n; i += 8, buf += 8) {
-		bi[0] =  buf[0] | ((u32) buf[1]<<8) | ((u32)buf[2]<<16) | ((u32)buf[3]<<24);
-		bi[1] =  buf[4] | ((u32) buf[5]<<8) | ((u32)buf[6]<<16) | ((u32)buf[7]<<24);
+		buf2ints(buf, bi);
 
 		bi[0] ^= bo[0];
 		bi[1] ^= bo[1];
@@ -96,36 +122,9 @@
 		bo[0] = bi[0];
 		bo[1] = bi[1];
 
-		p = buf;
-		b = bo[0];
-		*p++ = b;
-		b >>= 8;
-		*p++ = b;
-		b >>= 8;
-		*p++ = b;
-		b >>= 8;
-		*p++ = b;
-
-		b = bo[1];
-		*p++ = b;
-		b >>= 8;
-		*p++ = b;
-		b >>= 8;
-		*p++ = b;
-		b >>= 8;
-		*p = b;
+		ints2buf(bi, buf);
 	}
-
-	s->ivec[7] = bo[1] >> 24;
-	s->ivec[6] = bo[1] >> 16;
-	s->ivec[5] = bo[1] >> 8;
-	s->ivec[4] = bo[1];
-
-	s->ivec[3] = bo[0] >> 24;
-	s->ivec[2] = bo[0] >> 16;
-	s->ivec[1] = bo[0] >> 8;
-	s->ivec[0] = bo[0];
-
+	ints2buf(bo, s->ivec);
 	return;
 }
 
@@ -133,17 +132,13 @@
 bfCBCdecrypt(uchar *buf, int n, BFstate *s)
 {
 	int i;
-	uchar *p;
-	u32 b, bo[2], bi[2], xr[2];
+	u32int  bo[2], bi[2], xr[2];
 
 	assert((n & 7) == 0);
 
-	bo[0] =  s->ivec[0] | ((u32) s->ivec[1]<<8) | ((u32)s->ivec[2]<<16) | ((u32)s->ivec[3]<<24);
-	bo[1] =  s->ivec[4] | ((u32) s->ivec[5]<<8) | ((u32)s->ivec[6]<<16) | ((u32)s->ivec[7]<<24);
-
+	buf2ints(s->ivec, bo);
 	for(i=0; i < n; i += 8, buf += 8) {
-		bi[0] =  buf[0] | ((u32) buf[1]<<8) | ((u32)buf[2]<<16) | ((u32)buf[3]<<24);
-		bi[1] =  buf[4] | ((u32) buf[5]<<8) | ((u32)buf[6]<<16) | ((u32)buf[7]<<24);
+		buf2ints(buf, bi);
 
 		xr[0] = bi[0];
 		xr[1] = bi[1];
@@ -153,39 +148,12 @@
 		bo[0] ^= bi[0];
 		bo[1] ^= bi[1];
 
-		p = buf;
-		b = bo[0];
-		*p++ = b;
-		b >>= 8;
-		*p++ = b;
-		b >>= 8;
-		*p++ = b;
-		b >>= 8;
-		*p++ = b;
+		ints2buf(bo, buf);
 
-		b = bo[1];
-		*p++ = b;
-		b >>= 8;
-		*p++ = b;
-		b >>= 8;
-		*p++ = b;
-		b >>= 8;
-		*p = b;
-
 		bo[0] = xr[0];
 		bo[1] = xr[1];
 	}
-
-	s->ivec[7] = bo[1] >> 24;
-	s->ivec[6] = bo[1] >> 16;
-	s->ivec[5] = bo[1] >> 8;
-	s->ivec[4] = bo[1];
-
-	s->ivec[3] = bo[0] >> 24;
-	s->ivec[2] = bo[0] >> 16;
-	s->ivec[1] = bo[0] >> 8;
-	s->ivec[0] = bo[0];
-
+	ints2buf(bo, s->ivec);
 	return;
 }
 
@@ -193,23 +161,12 @@
 bfECBencrypt(uchar *buf, int n, BFstate *s)
 {
 	int i;
-	u32 b[2];
+	u32int b[2];
 
 	for(i=0; i < n; i += 8, buf += 8) {
-		b[0] =  buf[0] | ((u32) buf[1]<<8) | ((u32)buf[2]<<16) | ((u32)buf[3]<<24);
-		b[1] =  buf[4] | ((u32) buf[5]<<8) | ((u32)buf[6]<<16) | ((u32)buf[7]<<24);
-
+		buf2ints(buf, b);
 		bfencrypt(b, s);
-
-		buf[7] = b[1] >> 24;
-		buf[6] = b[1] >> 16;
-		buf[5] = b[1] >> 8;
-		buf[4] = b[1];
-
-		buf[3] = b[0] >> 24;
-		buf[2] = b[0] >> 16;
-		buf[1] = b[0] >> 8;
-		buf[0] = b[0];
+		ints2buf(b, buf);
 	}
 
 	return;
@@ -219,23 +176,12 @@
 bfECBdecrypt(uchar *buf, int n, BFstate *s)
 {
 	int i;
-	u32 b[2];
+	u32int b[2];
 
 	for(i=0; i < n; i += 8, buf += 8) {
-		b[0] =  buf[0] | ((u32) buf[1]<<8) | ((u32)buf[2]<<16) | ((u32)buf[3]<<24);
-		b[1] =  buf[4] | ((u32) buf[5]<<8) | ((u32)buf[6]<<16) | ((u32)buf[7]<<24);
-
+		buf2ints(buf, b);
 		bfdecrypt(b, s);
-
-		buf[7] = b[1] >> 24;
-		buf[6] = b[1] >> 16;
-		buf[5] = b[1] >> 8;
-		buf[4] = b[1];
-
-		buf[3] = b[0] >> 24;
-		buf[2] = b[0] >> 16;
-		buf[1] = b[0] >> 8;
-		buf[0] = b[0];
+		ints2buf(b, buf);
 	}
 
 	return;		
@@ -242,11 +188,11 @@
 }
 
 static void
-bfencrypt(u32 *b, BFstate *s)
+bfencrypt(u32int *b, BFstate *s)
 {
 	int i;
-	u32 l, r;
-	u32 *pb, *sb;
+	u32int l, r;
+	u32int *pb, *sb;
 
 	l = b[0];
 	r = b[1];
@@ -276,11 +222,11 @@
 }
 
 static void
-bfdecrypt(u32 *b, BFstate *s)
+bfdecrypt(u32int *b, BFstate *s)
 {
 	int i;
-	u32 l, r;
-	u32 *pb, *sb;
+	u32int l, r;
+	u32int *pb, *sb;
 
 	l = b[0];
 	r = b[1];
@@ -309,7 +255,7 @@
 	return;
 }
 
-static u32 pbox[BFrounds+2] = {
+static u32int pbox[BFrounds+2] = {
 	0x243f6a88, 0x85a308d3, 0x13198a2e, 0x03707344, 
 	0xa4093822, 0x299f31d0, 0x082efa98, 0xec4e6c89, 
 	0x452821e6, 0x38d01377, 0xbe5466cf, 0x34e90c6c, 
@@ -317,7 +263,7 @@
 	0x9216d5d9, 0x8979fb1b
 };
 
-static u32 sbox[1024] = {
+static u32int sbox[1024] = {
 	0xd1310ba6L, 0x98dfb5acL, 0x2ffd72dbL, 0xd01adfb7L, 
 	0xb8e1afedL, 0x6a267e96L, 0xba7c9045L, 0xf12c7f99L, 
 	0x24a19947L, 0xb3916cf7L, 0x0801f2e2L, 0x858efc16L, 
@@ -575,5 +521,4 @@
 	0x90d4f869L, 0xa65cdea0L, 0x3f09252dL, 0xc208e69fL, 
 	0xb74e6132L, 0xce77e25bL, 0x578fdfe3L, 0x3ac372e6L, 
 };
-
 
--- /dev/null
+++ b/libsec/port/ccpoly.c
@@ -1,0 +1,90 @@
+#include "os.h"
+#include <libsec.h>
+
+static void
+ccpolyotk(Chachastate *cs, DigestState *ds)
+{
+	uchar otk[ChachaBsize];
+
+	memset(ds, 0, sizeof(*ds));
+	memset(otk, 0, 32);
+	chacha_setblock(cs, 0);
+	chacha_encrypt(otk, ChachaBsize, cs);
+	poly1305(nil, 0, otk, 32, nil, ds);
+}
+
+static void
+ccpolypad(uchar *buf, u32 nbuf, DigestState *ds)
+{
+	static uchar zeros[16] = {0};
+	u32 npad;
+
+	if(nbuf == 0)
+		return;
+	poly1305(buf, nbuf, nil, 0, nil, ds);
+	npad = nbuf % 16;
+	if(npad == 0)
+		return;
+	poly1305(zeros, 16 - npad, nil, 0, nil, ds);
+}
+
+static void
+ccpolylen(u32 n, uchar tag[16], DigestState *ds)
+{
+	uchar info[8];
+
+	info[0] = n;
+	info[1] = n>>8;
+	info[2] = n>>16;
+	info[3] = n>>24;
+	info[4] = 0;
+	info[5] = 0;
+	info[6] = 0;
+	info[7] = 0;
+	poly1305(info, 8, nil, 0, tag, ds);
+}
+
+void
+ccpoly_encrypt(uchar *dat, u32 ndat, uchar *aad, u32 naad, uchar tag[16], Chachastate *cs)
+{
+	DigestState ds;
+
+	ccpolyotk(cs, &ds);
+	if(cs->ivwords == 2){
+		poly1305(aad, naad, nil, 0, nil, &ds);
+		ccpolylen(naad, nil, &ds);
+		chacha_encrypt(dat, ndat, cs);
+		poly1305(dat, ndat, nil, 0, nil, &ds);
+		ccpolylen(ndat, tag, &ds);
+	} else {
+		ccpolypad(aad, naad, &ds);
+		chacha_encrypt(dat, ndat, cs);
+		ccpolypad(dat, ndat, &ds);
+		ccpolylen(naad, nil, &ds);
+		ccpolylen(ndat, tag, &ds);
+	}
+}
+
+int
+ccpoly_decrypt(uchar *dat, u32 ndat, uchar *aad, u32 naad, uchar tag[16], Chachastate *cs)
+{
+	DigestState ds;
+	uchar tmp[16];
+
+	ccpolyotk(cs, &ds);
+	if(cs->ivwords == 2){
+		poly1305(aad, naad, nil, 0, nil, &ds);
+		ccpolylen(naad, nil, &ds);
+		poly1305(dat, ndat, nil, 0, nil, &ds);
+		ccpolylen(ndat, tmp, &ds);
+	} else {
+		ccpolypad(aad, naad, &ds);
+		ccpolypad(dat, ndat, &ds);
+		ccpolylen(naad, nil, &ds);
+		ccpolylen(ndat, tmp, &ds);
+	}
+	if(tsmemcmp(tag, tmp, 16) != 0)
+		return -1;
+	chacha_encrypt(dat, ndat, cs);
+	return 0;
+}
--- /dev/null
+++ b/libsec/port/chacha.c
@@ -1,0 +1,222 @@
+/*
+Adapted from chacha-merged.c version 20080118
+D. J. Bernstein
+Public domain.
+
+modified for use in Plan 9 and Inferno (no algorithmic changes),
+and including the changes to block number and nonce defined in RFC7539
+*/
+
+#include "os.h"
+#include <libsec.h>
+
+/* from chachablock.$O */
+extern void _chachablock(u32int x[16], int rounds);
+
+/* little-endian data order */
+#define	GET4(p)		((p)[0]|((p)[1]<<8)|((p)[2]<<16)|((p)[3]<<24))
+#define	PUT4(p,v)	(p)[0]=(v);(p)[1]=(v)>>8;(p)[2]=(v)>>16;(p)[3]=(v)>>24
+
+#define ENCRYPT(s, x, y, d) {\
+	u32int v; \
+	v = GET4(s); \
+	v ^= (x)+(y); \
+	PUT4(d, v); \
+}
+
+static uchar sigma[16] = "expand 32-byte k";
+static uchar tau[16] = "expand 16-byte k";
+
+static void
+load(u32int *d, uchar *s, int nw)
+{
+	int i;
+
+	for(i = 0; i < nw; i++, s+=4)
+		d[i] = GET4(s);
+}
+
+void
+setupChachastate(Chachastate *s, uchar *key, u32 keylen, uchar *iv, u32 ivlen, int rounds)
+{
+	if(keylen != 256/8 && keylen != 128/8)
+		sysfatal("invalid chacha key length");
+	if(ivlen != 64/8 && ivlen != 96/8
+	&& ivlen != 128/8 && ivlen != 192/8)	/* hchacha, xchacha */
+		sysfatal("invalid chacha iv length");
+	if(rounds == 0)
+		rounds = 20;
+	s->rounds = rounds;
+	if(keylen == 256/8) { /* recommended */
+		load(&s->input[0], sigma, 4);
+		load(&s->input[4], key, 8);
+	}else{
+		load(&s->input[0], tau, 4);
+		load(&s->input[4], key, 4);
+		load(&s->input[8], key, 4);
+	}
+	s->xkey[0] = s->input[4];
+	s->xkey[1] = s->input[5];
+	s->xkey[2] = s->input[6];
+	s->xkey[3] = s->input[7];
+	s->xkey[4] = s->input[8];
+	s->xkey[5] = s->input[9];
+	s->xkey[6] = s->input[10];
+	s->xkey[7] = s->input[11];
+
+	s->ivwords = ivlen/4;
+	s->input[12] = 0;
+	s->input[13] = 0;
+	if(iv == nil){
+		s->input[14] = 0;
+		s->input[15] = 0;
+	}else
+		chacha_setiv(s, iv);
+}
+
+static void
+hchachablock(uchar h[32], Chachastate *s)
+{
+	u32int x[16];
+
+	x[0] = s->input[0];
+	x[1] = s->input[1];
+	x[2] = s->input[2];
+	x[3] = s->input[3];
+	x[4] = s->input[4];
+	x[5] = s->input[5];
+	x[6] = s->input[6];
+	x[7] = s->input[7];
+	x[8] = s->input[8];
+	x[9] = s->input[9];
+	x[10] = s->input[10];
+	x[11] = s->input[11];
+	x[12] = s->input[12];
+	x[13] = s->input[13];
+	x[14] = s->input[14];
+	x[15] = s->input[15];
+
+	_chachablock(x, s->rounds);
+
+	PUT4(h+0*4, x[0]);
+	PUT4(h+1*4, x[1]);
+	PUT4(h+2*4, x[2]);
+	PUT4(h+3*4, x[3]);
+	PUT4(h+4*4, x[12]);
+	PUT4(h+5*4, x[13]);
+	PUT4(h+6*4, x[14]);
+	PUT4(h+7*4, x[15]);
+}
+
+void
+chacha_setiv(Chachastate *s, uchar *iv)
+{
+	if(s->ivwords == 192/32){
+		/* xchacha with 192-bit iv */
+		u32int counter[2];
+		uchar h[32];
+
+		s->input[4] = s->xkey[0];
+		s->input[5] = s->xkey[1];
+		s->input[6] = s->xkey[2];
+		s->input[7] = s->xkey[3];
+		s->input[8] = s->xkey[4];
+		s->input[9] = s->xkey[5];
+		s->input[10] = s->xkey[6];
+		s->input[11] = s->xkey[7];
+
+		counter[0] = s->input[12];
+		counter[1] = s->input[13];
+
+		load(&s->input[12], iv, 4);
+
+		hchachablock(h, s);
+		load(&s->input[4], h, 8);
+		memset(h, 0, 32);
+
+		s->input[12] = counter[0];
+		s->input[13] = counter[1];
+
+		load(&s->input[14], iv+16, 2);
+		return;
+	}
+	load(&s->input[16 - s->ivwords], iv, s->ivwords);
+}
+
+void
+chacha_setblock(Chachastate *s, u64int blockno)
+{
+	s->input[12] = blockno;
+	if(s->ivwords != 3)
+		s->input[13] = blockno>>32;
+}
+
+static void
+encryptblock(Chachastate *s, uchar *src, uchar *dst)
+{
+	u32int x[16];
+	int i;
+
+	x[0] = s->input[0];
+	x[1] = s->input[1];
+	x[2] = s->input[2];
+	x[3] = s->input[3];
+	x[4] = s->input[4];
+	x[5] = s->input[5];
+	x[6] = s->input[6];
+	x[7] = s->input[7];
+	x[8] = s->input[8];
+	x[9] = s->input[9];
+	x[10] = s->input[10];
+	x[11] = s->input[11];
+	x[12] = s->input[12];
+	x[13] = s->input[13];
+	x[14] = s->input[14];
+	x[15] = s->input[15];
+	_chachablock(x, s->rounds);
+
+	for(i=0; i<nelem(x); i+=4){
+		ENCRYPT(src, x[i], s->input[i], dst);
+		ENCRYPT(src+4, x[i+1], s->input[i+1], dst+4);
+		ENCRYPT(src+8, x[i+2], s->input[i+2], dst+8);
+		ENCRYPT(src+12, x[i+3], s->input[i+3], dst+12);
+		src += 16;
+		dst += 16;
+	}
+
+	if(++s->input[12] == 0 && s->ivwords != 3)
+		s->input[13]++;
+}
+
+void
+chacha_encrypt2(uchar *src, uchar *dst, u32 bytes, Chachastate *s)
+{
+	uchar tmp[ChachaBsize];
+
+	for(; bytes >= ChachaBsize; bytes -= ChachaBsize){
+		encryptblock(s, src, dst);
+		src += ChachaBsize;
+		dst += ChachaBsize;
+	}
+	if(bytes > 0){
+		memmove(tmp, src, bytes);
+		encryptblock(s, tmp, tmp);
+		memmove(dst, tmp, bytes);
+	}
+}
+
+void
+chacha_encrypt(uchar *buf, u32 bytes, Chachastate *s)
+{
+	chacha_encrypt2(buf, buf, bytes, s);
+}
+
+void
+hchacha(uchar h[32], uchar *key, u32 keylen, uchar nonce[16], int rounds)
+{
+	Chachastate s[1];
+
+	setupChachastate(s, key, keylen, nonce, 16, rounds);
+	hchachablock(h, s);
+	memset(s, 0, sizeof(s));
+}
--- /dev/null
+++ b/libsec/port/chachablock.c
@@ -1,0 +1,29 @@
+#include "os.h"
+
+#define ROTATE(v,c) ((u32int)((v) << (c)) | ((v) >> (32 - (c))))
+
+#define QUARTERROUND(ia,ib,ic,id) { \
+	u32int a, b, c, d, t; \
+	a = x[ia]; b = x[ib]; c = x[ic]; d = x[id]; \
+	a += b; t = d^a; d = ROTATE(t,16); \
+	c += d; t = b^c; b = ROTATE(t,12); \
+	a += b; t = d^a; d = ROTATE(t, 8); \
+	c += d; t = b^c; b = ROTATE(t, 7); \
+	x[ia] = a; x[ib] = b; x[ic] = c; x[id] = d; \
+}
+
+void
+_chachablock(u32int x[16], int rounds)
+{
+	for(; rounds > 0; rounds -= 2) {
+		QUARTERROUND(0, 4, 8,12)
+		QUARTERROUND(1, 5, 9,13)
+		QUARTERROUND(2, 6,10,14)
+		QUARTERROUND(3, 7,11,15)
+
+		QUARTERROUND(0, 5,10,15)
+		QUARTERROUND(1, 6,11,12)
+		QUARTERROUND(2, 7, 8,13)
+		QUARTERROUND(3, 4, 9,14)
+	}
+}
--- /dev/null
+++ b/libsec/port/chachatest.c
@@ -1,0 +1,217 @@
+#include <u.h>
+#include <libc.h>
+#include <libsec.h>
+
+static void
+printblock(uchar *b, usize n)
+{
+	int i;
+
+	for(i=0; i+8<=n; i+=8){
+		print("%#.2ux %#.2ux %#.2ux %#.2ux %#.2ux %#.2ux %#.2ux %#.2ux\n",
+			b[0], b[1], b[2], b[3], b[4], b[5], b[6], b[7]);
+		b += 8;
+	}
+	if(i < n){
+		print("%#.2ux", *b++);
+		while(++i < n)
+			print(" %#.2ux", *b++);
+		print("\n");
+	}
+}
+
+/* test vector from RFC7539 */
+uchar	rfckey[] = {
+	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+	0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f};
+uchar	rfcnonce[] = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x00};
+u32int	rfccount = 1;
+char	rfctext[] = "Ladies and Gentlemen of the class of '99: If I could offer you only one tip for the future, "
+	"sunscreen would be it.";
+uchar	rfcout[3*ChachaBsize];
+uchar	rfcref[] = {
+	0x6e, 0x2e, 0x35, 0x9a, 0x25, 0x68, 0xf9, 0x80, 0x41, 0xba, 0x07, 0x28, 0xdd, 0x0d, 0x69, 0x81,
+	0xe9, 0x7e, 0x7a, 0xec, 0x1d, 0x43, 0x60, 0xc2, 0x0a, 0x27, 0xaf, 0xcc, 0xfd, 0x9f, 0xae, 0x0b,
+	0xf9, 0x1b, 0x65, 0xc5, 0x52, 0x47, 0x33, 0xab, 0x8f, 0x59, 0x3d, 0xab, 0xcd, 0x62, 0xb3, 0x57,
+	0x16, 0x39, 0xd6, 0x24, 0xe6, 0x51, 0x52, 0xab, 0x8f, 0x53, 0x0c, 0x35, 0x9f, 0x08, 0x61, 0xd8,
+	0x07, 0xca, 0x0d, 0xbf, 0x50, 0x0d, 0x6a, 0x61, 0x56, 0xa3, 0x8e, 0x08, 0x8a, 0x22, 0xb6, 0x5e,
+	0x52, 0xbc, 0x51, 0x4d, 0x16, 0xcc, 0xf8, 0x06, 0x81, 0x8c, 0xe9, 0x1a, 0xb7, 0x79, 0x37, 0x36,
+	0x5a, 0xf9, 0x0b, 0xbf, 0x74, 0xa3, 0x5b, 0xe6, 0xb4, 0x0b, 0x8e, 0xed, 0xf2, 0x78, 0x5e, 0x42,
+	0x87, 0x4d
+};
+
+uchar	xcckey[] = {
+	0x1b, 0x27, 0x55, 0x64, 0x73, 0xe9, 0x85, 0xd4, 0x62, 0xcd, 0x51, 0x19, 0x7a, 0x9a, 0x46, 0xc7,
+	0x60, 0x09, 0x54, 0x9e, 0xac, 0x64, 0x74, 0xf2, 0x06, 0xc4, 0xee, 0x08, 0x44, 0xf6, 0x83, 0x89,
+};
+uchar	xcciv[] = {
+	0x69, 0x69, 0x6e, 0xe9, 0x55, 0xb6, 0x2b, 0x73, 0xcd, 0x62, 0xbd, 0xa8, 0x75, 0xfc, 0x73, 0xd6,
+	0x82, 0x19, 0xe0, 0x03, 0x6b, 0x7a, 0x0b, 0x37,
+};
+uchar	xccref[] = {
+	0x4f, 0xeb, 0xf2, 0xfe, 0x4b, 0x35, 0x9c, 0x50, 0x8d, 0xc5, 0xe8, 0xb5, 0x98, 0x0c, 0x88, 0xe3,
+	0x89, 0x46, 0xd8, 0xf1, 0x8f, 0x31, 0x34, 0x65, 0xc8, 0x62, 0xa0, 0x87, 0x82, 0x64, 0x82, 0x48,
+	0x01, 0x8d, 0xac, 0xdc, 0xb9, 0x04, 0x17, 0x88, 0x53, 0xa4, 0x6d, 0xca, 0x3a, 0x0e, 0xaa, 0xee,
+	0x74, 0x7c, 0xba, 0x97, 0x43, 0x4e, 0xaf, 0xfa, 0xd5, 0x8f, 0xea, 0x82, 0x22, 0x04, 0x7e, 0x0d,
+	0xe6, 0xc3, 0xa6, 0x77, 0x51, 0x06, 0xe0, 0x33, 0x1a, 0xd7, 0x14, 0xd2, 0xf2, 0x7a, 0x55, 0x64,
+	0x13, 0x40, 0xa1, 0xf1, 0xdd, 0x9f, 0x94, 0x53, 0x2e, 0x68, 0xcb, 0x24, 0x1c, 0xbd, 0xd1, 0x50,
+	0x97, 0x0d, 0x14, 0xe0, 0x5c, 0x5b, 0x17, 0x31, 0x93, 0xfb, 0x14, 0xf5, 0x1c, 0x41, 0xf3, 0x93,
+	0x83, 0x5b, 0xf7, 0xf4, 0x16, 0xa7, 0xe0, 0xbb, 0xa8, 0x1f, 0xfb, 0x8b, 0x13, 0xaf, 0x0e, 0x21,
+	0x69, 0x1d, 0x7e, 0xce, 0xc9, 0x3b, 0x75, 0xe6, 0xe4, 0x18, 0x3a,
+};
+
+uchar	ccpaad[] = {
+	0x50, 0x51, 0x52, 0x53, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
+};
+uchar	ccpkey[] = {
+	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
+};
+uchar	ccpiv[] = {
+	0x07, 0x00, 0x00, 0x00,  
+	0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+};
+uchar	ccptag[] = {
+	0x1a, 0xe1, 0x0b, 0x59, 0x4f, 0x09, 0xe2, 0x6a, 0x7e, 0x90, 0x2e, 0xcb, 0xd0, 0x60, 0x06, 0x91,
+};
+
+uchar	ccp64aad[] = {
+	0x87, 0xe2, 0x29, 0xd4, 0x50, 0x08, 0x45, 0xa0, 0x79, 0xc0,
+};
+uchar	ccp64key[] = {
+	0x42, 0x90, 0xbc, 0xb1, 0x54, 0x17, 0x35, 0x31, 0xf3, 0x14, 0xaf, 0x57, 0xf3, 0xbe, 0x3b, 0x50,
+	0x06, 0xda, 0x37, 0x1e, 0xce, 0x27, 0x2a, 0xfa, 0x1b, 0x5d, 0xbd, 0xd1, 0x10, 0x0a, 0x10, 0x07,
+};
+uchar	ccp64iv[] = {
+	0xcd, 0x7c, 0xf6, 0x7b, 0xe3, 0x9c, 0x79, 0x4a,
+};
+uchar	ccp64inp[] = {
+	0x86, 0xd0, 0x99, 0x74, 0x84, 0x0b, 0xde, 0xd2, 0xa5, 0xca, 
+};
+uchar	ccp64out[] = {
+	0xe3, 0xe4, 0x46, 0xf7, 0xed, 0xe9, 0xa1, 0x9b, 0x62, 0xa4,
+};
+uchar	ccp64tag[] = {
+	0x67, 0x7d, 0xab, 0xf4, 0xe3, 0xd2, 0x4b, 0x87, 0x6b, 0xb2, 0x84, 0x75, 0x38, 0x96, 0xe1, 0xd6,
+};
+
+void
+main(int argc, char **argv)
+{
+	Chachastate s;
+	uchar tag[16];
+	int n;
+
+	ARGBEGIN{
+	}ARGEND
+	print("rfc7539:\n");
+	print("key:\n");
+	printblock(rfckey, sizeof(rfckey));
+	n = strlen(rfctext);
+	setupChachastate(&s, rfckey, sizeof(rfckey), rfcnonce, sizeof(rfcnonce), 0);
+	chacha_setblock(&s, rfccount);
+	print("rfc in:\n");
+	printblock((uchar*)rfctext, n);
+	chacha_encrypt2((uchar*)rfctext, rfcout, n, &s);
+	print("rfc out:\n");
+	printblock(rfcout, n);
+	if(memcmp(rfcout, rfcref, sizeof(rfcref)) != 0){
+		print("failure of vision\n");
+		exits("wrong");
+	}
+	print("\n");
+
+
+	print("xchacha key:\n");
+	printblock(xcckey, sizeof(xcckey));
+
+	print("xchacha iv:\n");
+	printblock(xcciv, sizeof(xcciv));
+
+	setupChachastate(&s, xcckey, sizeof(xcckey), xcciv, sizeof(xcciv), 20);
+	memset(rfcout, 0, sizeof(xccref));
+	chacha_encrypt(rfcout, sizeof(xccref), &s);
+
+	print("xchacha out:\n");
+	printblock(rfcout, sizeof(xccref));
+	if(memcmp(rfcout, xccref, sizeof(xccref)) != 0){
+		print("failure of vision\n");
+		exits("wrong");
+	}
+	print("\n");
+
+
+	print("ccpoly key:\n");
+	printblock(ccpkey, sizeof(ccpkey));
+
+	print("ccpoly iv:\n");
+	printblock(ccpiv, sizeof(ccpiv));
+
+	setupChachastate(&s, ccpkey, sizeof(ccpkey), ccpiv, sizeof(ccpiv), 20);
+
+	memmove(rfcout, rfctext, sizeof(rfctext)-1);
+	ccpoly_encrypt(rfcout, sizeof(rfctext)-1, ccpaad, sizeof(ccpaad), tag, &s);
+
+	print("ccpoly cipher:\n");
+	printblock(rfcout, sizeof(rfctext)-1);
+
+	print("ccpoly tag:\n");
+	printblock(tag, sizeof(tag));
+
+	if(memcmp(tag, ccptag, sizeof(tag)) != 0){
+		print("bad ccpoly tag\n");
+		exits("wrong");
+	}
+
+	if(ccpoly_decrypt(rfcout, sizeof(rfctext)-1, ccpaad, sizeof(ccpaad), tag, &s) != 0){
+		print("ccpoly decryption failed\n");
+		exits("wrong");
+	}
+
+	if(memcmp(rfcout, rfctext, sizeof(rfctext)-1) != 0){
+		print("ccpoly bad decryption\n");
+		exits("wrong");
+	}
+	print("\n");
+
+
+	print("ccpoly64 key:\n");
+	printblock(ccp64key, sizeof(ccp64key));
+
+	print("ccpoly64 iv:\n");
+	printblock(ccp64iv, sizeof(ccp64iv));
+
+	setupChachastate(&s, ccp64key, sizeof(ccp64key), ccp64iv, sizeof(ccp64iv), 20);
+
+	memmove(rfcout, ccp64inp, sizeof(ccp64inp));
+	ccpoly_encrypt(rfcout, sizeof(ccp64inp), ccp64aad, sizeof(ccp64aad), tag, &s);
+
+	print("ccpoly64 cipher:\n");
+	printblock(rfcout, sizeof(ccp64inp));
+
+	print("ccpoly64 tag:\n");
+	printblock(tag, sizeof(tag));
+
+	if(memcmp(rfcout, ccp64out, sizeof(ccp64out)) != 0){
+		print("ccpoly64 bad ciphertext\n");
+		exits("wrong");
+	}
+	if(memcmp(tag, ccp64tag, sizeof(ccp64tag)) != 0){
+		print("ccpoly64 bad encryption tag\n");
+		exits("wrong");
+	}
+
+	if(ccpoly_decrypt(rfcout, sizeof(ccp64inp), ccp64aad, sizeof(ccp64aad), tag, &s) != 0){
+		print("ccpoly64 decryption failed\n");
+		exits("wrong");
+	}
+	if(memcmp(rfcout, ccp64inp, sizeof(ccp64inp)) != 0){
+		print("ccpoly64 bad decryption\n");
+		exits("wrong");
+	}
+
+	print("passed\n");
+	exits(nil);
+}
--- /dev/null
+++ b/libsec/port/curve25519.c
@@ -1,0 +1,570 @@
+/* Copyright 2008, Google Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * curve25519: Curve25519 elliptic curve, public key function
+ *
+ * http://code.google.com/p/curve25519-donna/
+ *
+ * Adam Langley <agl@imperialviolet.org>
+ *
+ * Derived from public domain C code by Daniel J. Bernstein <djb@cr.yp.to>
+ *
+ * More information about curve25519 can be found here
+ *   http://cr.yp.to/ecdh.html
+ *
+ * djb's sample implementation of curve25519 is written in a special assembly
+ * language called qhasm and uses the floating point registers.
+ *
+ * This is, almost, a clean room reimplementation from the curve25519 paper. It
+ * uses many of the tricks described therein. Only the crecip function is taken
+ * from the sample implementation.
+ */
+#include "os.h"
+#include <libsec.h>
+
+typedef vlong felem;
+
+/* Sum two numbers: output += in */
+static void fsum(felem *output, felem *in) {
+  unsigned i;
+  for (i = 0; i < 10; i += 2) {
+    output[0+i] = (output[0+i] + in[0+i]);
+    output[1+i] = (output[1+i] + in[1+i]);
+  }
+}
+
+/* Find the difference of two numbers: output = in - output
+ * (note the order of the arguments!)
+ */
+static void fdifference(felem *output, felem *in) {
+  unsigned i;
+  for (i = 0; i < 10; ++i) {
+    output[i] = (in[i] - output[i]);
+  }
+}
+
+/* Multiply a number my a scalar: output = in * scalar */
+static void fscalar_product(felem *output, felem *in, felem scalar) {
+  unsigned i;
+  for (i = 0; i < 10; ++i) {
+    output[i] = in[i] * scalar;
+  }
+}
+
+/* Multiply two numbers: output = in2 * in
+ *
+ * output must be distinct to both inputs. The inputs are reduced coefficient
+ * form, the output is not.
+ */
+static void fproduct(felem *output, felem *in2, felem *in) {
+  output[0] =      in2[0] * in[0];
+  output[1] =      in2[0] * in[1] +
+                   in2[1] * in[0];
+  output[2] =  2 * in2[1] * in[1] +
+                   in2[0] * in[2] +
+                   in2[2] * in[0];
+  output[3] =      in2[1] * in[2] +
+                   in2[2] * in[1] +
+                   in2[0] * in[3] +
+                   in2[3] * in[0];
+  output[4] =      in2[2] * in[2] +
+               2 * (in2[1] * in[3] +
+                    in2[3] * in[1]) +
+                   in2[0] * in[4] +
+                   in2[4] * in[0];
+  output[5] =      in2[2] * in[3] +
+                   in2[3] * in[2] +
+                   in2[1] * in[4] +
+                   in2[4] * in[1] +
+                   in2[0] * in[5] +
+                   in2[5] * in[0];
+  output[6] =  2 * (in2[3] * in[3] +
+                    in2[1] * in[5] +
+                    in2[5] * in[1]) +
+                   in2[2] * in[4] +
+                   in2[4] * in[2] +
+                   in2[0] * in[6] +
+                   in2[6] * in[0];
+  output[7] =      in2[3] * in[4] +
+                   in2[4] * in[3] +
+                   in2[2] * in[5] +
+                   in2[5] * in[2] +
+                   in2[1] * in[6] +
+                   in2[6] * in[1] +
+                   in2[0] * in[7] +
+                   in2[7] * in[0];
+  output[8] =      in2[4] * in[4] +
+               2 * (in2[3] * in[5] +
+                    in2[5] * in[3] +
+                    in2[1] * in[7] +
+                    in2[7] * in[1]) +
+                   in2[2] * in[6] +
+                   in2[6] * in[2] +
+                   in2[0] * in[8] +
+                   in2[8] * in[0];
+  output[9] =      in2[4] * in[5] +
+                   in2[5] * in[4] +
+                   in2[3] * in[6] +
+                   in2[6] * in[3] +
+                   in2[2] * in[7] +
+                   in2[7] * in[2] +
+                   in2[1] * in[8] +
+                   in2[8] * in[1] +
+                   in2[0] * in[9] +
+                   in2[9] * in[0];
+  output[10] = 2 * (in2[5] * in[5] +
+                    in2[3] * in[7] +
+                    in2[7] * in[3] +
+                    in2[1] * in[9] +
+                    in2[9] * in[1]) +
+                   in2[4] * in[6] +
+                   in2[6] * in[4] +
+                   in2[2] * in[8] +
+                   in2[8] * in[2];
+  output[11] =     in2[5] * in[6] +
+                   in2[6] * in[5] +
+                   in2[4] * in[7] +
+                   in2[7] * in[4] +
+                   in2[3] * in[8] +
+                   in2[8] * in[3] +
+                   in2[2] * in[9] +
+                   in2[9] * in[2];
+  output[12] =     in2[6] * in[6] +
+               2 * (in2[5] * in[7] +
+                    in2[7] * in[5] +
+                    in2[3] * in[9] +
+                    in2[9] * in[3]) +
+                   in2[4] * in[8] +
+                   in2[8] * in[4];
+  output[13] =     in2[6] * in[7] +
+                   in2[7] * in[6] +
+                   in2[5] * in[8] +
+                   in2[8] * in[5] +
+                   in2[4] * in[9] +
+                   in2[9] * in[4];
+  output[14] = 2 * (in2[7] * in[7] +
+                    in2[5] * in[9] +
+                    in2[9] * in[5]) +
+                   in2[6] * in[8] +
+                   in2[8] * in[6];
+  output[15] =     in2[7] * in[8] +
+                   in2[8] * in[7] +
+                   in2[6] * in[9] +
+                   in2[9] * in[6];
+  output[16] =     in2[8] * in[8] +
+               2 * (in2[7] * in[9] +
+                    in2[9] * in[7]);
+  output[17] =     in2[8] * in[9] +
+                   in2[9] * in[8];
+  output[18] = 2 * in2[9] * in[9];
+}
+
+/* Reduce a long form to a short form by taking the input mod 2^255 - 19. */
+static void freduce_degree(felem *output) {
+  output[8] += 19 * output[18];
+  output[7] += 19 * output[17];
+  output[6] += 19 * output[16];
+  output[5] += 19 * output[15];
+  output[4] += 19 * output[14];
+  output[3] += 19 * output[13];
+  output[2] += 19 * output[12];
+  output[1] += 19 * output[11];
+  output[0] += 19 * output[10];
+}
+
+/* Reduce all coefficients of the short form input to be -2**25 <= x <= 2**25
+ */
+static void freduce_coefficients(felem *output) {
+  unsigned i;
+  do {
+    output[10] = 0;
+
+    for (i = 0; i < 10; i += 2) {
+      felem over = output[i] / 0x2000000l;
+      felem over2 = (over + ((over >> 63) * 2) + 1) / 2;
+      output[i+1] += over2;
+      output[i] -= over2 * 0x4000000l;
+
+      over = output[i+1] / 0x2000000;
+      output[i+2] += over;
+      output[i+1] -= over * 0x2000000;
+    }
+    output[0] += 19 * output[10];
+  } while (output[10]);
+}
+
+/* A helpful wrapper around fproduct: output = in * in2.
+ *
+ * output must be distinct to both inputs. The output is reduced degree and
+ * reduced coefficient.
+ */
+static void
+fmul(felem *output, felem *in, felem *in2) {
+  felem t[19];
+  fproduct(t, in, in2);
+  freduce_degree(t);
+  freduce_coefficients(t);
+  memcpy(output, t, sizeof(felem) * 10);
+}
+
+static void fsquare_inner(felem *output, felem *in) {
+  felem tmp;
+  output[0] =      in[0] * in[0];
+  output[1] =  2 * in[0] * in[1];
+  output[2] =  2 * (in[1] * in[1] +
+                    in[0] * in[2]);
+  output[3] =  2 * (in[1] * in[2] +
+                    in[0] * in[3]);
+  output[4] =      in[2] * in[2] +
+               4 * in[1] * in[3] +
+               2 * in[0] * in[4];
+  output[5] =  2 * (in[2] * in[3] +
+                    in[1] * in[4] +
+                    in[0] * in[5]);
+  output[6] =  2 * (in[3] * in[3] +
+                    in[2] * in[4] +
+                    in[0] * in[6] +
+                2 * in[1] * in[5]);
+  output[7] =  2 * (in[3] * in[4] +
+                    in[2] * in[5] +
+                    in[1] * in[6] +
+                    in[0] * in[7]);
+  tmp = in[1] * in[7] + in[3] * in[5];
+  output[8] =      in[4] * in[4] +
+               2 * (in[2] * in[6] +
+                    in[0] * in[8] +
+                        2 * tmp);
+  output[9] =  2 * (in[4] * in[5] +
+                    in[3] * in[6] +
+                    in[2] * in[7] +
+                    in[1] * in[8] +
+                    in[0] * in[9]);
+  tmp = in[3] * in[7] + in[1] * in[9];
+  output[10] = 2 * (in[5] * in[5] +
+                   in[4] * in[6] +
+                   in[2] * in[8] +
+                       2 * tmp);
+  output[11] = 2 * (in[5] * in[6] +
+                    in[4] * in[7] +
+                    in[3] * in[8] +
+                    in[2] * in[9]);
+  output[12] =     in[6] * in[6] +
+               2 * (in[4] * in[8] +
+                2 * (in[5] * in[7] +
+                     in[3] * in[9]));
+  output[13] = 2 * (in[6] * in[7] +
+                    in[5] * in[8] +
+                    in[4] * in[9]);
+  output[14] = 2 * (in[7] * in[7] +
+                    in[6] * in[8] +
+                2 * in[5] * in[9]);
+  output[15] = 2 * (in[7] * in[8] +
+                    in[6] * in[9]);
+  output[16] =     in[8] * in[8] +
+               4 * in[7] * in[9];
+  output[17] = 2 * in[8] * in[9];
+  output[18] = 2 * in[9] * in[9];
+}
+
+static void
+fsquare(felem *output, felem *in) {
+  felem t[19];
+  fsquare_inner(t, in);
+  freduce_degree(t);
+  freduce_coefficients(t);
+  memcpy(output, t, sizeof(felem) * 10);
+}
+
+/* Take a little-endian, 32-byte number and expand it into polynomial form */
+static void
+fexpand(felem *output, uchar *input) {
+#define F(n,start,shift,mask) \
+  output[n] = ((((felem) input[start + 0]) | \
+                ((felem) input[start + 1]) << 8 | \
+                ((felem) input[start + 2]) << 16 | \
+                ((felem) input[start + 3]) << 24) >> shift) & mask;
+  F(0, 0, 0, 0x3ffffff);
+  F(1, 3, 2, 0x1ffffff);
+  F(2, 6, 3, 0x3ffffff);
+  F(3, 9, 5, 0x1ffffff);
+  F(4, 12, 6, 0x3ffffff);
+  F(5, 16, 0, 0x1ffffff);
+  F(6, 19, 1, 0x3ffffff);
+  F(7, 22, 3, 0x1ffffff);
+  F(8, 25, 4, 0x3ffffff);
+  F(9, 28, 6, 0x1ffffff);
+#undef F
+}
+
+/* Take a fully reduced polynomial form number and contract it into a
+ * little-endian, 32-byte array
+ */
+static void
+fcontract(uchar *output, felem *input) {
+  int i;
+
+  do {
+    for (i = 0; i < 9; ++i) {
+      if ((i & 1) == 1) {
+        while (input[i] < 0) {
+          input[i] += 0x2000000;
+          input[i + 1]--;
+        }
+      } else {
+        while (input[i] < 0) {
+          input[i] += 0x4000000;
+          input[i + 1]--;
+        }
+      }
+    }
+    while (input[9] < 0) {
+      input[9] += 0x2000000;
+      input[0] -= 19;
+    }
+  } while (input[0] < 0);
+
+  input[1] <<= 2;
+  input[2] <<= 3;
+  input[3] <<= 5;
+  input[4] <<= 6;
+  input[6] <<= 1;
+  input[7] <<= 3;
+  input[8] <<= 4;
+  input[9] <<= 6;
+#define F(i, s) \
+  output[s+0] |=  input[i] & 0xff; \
+  output[s+1]  = (input[i] >> 8) & 0xff; \
+  output[s+2]  = (input[i] >> 16) & 0xff; \
+  output[s+3]  = (input[i] >> 24) & 0xff;
+  output[0] = 0;
+  output[16] = 0;
+  F(0,0);
+  F(1,3);
+  F(2,6);
+  F(3,9);
+  F(4,12);
+  F(5,16);
+  F(6,19);
+  F(7,22);
+  F(8,25);
+  F(9,28);
+#undef F
+}
+
+/* Input: Q, Q', Q-Q'
+ * Output: 2Q, Q+Q'
+ *
+ *   x2 z3: long form
+ *   x3 z3: long form
+ *   x z: short form, destroyed
+ *   xprime zprime: short form, destroyed
+ *   qmqp: short form, preserved
+ */
+static void fmonty(felem *x2, felem *z2,  /* output 2Q */
+                   felem *x3, felem *z3,  /* output Q + Q' */
+                   felem *x, felem *z,    /* input Q */
+                   felem *xprime, felem *zprime,  /* input Q' */
+                   felem *qmqp /* input Q - Q' */) {
+  felem origx[10], origxprime[10], zzz[19], xx[19], zz[19], xxprime[19],
+        zzprime[19], zzzprime[19], xxxprime[19];
+
+  memcpy(origx, x, 10 * sizeof(felem));
+  fsum(x, z);
+  fdifference(z, origx);  // does x - z
+
+  memcpy(origxprime, xprime, sizeof(felem) * 10);
+  fsum(xprime, zprime);
+  fdifference(zprime, origxprime);
+  fproduct(xxprime, xprime, z);
+  fproduct(zzprime, x, zprime);
+  freduce_degree(xxprime);
+  freduce_coefficients(xxprime);
+  freduce_degree(zzprime);
+  freduce_coefficients(zzprime);
+  memcpy(origxprime, xxprime, sizeof(felem) * 10);
+  fsum(xxprime, zzprime);
+  fdifference(zzprime, origxprime);
+  fsquare(xxxprime, xxprime);
+  fsquare(zzzprime, zzprime);
+  fproduct(zzprime, zzzprime, qmqp);
+  freduce_degree(zzprime);
+  freduce_coefficients(zzprime);
+  memcpy(x3, xxxprime, sizeof(felem) * 10);
+  memcpy(z3, zzprime, sizeof(felem) * 10);
+
+  fsquare(xx, x);
+  fsquare(zz, z);
+  fproduct(x2, xx, zz);
+  freduce_degree(x2);
+  freduce_coefficients(x2);
+  fdifference(zz, xx);  // does zz = xx - zz
+  memset(zzz + 10, 0, sizeof(felem) * 9);
+  fscalar_product(zzz, zz, 121665);
+  freduce_degree(zzz);
+  freduce_coefficients(zzz);
+  fsum(zzz, xx);
+  fproduct(z2, zz, zzz);
+  freduce_degree(z2);
+  freduce_coefficients(z2);
+}
+
+/* Calculates nQ where Q is the x-coordinate of a point on the curve
+ *
+ *   resultx/resultz: the x coordinate of the resulting curve point (short form)
+ *   n: a little endian, 32-byte number
+ *   q: a point of the curve (short form)
+ */
+static void
+cmult(felem *resultx, felem *resultz, uchar *n, felem *q) {
+  felem a[19] = {0}, b[19] = {1}, c[19] = {1}, d[19] = {0};
+  felem *nqpqx = a, *nqpqz = b, *nqx = c, *nqz = d, *t;
+  felem e[19] = {0}, f[19] = {1}, g[19] = {0}, h[19] = {1};
+  felem *nqpqx2 = e, *nqpqz2 = f, *nqx2 = g, *nqz2 = h;
+
+  unsigned i, j;
+
+  memcpy(nqpqx, q, sizeof(felem) * 10);
+
+  for (i = 0; i < 32; ++i) {
+    uchar byte = n[31 - i];
+    for (j = 0; j < 8; ++j) {
+      if (byte & 0x80) {
+        fmonty(nqpqx2, nqpqz2,
+               nqx2, nqz2,
+               nqpqx, nqpqz,
+               nqx, nqz,
+               q);
+      } else {
+        fmonty(nqx2, nqz2,
+               nqpqx2, nqpqz2,
+               nqx, nqz,
+               nqpqx, nqpqz,
+               q);
+      }
+
+      t = nqx;
+      nqx = nqx2;
+      nqx2 = t;
+      t = nqz;
+      nqz = nqz2;
+      nqz2 = t;
+      t = nqpqx;
+      nqpqx = nqpqx2;
+      nqpqx2 = t;
+      t = nqpqz;
+      nqpqz = nqpqz2;
+      nqpqz2 = t;
+
+      byte <<= 1;
+    }
+  }
+
+  memcpy(resultx, nqx, sizeof(felem) * 10);
+  memcpy(resultz, nqz, sizeof(felem) * 10);
+}
+
+// -----------------------------------------------------------------------------
+// Shamelessly copied from djb's code
+// -----------------------------------------------------------------------------
+static void
+crecip(felem *out, felem *z) {
+  felem z2[10];
+  felem z9[10];
+  felem z11[10];
+  felem z2_5_0[10];
+  felem z2_10_0[10];
+  felem z2_20_0[10];
+  felem z2_50_0[10];
+  felem z2_100_0[10];
+  felem t0[10];
+  felem t1[10];
+  int i;
+
+  /* 2 */ fsquare(z2,z);
+  /* 4 */ fsquare(t1,z2);
+  /* 8 */ fsquare(t0,t1);
+  /* 9 */ fmul(z9,t0,z);
+  /* 11 */ fmul(z11,z9,z2);
+  /* 22 */ fsquare(t0,z11);
+  /* 2^5 - 2^0 = 31 */ fmul(z2_5_0,t0,z9);
+
+  /* 2^6 - 2^1 */ fsquare(t0,z2_5_0);
+  /* 2^7 - 2^2 */ fsquare(t1,t0);
+  /* 2^8 - 2^3 */ fsquare(t0,t1);
+  /* 2^9 - 2^4 */ fsquare(t1,t0);
+  /* 2^10 - 2^5 */ fsquare(t0,t1);
+  /* 2^10 - 2^0 */ fmul(z2_10_0,t0,z2_5_0);
+
+  /* 2^11 - 2^1 */ fsquare(t0,z2_10_0);
+  /* 2^12 - 2^2 */ fsquare(t1,t0);
+  /* 2^20 - 2^10 */ for (i = 2;i < 10;i += 2) { fsquare(t0,t1); fsquare(t1,t0); }
+  /* 2^20 - 2^0 */ fmul(z2_20_0,t1,z2_10_0);
+
+  /* 2^21 - 2^1 */ fsquare(t0,z2_20_0);
+  /* 2^22 - 2^2 */ fsquare(t1,t0);
+  /* 2^40 - 2^20 */ for (i = 2;i < 20;i += 2) { fsquare(t0,t1); fsquare(t1,t0); }
+  /* 2^40 - 2^0 */ fmul(t0,t1,z2_20_0);
+
+  /* 2^41 - 2^1 */ fsquare(t1,t0);
+  /* 2^42 - 2^2 */ fsquare(t0,t1);
+  /* 2^50 - 2^10 */ for (i = 2;i < 10;i += 2) { fsquare(t1,t0); fsquare(t0,t1); }
+  /* 2^50 - 2^0 */ fmul(z2_50_0,t0,z2_10_0);
+
+  /* 2^51 - 2^1 */ fsquare(t0,z2_50_0);
+  /* 2^52 - 2^2 */ fsquare(t1,t0);
+  /* 2^100 - 2^50 */ for (i = 2;i < 50;i += 2) { fsquare(t0,t1); fsquare(t1,t0); }
+  /* 2^100 - 2^0 */ fmul(z2_100_0,t1,z2_50_0);
+
+  /* 2^101 - 2^1 */ fsquare(t1,z2_100_0);
+  /* 2^102 - 2^2 */ fsquare(t0,t1);
+  /* 2^200 - 2^100 */ for (i = 2;i < 100;i += 2) { fsquare(t1,t0); fsquare(t0,t1); }
+  /* 2^200 - 2^0 */ fmul(t1,t0,z2_100_0);
+
+  /* 2^201 - 2^1 */ fsquare(t0,t1);
+  /* 2^202 - 2^2 */ fsquare(t1,t0);
+  /* 2^250 - 2^50 */ for (i = 2;i < 50;i += 2) { fsquare(t0,t1); fsquare(t1,t0); }
+  /* 2^250 - 2^0 */ fmul(t0,t1,z2_50_0);
+
+  /* 2^251 - 2^1 */ fsquare(t1,t0);
+  /* 2^252 - 2^2 */ fsquare(t0,t1);
+  /* 2^253 - 2^3 */ fsquare(t1,t0);
+  /* 2^254 - 2^4 */ fsquare(t0,t1);
+  /* 2^255 - 2^5 */ fsquare(t1,t0);
+  /* 2^255 - 21 */ fmul(out,t1,z11);
+}
+
+void
+curve25519(uchar mypublic[32], uchar secret[32], uchar basepoint[32]) {
+  felem bp[10], x[10], z[10], zmone[10];
+  fexpand(bp, basepoint);
+  cmult(x, z, secret, bp);
+  crecip(zmone, z);
+  fmul(z, x, zmone);
+  fcontract(mypublic, z);
+}
--- /dev/null
+++ b/libsec/port/curve25519_dh.c
@@ -1,0 +1,37 @@
+#include "os.h"
+#include <mp.h>
+#include <libsec.h>
+
+static uchar nine[32] = {9};
+static uchar zero[32] = {0};
+
+void
+curve25519_dh_new(uchar x[32], uchar y[32])
+{
+	uchar b;
+
+	/* new public/private key pair */
+	genrandom(x, 32);
+	b = x[31];
+	x[0] &= ~7;			/* clear bit 0,1,2 */
+	x[31] = 0x40 | (b & 0x7f);	/* set bit 254, clear bit 255 */
+	curve25519(y, x, nine);
+
+	/* bit 255 is always 0, so make it random */
+	y[31] |= b & 0x80;
+}
+
+int
+curve25519_dh_finish(uchar x[32], uchar y[32], uchar z[32])
+{
+	/* remove the random bit */
+	y[31] &= 0x7f;
+
+	/* calculate dhx key */
+	curve25519(z, x, y);
+
+	memset(x, 0, 32);
+	memset(y, 0, 32);
+
+	return tsmemcmp(z, zero, 32) != 0;
+}
--- a/libsec/port/decodepem.c
+++ b/libsec/port/decodepem.c
@@ -1,6 +1,4 @@
-#include <u.h>
-#include <libc.h>
-#include <mp.h>
+#include "os.h"
 #include <libsec.h>
 
 #define STRLEN(s)	(sizeof(s)-1)
@@ -27,13 +25,15 @@
 			t++;
 		if(strncmp(tt, "-----BEGIN ", STRLEN("-----BEGIN ")) == 0
 		&& strncmp(&tt[STRLEN("-----BEGIN ")], type, n) == 0
-		&& strncmp(&tt[STRLEN("-----BEGIN ")+n], "-----\n", STRLEN("-----\n")) == 0)
+		&& strncmp(&tt[STRLEN("-----BEGIN ")+n], "-----", STRLEN("-----")) == 0
+		&& strchr("\r\n", tt[STRLEN("-----BEGIN ")+n+STRLEN("-----")]) != nil)
 			break;
 	}
 	for(tt = t; tt != nil && tt < e; tt++){
 		if(strncmp(tt, "-----END ", STRLEN("-----END ")) == 0
 		&& strncmp(&tt[STRLEN("-----END ")], type, n) == 0
-		&& strncmp(&tt[STRLEN("-----END ")+n], "-----\n", STRLEN("-----\n")) == 0)
+		&& strncmp(&tt[STRLEN("-----END ")+n], "-----", STRLEN("-----")) == 0
+		&& strchr("\r\n", tt[STRLEN("-----END ")+n+STRLEN("-----")]) != nil)
 			break;
 		tt = strchr(tt, '\n');
 		if(tt == nil)
--- a/libsec/port/des.c
+++ b/libsec/port/des.c
@@ -4,7 +4,7 @@
 /*
  * integrated sbox & p perm
  */
-static u32 spbox[] = {
+static u32int spbox[] = {
 
 0x00808200,0x00000000,0x00008000,0x00808202,0x00808002,0x00008202,0x00000002,0x00008000,
 0x00000200,0x00808200,0x00808202,0x00000200,0x00800202,0x00808002,0x00800000,0x00000002,
@@ -81,7 +81,7 @@
 
 /*
  * for manual index calculation
- * #define fetch(box, i, sh) (*((u32*)((uchar*)spbox + (box << 8) + ((i >> (sh)) & 0xfc))))
+ * #define fetch(box, i, sh) (*((u32int*)((uchar*)spbox + (box << 8) + ((i >> (sh)) & 0xfc))))
  */
 #define fetch(box, i, sh) ((spbox+(box << 6))[((i >> (sh + 2)) & 0x3f)])
 
@@ -91,14 +91,14 @@
 void
 block_cipher(u32 key[32], uchar text[8], int decrypting)
 {
-	u32 right, left, v0, v1;
+	u32int right, left, v0, v1;
 	int i, keystep;
 
 	/*
 	 * initial permutation
 	 */
-	v0 = text[0] | ((u32)text[2]<<8) | ((u32)text[4]<<16) | ((u32)text[6]<<24);
-	left = text[1] | ((u32)text[3]<<8) | ((u32)text[5]<<16) | ((u32)text[7]<<24);
+	v0 = text[0] | ((u32int)text[2]<<8) | ((u32int)text[4]<<16) | ((u32int)text[6]<<24);
+	left = text[1] | ((u32int)text[3]<<8) | ((u32int)text[5]<<16) | ((u32int)text[7]<<24);
 	right = (left & 0xaaaaaaaa) | ((v0 >> 1) & 0x55555555);
 	left = ((left << 1) & 0xaaaaaaaa) | (v0 & 0x55555555);
 	left = ((left << 6) & 0x33003300)
@@ -183,14 +183,14 @@
 triple_block_cipher(u32 expanded_key[3][32], uchar text[8], int ende)
 {
 	u32 *key;
-	u32 right, left, v0, v1;
+	u32int right, left, v0, v1;
 	int i, j, keystep;
 
 	/*
 	 * initial permutation
 	 */
-	v0 = text[0] | ((u32)text[2]<<8) | ((u32)text[4]<<16) | ((u32)text[6]<<24);
-	left = text[1] | ((u32)text[3]<<8) | ((u32)text[5]<<16) | ((u32)text[7]<<24);
+	v0 = text[0] | ((u32int)text[2]<<8) | ((u32int)text[4]<<16) | ((u32int)text[6]<<24);
+	left = text[1] | ((u32int)text[3]<<8) | ((u32int)text[5]<<16) | ((u32int)text[7]<<24);
 	right = (left & 0xaaaaaaaa) | ((v0 >> 1) & 0x55555555);
 	left = ((left << 1) & 0xaaaaaaaa) | (v0 & 0x55555555);
 	left = ((left << 6) & 0x33003300)
@@ -281,7 +281,7 @@
 /*
  * key compression permutation, 4 bits at a time
  */
-static u32 comptab[] = {
+static u32int comptab[] = {
 
 0x000000,0x010000,0x000008,0x010008,0x000080,0x010080,0x000088,0x010088,
 0x000000,0x010000,0x000008,0x010008,0x000080,0x010080,0x000088,0x010088,
@@ -332,9 +332,9 @@
 };
 
 static void
-keycompperm(u32 left, u32 right, u32 *ek)
+keycompperm(u32int left, u32int right, u32 *ek)
 {
-	u32 v0, v1;
+	u32int v0, v1;
 	int i;
 
 	for(i = 0; i < 16; i++){
@@ -371,10 +371,10 @@
 void
 des_key_setup(uchar key[8], u32 *ek)
 {
-	u32 left, right, v0, v1;
+	u32int left, right, v0, v1;
 
-	v0 = key[0] | ((u32)key[2] << 8) | ((u32)key[4] << 16) | ((u32)key[6] << 24);
-	v1 = key[1] | ((u32)key[3] << 8) | ((u32)key[5] << 16) | ((u32)key[7] << 24);
+	v0 = key[0] | ((u32int)key[2] << 8) | ((u32int)key[4] << 16) | ((u32int)key[6] << 24);
+	v1 = key[1] | ((u32int)key[3] << 8) | ((u32int)key[5] << 16) | ((u32int)key[7] << 24);
 	left = ((v0 >> 1) & 0x40404040)
 		| ((v0 >> 2) & 0x10101010)
 		| ((v0 >> 3) & 0x04040404)
@@ -433,10 +433,10 @@
 void
 des56to64(uchar *k56, uchar *k64)
 {
-	u32 hi, lo;
+	u32int hi, lo;
 
-	hi = ((u32)k56[0]<<24)|((u32)k56[1]<<16)|((u32)k56[2]<<8)|k56[3];
-	lo = ((u32)k56[4]<<24)|((u32)k56[5]<<16)|((u32)k56[6]<<8);
+	hi = ((u32int)k56[0]<<24)|((u32int)k56[1]<<16)|((u32int)k56[2]<<8)|k56[3];
+	lo = ((u32int)k56[4]<<24)|((u32int)k56[5]<<16)|((u32int)k56[6]<<8);
 
 	k64[0] = parity[(hi>>25)&0x7f];
 	k64[1] = parity[(hi>>18)&0x7f];
@@ -454,12 +454,12 @@
 void
 des64to56(uchar *k64, uchar *k56)
 {
-	u32 hi, lo;
+	u32int hi, lo;
 
-	hi = (((u32)k64[0]&0xfe)<<24)|(((u32)k64[1]&0xfe)<<17)|(((u32)k64[2]&0xfe)<<10)
+	hi = (((u32int)k64[0]&0xfe)<<24)|(((u32int)k64[1]&0xfe)<<17)|(((u32int)k64[2]&0xfe)<<10)
 		|((k64[3]&0xfe)<<3)|(k64[4]>>4);
-	lo = (((u32)k64[4]&0xfe)<<28)|(((u32)k64[5]&0xfe)<<21)|(((u32)k64[6]&0xfe)<<14)
-		|(((u32)k64[7]&0xfe)<<7);
+	lo = (((u32int)k64[4]&0xfe)<<28)|(((u32int)k64[5]&0xfe)<<21)|(((u32int)k64[6]&0xfe)<<14)
+		|(((u32int)k64[7]&0xfe)<<7);
 
 	k56[0] = hi>>24;
 	k56[1] = hi>>16;
@@ -471,7 +471,7 @@
 }
 
 void
-key_setup(uchar key[7], u32 *ek)
+key_setup(uchar key[7], uint *ek)
 {
 	uchar k64[8];
 
--- a/libsec/port/des3CBC.c
+++ b/libsec/port/des3CBC.c
@@ -1,5 +1,4 @@
 #include "os.h"
-#include <mp.h>
 #include <libsec.h>
 
 // Because of the way that non multiple of 8
--- a/libsec/port/des3ECB.c
+++ b/libsec/port/des3ECB.c
@@ -1,5 +1,4 @@
 #include "os.h"
-#include <mp.h>
 #include <libsec.h>
 
 // I wasn't sure what to do when the buffer was not
--- a/libsec/port/desCBC.c
+++ b/libsec/port/desCBC.c
@@ -1,5 +1,4 @@
 #include "os.h"
-#include <mp.h>
 #include <libsec.h>
 
 // Because of the way that non multiple of 8
--- a/libsec/port/desECB.c
+++ b/libsec/port/desECB.c
@@ -1,5 +1,4 @@
 #include "os.h"
-#include <mp.h>
 #include <libsec.h>
 
 // I wasn't sure what to do when the buffer was not
--- /dev/null
+++ b/libsec/port/dh.c
@@ -1,0 +1,74 @@
+#include "os.h"
+#include <mp.h>
+#include <libsec.h>
+
+mpint*
+dh_new(DHstate *dh, mpint *p, mpint *q, mpint *g)
+{
+	mpint *pm1;
+	int n;
+
+	memset(dh, 0, sizeof(*dh));
+	if(mpcmp(g, mpone) <= 0)
+		return nil;
+
+	n = mpsignif(p);
+	pm1 = mpnew(n);
+	mpsub(p, mpone, pm1);
+	dh->p = mpcopy(p);
+	dh->g = mpcopy(g);
+	dh->q = mpcopy(q != nil ? q : pm1);
+	dh->x = mpnew(mpsignif(dh->q));
+	dh->y = mpnew(n);
+	for(;;){
+		mpnrand(dh->q, genrandom, dh->x);
+		mpexp(dh->g, dh->x, dh->p, dh->y);
+		if(mpcmp(dh->y, mpone) > 0 && mpcmp(dh->y, pm1) < 0)
+			break;
+	}
+	mpfree(pm1);
+
+	return dh->y;
+}
+
+mpint*
+dh_finish(DHstate *dh, mpint *y)
+{
+	mpint *k = nil;
+
+	if(y == nil || dh->x == nil || dh->p == nil || dh->q == nil)
+		goto Out;
+
+	/* y > 1 */
+	if(mpcmp(y, mpone) <= 0)
+		goto Out;
+
+	k = mpnew(mpsignif(dh->p));
+
+	/* y < p-1 */
+	mpsub(dh->p, mpone, k);
+	if(mpcmp(y, k) >= 0){
+Bad:
+		mpfree(k);
+		k = nil;
+		goto Out;
+	}
+
+	/* y**q % p == 1 if q < p-1 */
+	if(mpcmp(dh->q, k) < 0){
+		mpexp(y, dh->q, dh->p, k);
+		if(mpcmp(k, mpone) != 0)
+			goto Bad;
+	}
+
+	mpexp(y, dh->x, dh->p, k);
+
+Out:
+	mpfree(dh->p);
+	mpfree(dh->q);
+	mpfree(dh->g);
+	mpfree(dh->x);
+	mpfree(dh->y);
+	memset(dh, 0, sizeof(*dh));
+	return k;
+}
--- a/libsec/port/dsaprimes.c
+++ b/libsec/port/dsaprimes.c
@@ -12,7 +12,7 @@
 static void
 Hrand(uchar *s)
 {
-	u32 *u = (u32*)s;
+	u32int *u = (u32int*)s;
 	*u++ = fastrand();
 	*u++ = fastrand();
 	*u++ = fastrand();
@@ -48,7 +48,7 @@
 forever:
 	do{
 		Hrand(s);
-		memmove(sj, s, 20);
+		memcpy(sj, s, 20);
 		sha1(s, 20, Hs, 0);
 		Hincr(sj);
 		sha1(sj, 20, Hs1, 0);
@@ -65,7 +65,7 @@
 	Hincr(sj);
 	mpleft(q, 1, q2);
 	while(i<4096){
-		memmove(sjk, sj, 20);
+		memcpy(sjk, sj, 20);
 		for(k=0; k <= n; k++){
 			sha1(sjk, 20, Hs, 0);
 			letomp(Hs, 20, Vk);
--- /dev/null
+++ b/libsec/port/ecc.c
@@ -1,0 +1,612 @@
+#include "os.h"
+#include <mp.h>
+#include <libsec.h>
+#include <ctype.h>
+
+extern void jacobian_affine(mpint *p,
+	mpint *X, mpint *Y, mpint *Z);
+extern void jacobian_dbl(mpint *p, mpint *a,
+	mpint *X1, mpint *Y1, mpint *Z1,
+	mpint *X3, mpint *Y3, mpint *Z3);
+extern void jacobian_add(mpint *p, mpint *a,
+	mpint *X1, mpint *Y1, mpint *Z1,
+	mpint *X2, mpint *Y2, mpint *Z2,
+	mpint *X3, mpint *Y3, mpint *Z3);
+
+void
+ecassign(ECdomain *dom, ECpoint *a, ECpoint *b)
+{
+	if((b->inf = a->inf) != 0)
+		return;
+	mpassign(a->x, b->x);
+	mpassign(a->y, b->y);
+	if(b->z != nil){
+		mpassign(a->z != nil ? a->z : mpone, b->z);
+		return;
+	}
+	if(a->z != nil){
+		b->z = mpcopy(a->z);
+		jacobian_affine(dom->p, b->x, b->y, b->z);
+		mpfree(b->z);
+		b->z = nil;
+	}
+}
+
+void
+ecadd(ECdomain *dom, ECpoint *a, ECpoint *b, ECpoint *s)
+{
+	if(a->inf && b->inf){
+		s->inf = 1;
+		return;
+	}
+	if(a->inf){
+		ecassign(dom, b, s);
+		return;
+	}
+	if(b->inf){
+		ecassign(dom, a, s);
+		return;
+	}
+
+	if(s->z == nil){
+		s->z = mpcopy(mpone);
+		ecadd(dom, a, b, s);
+		if(!s->inf)
+			jacobian_affine(dom->p, s->x, s->y, s->z);
+		mpfree(s->z);
+		s->z = nil;
+		return;
+	}
+
+	if(a == b)
+		jacobian_dbl(dom->p, dom->a,
+			a->x, a->y, a->z != nil ? a->z : mpone,
+			s->x, s->y, s->z);
+	else
+		jacobian_add(dom->p, dom->a,
+			a->x, a->y, a->z != nil ? a->z : mpone,
+			b->x, b->y, b->z != nil ? b->z : mpone,
+			s->x, s->y, s->z);
+	s->inf = mpcmp(s->z, mpzero) == 0;
+}
+
+void
+ecmul(ECdomain *dom, ECpoint *a, mpint *k, ECpoint *s)
+{
+	ECpoint ns, na;
+	mpint *l;
+
+	if(a->inf || mpcmp(k, mpzero) == 0){
+		s->inf = 1;
+		return;
+	}
+	ns.inf = 1;
+	ns.x = mpnew(0);
+	ns.y = mpnew(0);
+	ns.z = mpnew(0);
+	na.x = mpnew(0);
+	na.y = mpnew(0);
+	na.z = mpnew(0);
+	ecassign(dom, a, &na);
+	l = mpcopy(k);
+	l->sign = 1;
+	while(mpcmp(l, mpzero) != 0){
+		if(l->p[0] & 1)
+			ecadd(dom, &na, &ns, &ns);
+		ecadd(dom, &na, &na, &na);
+		mpright(l, 1, l);
+	}
+	if(k->sign < 0 && !ns.inf){
+		ns.y->sign = -1;
+		mpmod(ns.y, dom->p, ns.y);
+	}
+	ecassign(dom, &ns, s);
+	mpfree(ns.x);
+	mpfree(ns.y);
+	mpfree(ns.z);
+	mpfree(na.x);
+	mpfree(na.y);
+	mpfree(na.z);
+	mpfree(l);
+}
+
+int
+ecverify(ECdomain *dom, ECpoint *a)
+{
+	mpint *p, *q;
+	int r;
+
+	if(a->inf)
+		return 1;
+
+	assert(a->z == nil);	/* need affine coordinates */
+	p = mpnew(0);
+	q = mpnew(0);
+	mpmodmul(a->y, a->y, dom->p, p);
+	mpmodmul(a->x, a->x, dom->p, q);
+	mpmodadd(q, dom->a, dom->p, q);
+	mpmodmul(q, a->x, dom->p, q);
+	mpmodadd(q, dom->b, dom->p, q);
+	r = mpcmp(p, q);
+	mpfree(p);
+	mpfree(q);
+	return r == 0;
+}
+
+int
+ecpubverify(ECdomain *dom, ECpub *a)
+{
+	ECpoint p;
+	int r;
+
+	if(a->inf)
+		return 0;
+	if(!ecverify(dom, a))
+		return 0;
+	p.x = mpnew(0);
+	p.y = mpnew(0);
+	p.z = mpnew(0);
+	ecmul(dom, a, dom->n, &p);
+	r = p.inf;
+	mpfree(p.x);
+	mpfree(p.y);
+	mpfree(p.z);
+	return r;
+}
+
+static void
+fixnibble(uchar *a)
+{
+	if(*a >= 'a')
+		*a -= 'a'-10;
+	else if(*a >= 'A')
+		*a -= 'A'-10;
+	else
+		*a -= '0';
+}
+
+static int
+octet(char **s)
+{
+	uchar c, d;
+	
+	c = *(*s)++;
+	if(!isxdigit(c))
+		return -1;
+	d = *(*s)++;
+	if(!isxdigit(d))
+		return -1;
+	fixnibble(&c);
+	fixnibble(&d);
+	return (c << 4) | d;
+}
+
+static mpint*
+halfpt(ECdomain *dom, char *s, char **rptr, mpint *out)
+{
+	char *buf, *r;
+	int n;
+	mpint *ret;
+	
+	n = ((mpsignif(dom->p)+7)/8)*2;
+	if(strlen(s) < n)
+		return 0;
+	buf = malloc(n+1);
+	buf[n] = 0;
+	memcpy(buf, s, n);
+	ret = strtomp(buf, &r, 16, out);
+	*rptr = s + (r - buf);
+	free(buf);
+	return ret;
+}
+
+static int
+mpleg(mpint *a, mpint *b)
+{
+	int r, k;
+	mpint *m, *n, *t;
+	
+	r = 1;
+	m = mpcopy(a);
+	n = mpcopy(b);
+	for(;;){
+		if(mpcmp(m, n) > 0)
+			mpmod(m, n, m);
+		if(mpcmp(m, mpzero) == 0){
+			r = 0;
+			break;
+		}
+		if(mpcmp(m, mpone) == 0)
+			break;
+		k = mplowbits0(m);
+		if(k > 0){
+			if(k & 1)
+				switch(n->p[0] & 15){
+				case 3: case 5: case 11: case 13:
+					r = -r;
+				}
+			mpright(m, k, m);
+		}
+		if((n->p[0] & 3) == 3 && (m->p[0] & 3) == 3)
+			r = -r;
+		t = m;
+		m = n;
+		n = t;
+	}
+	mpfree(m);
+	mpfree(n);
+	return r;
+}
+
+static int
+mpsqrt(mpint *n, mpint *p, mpint *r)
+{
+	mpint *a, *t, *s, *xp, *xq, *yp, *yq, *zp, *zq, *N;
+
+	if(mpleg(n, p) == -1)
+		return 0;
+	a = mpnew(0);
+	t = mpnew(0);
+	s = mpnew(0);
+	N = mpnew(0);
+	xp = mpnew(0);
+	xq = mpnew(0);
+	yp = mpnew(0);
+	yq = mpnew(0);
+	zp = mpnew(0);
+	zq = mpnew(0);
+	for(;;){
+		for(;;){
+			mpnrand(p, genrandom, a);
+			if(mpcmp(a, mpzero) > 0)
+				break;
+		}
+		mpmul(a, a, t);
+		mpsub(t, n, t);
+		mpmod(t, p, t);
+		if(mpleg(t, p) == -1)
+			break;
+	}
+	mpadd(p, mpone, N);
+	mpright(N, 1, N);
+	mpmul(a, a, t);
+	mpsub(t, n, t);
+	mpassign(a, xp);
+	uitomp(1, xq);
+	uitomp(1, yp);
+	uitomp(0, yq);
+	while(mpcmp(N, mpzero) != 0){
+		if(N->p[0] & 1){
+			mpmul(xp, yp, zp);
+			mpmul(xq, yq, zq);
+			mpmul(zq, t, zq);
+			mpadd(zp, zq, zp);
+			mpmod(zp, p, zp);
+			mpmul(xp, yq, zq);
+			mpmul(xq, yp, s);
+			mpadd(zq, s, zq);
+			mpmod(zq, p, yq);
+			mpassign(zp, yp);
+		}
+		mpmul(xp, xp, zp);
+		mpmul(xq, xq, zq);
+		mpmul(zq, t, zq);
+		mpadd(zp, zq, zp);
+		mpmod(zp, p, zp);
+		mpmul(xp, xq, zq);
+		mpadd(zq, zq, zq);
+		mpmod(zq, p, xq);
+		mpassign(zp, xp);
+		mpright(N, 1, N);
+	}
+	if(mpcmp(yq, mpzero) != 0)
+		abort();
+	mpassign(yp, r);
+	mpfree(a);
+	mpfree(t);
+	mpfree(s);
+	mpfree(N);
+	mpfree(xp);
+	mpfree(xq);
+	mpfree(yp);
+	mpfree(yq);
+	mpfree(zp);
+	mpfree(zq);
+	return 1;
+}
+
+ECpoint*
+strtoec(ECdomain *dom, char *s, char **rptr, ECpoint *ret)
+{
+	int allocd, o;
+	mpint *r;
+
+	allocd = 0;
+	if(ret == nil){
+		allocd = 1;
+		ret = mallocz(sizeof(*ret), 1);
+		if(ret == nil)
+			return nil;
+		ret->x = mpnew(0);
+		ret->y = mpnew(0);
+	}
+	ret->inf = 0;
+	o = 0;
+	switch(octet(&s)){
+	case 0:
+		ret->inf = 1;
+		break;
+	case 3:
+		o = 1;
+	case 2:
+		if(halfpt(dom, s, &s, ret->x) == nil)
+			goto err;
+		r = mpnew(0);
+		mpmul(ret->x, ret->x, r);
+		mpadd(r, dom->a, r);
+		mpmul(r, ret->x, r);
+		mpadd(r, dom->b, r);
+		if(!mpsqrt(r, dom->p, r)){
+			mpfree(r);
+			goto err;
+		}
+		if((r->p[0] & 1) != o)
+			mpsub(dom->p, r, r);
+		mpassign(r, ret->y);
+		mpfree(r);
+		if(!ecverify(dom, ret))
+			goto err;
+		break;
+	case 4:
+		if(halfpt(dom, s, &s, ret->x) == nil)
+			goto err;
+		if(halfpt(dom, s, &s, ret->y) == nil)
+			goto err;
+		if(!ecverify(dom, ret))
+			goto err;
+		break;
+	}
+	if(ret->z != nil && !ret->inf)
+		mpassign(mpone, ret->z);
+	return ret;
+
+err:
+	if(rptr)
+		*rptr = s;
+	if(allocd){
+		mpfree(ret->x);
+		mpfree(ret->y);
+		free(ret);
+	}
+	return nil;
+}
+
+ECpriv*
+ecgen(ECdomain *dom, ECpriv *p)
+{
+	if(p == nil){
+		p = mallocz(sizeof(*p), 1);
+		if(p == nil)
+			return nil;
+		p->x = mpnew(0);
+		p->y = mpnew(0);
+		p->d = mpnew(0);
+	}
+	for(;;){
+		mpnrand(dom->n, genrandom, p->d);
+		if(mpcmp(p->d, mpzero) > 0)
+			break;
+	}
+	ecmul(dom, &dom->G, p->d, p);
+	return p;
+}
+
+void
+ecdsasign(ECdomain *dom, ECpriv *priv, uchar *dig, int len, mpint *r, mpint *s)
+{
+	ECpriv tmp;
+	mpint *E, *t;
+
+	tmp.x = mpnew(0);
+	tmp.y = mpnew(0);
+	tmp.z = nil;
+	tmp.d = mpnew(0);
+	E = betomp(dig, len, nil);
+	t = mpnew(0);
+	if(mpsignif(dom->n) < 8*len)
+		mpright(E, 8*len - mpsignif(dom->n), E);
+	for(;;){
+		ecgen(dom, &tmp);
+		mpmod(tmp.x, dom->n, r);
+		if(mpcmp(r, mpzero) == 0)
+			continue;
+		mpmul(r, priv->d, s);
+		mpadd(E, s, s);
+		mpinvert(tmp.d, dom->n, t);
+		mpmodmul(s, t, dom->n, s);
+		if(mpcmp(s, mpzero) != 0)
+			break;
+	}
+	mpfree(t);
+	mpfree(E);
+	mpfree(tmp.x);
+	mpfree(tmp.y);
+	mpfree(tmp.d);
+}
+
+int
+ecdsaverify(ECdomain *dom, ECpub *pub, uchar *dig, int len, mpint *r, mpint *s)
+{
+	mpint *E, *t, *u1, *u2;
+	ECpoint R, S;
+	int ret;
+
+	if(mpcmp(r, mpone) < 0 || mpcmp(s, mpone) < 0 || mpcmp(r, dom->n) >= 0 || mpcmp(r, dom->n) >= 0)
+		return 0;
+	E = betomp(dig, len, nil);
+	if(mpsignif(dom->n) < 8*len)
+		mpright(E, 8*len - mpsignif(dom->n), E);
+	t = mpnew(0);
+	u1 = mpnew(0);
+	u2 = mpnew(0);
+	R.x = mpnew(0);
+	R.y = mpnew(0);
+	R.z = mpnew(0);
+	S.x = mpnew(0);
+	S.y = mpnew(0);
+	S.z = mpnew(0);
+	mpinvert(s, dom->n, t);
+	mpmodmul(E, t, dom->n, u1);
+	mpmodmul(r, t, dom->n, u2);
+	ecmul(dom, &dom->G, u1, &R);
+	ecmul(dom, pub, u2, &S);
+	ecadd(dom, &R, &S, &R);
+	ret = 0;
+	if(!R.inf){
+		jacobian_affine(dom->p, R.x, R.y, R.z);
+		mpmod(R.x, dom->n, t);
+		ret = mpcmp(r, t) == 0;
+	}
+	mpfree(E);
+	mpfree(t);
+	mpfree(u1);
+	mpfree(u2);
+	mpfree(R.x);
+	mpfree(R.y);
+	mpfree(R.z);
+	mpfree(S.x);
+	mpfree(S.y);
+	mpfree(S.z);
+	return ret;
+}
+
+static char *code = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz";
+
+void
+base58enc(uchar *src, char *dst, int len)
+{
+	mpint *n, *r, *b;
+	char *sdst, t;
+	
+	sdst = dst;
+	n = betomp(src, len, nil);
+	b = uitomp(58, nil);
+	r = mpnew(0);
+	while(mpcmp(n, mpzero) != 0){
+		mpdiv(n, b, n, r);
+		*dst++ = code[mptoui(r)];
+	}
+	for(; *src == 0; src++)
+		*dst++ = code[0];
+	*dst-- = 0;
+	while(dst > sdst){
+		t = *sdst;
+		*sdst++ = *dst;
+		*dst-- = t;
+	}
+}
+
+int
+base58dec(char *src, uchar *dst, int len)
+{
+	mpint *n, *b, *r;
+	char *t;
+	
+	n = mpnew(0);
+	r = mpnew(0);
+	b = uitomp(58, nil);
+	for(; *src; src++){
+		t = strchr(code, *src);
+		if(t == nil){
+			mpfree(n);
+			mpfree(r);
+			mpfree(b);
+			werrstr("invalid base58 char");
+			return -1;
+		}
+		uitomp(t - code, r);
+		mpmul(n, b, n);
+		mpadd(n, r, n);
+	}
+	mptober(n, dst, len);
+	mpfree(n);
+	mpfree(r);
+	mpfree(b);
+	return 0;
+}
+
+void
+ecdominit(ECdomain *dom, void (*init)(mpint *p, mpint *a, mpint *b, mpint *x, mpint *y, mpint *n, mpint *h))
+{
+	memset(dom, 0, sizeof(*dom));
+	dom->p = mpnew(0);
+	dom->a = mpnew(0);
+	dom->b = mpnew(0);
+	dom->G.x = mpnew(0);
+	dom->G.y = mpnew(0);
+	dom->n = mpnew(0);
+	dom->h = mpnew(0);
+	if(init){
+		(*init)(dom->p, dom->a, dom->b, dom->G.x, dom->G.y, dom->n, dom->h);
+		dom->p = mpfield(dom->p);
+	}
+}
+
+void
+ecdomfree(ECdomain *dom)
+{
+	mpfree(dom->p);
+	mpfree(dom->a);
+	mpfree(dom->b);
+	mpfree(dom->G.x);
+	mpfree(dom->G.y);
+	mpfree(dom->n);
+	mpfree(dom->h);
+	memset(dom, 0, sizeof(*dom));
+}
+
+int
+ecencodepub(ECdomain *dom, ECpub *pub, uchar *data, int len)
+{
+	int n;
+
+	n = (mpsignif(dom->p)+7)/8;
+	if(len < 1 + 2*n)
+		return 0;
+	len = 1 + 2*n;
+	data[0] = 0x04;
+	mptober(pub->x, data+1, n);
+	mptober(pub->y, data+1+n, n);
+	return len;
+}
+
+ECpub*
+ecdecodepub(ECdomain *dom, uchar *data, int len)
+{
+	ECpub *pub;
+	int n;
+
+	n = (mpsignif(dom->p)+7)/8;
+	if(len != 1 + 2*n || data[0] != 0x04)
+		return nil;
+	pub = mallocz(sizeof(*pub), 1);
+	if(pub == nil)
+		return nil;
+	pub->x = betomp(data+1, n, nil);
+	pub->y = betomp(data+1+n, n, nil);
+	if(!ecpubverify(dom, pub)){
+		ecpubfree(pub);
+		pub = nil;
+	}
+	return pub;
+}
+
+void
+ecpubfree(ECpub *p)
+{
+	if(p == nil)
+		return;
+	mpfree(p->x);
+	mpfree(p->y);
+	free(p);
+}
--- a/libsec/port/egtest.c
+++ b/libsec/port/egtest.c
@@ -9,7 +9,7 @@
 	mpint *m, *gamma, *delta, *in, *out;
 	int plen, shift;
 
-	fmtinstall('B', mpconv);
+	fmtinstall('B', mpfmt);
 
 	sk = egprivalloc();
 	sk->pub.p = uitomp(2357, nil);
--- a/libsec/port/fastrand.c
+++ b/libsec/port/fastrand.c
@@ -1,5 +1,5 @@
-#include	"os.h"
-#include	<libsec.h>
+#include "os.h"
+#include <libsec.h>
 
 /* 
  *  use the X917 random number generator to create random
--- a/libsec/port/genprime.c
+++ b/libsec/port/genprime.c
@@ -17,6 +17,7 @@
 	p->p[p->top-1] &= (x-1);
 	p->p[p->top-1] |= x;
 	p->p[0] |= 1;
+	mpnorm(p);
 
 	// keep icrementing till it looks prime
 	for(;;){
--- a/libsec/port/genrandom.c
+++ b/libsec/port/genrandom.c
@@ -1,77 +1,44 @@
 #include "os.h"
-#include "kernel.h"
-#include <mp.h>
 #include <libsec.h>
 
-typedef struct State{
-	int		seeded;
-	u64		seed;
-	DES3state	des3;
-} State;
-static State x917state;
-
 static void
-X917(uchar *rand, int nrand)
+init(Chachastate *cs)
 {
-	int i, m, n8;
-	u64 I, x;
+	ulong seed[11];
+	int i;
 
-	/* 1. Compute intermediate value I = Ek(time). */
-	I = nsec();
-	triple_block_cipher(x917state.des3.expanded, (uchar*)&I, 0); /* two-key EDE */
+	for(i=0; i<nelem(seed); i++)
+		seed[i] = truerand();
 
-	/* 2. x[i] = Ek(I^seed);  seed = Ek(x[i]^I); */
-	m = (nrand+7)/8;
-	for(i=0; i<m; i++){
-		x = I ^ x917state.seed;
-		triple_block_cipher(x917state.des3.expanded, (uchar*)&x, 0);
-		n8 = (nrand>8) ? 8 : nrand;
-		memmove(rand, (uchar*)&x, n8);
-		rand += 8;
-		nrand -= 8;
-		x ^= I;
-		triple_block_cipher(x917state.des3.expanded, (uchar*)&x, 0);
-		x917state.seed = x;
-	}
+	setupChachastate(cs, (uchar*)&seed[0], 32, (uchar*)&seed[8], 12, 20);
+	memset(seed, 0, sizeof(seed));
 }
 
 static void
-X917init(void)
+fill(Chachastate *cs, uchar *p, int n)
 {
-	int n;
-	uchar mix[128];
-	uchar key3[3][8];
-	u32 *ulp;
+	Chachastate c;
 
-	ulp = (u32*)key3;
-	for(n = 0; n < sizeof(key3)/sizeof(u32); n++)
-		ulp[n] = truerand();
-	setupDES3state(&x917state.des3, key3, nil);
-	X917(mix, sizeof mix);
-	x917state.seeded = 1;
+	c = *cs;
+	chacha_encrypt((uchar*)&cs->input[4], 32, &c);
+	if(++cs->input[13] == 0)
+		if(++cs->input[14] == 0)
+			++cs->input[15];
+
+	chacha_encrypt(p, n, &c);
+	memset(&c, 0, sizeof(c));
 }
 
 void
 genrandom(uchar *p, int n)
 {
-	_genrandomqlock();
-	if(x917state.seeded == 0)
-		X917init();
-	X917(p, n);
-	_genrandomqunlock();
-}
+	static QLock lk;
+	static Chachastate cs;
 
-QLock grandomlk;
-
-void
-_genrandomqlock(void)
-{
-	qlock(&grandomlk);
-}
-
-
-void
-_genrandomqunlock(void)
-{
-	qunlock(&grandomlk);
+	qlock(&lk);
+	if(cs.rounds == 0)
+		init(&cs);
+	cs.input[4] ^= getpid();	/* fork protection */
+	fill(&cs, p, n);
+	qunlock(&lk);
 }
--- /dev/null
+++ b/libsec/port/hkdf.c
@@ -1,0 +1,38 @@
+#include "os.h"
+#include <libsec.h>
+
+/* rfc5869 */
+void
+hkdf_x(salt, nsalt, info, ninfo, key, nkey, d, dlen, x, xlen)
+	uchar *salt, *info, *key, *d;
+	u32 nsalt, ninfo, nkey, dlen;
+	DigestState* (*x)(uchar*, u32, uchar*, u32, uchar*, DigestState*);
+	int xlen;
+{
+	uchar prk[256], tmp[256], cnt;
+	DigestState *ds;
+
+	assert(xlen <= sizeof(tmp));
+
+	memset(tmp, 0, xlen);
+	if(nsalt == 0){
+		salt = tmp;
+		nsalt = xlen;
+	}
+	/* note that salt and key are swapped in this case */
+	(*x)(key, nkey, salt, nsalt, prk, nil);
+	ds = nil;
+	for(cnt=1;; cnt++) {
+		if(ninfo > 0)
+			ds = (*x)(info, ninfo, prk, xlen, nil, ds);
+		(*x)(&cnt, 1, prk, xlen, tmp, ds);
+		if(dlen <= xlen){
+			memmove(d, tmp, dlen);
+			break;
+		}
+		memmove(d, tmp, xlen);
+		dlen -= xlen;
+		d += xlen;
+		ds = (*x)(tmp, xlen, prk, xlen, nil, nil);
+	}
+}
--- a/libsec/port/hmac.c
+++ b/libsec/port/hmac.c
@@ -2,27 +2,30 @@
 #include <libsec.h>
 
 /* rfc2104 */
-static DigestState*
+DigestState*
 hmac_x(uchar *p, u32 len, uchar *key, u32 klen, uchar *digest, DigestState *s,
 	DigestState*(*x)(uchar*, u32, uchar*, DigestState*), int xlen)
 {
 	int i;
-	uchar pad[65], innerdigest[256];
+	uchar pad[Hmacblksz+1], innerdigest[256];
 
 	if(xlen > sizeof(innerdigest))
 		return nil;
+	if(klen > Hmacblksz){
+		if(xlen > Hmacblksz)
+			return nil;
+		(*x)(key, klen, innerdigest, nil);
+		key = innerdigest;
+		klen = xlen;
+	}
 
-	if(klen>64)
-		return nil;
-
 	/* first time through */
 	if(s == nil || s->seeded == 0){
-		for(i=0; i<64; i++)
-			pad[i] = 0x36;
-		pad[64] = 0;
-		for(i=0; i<klen; i++)
+		memset(pad, 0x36, Hmacblksz);
+		pad[Hmacblksz] = 0;
+		for(i = 0; i < klen; i++)
 			pad[i] ^= key[i];
-		s = (*x)(pad, 64, nil, s);
+		s = (*x)(pad, Hmacblksz, nil, s);
 		if(s == nil)
 			return nil;
 	}
@@ -32,25 +35,12 @@
 		return s;
 
 	/* last time through */
-	for(i=0; i<64; i++)
-		pad[i] = 0x5c;
-	pad[64] = 0;
-	for(i=0; i<klen; i++)
+	memset(pad, 0x5c, Hmacblksz);
+	pad[Hmacblksz] = 0;
+	for(i = 0; i < klen; i++)
 		pad[i] ^= key[i];
 	(*x)(nil, 0, innerdigest, s);
-	s = (*x)(pad, 64, nil, nil);
+	s = (*x)(pad, Hmacblksz, nil, nil);
 	(*x)(innerdigest, xlen, digest, s);
 	return nil;
-}
-
-DigestState*
-hmac_sha1(uchar *p, u32 len, uchar *key, u32 klen, uchar *digest, DigestState *s)
-{
-	return hmac_x(p, len, key, klen, digest, s, sha1, SHA1dlen);
-}
-
-DigestState*
-hmac_md5(uchar *p, u32 len, uchar *key, u32 klen, uchar *digest, DigestState *s)
-{
-	return hmac_x(p, len, key, klen, digest, s, md5, MD5dlen);
 }
--- /dev/null
+++ b/libsec/port/jacobian.c
@@ -1,0 +1,166 @@
+#include "os.h"
+#include <mp.h>
+void jacobian_new(mpint *x, mpint *y, mpint *z, mpint *X, mpint *Y, mpint *Z){
+	mpassign(x, X);
+	mpassign(y, Y);
+	mpassign(z, Z);
+	}
+void jacobian_inf(mpint *X, mpint *Y, mpint *Z){
+	jacobian_new(mpzero, mpone, mpzero, X, Y, Z);
+	}
+void jacobian_affine(mpint *p, mpint *X, mpint *Y, mpint *Z){
+	mpint *ZZZ = mpnew(0);
+	mpint *ZZ = mpnew(0);
+	if(mpcmp(Z, mpzero) != 0){
+		mpmodmul(Z, Z, p, ZZ);
+		mpmodmul(ZZ, Z, p, ZZZ);
+		mpint *tmp1 = mpnew(0);
+		mpinvert(ZZ, p, tmp1);
+		mpmodmul(X, tmp1, p, X);
+		mpfree(tmp1);
+		tmp1 = mpnew(0);
+		mpinvert(ZZZ, p, tmp1);
+		mpmodmul(Y, tmp1, p, Y);
+		mpfree(tmp1);
+		mpassign(mpone, Z);
+		}
+	mpfree(ZZZ);
+	mpfree(ZZ);
+	}
+void jacobian_dbl(mpint *p, mpint *a, mpint *X1, mpint *Y1, mpint *Z1, mpint *X3, mpint *Y3, mpint *Z3){
+	mpint *M = mpnew(0);
+	mpint *S = mpnew(0);
+	mpint *ZZ = mpnew(0);
+	mpint *YYYY = mpnew(0);
+	mpint *YY = mpnew(0);
+	mpint *XX = mpnew(0);
+	if(mpcmp(Y1, mpzero) == 0){
+		jacobian_inf(X3, Y3, Z3);
+		}else{
+		mpmodmul(X1, X1, p, XX);
+		mpmodmul(Y1, Y1, p, YY);
+		mpmodmul(YY, YY, p, YYYY);
+		mpmodmul(Z1, Z1, p, ZZ);
+		mpint *tmp1 = mpnew(0);
+		mpmodadd(X1, YY, p, tmp1);
+		mpmodmul(tmp1, tmp1, p, tmp1);
+		mpmodsub(tmp1, XX, p, tmp1);
+		mpmodsub(tmp1, YYYY, p, tmp1);
+		mpmodadd(tmp1, tmp1, p, S); // 2*tmp1
+		mpfree(tmp1);
+		tmp1 = mpnew(0);
+		uitomp(3UL, tmp1);
+		mpmodmul(tmp1, XX, p, M);
+		mpfree(tmp1);
+		tmp1 = mpnew(0);
+		mpint *tmp2 = mpnew(0);
+		mpmodmul(ZZ, ZZ, p, tmp2);
+		mpmodmul(a, tmp2, p, tmp1);
+		mpfree(tmp2);
+		mpmodadd(M, tmp1, p, M);
+		mpfree(tmp1);
+		mpmodadd(Y1, Z1, p, Z3);
+		mpmodmul(Z3, Z3, p, Z3);
+		mpmodsub(Z3, YY, p, Z3);
+		mpmodsub(Z3, ZZ, p, Z3);
+		mpmodmul(M, M, p, X3);
+		tmp1 = mpnew(0);
+		mpmodadd(S, S, p, tmp1); // 2*S
+		mpmodsub(X3, tmp1, p, X3);
+		mpfree(tmp1);
+		tmp1 = mpnew(0);
+		mpmodsub(S, X3, p, tmp1);
+		mpmodmul(M, tmp1, p, Y3);
+		mpfree(tmp1);
+		tmp1 = mpnew(0);
+		tmp2 = mpnew(0);
+		uitomp(8UL, tmp2);
+		mpmodmul(tmp2, YYYY, p, tmp1);
+		mpfree(tmp2);
+		mpmodsub(Y3, tmp1, p, Y3);
+		mpfree(tmp1);
+		}
+	mpfree(M);
+	mpfree(S);
+	mpfree(ZZ);
+	mpfree(YYYY);
+	mpfree(YY);
+	mpfree(XX);
+	}
+void jacobian_add(mpint *p, mpint *a, mpint *X1, mpint *Y1, mpint *Z1, mpint *X2, mpint *Y2, mpint *Z2, mpint *X3, mpint *Y3, mpint *Z3){
+	mpint *V = mpnew(0);
+	mpint *r = mpnew(0);
+	mpint *J = mpnew(0);
+	mpint *I = mpnew(0);
+	mpint *H = mpnew(0);
+	mpint *S2 = mpnew(0);
+	mpint *S1 = mpnew(0);
+	mpint *U2 = mpnew(0);
+	mpint *U1 = mpnew(0);
+	mpint *Z2Z2 = mpnew(0);
+	mpint *Z1Z1 = mpnew(0);
+	mpmodmul(Z1, Z1, p, Z1Z1);
+	mpmodmul(Z2, Z2, p, Z2Z2);
+	mpmodmul(X1, Z2Z2, p, U1);
+	mpmodmul(X2, Z1Z1, p, U2);
+	mpint *tmp1 = mpnew(0);
+	mpmodmul(Y1, Z2, p, tmp1);
+	mpmodmul(tmp1, Z2Z2, p, S1);
+	mpfree(tmp1);
+	tmp1 = mpnew(0);
+	mpmodmul(Y2, Z1, p, tmp1);
+	mpmodmul(tmp1, Z1Z1, p, S2);
+	mpfree(tmp1);
+	if(mpcmp(U1, U2) == 0){
+		if(mpcmp(S1, S2) != 0){
+			jacobian_inf(X3, Y3, Z3);
+			}else{
+			jacobian_dbl(p, a, X1, Y1, Z1, X3, Y3, Z3);
+			}
+		}else{
+		mpmodsub(U2, U1, p, H);
+		mpmodadd(H, H, p, I); // 2*H
+		mpmodmul(I, I, p, I);
+		mpmodmul(H, I, p, J);
+		mpint *tmp2 = mpnew(0);
+		mpmodsub(S2, S1, p, tmp2);
+		mpmodadd(tmp2, tmp2, p, r); // 2*tmp2
+		mpfree(tmp2);
+		mpmodmul(U1, I, p, V);
+		mpmodmul(r, r, p, X3);
+		mpmodsub(X3, J, p, X3);
+		tmp2 = mpnew(0);
+		mpmodadd(V, V, p, tmp2); // 2*V
+		mpmodsub(X3, tmp2, p, X3);
+		mpfree(tmp2);
+		tmp2 = mpnew(0);
+		mpmodsub(V, X3, p, tmp2);
+		mpmodmul(r, tmp2, p, Y3);
+		mpfree(tmp2);
+		tmp2 = mpnew(0);
+		mpint *tmp3 = mpnew(0);
+		mpmodadd(S1, S1, p, tmp3); // 2*S1
+		mpmodmul(tmp3, J, p, tmp2);
+		mpfree(tmp3);
+		mpmodsub(Y3, tmp2, p, Y3);
+		mpfree(tmp2);
+		tmp2 = mpnew(0);
+		mpmodadd(Z1, Z2, p, tmp2);
+		mpmodmul(tmp2, tmp2, p, tmp2);
+		mpmodsub(tmp2, Z1Z1, p, tmp2);
+		mpmodsub(tmp2, Z2Z2, p, tmp2);
+		mpmodmul(tmp2, H, p, Z3);
+		mpfree(tmp2);
+		}
+	mpfree(V);
+	mpfree(r);
+	mpfree(J);
+	mpfree(I);
+	mpfree(H);
+	mpfree(S2);
+	mpfree(S1);
+	mpfree(U2);
+	mpfree(U1);
+	mpfree(Z2Z2);
+	mpfree(Z1Z1);
+	}
--- /dev/null
+++ b/libsec/port/jacobian.mp
@@ -1,0 +1,60 @@
+# Elliptic curve group operations in jacobian coordinates:
+#	x=X/Z^2
+#	x=Y/Z^3
+
+jacobian_new(x,y,z, X,Y,Z) {
+	X = x;
+	Y = y;
+	Z = z;
+}
+jacobian_inf(X,Y,Z) {
+	X,Y,Z = jacobian_new(0,1,0);
+}
+jacobian_affine(p, X,Y,Z) mod(p) {
+	if(Z != 0) {
+		ZZ = Z^2;
+		ZZZ = ZZ*Z;
+		X = X / ZZ;
+		Y = Y / ZZZ;
+		Z = 1;
+	}
+}
+jacobian_dbl(p,a, X1,Y1,Z1, X3,Y3,Z3) mod(p) {
+	if(Y1 == 0) {
+		X3,Y3,Z3 = jacobian_inf();
+	} else {
+		XX = X1^2;
+		YY = Y1^2;
+		YYYY = YY^2;
+		ZZ = Z1^2;
+		S = 2*((X1+YY)^2-XX-YYYY);
+		M = 3*XX+a*ZZ^2;
+		Z3 = (Y1+Z1)^2-YY-ZZ;	
+		X3 = M^2-2*S;
+		Y3 = M*(S-X3)-8*YYYY;
+	}
+}
+jacobian_add(p,a, X1,Y1,Z1, X2,Y2,Z2, X3,Y3,Z3) mod(p) {
+	Z1Z1 = Z1^2;
+	Z2Z2 = Z2^2;
+	U1 = X1*Z2Z2;
+	U2 = X2*Z1Z1;
+	S1 = Y1*Z2*Z2Z2;
+	S2 = Y2*Z1*Z1Z1;
+	if(U1 == U2) {
+		if(S1 != S2) {
+			X3,Y3,Z3 = jacobian_inf();
+		} else {
+			X3,Y3,Z3 = jacobian_dbl(p,a, X1,Y1,Z1);
+		}
+	} else {
+		H = U2-U1;
+		I = (2*H)^2;
+		J = H*I;
+		r = 2*(S2-S1);
+		V = U1*I;
+		X3 = r^2-J-2*V;
+		Y3 = r*(V-X3)-2*S1*J;
+		Z3 = ((Z1+Z2)^2-Z1Z1-Z2Z2)*H;
+	}
+}
--- a/libsec/port/md4.c
+++ b/libsec/port/md4.c
@@ -91,17 +91,17 @@
 	{ 15,	S34},	
 };
 
-static void encode(uchar*, u32*, u32);
-static void decode(u32*, uchar*, u32);
+static void encode(uchar*, u32int*, u32);
+static void decode(u32int*, uchar*, u32);
 
 static void
 md4block(uchar *p, u32 len, MD4state *s)
 {
 	int i;
-	u32 a, b, c, d, tmp;
+	u32int a, b, c, d, tmp;
 	MD4Table *t;
 	uchar *end;
-	u32 x[16];
+	u32int x[16];
 
 	for(end = p+len; p < end; p += 64){
 		a = s->state[0];
@@ -147,7 +147,7 @@
 MD4state*
 md4(uchar *p, u32 len, uchar *digest, MD4state *s)
 {
-	u32 x[16];
+	u32int x[16];
 	uchar buf[128];
 	int i;
 	uchar *e;
@@ -238,13 +238,13 @@
 }
 
 /*
- *	encodes input (u32) into output (uchar). Assumes len is
+ *	encodes input (u32int) into output (uchar). Assumes len is
  *	a multiple of 4.
  */
 static void
-encode(uchar *output, u32 *input, u32 len)
+encode(uchar *output, u32int *input, u32 len)
 {
-	u32 x;
+	u32int x;
 	uchar *e;
 
 	for(e = output + len; output < e;) {
@@ -257,11 +257,11 @@
 }
 
 /*
- *	decodes input (uchar) into output (u32). Assumes len is
+ *	decodes input (uchar) into output (u32int). Assumes len is
  *	a multiple of 4.
  */
 static void
-decode(u32 *output, uchar *input, u32 len)
+decode(u32int *output, uchar *input, u32 len)
 {
 	uchar *e;
 
--- a/libsec/port/md5.c
+++ b/libsec/port/md5.c
@@ -27,14 +27,14 @@
  *	documentation and/or software.
  */
 
-static void encode(uchar*, u32*, u32);
+static void encode(uchar*, u32int*, u32);
 
-extern void _md5block(uchar*, u32, u32*);
+extern void _md5block(uchar*, u32, u32int*);
 
 MD5state*
 md5(uchar *p, u32 len, uchar *digest, MD5state *s)
 {
-	u32 x[16];
+	u32int x[16];
 	uchar buf[128];
 	int i;
 	uchar *e;
@@ -128,13 +128,13 @@
 }
 
 /*
- *	encodes input (u32) into output (uchar). Assumes len is
+ *	encodes input (u32int) into output (uchar). Assumes len is
  *	a multiple of 4.
  */
 static void
-encode(uchar *output, u32 *input, u32 len)
+encode(uchar *output, u32int *input, u32 len)
 {
-	u32 x;
+	u32int x;
 	uchar *e;
 
 	for(e = output + len; output < e;) {
@@ -144,4 +144,11 @@
 		*output++ = x >> 16;
 		*output++ = x >> 24;
 	}
+}
+
+DigestState*
+hmac_md5(uchar *p, u32 len, uchar *key, u32 klen, uchar *digest,
+	DigestState *s)
+{
+	return hmac_x(p, len, key, klen, digest, s, md5, MD5dlen);
 }
--- a/libsec/port/md5block.c
+++ b/libsec/port/md5block.c
@@ -50,10 +50,10 @@
 	S41=	6,
 	S42=	10,
 	S43=	15,
-	S44=	21
+	S44=	21,
 };
 
-static u32 md5tab[] =
+static u32int md5tab[] =
 {
 	/* round 1 */
 /*[0]*/	0xd76aa478,	
@@ -128,16 +128,16 @@
 	0xeb86d391,	
 };
 
-static void decode(u32*, uchar*, u32);
-extern void _md5block(uchar *p, u32 len, u32 *s);
+static void decode(u32int*, uchar*, ulong);
+extern void _md5block(uchar *p, ulong len, u32int *s);
 
 void
-_md5block(uchar *p, u32 len, u32 *s)
+_md5block(uchar *p, ulong len, u32int *s)
 {
-	u32 a, b, c, d, sh;
-	u32 *t;
+	u32int a, b, c, d, sh;
+	u32int *t;
 	uchar *end;
-	u32 x[16];
+	u32int x[16];
 
 	for(end = p+len; p < end; p += 64){
 		a = s[0];
@@ -253,11 +253,11 @@
 }
 
 /*
- *	decodes input (uchar) into output (u32). Assumes len is
+ *	decodes input (uchar) into output (u32int). Assumes len is
  *	a multiple of 4.
  */
 static void
-decode(u32 *output, uchar *input, u32 len)
+decode(u32int *output, uchar *input, ulong len)
 {
 	uchar *e;
 
--- a/libsec/port/mkfile
+++ b/libsec/port/mkfile
@@ -3,18 +3,40 @@
 LIB=libsec.a
 
 CFILES = des.c desmodes.c desECB.c desCBC.c des3ECB.c des3CBC.c\
-	aes.c blowfish.c \
-	idea.c \
-	hmac.c md5.c md5block.c md4.c sha1.c sha1block.c\
-	sha2.c sha256block.c sha512block.c\
-	sha1pickle.c md5pickle.c\
-	rc4.c\
-	genrandom.c prng.c fastrand.c nfastrand.c\
-	probably_prime.c smallprimetest.c genprime.c dsaprimes.c gensafeprime.c genstrongprime.c\
-	rsagen.c rsafill.c rsaencrypt.c rsadecrypt.c rsaalloc.c rsaprivtopub.c \
-	eggen.c egencrypt.c egdecrypt.c egalloc.c egprivtopub.c egsign.c egverify.c \
-	dsagen.c dsaalloc.c dsaprivtopub.c dsasign.c dsaverify.c \
+        aes.c aesni.c aesCBC.c aesCFB.c aesOFB.c aes_gcm.c \
+        blowfish.c \
+        hmac.c md5.c md5block.c md4.c sha1.c sha1block.c\
+        sha2_64.c sha2_128.c sha2block64.c sha2block128.c\
+        poly1305.c\
+        rc4.c\
+        chacha.c chachablock.c\
+        salsa.c\
+        genrandom.c prng.c fastrand.c nfastrand.c\
+        probably_prime.c smallprimetest.c genprime.c dsaprimes.c\
+        gensafeprime.c genstrongprime.c\
+        rsagen.c rsafill.c rsaencrypt.c rsadecrypt.c rsaalloc.c \
+        rsaprivtopub.c x509.c decodepem.c \
+        eggen.c egencrypt.c egdecrypt.c egalloc.c egprivtopub.c \
+        egsign.c egverify.c \
+        dsagen.c dsaalloc.c dsaprivtopub.c dsasign.c dsaverify.c \
+        tlshand.c thumb.c readcert.c \
+        aes_xts.c  \
+        ecc.c\
+        jacobian.c\
+        ripemd.c\
+        dh.c\
+        curve25519.c\
+        curve25519_dh.c\
+        pbkdf2.c\
+        scrypt.c\
+        hkdf.c\
+        ccpoly.c\
+        tsmemcmp.c\
+        secp256r1.c\
+        secp384r1.c\
+        secp256k1.c\
 
+
 ALLOFILES=${CFILES:%.c=%.$O}
 
 # cull things in the per-machine directories from this list
@@ -29,5 +51,20 @@
 UPDATE=mkfile\
 	$HFILES\
 	$CFILES\
+
+%.c:D:  %.mp
+	echo '#include "os.h"' > $target
+	echo '#include <mp.h>' >> $target
+	mpc $prereq >> $target
+
+# TODO fix the compile errors
+#$O.rsatest: rsatest.$O
+#	$LD -o $target $prereq
+
+$O.chachatest: chachatest.$O
+	$LD -o $target $prereq
+
+$O.aesgcmtest: aesgcmtest.$O
+	$LD -o $target $prereq
 
 <$ROOT/mkfiles/mksyslib-$SHELLTYPE
--- /dev/null
+++ b/libsec/port/pbkdf2.c
@@ -1,0 +1,34 @@
+#include "os.h"
+#include <libsec.h>
+
+/* rfc2898 */
+void
+pbkdf2_x(p, plen, s, slen, rounds, d, dlen, x, xlen)
+	uchar *p, *s, *d;
+	u32 plen, slen, dlen, rounds;
+	DigestState* (*x)(uchar*, u32, uchar*, u32, uchar*, DigestState*);
+	int xlen;
+{
+	uchar block[256], tmp[256];
+	u32 i, j, k, n;
+	DigestState *ds;
+
+	assert(xlen <= sizeof(tmp));
+
+	for(i = 1; dlen > 0; i++, d += n, dlen -= n){
+		tmp[3] = i;
+		tmp[2] = i >> 8;
+		tmp[1] = i >> 16;
+		tmp[0] = i >> 24;
+		ds = (*x)(s, slen, p, plen, nil, nil);
+		(*x)(tmp, 4, p, plen, block, ds);
+		memmove(tmp, block, xlen);
+		for(j = 1; j < rounds; j++){
+			(*x)(tmp, xlen, p, plen, tmp, nil);
+			for(k=0; k<xlen; k++)
+				block[k] ^= tmp[k];
+		}
+		n = dlen > xlen ? xlen : dlen;
+		memmove(d, block, n); 
+	}
+}
--- /dev/null
+++ b/libsec/port/poly1305.c
@@ -1,0 +1,195 @@
+#include "os.h"
+#include <libsec.h>
+
+/*
+	poly1305 implementation using 32 bit * 32 bit = 64 bit multiplication and 64 bit addition
+
+	derived from http://github.com/floodberry/poly1305-donna
+*/
+
+#define U8TO32(p)	((u32int)(p)[0] | (u32int)(p)[1]<<8 | (u32int)(p)[2]<<16 | (u32int)(p)[3]<<24)
+#define U32TO8(p, v)	(p)[0]=(v), (p)[1]=(v)>>8, (p)[2]=(v)>>16, (p)[3]=(v)>>24
+
+/* (r,s) = (key[0:15],key[16:31]), the one time key */
+DigestState*
+poly1305(uchar *m, u32 len, uchar *key, u32 klen, uchar *digest, DigestState *s)
+{
+	u32int r0,r1,r2,r3,r4, s1,s2,s3,s4, h0,h1,h2,h3,h4, g0,g1,g2,g3,g4;
+	u64int d0,d1,d2,d3,d4, f;
+	u32int hibit, mask, c;
+
+	if(s == nil){
+		s = malloc(sizeof(*s));
+		if(s == nil)
+			return nil;
+		memset(s, 0, sizeof(*s));
+		s->malloced = 1;
+	}
+
+	if(s->seeded == 0){
+		assert(klen == 32);
+
+		/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
+		s->state[0] = (U8TO32(&key[ 0])     ) & 0x3ffffff;
+		s->state[1] = (U8TO32(&key[ 3]) >> 2) & 0x3ffff03;
+		s->state[2] = (U8TO32(&key[ 6]) >> 4) & 0x3ffc0ff;
+		s->state[3] = (U8TO32(&key[ 9]) >> 6) & 0x3f03fff;
+		s->state[4] = (U8TO32(&key[12]) >> 8) & 0x00fffff;
+
+		/* h = 0 */
+		s->state[5] = 0;
+		s->state[6] = 0;
+		s->state[7] = 0;
+		s->state[8] = 0;
+		s->state[9] = 0;
+
+		/* save pad for later */
+		s->state[10] = U8TO32(&key[16]);
+		s->state[11] = U8TO32(&key[20]);
+		s->state[12] = U8TO32(&key[24]);
+		s->state[13] = U8TO32(&key[28]);
+
+		s->seeded = 1;
+	}
+
+	if(s->blen){
+		c = 16 - s->blen;
+		if(c > len)
+			c = len;
+		memmove(s->buf + s->blen, m, c);
+		len -= c, m += c;
+		s->blen += c;
+		if(s->blen == 16){
+			s->blen = 0;
+			poly1305(s->buf, 16, key, klen, nil, s);
+		} else if(len == 0){
+			m = s->buf;
+			len = s->blen;
+			s->blen = 0;
+		}
+	}
+
+	r0 = s->state[0];
+	r1 = s->state[1];
+	r2 = s->state[2];
+	r3 = s->state[3];
+	r4 = s->state[4];
+
+	h0 = s->state[5];
+	h1 = s->state[6];
+	h2 = s->state[7];
+	h3 = s->state[8];
+	h4 = s->state[9];
+
+	s1 = r1 * 5;
+	s2 = r2 * 5;
+	s3 = r3 * 5;
+	s4 = r4 * 5;
+
+	hibit = 1<<24;	/* 1<<128 */
+
+	while(len >= 16){
+Block:
+		/* h += m[i] */
+		h0 += (U8TO32(&m[0])     ) & 0x3ffffff;
+		h1 += (U8TO32(&m[3]) >> 2) & 0x3ffffff;
+		h2 += (U8TO32(&m[6]) >> 4) & 0x3ffffff;
+		h3 += (U8TO32(&m[9]) >> 6) & 0x3ffffff;
+		h4 += (U8TO32(&m[12])>> 8) | hibit;
+
+		/* h *= r */
+		d0 = ((u64int)h0 * r0) + ((u64int)h1 * s4) + ((u64int)h2 * s3) + ((u64int)h3 * s2) + ((u64int)h4 * s1);
+		d1 = ((u64int)h0 * r1) + ((u64int)h1 * r0) + ((u64int)h2 * s4) + ((u64int)h3 * s3) + ((u64int)h4 * s2);
+		d2 = ((u64int)h0 * r2) + ((u64int)h1 * r1) + ((u64int)h2 * r0) + ((u64int)h3 * s4) + ((u64int)h4 * s3);
+		d3 = ((u64int)h0 * r3) + ((u64int)h1 * r2) + ((u64int)h2 * r1) + ((u64int)h3 * r0) + ((u64int)h4 * s4);
+		d4 = ((u64int)h0 * r4) + ((u64int)h1 * r3) + ((u64int)h2 * r2) + ((u64int)h3 * r1) + ((u64int)h4 * r0);
+
+		/* (partial) h %= p */
+		              c = (u32int)(d0 >> 26); h0 = (u32int)d0 & 0x3ffffff;
+		d1 += c;      c = (u32int)(d1 >> 26); h1 = (u32int)d1 & 0x3ffffff;
+		d2 += c;      c = (u32int)(d2 >> 26); h2 = (u32int)d2 & 0x3ffffff;
+		d3 += c;      c = (u32int)(d3 >> 26); h3 = (u32int)d3 & 0x3ffffff;
+		d4 += c;      c = (u32int)(d4 >> 26); h4 = (u32int)d4 & 0x3ffffff;
+		h0 += c * 5;  c = (h0 >> 26); h0 = h0 & 0x3ffffff;
+		h1 += c;
+
+		len -= 16, m += 16;
+	}
+
+	if(len){
+		s->blen = len;
+		memmove(s->buf, m, len);
+	}
+
+	if(digest == nil){
+		s->state[5] = h0;
+		s->state[6] = h1;
+		s->state[7] = h2;
+		s->state[8] = h3;
+		s->state[9] = h4;
+		return s;
+	}
+
+	if(len){
+		m = s->buf;
+		m[len++] = 1;
+		while(len < 16)
+			m[len++] = 0;
+		hibit = 0;
+		goto Block;
+	}
+
+	             c = h1 >> 26; h1 = h1 & 0x3ffffff;
+	h2 +=     c; c = h2 >> 26; h2 = h2 & 0x3ffffff;
+	h3 +=     c; c = h3 >> 26; h3 = h3 & 0x3ffffff;
+	h4 +=     c; c = h4 >> 26; h4 = h4 & 0x3ffffff;
+	h0 += c * 5; c = h0 >> 26; h0 = h0 & 0x3ffffff;
+	h1 +=     c;
+
+	/* compute h + -p */
+	g0 = h0 + 5; c = g0 >> 26; g0 &= 0x3ffffff;
+	g1 = h1 + c; c = g1 >> 26; g1 &= 0x3ffffff;
+	g2 = h2 + c; c = g2 >> 26; g2 &= 0x3ffffff;
+	g3 = h3 + c; c = g3 >> 26; g3 &= 0x3ffffff;
+	g4 = h4 + c - (1 << 26);
+
+	/* select h if h < p, or h + -p if h >= p */
+	mask = (g4 >> 31) - 1;
+	g0 &= mask;
+	g1 &= mask;
+	g2 &= mask;
+	g3 &= mask;
+	g4 &= mask;
+	mask = ~mask;
+	h0 = (h0 & mask) | g0;
+	h1 = (h1 & mask) | g1;
+	h2 = (h2 & mask) | g2;
+	h3 = (h3 & mask) | g3;
+	h4 = (h4 & mask) | g4;
+
+	/* h = h % (2^128) */
+	h0 = (h0      ) | (h1 << 26);
+	h1 = (h1 >>  6) | (h2 << 20);
+	h2 = (h2 >> 12) | (h3 << 14);
+	h3 = (h3 >> 18) | (h4 <<  8);
+	
+	/* digest = (h + pad) % (2^128) */
+	f = (u64int)h0 + s->state[10]            ; h0 = (u32int)f;
+	f = (u64int)h1 + s->state[11] + (f >> 32); h1 = (u32int)f;
+	f = (u64int)h2 + s->state[12] + (f >> 32); h2 = (u32int)f;
+	f = (u64int)h3 + s->state[13] + (f >> 32); h3 = (u32int)f;
+
+	U32TO8(&digest[0], h0);
+	U32TO8(&digest[4], h1);
+	U32TO8(&digest[8], h2);
+	U32TO8(&digest[12], h3);
+
+	if(s->malloced){
+		memset(s, 0, sizeof(*s));
+		free(s);
+		return nil;
+	}
+
+	memset(s, 0, sizeof(*s));
+	return nil;
+}
--- a/libsec/port/primetest.c
+++ b/libsec/port/primetest.c
@@ -10,7 +10,7 @@
 	mpint *q = mpnew(0);
 	mpint *nine = mpnew(0);
 
-	fmtinstall('B', mpconv);
+	fmtinstall('B', mpfmt);
 	strtomp("2492491", nil, 16, z);	// 38347921 = x*y = (2**28-9)/7, 
 				//    an example of 3**(n-1)=1 mod n
 	strtomp("15662C00E811", nil, 16, p);// 23528569104401, a prime
--- a/libsec/port/prng.c
+++ b/libsec/port/prng.c
@@ -1,5 +1,4 @@
 #include "os.h"
-#include <mp.h>
 #include <libsec.h>
 
 //
--- a/libsec/port/probably_prime.c
+++ b/libsec/port/probably_prime.c
@@ -2,10 +2,12 @@
 #include <mp.h>
 #include <libsec.h>
 
-/* Miller-Rabin probabilistic primality testing */
-/*	Knuth (1981) Seminumerical Algorithms, p.379 */
-/*	Menezes et al () Handbook, p.39 */
-/* 0 if composite; 1 if almost surely prime, Pr(err)<1/4**nrep */
+/*
+ * Miller-Rabin probabilistic primality testing
+ *	Knuth (1981) Seminumerical Algorithms, p.379
+ *	Menezes et al () Handbook, p.39
+ * 0 if composite; 1 if almost surely prime, Pr(err)<1/4**nrep
+ */
 int
 probably_prime(mpint *n, int nrep)
 {
@@ -19,10 +21,10 @@
 		nrep = 18;
 
 	k = mptoi(n);
-	if(k == 2)		/* 2 is prime */
-		return 1;
 	if(k < 2)		/* 1 is not prime */
 		return 0;
+	if(k == 2 || k == 3)	/* 2, 3 is prime */
+		return 1;
 	if((n->p[0] & 1) == 0)	/* even is not prime */
 		return 0;
 
@@ -51,11 +53,11 @@
 	for(rep = 0; rep < nrep; rep++){
 		for(;;){
 			/* find x = random in [2, n-2] */
-			r = mprand(nbits, prng, nil);
-			mpmod(r, nm1, x);
-			mpfree(r);
-			if(mpcmp(x, mpone) > 0)
-				break;
+		 	r = mprand(nbits, prng, nil);
+		 	mpmod(r, nm1, x);
+		 	mpfree(r);
+		 	if(mpcmp(x, mpone) > 0)
+		 		break;
 		}
 
 		/* y = x**q mod n */
@@ -62,21 +64,21 @@
 		mpexp(x, q, n, y);
 
 		if(mpcmp(y, mpone) == 0 || mpcmp(y, nm1) == 0)
-			continue;
+		 	continue;
 
 		for(j = 1;; j++){
-			if(j >= k) {
-				isprime = 0;
-				goto done;
-			}
-			mpmul(y, y, x);
-			mpmod(x, n, y);	/* y = y*y mod n */
-			if(mpcmp(y, nm1) == 0)
-				break;
-			if(mpcmp(y, mpone) == 0){
-				isprime = 0;
-				goto done;
-			}
+		 	if(j >= k) {
+		 		isprime = 0;
+		 		goto done;
+		 	}
+		 	mpmul(y, y, x);
+		 	mpmod(x, n, y);	/* y = y*y mod n */
+		 	if(mpcmp(y, nm1) == 0)
+		 		break;
+		 	if(mpcmp(y, mpone) == 0){
+		 		isprime = 0;
+		 		goto done;
+		 	}
 		}
 	}
 	isprime = 1;
--- /dev/null
+++ b/libsec/port/readcert.c
@@ -1,0 +1,63 @@
+#include "os.h"
+#include <libsec.h>
+
+static char*
+readfile(char *name)
+{
+	int fd;
+	char *s;
+	Dir *d;
+
+	fd = open(name, OREAD|OCEXEC);
+	if(fd < 0)
+		return nil;
+	if((d = dirfstat(fd)) == nil) {
+		close(fd);
+		return nil;
+	}
+	s = malloc(d->length + 1);
+	if(s == nil || readn(fd, s, d->length) != d->length){
+		free(s);
+		free(d);
+		close(fd);
+		return nil;
+	}
+	close(fd);
+	s[d->length] = '\0';
+	free(d);
+	return s;
+}
+
+uchar*
+readcert(char *filename, int *pcertlen)
+{
+	char *pem;
+	uchar *binary;
+
+	pem = readfile(filename);
+	if(pem == nil){
+		werrstr("can't read %s: %r", filename);
+		return nil;
+	}
+	binary = decodePEM(pem, "CERTIFICATE", pcertlen, nil);
+	free(pem);
+	if(binary == nil){
+		werrstr("can't parse %s", filename);
+		return nil;
+	}
+	return binary;
+}
+
+PEMChain *
+readcertchain(char *filename)
+{
+	char *chfile;
+
+	chfile = readfile(filename);
+	if (chfile == nil) {
+		werrstr("can't read %s: %r", filename);
+		return nil;
+	}
+	return decodepemchain(chfile, "CERTIFICATE");
+}
+
--- /dev/null
+++ b/libsec/port/ripemd.c
@@ -1,0 +1,383 @@
+#include "os.h"
+
+#include <libsec.h>
+
+#define BYTES_TO_DWORD(strptr)                    \
+            (((u32int) *((strptr)+3) << 24) | \
+             ((u32int) *((strptr)+2) << 16) | \
+             ((u32int) *((strptr)+1) <<  8) | \
+             ((u32int) *(strptr)))
+
+#define ROL(x, n)        (((x) << (n)) | ((x) >> (32-(n))))
+
+/* the five basic functions F(), G() and H() */
+#define F(x, y, z)        ((x) ^ (y) ^ (z)) 
+#define G(x, y, z)        (((x) & (y)) | (~(x) & (z))) 
+#define H(x, y, z)        (((x) | ~(y)) ^ (z))
+#define I(x, y, z)        (((x) & (z)) | ((y) & ~(z))) 
+#define J(x, y, z)        ((x) ^ ((y) | ~(z)))
+  
+/* the ten basic operations FF() through III() */
+#define FF(a, b, c, d, e, x, s)        {\
+      (a) += F((b), (c), (d)) + (x);\
+      (a) = ROL((a), (s)) + (e);\
+      (c) = ROL((c), 10);\
+   }
+#define GG(a, b, c, d, e, x, s)        {\
+      (a) += G((b), (c), (d)) + (x) + 0x5a827999UL;\
+      (a) = ROL((a), (s)) + (e);\
+      (c) = ROL((c), 10);\
+   }
+#define HH(a, b, c, d, e, x, s)        {\
+      (a) += H((b), (c), (d)) + (x) + 0x6ed9eba1UL;\
+      (a) = ROL((a), (s)) + (e);\
+      (c) = ROL((c), 10);\
+   }
+#define II(a, b, c, d, e, x, s)        {\
+      (a) += I((b), (c), (d)) + (x) + 0x8f1bbcdcUL;\
+      (a) = ROL((a), (s)) + (e);\
+      (c) = ROL((c), 10);\
+   }
+#define JJ(a, b, c, d, e, x, s)        {\
+      (a) += J((b), (c), (d)) + (x) + 0xa953fd4eUL;\
+      (a) = ROL((a), (s)) + (e);\
+      (c) = ROL((c), 10);\
+   }
+#define FFF(a, b, c, d, e, x, s)        {\
+      (a) += F((b), (c), (d)) + (x);\
+      (a) = ROL((a), (s)) + (e);\
+      (c) = ROL((c), 10);\
+   }
+#define GGG(a, b, c, d, e, x, s)        {\
+      (a) += G((b), (c), (d)) + (x) + 0x7a6d76e9UL;\
+      (a) = ROL((a), (s)) + (e);\
+      (c) = ROL((c), 10);\
+   }
+#define HHH(a, b, c, d, e, x, s)        {\
+      (a) += H((b), (c), (d)) + (x) + 0x6d703ef3UL;\
+      (a) = ROL((a), (s)) + (e);\
+      (c) = ROL((c), 10);\
+   }
+#define III(a, b, c, d, e, x, s)        {\
+      (a) += I((b), (c), (d)) + (x) + 0x5c4dd124UL;\
+      (a) = ROL((a), (s)) + (e);\
+      (c) = ROL((c), 10);\
+   }
+#define JJJ(a, b, c, d, e, x, s)        {\
+      (a) += J((b), (c), (d)) + (x) + 0x50a28be6UL;\
+      (a) = ROL((a), (s)) + (e);\
+      (c) = ROL((c), 10);\
+   }
+
+
+static void MDinit(u32int *MDbuf)
+{
+   MDbuf[0] = 0x67452301UL;
+   MDbuf[1] = 0xefcdab89UL;
+   MDbuf[2] = 0x98badcfeUL;
+   MDbuf[3] = 0x10325476UL;
+   MDbuf[4] = 0xc3d2e1f0UL;
+
+   return;
+}
+
+static void compress(u32int *MDbuf, u32int *X)
+{
+   u32int aa = MDbuf[0],  bb = MDbuf[1],  cc = MDbuf[2],
+         dd = MDbuf[3],  ee = MDbuf[4];
+   u32int aaa = MDbuf[0], bbb = MDbuf[1], ccc = MDbuf[2],
+         ddd = MDbuf[3], eee = MDbuf[4];
+
+   /* round 1 */
+   FF(aa, bb, cc, dd, ee, X[ 0], 11);
+   FF(ee, aa, bb, cc, dd, X[ 1], 14);
+   FF(dd, ee, aa, bb, cc, X[ 2], 15);
+   FF(cc, dd, ee, aa, bb, X[ 3], 12);
+   FF(bb, cc, dd, ee, aa, X[ 4],  5);
+   FF(aa, bb, cc, dd, ee, X[ 5],  8);
+   FF(ee, aa, bb, cc, dd, X[ 6],  7);
+   FF(dd, ee, aa, bb, cc, X[ 7],  9);
+   FF(cc, dd, ee, aa, bb, X[ 8], 11);
+   FF(bb, cc, dd, ee, aa, X[ 9], 13);
+   FF(aa, bb, cc, dd, ee, X[10], 14);
+   FF(ee, aa, bb, cc, dd, X[11], 15);
+   FF(dd, ee, aa, bb, cc, X[12],  6);
+   FF(cc, dd, ee, aa, bb, X[13],  7);
+   FF(bb, cc, dd, ee, aa, X[14],  9);
+   FF(aa, bb, cc, dd, ee, X[15],  8);
+                             
+   /* round 2 */
+   GG(ee, aa, bb, cc, dd, X[ 7],  7);
+   GG(dd, ee, aa, bb, cc, X[ 4],  6);
+   GG(cc, dd, ee, aa, bb, X[13],  8);
+   GG(bb, cc, dd, ee, aa, X[ 1], 13);
+   GG(aa, bb, cc, dd, ee, X[10], 11);
+   GG(ee, aa, bb, cc, dd, X[ 6],  9);
+   GG(dd, ee, aa, bb, cc, X[15],  7);
+   GG(cc, dd, ee, aa, bb, X[ 3], 15);
+   GG(bb, cc, dd, ee, aa, X[12],  7);
+   GG(aa, bb, cc, dd, ee, X[ 0], 12);
+   GG(ee, aa, bb, cc, dd, X[ 9], 15);
+   GG(dd, ee, aa, bb, cc, X[ 5],  9);
+   GG(cc, dd, ee, aa, bb, X[ 2], 11);
+   GG(bb, cc, dd, ee, aa, X[14],  7);
+   GG(aa, bb, cc, dd, ee, X[11], 13);
+   GG(ee, aa, bb, cc, dd, X[ 8], 12);
+
+   /* round 3 */
+   HH(dd, ee, aa, bb, cc, X[ 3], 11);
+   HH(cc, dd, ee, aa, bb, X[10], 13);
+   HH(bb, cc, dd, ee, aa, X[14],  6);
+   HH(aa, bb, cc, dd, ee, X[ 4],  7);
+   HH(ee, aa, bb, cc, dd, X[ 9], 14);
+   HH(dd, ee, aa, bb, cc, X[15],  9);
+   HH(cc, dd, ee, aa, bb, X[ 8], 13);
+   HH(bb, cc, dd, ee, aa, X[ 1], 15);
+   HH(aa, bb, cc, dd, ee, X[ 2], 14);
+   HH(ee, aa, bb, cc, dd, X[ 7],  8);
+   HH(dd, ee, aa, bb, cc, X[ 0], 13);
+   HH(cc, dd, ee, aa, bb, X[ 6],  6);
+   HH(bb, cc, dd, ee, aa, X[13],  5);
+   HH(aa, bb, cc, dd, ee, X[11], 12);
+   HH(ee, aa, bb, cc, dd, X[ 5],  7);
+   HH(dd, ee, aa, bb, cc, X[12],  5);
+
+   /* round 4 */
+   II(cc, dd, ee, aa, bb, X[ 1], 11);
+   II(bb, cc, dd, ee, aa, X[ 9], 12);
+   II(aa, bb, cc, dd, ee, X[11], 14);
+   II(ee, aa, bb, cc, dd, X[10], 15);
+   II(dd, ee, aa, bb, cc, X[ 0], 14);
+   II(cc, dd, ee, aa, bb, X[ 8], 15);
+   II(bb, cc, dd, ee, aa, X[12],  9);
+   II(aa, bb, cc, dd, ee, X[ 4],  8);
+   II(ee, aa, bb, cc, dd, X[13],  9);
+   II(dd, ee, aa, bb, cc, X[ 3], 14);
+   II(cc, dd, ee, aa, bb, X[ 7],  5);
+   II(bb, cc, dd, ee, aa, X[15],  6);
+   II(aa, bb, cc, dd, ee, X[14],  8);
+   II(ee, aa, bb, cc, dd, X[ 5],  6);
+   II(dd, ee, aa, bb, cc, X[ 6],  5);
+   II(cc, dd, ee, aa, bb, X[ 2], 12);
+
+   /* round 5 */
+   JJ(bb, cc, dd, ee, aa, X[ 4],  9);
+   JJ(aa, bb, cc, dd, ee, X[ 0], 15);
+   JJ(ee, aa, bb, cc, dd, X[ 5],  5);
+   JJ(dd, ee, aa, bb, cc, X[ 9], 11);
+   JJ(cc, dd, ee, aa, bb, X[ 7],  6);
+   JJ(bb, cc, dd, ee, aa, X[12],  8);
+   JJ(aa, bb, cc, dd, ee, X[ 2], 13);
+   JJ(ee, aa, bb, cc, dd, X[10], 12);
+   JJ(dd, ee, aa, bb, cc, X[14],  5);
+   JJ(cc, dd, ee, aa, bb, X[ 1], 12);
+   JJ(bb, cc, dd, ee, aa, X[ 3], 13);
+   JJ(aa, bb, cc, dd, ee, X[ 8], 14);
+   JJ(ee, aa, bb, cc, dd, X[11], 11);
+   JJ(dd, ee, aa, bb, cc, X[ 6],  8);
+   JJ(cc, dd, ee, aa, bb, X[15],  5);
+   JJ(bb, cc, dd, ee, aa, X[13],  6);
+
+   /* parallel round 1 */
+   JJJ(aaa, bbb, ccc, ddd, eee, X[ 5],  8);
+   JJJ(eee, aaa, bbb, ccc, ddd, X[14],  9);
+   JJJ(ddd, eee, aaa, bbb, ccc, X[ 7],  9);
+   JJJ(ccc, ddd, eee, aaa, bbb, X[ 0], 11);
+   JJJ(bbb, ccc, ddd, eee, aaa, X[ 9], 13);
+   JJJ(aaa, bbb, ccc, ddd, eee, X[ 2], 15);
+   JJJ(eee, aaa, bbb, ccc, ddd, X[11], 15);
+   JJJ(ddd, eee, aaa, bbb, ccc, X[ 4],  5);
+   JJJ(ccc, ddd, eee, aaa, bbb, X[13],  7);
+   JJJ(bbb, ccc, ddd, eee, aaa, X[ 6],  7);
+   JJJ(aaa, bbb, ccc, ddd, eee, X[15],  8);
+   JJJ(eee, aaa, bbb, ccc, ddd, X[ 8], 11);
+   JJJ(ddd, eee, aaa, bbb, ccc, X[ 1], 14);
+   JJJ(ccc, ddd, eee, aaa, bbb, X[10], 14);
+   JJJ(bbb, ccc, ddd, eee, aaa, X[ 3], 12);
+   JJJ(aaa, bbb, ccc, ddd, eee, X[12],  6);
+
+   /* parallel round 2 */
+   III(eee, aaa, bbb, ccc, ddd, X[ 6],  9); 
+   III(ddd, eee, aaa, bbb, ccc, X[11], 13);
+   III(ccc, ddd, eee, aaa, bbb, X[ 3], 15);
+   III(bbb, ccc, ddd, eee, aaa, X[ 7],  7);
+   III(aaa, bbb, ccc, ddd, eee, X[ 0], 12);
+   III(eee, aaa, bbb, ccc, ddd, X[13],  8);
+   III(ddd, eee, aaa, bbb, ccc, X[ 5],  9);
+   III(ccc, ddd, eee, aaa, bbb, X[10], 11);
+   III(bbb, ccc, ddd, eee, aaa, X[14],  7);
+   III(aaa, bbb, ccc, ddd, eee, X[15],  7);
+   III(eee, aaa, bbb, ccc, ddd, X[ 8], 12);
+   III(ddd, eee, aaa, bbb, ccc, X[12],  7);
+   III(ccc, ddd, eee, aaa, bbb, X[ 4],  6);
+   III(bbb, ccc, ddd, eee, aaa, X[ 9], 15);
+   III(aaa, bbb, ccc, ddd, eee, X[ 1], 13);
+   III(eee, aaa, bbb, ccc, ddd, X[ 2], 11);
+
+   /* parallel round 3 */
+   HHH(ddd, eee, aaa, bbb, ccc, X[15],  9);
+   HHH(ccc, ddd, eee, aaa, bbb, X[ 5],  7);
+   HHH(bbb, ccc, ddd, eee, aaa, X[ 1], 15);
+   HHH(aaa, bbb, ccc, ddd, eee, X[ 3], 11);
+   HHH(eee, aaa, bbb, ccc, ddd, X[ 7],  8);
+   HHH(ddd, eee, aaa, bbb, ccc, X[14],  6);
+   HHH(ccc, ddd, eee, aaa, bbb, X[ 6],  6);
+   HHH(bbb, ccc, ddd, eee, aaa, X[ 9], 14);
+   HHH(aaa, bbb, ccc, ddd, eee, X[11], 12);
+   HHH(eee, aaa, bbb, ccc, ddd, X[ 8], 13);
+   HHH(ddd, eee, aaa, bbb, ccc, X[12],  5);
+   HHH(ccc, ddd, eee, aaa, bbb, X[ 2], 14);
+   HHH(bbb, ccc, ddd, eee, aaa, X[10], 13);
+   HHH(aaa, bbb, ccc, ddd, eee, X[ 0], 13);
+   HHH(eee, aaa, bbb, ccc, ddd, X[ 4],  7);
+   HHH(ddd, eee, aaa, bbb, ccc, X[13],  5);
+
+   /* parallel round 4 */   
+   GGG(ccc, ddd, eee, aaa, bbb, X[ 8], 15);
+   GGG(bbb, ccc, ddd, eee, aaa, X[ 6],  5);
+   GGG(aaa, bbb, ccc, ddd, eee, X[ 4],  8);
+   GGG(eee, aaa, bbb, ccc, ddd, X[ 1], 11);
+   GGG(ddd, eee, aaa, bbb, ccc, X[ 3], 14);
+   GGG(ccc, ddd, eee, aaa, bbb, X[11], 14);
+   GGG(bbb, ccc, ddd, eee, aaa, X[15],  6);
+   GGG(aaa, bbb, ccc, ddd, eee, X[ 0], 14);
+   GGG(eee, aaa, bbb, ccc, ddd, X[ 5],  6);
+   GGG(ddd, eee, aaa, bbb, ccc, X[12],  9);
+   GGG(ccc, ddd, eee, aaa, bbb, X[ 2], 12);
+   GGG(bbb, ccc, ddd, eee, aaa, X[13],  9);
+   GGG(aaa, bbb, ccc, ddd, eee, X[ 9], 12);
+   GGG(eee, aaa, bbb, ccc, ddd, X[ 7],  5);
+   GGG(ddd, eee, aaa, bbb, ccc, X[10], 15);
+   GGG(ccc, ddd, eee, aaa, bbb, X[14],  8);
+
+   /* parallel round 5 */
+   FFF(bbb, ccc, ddd, eee, aaa, X[12] ,  8);
+   FFF(aaa, bbb, ccc, ddd, eee, X[15] ,  5);
+   FFF(eee, aaa, bbb, ccc, ddd, X[10] , 12);
+   FFF(ddd, eee, aaa, bbb, ccc, X[ 4] ,  9);
+   FFF(ccc, ddd, eee, aaa, bbb, X[ 1] , 12);
+   FFF(bbb, ccc, ddd, eee, aaa, X[ 5] ,  5);
+   FFF(aaa, bbb, ccc, ddd, eee, X[ 8] , 14);
+   FFF(eee, aaa, bbb, ccc, ddd, X[ 7] ,  6);
+   FFF(ddd, eee, aaa, bbb, ccc, X[ 6] ,  8);
+   FFF(ccc, ddd, eee, aaa, bbb, X[ 2] , 13);
+   FFF(bbb, ccc, ddd, eee, aaa, X[13] ,  6);
+   FFF(aaa, bbb, ccc, ddd, eee, X[14] ,  5);
+   FFF(eee, aaa, bbb, ccc, ddd, X[ 0] , 15);
+   FFF(ddd, eee, aaa, bbb, ccc, X[ 3] , 13);
+   FFF(ccc, ddd, eee, aaa, bbb, X[ 9] , 11);
+   FFF(bbb, ccc, ddd, eee, aaa, X[11] , 11);
+
+   /* combine results */
+   ddd += cc + MDbuf[1];               /* final result for MDbuf[0] */
+   MDbuf[1] = MDbuf[2] + dd + eee;
+   MDbuf[2] = MDbuf[3] + ee + aaa;
+   MDbuf[3] = MDbuf[4] + aa + bbb;
+   MDbuf[4] = MDbuf[0] + bb + ccc;
+   MDbuf[0] = ddd;
+
+   return;
+}
+
+static void MDfinish(u32int *MDbuf, uchar *strptr, u32int lswlen, u32int mswlen)
+{
+   unsigned int i;                                 /* counter       */
+   u32int        X[16];                             /* message words */
+
+   memset(X, 0, 16*sizeof(u32int));
+
+   /* put bytes from strptr into X */
+   for (i=0; i<(lswlen&63); i++) {
+      /* byte i goes into word X[i div 4] at pos.  8*(i mod 4)  */
+      X[i>>2] ^= (u32int) *strptr++ << (8 * (i&3));
+   }
+
+   /* append the bit m_n == 1 */
+   X[(lswlen>>2)&15] ^= (u32int)1 << (8*(lswlen&3) + 7);
+
+   if ((lswlen & 63) > 55) {
+      /* length goes to next block */
+      compress(MDbuf, X);
+      memset(X, 0, 16*sizeof(u32int));
+   }
+
+   /* append length in bits*/
+   X[14] = lswlen << 3;
+   X[15] = (lswlen >> 29) | (mswlen << 3);
+   compress(MDbuf, X);
+
+   return;
+}
+
+DigestState*
+ripemd160(uchar *p, u32 len, uchar *digest, DigestState *s)
+{
+	u32int x[16];
+	int i, j, k;
+
+	if(s == nil){
+		s = malloc(sizeof(*s));
+		if(s == nil)
+			return nil;
+		memset(s, 0, sizeof(*s));
+		s->malloced = 1;
+	}
+
+	if(s->seeded == 0){
+		MDinit(s->state);
+		s->seeded = 1;
+	}
+
+	/* fill out the partial 64 byte block from previous calls */
+	if(s->blen){
+		i = 64 - s->blen;
+		if(len < i)
+			i = len;
+		memmove(s->buf + s->blen, p, i);
+		len -= i;
+		s->blen += i;
+		p += i;
+		if(s->blen == 64){
+			for(i = 0; i < 16; i++)
+				x[i] = BYTES_TO_DWORD(s->buf + i * 4);
+			compress(s->state, x);
+			s->len += s->blen;
+			s->blen = 0;
+		}
+	}
+
+	/* do 64 byte blocks */
+	i = len & ~0x3f;
+	if(i){
+		for(j = 0; j < i; j += 64){
+			for(k = 0; k < 16; k++)
+				x[k] = BYTES_TO_DWORD(p + j + k * 4);
+			compress(s->state, x);
+		}
+		s->len += i;
+		len -= i;
+		p += i;
+	}
+
+	/* save the left overs if not last call */
+	if(digest == 0){
+		if(len){
+			memmove(s->buf, p, len);
+			s->blen += len;
+		}
+		return s;
+	}
+
+	MDfinish(s->state, p, s->len + len, 0);
+	for(i = 0; i < 5; i++){
+		digest[4 * i] = s->state[i];
+		digest[4 * i + 1] = s->state[i] >> 8;
+		digest[4 * i + 2] = s->state[i] >> 16;
+		digest[4 * i + 3] = s->state[i] >> 24;
+
+	}
+	if(s->malloced == 1)
+		free(s);
+	return nil;
+
+}
--- a/libsec/port/rsagen.c
+++ b/libsec/port/rsagen.c
@@ -26,9 +26,13 @@
 	// find an e relatively prime to phi
 	t1 = mpnew(0);
 	t2 = mpnew(0);
-	mprand(elen, genrandom, e);
-	if(mpcmp(e,mptwo) <= 0)
-		itomp(3, e);
+	if(elen == 0)
+		itomp(65537, e);
+	else {
+		mprand(elen, genrandom, e);
+		if(mpcmp(e,mptwo) <= 0)
+			itomp(3, e);
+	}
 	// See Menezes et al. p.291 "8.8 Note (selecting primes)" for discussion
 	// of the merits of various choices of primes and exponents.  e=3 is a
 	// common and recommended exponent, but doesn't necessarily work here
@@ -39,6 +43,8 @@
 			break;
 		mpadd(mpone, e, e);
 	}
+	if(d->sign < 0)
+		mpadd(phi, d, d);
 	mpfree(t1);
 	mpfree(t2);
 
--- a/libsec/port/rsatest.c
+++ b/libsec/port/rsatest.c
@@ -6,14 +6,13 @@
 void
 main(void)
 {
-	RSApriv *rsa;
-	Biobuf b;
-	char *p;
 	int n;
-	mpint *clr, *enc, *clr2;
-	uchar buf[4096];
-	uchar *e;
 	vlong start;
+	char *p;
+	uchar buf[4096];
+	Biobuf b;
+	RSApriv *rsa;
+	mpint *clr, *enc, *clr2;
 
 	fmtinstall('B', mpfmt);
 
--- /dev/null
+++ b/libsec/port/salsa.c
@@ -1,0 +1,308 @@
+#include "os.h"
+#include <libsec.h>
+
+/* little-endian data order */
+#define	GET4(p)		((p)[0]|((p)[1]<<8)|((p)[2]<<16)|((p)[3]<<24))
+#define	PUT4(p,v)	(p)[0]=(v);(p)[1]=(v)>>8;(p)[2]=(v)>>16;(p)[3]=(v)>>24
+
+#define ROTATE(v,c) (t = v, (u32int)(t << (c)) | (t >> (32 - (c))))
+
+#define ENCRYPT(s, x, y, d) {\
+	u32int v; \
+	v = GET4(s); \
+	v ^= (x)+(y); \
+	PUT4(d, v); \
+}
+
+static uchar sigma[16] = "expand 32-byte k";
+static uchar tau[16] = "expand 16-byte k";
+
+static void
+load(u32int *d, uchar *s, int nw)
+{
+	int i;
+
+	for(i = 0; i < nw; i++, s+=4)
+		d[i] = GET4(s);
+}
+
+void
+setupSalsastate(Salsastate *s, uchar *key, u32 keylen, uchar *iv, u32 ivlen, int rounds)
+{
+	if(keylen != 256/8 && keylen != 128/8)
+		sysfatal("invalid salsa key length");
+	if(ivlen != 64/8
+	&& ivlen != 128/8 && ivlen != 192/8)	/* hsalsa, xsalsa */
+		sysfatal("invalid salsa iv length");
+	if(rounds == 0)
+		rounds = 20;
+	s->rounds = rounds;
+	if(keylen == 256/8) { /* recommended */
+		load(&s->input[0],  sigma+4*0, 1);
+		load(&s->input[1],  key +16*0, 4);
+		load(&s->input[5],  sigma+4*1, 1);
+		load(&s->input[10], sigma+4*2, 1);
+		load(&s->input[11], key +16*1, 4);
+		load(&s->input[15], sigma+4*3, 1);
+	}else{
+		load(&s->input[0],  tau +4*0, 1);
+		load(&s->input[1],  key, 4);
+		load(&s->input[5],  tau +4*1, 1);
+		load(&s->input[10], tau +4*2, 1);
+		load(&s->input[11], key, 4);
+		load(&s->input[15], tau +4*3, 1);
+	}
+	s->xkey[0] = s->input[1];
+	s->xkey[1] = s->input[2];
+	s->xkey[2] = s->input[3];
+	s->xkey[3] = s->input[4];
+	s->xkey[4] = s->input[11];
+	s->xkey[5] = s->input[12];
+	s->xkey[6] = s->input[13];
+	s->xkey[7] = s->input[14];
+
+	s->ivwords = ivlen/4;
+	s->input[8] = 0;
+	s->input[9] = 0;
+	if(iv == nil){
+		s->input[6] = 0;
+		s->input[7] = 0;
+	}else
+		salsa_setiv(s, iv);
+}
+
+static void
+dorounds(u32int x[16], int rounds)
+{
+	u32int t;
+
+	for(; rounds > 0; rounds -= 2) {
+	     x[4] ^= ROTATE( x[0]+x[12], 7);
+	     x[8] ^= ROTATE( x[4]+ x[0], 9);
+	    x[12] ^= ROTATE( x[8]+ x[4],13);
+	     x[0] ^= ROTATE(x[12]+ x[8],18);
+	     x[9] ^= ROTATE( x[5]+ x[1], 7);
+	    x[13] ^= ROTATE( x[9]+ x[5], 9);
+	     x[1] ^= ROTATE(x[13]+ x[9],13);
+	     x[5] ^= ROTATE( x[1]+x[13],18);
+	    x[14] ^= ROTATE(x[10]+ x[6], 7);
+	     x[2] ^= ROTATE(x[14]+x[10], 9);
+	     x[6] ^= ROTATE( x[2]+x[14],13);
+	    x[10] ^= ROTATE( x[6]+ x[2],18);
+	     x[3] ^= ROTATE(x[15]+x[11], 7);
+	     x[7] ^= ROTATE( x[3]+x[15], 9);
+	    x[11] ^= ROTATE( x[7]+ x[3],13);
+	    x[15] ^= ROTATE(x[11]+ x[7],18);
+	     x[1] ^= ROTATE( x[0]+ x[3], 7);
+	     x[2] ^= ROTATE( x[1]+ x[0], 9);
+	     x[3] ^= ROTATE( x[2]+ x[1],13);
+	     x[0] ^= ROTATE( x[3]+ x[2],18);
+	     x[6] ^= ROTATE( x[5]+ x[4], 7);
+	     x[7] ^= ROTATE( x[6]+ x[5], 9);
+	     x[4] ^= ROTATE( x[7]+ x[6],13);
+	     x[5] ^= ROTATE( x[4]+ x[7],18);
+	    x[11] ^= ROTATE(x[10]+ x[9], 7);
+	     x[8] ^= ROTATE(x[11]+x[10], 9);
+	     x[9] ^= ROTATE( x[8]+x[11],13);
+	    x[10] ^= ROTATE( x[9]+ x[8],18);
+	    x[12] ^= ROTATE(x[15]+x[14], 7);
+	    x[13] ^= ROTATE(x[12]+x[15], 9);
+	    x[14] ^= ROTATE(x[13]+x[12],13);
+	    x[15] ^= ROTATE(x[14]+x[13],18);
+	}
+}
+
+static void
+hsalsablock(uchar h[32], Salsastate *s)
+{
+	u32int x[16];
+
+	x[0] = s->input[0];
+	x[1] = s->input[1];
+	x[2] = s->input[2];
+	x[3] = s->input[3];
+	x[4] = s->input[4];
+	x[5] = s->input[5];
+	x[6] = s->input[6];
+	x[7] = s->input[7];
+	x[8] = s->input[8];
+	x[9] = s->input[9];
+	x[10] = s->input[10];
+	x[11] = s->input[11];
+	x[12] = s->input[12];
+	x[13] = s->input[13];
+	x[14] = s->input[14];
+	x[15] = s->input[15];
+
+	dorounds(x, s->rounds);
+
+	PUT4(h+0*4, x[0]);
+	PUT4(h+1*4, x[5]);
+	PUT4(h+2*4, x[10]);
+	PUT4(h+3*4, x[15]);
+	PUT4(h+4*4, x[6]);
+	PUT4(h+5*4, x[7]);
+	PUT4(h+6*4, x[8]);
+	PUT4(h+7*4, x[9]);
+}
+
+void
+salsa_setiv(Salsastate *s, uchar *iv)
+{
+	if(s->ivwords == 128/32){
+		/* hsalsa with 128-bit iv */
+		load(&s->input[6], iv, 4);
+		return;
+	}
+	if(s->ivwords == 192/32){
+		/* xsalsa with 192-bit iv */
+		u32int counter[2];
+		uchar h[32];
+
+		counter[0] = s->input[8];
+		counter[1] = s->input[9];
+
+		s->input[1] = s->xkey[0];
+		s->input[2] = s->xkey[1];
+		s->input[3] = s->xkey[2];
+		s->input[4] = s->xkey[3];
+		s->input[11] = s->xkey[4];
+		s->input[12] = s->xkey[5];
+		s->input[13] = s->xkey[6];
+		s->input[14] = s->xkey[7];
+
+		load(&s->input[6], iv, 4);
+
+		hsalsablock(h, s);
+		load(&s->input[1],  h+16*0, 4);
+		load(&s->input[11], h+16*1, 4);
+		memset(h, 0, 32);
+
+		s->input[8] = counter[0];
+		s->input[9] = counter[1];
+
+		iv += 16;
+	}
+	/* 64-bit iv */
+	load(&s->input[6], iv, 2);
+}
+
+void
+salsa_setblock(Salsastate *s, u64int blockno)
+{
+	s->input[8] = blockno;
+	s->input[9] = blockno>>32;
+}
+
+static void
+encryptblock(Salsastate *s, uchar *src, uchar *dst)
+{
+	u32int x[16];
+	int i;
+
+	x[0] = s->input[0];
+	x[1] = s->input[1];
+	x[2] = s->input[2];
+	x[3] = s->input[3];
+	x[4] = s->input[4];
+	x[5] = s->input[5];
+	x[6] = s->input[6];
+	x[7] = s->input[7];
+	x[8] = s->input[8];
+	x[9] = s->input[9];
+	x[10] = s->input[10];
+	x[11] = s->input[11];
+	x[12] = s->input[12];
+	x[13] = s->input[13];
+	x[14] = s->input[14];
+	x[15] = s->input[15];
+
+	dorounds(x, s->rounds);
+
+	for(i=0; i<nelem(x); i+=4){
+		ENCRYPT(src, x[i], s->input[i], dst);
+		ENCRYPT(src+4, x[i+1], s->input[i+1], dst+4);
+		ENCRYPT(src+8, x[i+2], s->input[i+2], dst+8);
+		ENCRYPT(src+12, x[i+3], s->input[i+3], dst+12);
+		src += 16;
+		dst += 16;
+	}
+
+	if(++s->input[8] == 0)
+		s->input[9]++;
+}
+
+void
+salsa_encrypt2(uchar *src, uchar *dst, u32 bytes, Salsastate *s)
+{
+	uchar tmp[SalsaBsize];
+
+	for(; bytes >= SalsaBsize; bytes -= SalsaBsize){
+		encryptblock(s, src, dst);
+		src += SalsaBsize;
+		dst += SalsaBsize;
+	}
+	if(bytes > 0){
+		memmove(tmp, src, bytes);
+		encryptblock(s, tmp, tmp);
+		memmove(dst, tmp, bytes);
+	}
+}
+
+void
+salsa_encrypt(uchar *buf, u32 bytes, Salsastate *s)
+{
+	salsa_encrypt2(buf, buf, bytes, s);
+}
+
+void
+salsa_core(u32int in[16], u32int out[16], int rounds)
+{
+	u32int x[16];
+
+	x[0] = in[0];
+	x[1] = in[1];
+	x[2] = in[2];
+	x[3] = in[3];
+	x[4] = in[4];
+	x[5] = in[5];
+	x[6] = in[6];
+	x[7] = in[7];
+	x[8] = in[8];
+	x[9] = in[9];
+	x[10] = in[10];
+	x[11] = in[11];
+	x[12] = in[12];
+	x[13] = in[13];
+	x[14] = in[14];
+	x[15] = in[15];
+
+	dorounds(x, rounds);
+
+	out[0] = x[0] + in[0];
+	out[1] = x[1] + in[1];
+	out[2] = x[2] + in[2];
+	out[3] = x[3] + in[3];
+	out[4] = x[4] + in[4];
+	out[5] = x[5] + in[5];
+	out[6] = x[6] + in[6];
+	out[7] = x[7] + in[7];
+	out[8] = x[8] + in[8];
+	out[9] = x[9] + in[9];
+	out[10] = x[10] + in[10];
+	out[11] = x[11] + in[11];
+	out[12] = x[12] + in[12];
+	out[13] = x[13] + in[13];
+	out[14] = x[14] + in[14];
+	out[15] = x[15] + in[15];
+}
+
+void
+hsalsa(uchar h[32], uchar *key, u32 keylen, uchar nonce[16], int rounds)
+{
+	Salsastate s[1];
+
+	setupSalsastate(s, key, keylen, nonce, 16, rounds);
+	hsalsablock(h, s);
+	memset(s, 0, sizeof(s));
+}
--- /dev/null
+++ b/libsec/port/scrypt.c
@@ -1,0 +1,119 @@
+#include "os.h"
+#include <libsec.h>
+
+#define movw(w, S, D)	memmove(D, S, (w)*4)
+
+static void
+xorw(u32 w, u32int *S, u32int *D)
+{
+	for(w /= 8; w; w--, D += 8, S += 8){
+		D[0] ^= S[0];
+		D[1] ^= S[1];
+		D[2] ^= S[2];
+		D[3] ^= S[3];
+		D[4] ^= S[4];
+		D[5] ^= S[5];
+		D[6] ^= S[6];
+		D[7] ^= S[7];
+	}
+}
+
+static void
+scryptBlockMix(u32 R, u32int *B, u32int *Y)
+{
+	u32int X[16];
+	u32 i;
+
+	R *= 2;
+	movw(16, &B[(R-1)*16], X);
+	for(i = 0; i < R; i += 2){
+		xorw(16, &B[i*16], X);
+		salsa_core(X, X, 8);
+		movw(16, X, &Y[i*8]);
+
+		xorw(16, &B[(i+1)*16], X);
+		salsa_core(X, X, 8);
+		movw(16, X, &Y[i*8 + R*8]);
+	}
+}
+
+static void
+scryptROMix(u32 R, u32 N, u32int *V, u32int *X, uchar *B)
+{
+	u32 w, i, d;
+	u32int *Y;
+
+	w = R*32;
+	for(i=0; i<w; i++, B+=4)
+		X[i] = B[0] | (B[1]<<8) | (B[2]<<16) | (B[3]<<24);
+
+	Y = &X[w];
+	for(i=0; i<N; i += 2){
+		movw(w, X, &V[i*w]);
+		scryptBlockMix(R, X, Y);
+
+		movw(w, Y, &V[(i+1)*w]);
+		scryptBlockMix(R, Y, X);
+	}
+	for(i=0; i<N; i += 2){
+		xorw(w, &V[(X[w-16] & (N-1))*w], X);
+		scryptBlockMix(R, X, Y);
+
+		xorw(w, &V[(Y[w-16] & (N-1))*w], Y);
+		scryptBlockMix(R, Y, X);
+	}
+
+	B -= w*4;
+	for(i=0; i<w; i++, B+=4)
+		d = X[i], B[0]=d, B[1]=d>>8, B[2]=d>>16, B[3]=d>>24;
+}
+
+char*
+scrypt(p, plen, s, slen, N, R, P, d, dlen)
+	u32 plen, slen, dlen, N, R, P;
+	uchar *p, *s, *d;
+{
+	static char oom[] = "out of memory";
+
+	u32 rb, i;
+	u32int *V, *X;
+	uchar *B;
+
+	if(P < 1)
+		return "invalid parallelization parameter P";
+	if(R < 1 || R >= (1UL<<(31-7))/P)
+		return "invalid block size parameter R";
+	if(N < 2 || (N & (N-1)) != 0 || N >= (1UL<<(31-7))/R)
+		return "invalid cpu/memory cost parameter N";
+
+	rb = R<<7;
+	if((B = malloc(P*rb)) == nil)
+		return oom;
+	if((V = malloc(N*rb)) == nil){
+		free(B);
+		return oom;
+	}
+	if((X = malloc(2*rb)) == nil){
+		free(V);
+		free(B);
+		return oom;
+	}
+
+	pbkdf2_x(p, plen, s, slen, 1, B, P*rb, hmac_sha2_256, SHA2_256dlen);
+
+	for(i=0; i<P; i++)
+		scryptROMix(R, N, V, X, &B[i*rb]);
+
+	memset(X, 0, 2*rb);
+	free(X);
+
+	memset(V, 0, N*rb);
+	free(V);
+
+	pbkdf2_x(p, plen, B, P*rb, 1, d, dlen, hmac_sha2_256, SHA2_256dlen);
+
+	memset(B, 0, P*rb);
+	free(B);
+
+	return nil;
+}
--- /dev/null
+++ b/libsec/port/secp256k1.c
@@ -1,0 +1,11 @@
+#include "os.h"
+#include <mp.h>
+void secp256k1(mpint *p, mpint *a, mpint *b, mpint *x, mpint *y, mpint *n, mpint *h){
+	strtomp("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFC2F", nil, 16, p);
+	mpassign(mpzero, a);
+	uitomp(7UL, b);
+	strtomp("79BE667EF9DCBBAC55A06295CE870B07029BFCDB2DCE28D959F2815B16F81798", nil, 16, x);
+	strtomp("483ADA7726A3C4655DA4FBFC0E1108A8FD17B448A68554199C47D08FFB10D4B8", nil, 16, y);
+	strtomp("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364141", nil, 16, n);
+	mpassign(mpone, h);
+	}
--- /dev/null
+++ b/libsec/port/secp256k1.mp
@@ -1,0 +1,10 @@
+# E: y² = x³ + ax + b 
+secp256k1(p,a,b,x,y,n,h) {
+	p = 2^256 - 2^32 - 2^9 - 2^8 - 2^7 - 2^6 - 2^4 - 1;
+	a = 0;
+	b = 7;
+	x = 0x79BE667EF9DCBBAC55A06295CE870B07029BFCDB2DCE28D959F2815B16F81798;
+	y = 0x483ADA7726A3C4655DA4FBFC0E1108A8FD17B448A68554199C47D08FFB10D4B8;
+	n = 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364141;
+	h = 1;
+}
--- /dev/null
+++ b/libsec/port/secp256r1.c
@@ -1,0 +1,12 @@
+#include "os.h"
+#include <mp.h>
+void secp256r1(mpint *p, mpint *a, mpint *b, mpint *x, mpint *y, mpint *n, mpint *h){
+	strtomp("FFFFFFFF00000001000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFF", nil, 16, p);
+	uitomp(3UL, a);
+	mpsub(p, a, a);
+	strtomp("5AC635D8AA3A93E7B3EBBD55769886BC651D06B0CC53B0F63BCE3C3E27D2604B", nil, 16, b);
+	strtomp("6B17D1F2E12C4247F8BCE6E563A440F277037D812DEB33A0F4A13945D898C296", nil, 16, x);
+	strtomp("4FE342E2FE1A7F9B8EE7EB4A7C0F9E162BCE33576B315ECECBB6406837BF51F5", nil, 16, y);
+	strtomp("FFFFFFFF00000000FFFFFFFFFFFFFFFFBCE6FAADA7179E84F3B9CAC2FC632551", nil, 16, n);
+	mpassign(mpone, h);
+	}
--- /dev/null
+++ b/libsec/port/secp256r1.mp
@@ -1,0 +1,10 @@
+# E: y² = x³ + ax + b 
+secp256r1(p,a,b,x,y,n,h) {
+	p = 2^256 - 2^224 + 2^192 + 2^96 - 1;
+	a = p - 3;
+	b = 0x5AC635D8AA3A93E7B3EBBD55769886BC651D06B0CC53B0F63BCE3C3E27D2604B;
+	x = 0x6B17D1F2E12C4247F8BCE6E563A440F277037D812DEB33A0F4A13945D898C296;
+	y = 0x4FE342E2FE1A7F9B8EE7EB4A7C0F9E162BCE33576B315ECECBB6406837BF51F5;
+	n = 0xFFFFFFFF00000000FFFFFFFFFFFFFFFFBCE6FAADA7179E84F3B9CAC2FC632551;
+	h = 1;
+}
--- /dev/null
+++ b/libsec/port/secp384r1.c
@@ -1,0 +1,12 @@
+#include "os.h"
+#include <mp.h>
+void secp384r1(mpint *p, mpint *a, mpint *b, mpint *x, mpint *y, mpint *n, mpint *h){
+	strtomp("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFF0000000000000000FFFFFFFF", nil, 16, p);
+	uitomp(3UL, a);
+	mpsub(p, a, a);
+	strtomp("B3312FA7E23EE7E4988E056BE3F82D19181D9C6EFE8141120314088F5013875AC656398D8A2ED19D2A85C8EDD3EC2AEF", nil, 16, b);
+	strtomp("AA87CA22BE8B05378EB1C71EF320AD746E1D3B628BA79B9859F741E082542A385502F25DBF55296C3A545E3872760AB7", nil, 16, x);
+	strtomp("3617DE4A96262C6F5D9E98BF9292DC29F8F41DBD289A147CE9DA3113B5F0B8C00A60B1CE1D7E819D7A431D7C90EA0E5F", nil, 16, y);
+	strtomp("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFC7634D81F4372DDF581A0DB248B0A77AECEC196ACCC52973", nil, 16, n);
+	mpassign(mpone, h);
+	}
--- /dev/null
+++ b/libsec/port/secp384r1.mp
@@ -1,0 +1,10 @@
+# E: y² = x³ + ax + b 
+secp384r1(p,a,b,x,y,n,h) {
+	p = 2^384 - 2^128 - 2^96 + 2^32 - 1;
+	a = p - 3;
+	b = 0xB3312FA7E23EE7E4988E056BE3F82D19181D9C6EFE8141120314088F5013875AC656398D8A2ED19D2A85C8EDD3EC2AEF;
+	x = 0xAA87CA22BE8B05378EB1C71EF320AD746E1D3B628BA79B9859F741E082542A385502F25DBF55296C3A545E3872760AB7;
+	y = 0x3617DE4A96262C6F5D9E98BF9292DC29F8F41DBD289A147CE9DA3113B5F0B8C00A60B1CE1D7E819D7A431D7C90EA0E5F;
+	n = 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFC7634D81F4372DDF581A0DB248B0A77AECEC196ACCC52973;
+	h = 1;
+}
--- a/libsec/port/sha1.c
+++ b/libsec/port/sha1.c
@@ -1,9 +1,9 @@
 #include "os.h"
 #include <libsec.h>
 
-static void encode(uchar*, u32*, u32);
+static void encode(uchar*, u32int*, u32);
 
-extern void _sha1block(uchar*, u32, u32*);
+extern void _sha1block(uchar*, u32, u32int*);
 
 /*
  *  we require len to be a multiple of 64 for all but
@@ -14,7 +14,7 @@
 sha1(uchar *p, u32 len, uchar *digest, SHA1state *s)
 {
 	uchar buf[128];
-	u32 x[16];
+	u32int x[16];
 	int i;
 	uchar *e;
 
@@ -112,9 +112,9 @@
  *	a multiple of 4.
  */
 static void
-encode(uchar *output, u32 *input, u32 len)
+encode(uchar *output, u32int *input, u32 len)
 {
-	u32 x;
+	u32int x;
 	uchar *e;
 
 	for(e = output + len; output < e;) {
@@ -124,4 +124,11 @@
 		*output++ = x >> 8;
 		*output++ = x;
 	}
+}
+
+DigestState*
+hmac_sha1(uchar *p, u32 len, uchar *key, u32 klen, uchar *digest,
+	DigestState *s)
+{
+	return hmac_x(p, len, key, klen, digest, s, sha1, SHA1dlen);
 }
--- a/libsec/port/sha1block.c
+++ b/libsec/port/sha1block.c
@@ -1,12 +1,17 @@
 #include "os.h"
 
+#define ROTL(x,n)	(((x)<<n)|((x)>>32-n))
+
+#define F0(x,y,z)	(0x5a827999 + ((z) ^ ((x) & ((y) ^ (z)))))
+#define F1(x,y,z)	(0x6ed9eba1 + ((x) ^ (y) ^ (z)))
+#define F2(x,y,z)	(0x8f1bbcdc + (((x) & (y)) | (((x) | (y)) & (z))))
+#define F3(x,y,z)	(0xca62c1d6 + ((x) ^ (y) ^ (z)))
+
 void
-_sha1block(uchar *p, u32 len, u32 *s)
+_sha1block(uchar *p, ulong len, u32int *s)
 {
-	u32 a, b, c, d, e, x;
+	u32int w[16], a, b, c, d, e;
 	uchar *end;
-	u32 *wp, *wend;
-	u32 w[80];
 
 	/* at this point, we have a multiple of 64 bytes */
 	for(end = p+len; p < end;){
@@ -16,168 +21,113 @@
 		d = s[3];
 		e = s[4];
 
-		wend = w + 15;
-		for(wp = w; wp < wend; wp += 5){
-			wp[0] = (p[0]<<24) | (p[1]<<16) | (p[2]<<8) | p[3];
-			e += ((a<<5) | (a>>27)) + wp[0];
-			e += 0x5a827999 + (((c^d)&b)^d);
-			b = (b<<30)|(b>>2);
+#define STEP(a,b,c,d,e,f,i) \
+	if(i < 16) {\
+		w[i] = p[0]<<24 | p[1]<<16 | p[2]<<8 | p[3]; \
+		p += 4; \
+	} else { \
+		u32int x = w[i-3&15] ^ w[i-8&15] ^ w[i-14&15] ^ w[i-16&15]; \
+		w[i&15] = ROTL(x, 1); \
+	} \
+	e += ROTL(a, 5) + w[i&15] + f(b,c,d); \
+	b = ROTL(b, 30);
 
-			wp[1] = (p[4]<<24) | (p[5]<<16) | (p[6]<<8) | p[7];
-			d += ((e<<5) | (e>>27)) + wp[1];
-			d += 0x5a827999 + (((b^c)&a)^c);
-			a = (a<<30)|(a>>2);
-
-			wp[2] = (p[8]<<24) | (p[9]<<16) | (p[10]<<8) | p[11];
-			c += ((d<<5) | (d>>27)) + wp[2];
-			c += 0x5a827999 + (((a^b)&e)^b);
-			e = (e<<30)|(e>>2);
-
-			wp[3] = (p[12]<<24) | (p[13]<<16) | (p[14]<<8) | p[15];
-			b += ((c<<5) | (c>>27)) + wp[3];
-			b += 0x5a827999 + (((e^a)&d)^a);
-			d = (d<<30)|(d>>2);
-
-			wp[4] = (p[16]<<24) | (p[17]<<16) | (p[18]<<8) | p[19];
-			a += ((b<<5) | (b>>27)) + wp[4];
-			a += 0x5a827999 + (((d^e)&c)^e);
-			c = (c<<30)|(c>>2);
-			
-			p += 20;
-		}
-
-		wp[0] = (p[0]<<24) | (p[1]<<16) | (p[2]<<8) | p[3];
-		e += ((a<<5) | (a>>27)) + wp[0];
-		e += 0x5a827999 + (((c^d)&b)^d);
-		b = (b<<30)|(b>>2);
-
-		x = wp[-2] ^ wp[-7] ^ wp[-13] ^ wp[-15];
-		wp[1] = (x<<1) | (x>>31);
-		d += ((e<<5) | (e>>27)) + wp[1];
-		d += 0x5a827999 + (((b^c)&a)^c);
-		a = (a<<30)|(a>>2);
-
-		x = wp[-1] ^ wp[-6] ^ wp[-12] ^ wp[-14];
-		wp[2] = (x<<1) | (x>>31);
-		c += ((d<<5) | (d>>27)) + wp[2];
-		c += 0x5a827999 + (((a^b)&e)^b);
-		e = (e<<30)|(e>>2);
-
-		x = wp[0] ^ wp[-5] ^ wp[-11] ^ wp[-13];
-		wp[3] = (x<<1) | (x>>31);
-		b += ((c<<5) | (c>>27)) + wp[3];
-		b += 0x5a827999 + (((e^a)&d)^a);
-		d = (d<<30)|(d>>2);
-
-		x = wp[1] ^ wp[-4] ^ wp[-10] ^ wp[-12];
-		wp[4] = (x<<1) | (x>>31);
-		a += ((b<<5) | (b>>27)) + wp[4];
-		a += 0x5a827999 + (((d^e)&c)^e);
-		c = (c<<30)|(c>>2);
-
-		wp += 5;
-		p += 4;
-
-		wend = w + 40;
-		for(; wp < wend; wp += 5){
-			x = wp[-3] ^ wp[-8] ^ wp[-14] ^ wp[-16];
-			wp[0] = (x<<1) | (x>>31);
-			e += ((a<<5) | (a>>27)) + wp[0];
-			e += 0x6ed9eba1 + (b^c^d);
-			b = (b<<30)|(b>>2);
-
-			x = wp[-2] ^ wp[-7] ^ wp[-13] ^ wp[-15];
-			wp[1] = (x<<1) | (x>>31);
-			d += ((e<<5) | (e>>27)) + wp[1];
-			d += 0x6ed9eba1 + (a^b^c);
-			a = (a<<30)|(a>>2);
-
-			x = wp[-1] ^ wp[-6] ^ wp[-12] ^ wp[-14];
-			wp[2] = (x<<1) | (x>>31);
-			c += ((d<<5) | (d>>27)) + wp[2];
-			c += 0x6ed9eba1 + (e^a^b);
-			e = (e<<30)|(e>>2);
-
-			x = wp[0] ^ wp[-5] ^ wp[-11] ^ wp[-13];
-			wp[3] = (x<<1) | (x>>31);
-			b += ((c<<5) | (c>>27)) + wp[3];
-			b += 0x6ed9eba1 + (d^e^a);
-			d = (d<<30)|(d>>2);
-
-			x = wp[1] ^ wp[-4] ^ wp[-10] ^ wp[-12];
-			wp[4] = (x<<1) | (x>>31);
-			a += ((b<<5) | (b>>27)) + wp[4];
-			a += 0x6ed9eba1 + (c^d^e);
-			c = (c<<30)|(c>>2);
-		}
-
-		wend = w + 60;
-		for(; wp < wend; wp += 5){
-			x = wp[-3] ^ wp[-8] ^ wp[-14] ^ wp[-16];
-			wp[0] = (x<<1) | (x>>31);
-			e += ((a<<5) | (a>>27)) + wp[0];
-			e += 0x8f1bbcdc + ((b&c)|((b|c)&d));
-			b = (b<<30)|(b>>2);
-
-			x = wp[-2] ^ wp[-7] ^ wp[-13] ^ wp[-15];
-			wp[1] = (x<<1) | (x>>31);
-			d += ((e<<5) | (e>>27)) + wp[1];
-			d += 0x8f1bbcdc + ((a&b)|((a|b)&c));
-			a = (a<<30)|(a>>2);
-
-			x = wp[-1] ^ wp[-6] ^ wp[-12] ^ wp[-14];
-			wp[2] = (x<<1) | (x>>31);
-			c += ((d<<5) | (d>>27)) + wp[2];
-			c += 0x8f1bbcdc + ((e&a)|((e|a)&b));
-			e = (e<<30)|(e>>2);
-
-			x = wp[0] ^ wp[-5] ^ wp[-11] ^ wp[-13];
-			wp[3] = (x<<1) | (x>>31);
-			b += ((c<<5) | (c>>27)) + wp[3];
-			b += 0x8f1bbcdc + ((d&e)|((d|e)&a));
-			d = (d<<30)|(d>>2);
-
-			x = wp[1] ^ wp[-4] ^ wp[-10] ^ wp[-12];
-			wp[4] = (x<<1) | (x>>31);
-			a += ((b<<5) | (b>>27)) + wp[4];
-			a += 0x8f1bbcdc + ((c&d)|((c|d)&e));
-			c = (c<<30)|(c>>2);
-		}
-
-		wend = w + 80;
-		for(; wp < wend; wp += 5){
-			x = wp[-3] ^ wp[-8] ^ wp[-14] ^ wp[-16];
-			wp[0] = (x<<1) | (x>>31);
-			e += ((a<<5) | (a>>27)) + wp[0];
-			e += 0xca62c1d6 + (b^c^d);
-			b = (b<<30)|(b>>2);
+		STEP(a,b,c,d,e,F0,0);
+		STEP(e,a,b,c,d,F0,1);
+		STEP(d,e,a,b,c,F0,2);
+		STEP(c,d,e,a,b,F0,3);
+		STEP(b,c,d,e,a,F0,4);
+	
+		STEP(a,b,c,d,e,F0,5);
+		STEP(e,a,b,c,d,F0,6);
+		STEP(d,e,a,b,c,F0,7);
+		STEP(c,d,e,a,b,F0,8);
+		STEP(b,c,d,e,a,F0,9);
+	
+		STEP(a,b,c,d,e,F0,10);
+		STEP(e,a,b,c,d,F0,11);
+		STEP(d,e,a,b,c,F0,12);
+		STEP(c,d,e,a,b,F0,13);
+		STEP(b,c,d,e,a,F0,14);
+	
+		STEP(a,b,c,d,e,F0,15);
+		STEP(e,a,b,c,d,F0,16);
+		STEP(d,e,a,b,c,F0,17);
+		STEP(c,d,e,a,b,F0,18);
+		STEP(b,c,d,e,a,F0,19);
+	
+		STEP(a,b,c,d,e,F1,20);
+		STEP(e,a,b,c,d,F1,21);
+		STEP(d,e,a,b,c,F1,22);
+		STEP(c,d,e,a,b,F1,23);
+		STEP(b,c,d,e,a,F1,24);
+	
+		STEP(a,b,c,d,e,F1,25);
+		STEP(e,a,b,c,d,F1,26);
+		STEP(d,e,a,b,c,F1,27);
+		STEP(c,d,e,a,b,F1,28);
+		STEP(b,c,d,e,a,F1,29);
+	
+		STEP(a,b,c,d,e,F1,30);
+		STEP(e,a,b,c,d,F1,31);
+		STEP(d,e,a,b,c,F1,32);
+		STEP(c,d,e,a,b,F1,33);
+		STEP(b,c,d,e,a,F1,34);
+	
+		STEP(a,b,c,d,e,F1,35);
+		STEP(e,a,b,c,d,F1,36);
+		STEP(d,e,a,b,c,F1,37);
+		STEP(c,d,e,a,b,F1,38);
+		STEP(b,c,d,e,a,F1,39);
+	
+		STEP(a,b,c,d,e,F2,40);
+		STEP(e,a,b,c,d,F2,41);
+		STEP(d,e,a,b,c,F2,42);
+		STEP(c,d,e,a,b,F2,43);
+		STEP(b,c,d,e,a,F2,44);
+	
+		STEP(a,b,c,d,e,F2,45);
+		STEP(e,a,b,c,d,F2,46);
+		STEP(d,e,a,b,c,F2,47);
+		STEP(c,d,e,a,b,F2,48);
+		STEP(b,c,d,e,a,F2,49);
+	
+		STEP(a,b,c,d,e,F2,50);
+		STEP(e,a,b,c,d,F2,51);
+		STEP(d,e,a,b,c,F2,52);
+		STEP(c,d,e,a,b,F2,53);
+		STEP(b,c,d,e,a,F2,54);
+	
+		STEP(a,b,c,d,e,F2,55);
+		STEP(e,a,b,c,d,F2,56);
+		STEP(d,e,a,b,c,F2,57);
+		STEP(c,d,e,a,b,F2,58);
+		STEP(b,c,d,e,a,F2,59);
+	
+		STEP(a,b,c,d,e,F3,60);
+		STEP(e,a,b,c,d,F3,61);
+		STEP(d,e,a,b,c,F3,62);
+		STEP(c,d,e,a,b,F3,63);
+		STEP(b,c,d,e,a,F3,64);
+	
+		STEP(a,b,c,d,e,F3,65);
+		STEP(e,a,b,c,d,F3,66);
+		STEP(d,e,a,b,c,F3,67);
+		STEP(c,d,e,a,b,F3,68);
+		STEP(b,c,d,e,a,F3,69);
+	
+		STEP(a,b,c,d,e,F3,70);
+		STEP(e,a,b,c,d,F3,71);
+		STEP(d,e,a,b,c,F3,72);
+		STEP(c,d,e,a,b,F3,73);
+		STEP(b,c,d,e,a,F3,74);
+	
+		STEP(a,b,c,d,e,F3,75);
+		STEP(e,a,b,c,d,F3,76);
+		STEP(d,e,a,b,c,F3,77);
+		STEP(c,d,e,a,b,F3,78);
+		STEP(b,c,d,e,a,F3,79);
 
-			x = wp[-2] ^ wp[-7] ^ wp[-13] ^ wp[-15];
-			wp[1] = (x<<1) | (x>>31);
-			d += ((e<<5) | (e>>27)) + wp[1];
-			d += 0xca62c1d6 + (a^b^c);
-			a = (a<<30)|(a>>2);
-
-			x = wp[-1] ^ wp[-6] ^ wp[-12] ^ wp[-14];
-			wp[2] = (x<<1) | (x>>31);
-			c += ((d<<5) | (d>>27)) + wp[2];
-			c += 0xca62c1d6 + (e^a^b);
-			e = (e<<30)|(e>>2);
-
-			x = wp[0] ^ wp[-5] ^ wp[-11] ^ wp[-13];
-			wp[3] = (x<<1) | (x>>31);
-			b += ((c<<5) | (c>>27)) + wp[3];
-			b += 0xca62c1d6 + (d^e^a);
-			d = (d<<30)|(d>>2);
-
-			x = wp[1] ^ wp[-4] ^ wp[-10] ^ wp[-12];
-			wp[4] = (x<<1) | (x>>31);
-			a += ((b<<5) | (b>>27)) + wp[4];
-			a += 0xca62c1d6 + (c^d^e);
-			c = (c<<30)|(c>>2);
-		}
-
-		/* save state */
 		s[0] += a;
 		s[1] += b;
 		s[2] += c;
--- /dev/null
+++ b/libsec/port/sha2_128.c
@@ -1,0 +1,191 @@
+/*
+ * sha2 128-bit
+ */
+#include <u.h>
+#include <libc.h>
+#include <libsec.h>
+
+static void encode64(uchar*, u64int*, u32);
+static DigestState* sha2_128(uchar *, u32, uchar *, SHA2_256state *, int);
+
+extern void _sha2block128(uchar*, u32, u64int*);
+
+/*
+ *  for sha2_384 and sha2_512, len must be multiple of 128 for all but
+ *  the last call.  There must be room in the input buffer to pad.
+ *
+ *  Note: sha2_384 calls sha2_512block as sha2_384; it just uses a different
+ *  initial seed to produce a truncated 384b hash result.  otherwise
+ *  it's the same as sha2_512.
+ */
+SHA2_384state*
+sha2_384(uchar *p, u32 len, uchar *digest, SHA2_384state *s)
+{
+	if(s == nil) {
+		s = mallocz(sizeof(*s), 1);
+		if(s == nil)
+			return nil;
+		s->malloced = 1;
+	}
+	if(s->seeded == 0){
+		/*
+		 * seed the state with the first 64 bits of the fractional
+		 * parts of the square roots of the 9th thru 16th primes.
+		 */
+ 		s->bstate[0] = 0xcbbb9d5dc1059ed8LL;
+		s->bstate[1] = 0x629a292a367cd507LL;
+		s->bstate[2] = 0x9159015a3070dd17LL;
+		s->bstate[3] = 0x152fecd8f70e5939LL;
+		s->bstate[4] = 0x67332667ffc00b31LL;
+		s->bstate[5] = 0x8eb44a8768581511LL;
+		s->bstate[6] = 0xdb0c2e0d64f98fa7LL;
+		s->bstate[7] = 0x47b5481dbefa4fa4LL;
+		s->seeded = 1;
+	}
+	return sha2_128(p, len, digest, s, SHA2_384dlen);
+}
+
+SHA2_512state*
+sha2_512(uchar *p, u32 len, uchar *digest, SHA2_512state *s)
+{
+
+	if(s == nil) {
+		s = mallocz(sizeof(*s), 1);
+		if(s == nil)
+			return nil;
+		s->malloced = 1;
+	}
+	if(s->seeded == 0){
+		/*
+		 * seed the state with the first 64 bits of the fractional
+		 * parts of the square roots of the first 8 primes 2..19).
+		 */
+ 		s->bstate[0] = 0x6a09e667f3bcc908LL;
+		s->bstate[1] = 0xbb67ae8584caa73bLL;
+		s->bstate[2] = 0x3c6ef372fe94f82bLL;
+		s->bstate[3] = 0xa54ff53a5f1d36f1LL;
+		s->bstate[4] = 0x510e527fade682d1LL;
+		s->bstate[5] = 0x9b05688c2b3e6c1fLL;
+		s->bstate[6] = 0x1f83d9abfb41bd6bLL;
+		s->bstate[7] = 0x5be0cd19137e2179LL;
+		s->seeded = 1;
+	}
+	return sha2_128(p, len, digest, s, SHA2_512dlen);
+}
+
+/* common 128 byte block padding and count code for SHA2_384 and SHA2_512 */
+static DigestState*
+sha2_128(uchar *p, u32 len, uchar *digest, SHA2_512state *s, int dlen)
+{
+	int i;
+	u64int x[16];
+	uchar buf[256];
+	uchar *e;
+
+	/* fill out the partial 128 byte block from previous calls */
+	if(s->blen){
+		i = 128 - s->blen;
+		if(len < i)
+			i = len;
+		memmove(s->buf + s->blen, p, i);
+		len -= i;
+		s->blen += i;
+		p += i;
+		if(s->blen == 128){
+			_sha2block128(s->buf, s->blen, s->bstate);
+			s->len += s->blen;
+			s->blen = 0;
+		}
+	}
+
+	/* do 128 byte blocks */
+	i = len & ~(128-1);
+	if(i){
+		_sha2block128(p, i, s->bstate);
+		s->len += i;
+		len -= i;
+		p += i;
+	}
+
+	/* save the left overs if not last call */
+	if(digest == 0){
+		if(len){
+			memmove(s->buf, p, len);
+			s->blen += len;
+		}
+		return s;
+	}
+
+	/*
+	 *  this is the last time through, pad what's left with 0x80,
+	 *  0's, and the input count to create a multiple of 128 bytes.
+	 */
+	if(s->blen){
+		p = s->buf;
+		len = s->blen;
+	} else {
+		memmove(buf, p, len);
+		p = buf;
+	}
+	s->len += len;
+	e = p + len;
+	if(len < 112)
+		i = 112 - len;
+	else
+		i = 240 - len;
+	memset(e, 0, i);
+	*e = 0x80;
+	len += i;
+
+	/* append the count */
+	x[0] = 0;			/* assume 32b length, i.e. < 4GB */
+	x[1] = s->len<<3;
+	encode64(p+len, x, 16);
+
+	/* digest the last part */
+	_sha2block128(p, len+16, s->bstate);
+	s->len += len+16;
+
+	/* return result and free state */
+	encode64(digest, s->bstate, dlen);
+	if(s->malloced == 1)
+		free(s);
+	return nil;
+}
+
+/*
+ * Encodes input (u32 long) into output (uchar).
+ * Assumes len is a multiple of 8.
+ */
+static void
+encode64(uchar *output, u64int *input, u32 len)
+{
+	u64int x;
+	uchar *e;
+
+	for(e = output + len; output < e;) {
+		x = *input++;
+		*output++ = x >> 56;
+		*output++ = x >> 48;
+		*output++ = x >> 40;
+		*output++ = x >> 32;
+		*output++ = x >> 24;
+		*output++ = x >> 16;
+		*output++ = x >> 8;
+		*output++ = x;
+	}
+}
+
+DigestState*
+hmac_sha2_384(uchar *p, u32 len, uchar *key, u32 klen, uchar *digest,
+	DigestState *s)
+{
+	return hmac_x(p, len, key, klen, digest, s, sha2_384, SHA2_384dlen);
+}
+
+DigestState*
+hmac_sha2_512(uchar *p, u32 len, uchar *key, u32 klen, uchar *digest,
+	DigestState *s)
+{
+	return hmac_x(p, len, key, klen, digest, s, sha2_512, SHA2_512dlen);
+}
--- /dev/null
+++ b/libsec/port/sha2_64.c
@@ -1,0 +1,187 @@
+/*
+ * sha2 64-bit
+ */
+#include <u.h>
+#include <libc.h>
+#include <libsec.h>
+
+static void encode32(uchar*, u32int*, u32);
+static DigestState* sha2_64(uchar *, u32, uchar *, SHA2_256state *, int);
+
+extern void _sha2block64(uchar*, u32, u32int*);
+
+/*
+ *  for sha2_224 and sha2_256, len must be multiple of 64 for all but
+ *  the last call.  There must be room in the input buffer to pad.
+ *
+ *  Note: sha2_224 calls sha2_256block as sha2_224, just uses different
+ *  initial seed and produces a 224b hash result.  otherwise it's
+ *  the same as sha2_256.
+ */
+
+SHA2_224state*
+sha2_224(uchar *p, u32 len, uchar *digest, SHA2_224state *s)
+{
+	if(s == nil) {
+		s = mallocz(sizeof(*s), 1);
+		if(s == nil)
+			return nil;
+		s->malloced = 1;
+	}
+	if(s->seeded == 0){
+		/*
+		 * seed the state with the first 32 bits of the fractional
+		 * parts of the square roots of the first 8 primes 2..19).
+		 */
+		s->state[0] = 0xc1059ed8;
+		s->state[1] = 0x367cd507;
+		s->state[2] = 0x3070dd17;
+		s->state[3] = 0xf70e5939;
+		s->state[4] = 0xffc00b31;
+		s->state[5] = 0x68581511;
+		s->state[6] = 0x64f98fa7;
+		s->state[7] = 0xbefa4fa4;
+		s->seeded = 1;
+	}
+	return sha2_64(p, len, digest, s, SHA2_224dlen);
+}
+
+SHA2_256state*
+sha2_256(uchar *p, u32 len, uchar *digest, SHA2_256state *s)
+{
+	if(s == nil) {
+		s = mallocz(sizeof(*s), 1);
+		if(s == nil)
+			return nil;
+		s->malloced = 1;
+	}
+	if(s->seeded == 0){
+		/*
+		 * seed the state with the first 32 bits of the fractional
+		 * parts of the square roots of the first 8 primes 2..19).
+		 */
+		s->state[0] = 0x6a09e667;
+		s->state[1] = 0xbb67ae85;
+		s->state[2] = 0x3c6ef372;
+		s->state[3] = 0xa54ff53a;
+		s->state[4] = 0x510e527f;
+		s->state[5] = 0x9b05688c;
+		s->state[6] = 0x1f83d9ab;
+		s->state[7] = 0x5be0cd19;
+		s->seeded = 1;
+	}
+	return sha2_64(p, len, digest, s, SHA2_256dlen);
+}
+
+/* common 64 byte block padding and count code for SHA2_224 and SHA2_256 */
+static DigestState*
+sha2_64(uchar *p, u32 len, uchar *digest, SHA2_256state *s, int dlen)
+{
+	int i;
+	u32int x[16];
+	uchar buf[128];
+	uchar *e;
+
+	/* fill out the partial 64 byte block from previous calls */
+	if(s->blen){
+		i = 64 - s->blen;
+		if(len < i)
+			i = len;
+		memmove(s->buf + s->blen, p, i);
+		len -= i;
+		s->blen += i;
+		p += i;
+		if(s->blen == 64){
+			_sha2block64(s->buf, s->blen, s->state);
+			s->len += s->blen;
+			s->blen = 0;
+		}
+	}
+
+	/* do 64 byte blocks */
+	i = len & ~(64-1);
+	if(i){
+		_sha2block64(p, i, s->state);
+		s->len += i;
+		len -= i;
+		p += i;
+	}
+
+	/* save the left overs if not last call */
+	if(digest == 0){
+		if(len){
+			memmove(s->buf, p, len);
+			s->blen += len;
+		}
+		return s;
+	}
+
+	/*
+	 *  this is the last time through, pad what's left with 0x80,
+	 *  0's, and the input count to create a multiple of 64 bytes.
+	 */
+	if(s->blen){
+		p = s->buf;
+		len = s->blen;
+	} else {
+		memmove(buf, p, len);
+		p = buf;
+	}
+	s->len += len;
+	e = p + len;
+	if(len < 56)
+		i = 56 - len;
+	else
+		i = 120 - len;
+	memset(e, 0, i);
+	*e = 0x80;
+	len += i;
+
+	/* append the count */
+	x[0] = s->len>>29;
+	x[1] = s->len<<3;
+	encode32(p+len, x, 8);
+
+	/* digest the last part */
+	_sha2block64(p, len+8, s->state);
+	s->len += len+8;
+
+	/* return result and free state */
+	encode32(digest, s->state, dlen);
+	if(s->malloced == 1)
+		free(s);
+	return nil;
+}
+
+/*
+ * Encodes input (u32) into output (uchar).
+ * Assumes len is a multiple of 4.
+ */
+static void
+encode32(uchar *output, u32int *input, u32 len)
+{
+	u32int x;
+	uchar *e;
+
+	for(e = output + len; output < e;) {
+		x = *input++;
+		*output++ = x >> 24;
+		*output++ = x >> 16;
+		*output++ = x >> 8;
+		*output++ = x;
+	}
+}
+
+DigestState*
+hmac_sha2_224(uchar *p, u32 len, uchar *key, u32 klen, uchar *digest,
+	DigestState *s)
+{
+	return hmac_x(p, len, key, klen, digest, s, sha2_224, SHA2_224dlen);
+}
+
+DigestState*
+hmac_sha2_256(uchar *p, u32 len, uchar *key, u32 klen, uchar *digest,
+	DigestState *s)
+{
+	return hmac_x(p, len, key, klen, digest, s, sha2_256, SHA2_256dlen);
+}
--- /dev/null
+++ b/libsec/port/sha2block128.c
@@ -1,0 +1,176 @@
+/*
+ * sha2_512 block cipher - unrolled version
+ *
+ *   note: the following upper and lower case macro names are distinct
+ *	   and reflect the functions defined in FIPS pub. 180-2.
+ */
+
+#include "os.h"
+
+#define ROTR(x,n)	(((x) >> (n)) | ((x) << (64-(n))))
+#define sigma0(x)	(ROTR((x),1) ^ ROTR((x),8) ^ ((x) >> 7))
+#define sigma1(x)	(ROTR((x),19) ^ ROTR((x),61) ^ ((x) >> 6))
+#define SIGMA0(x)	(ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
+#define SIGMA1(x)	(ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
+#define Ch(x,y,z)	((z) ^ ((x) & ((y) ^ (z))))
+#define Maj(x,y,z)	(((x) | (y)) & ((z) | ((x) & (y))))
+
+/*
+ * first 64 bits of the fractional parts of cube roots of
+ * first 80 primes (2..311).
+ */
+static u64int K512[80] = {
+	0x428a2f98d728ae22LL, 0x7137449123ef65cdLL, 0xb5c0fbcfec4d3b2fLL, 0xe9b5dba58189dbbcLL,
+	0x3956c25bf348b538LL, 0x59f111f1b605d019LL, 0x923f82a4af194f9bLL, 0xab1c5ed5da6d8118LL,
+	0xd807aa98a3030242LL, 0x12835b0145706fbeLL, 0x243185be4ee4b28cLL, 0x550c7dc3d5ffb4e2LL,
+	0x72be5d74f27b896fLL, 0x80deb1fe3b1696b1LL, 0x9bdc06a725c71235LL, 0xc19bf174cf692694LL,
+	0xe49b69c19ef14ad2LL, 0xefbe4786384f25e3LL, 0x0fc19dc68b8cd5b5LL, 0x240ca1cc77ac9c65LL,
+	0x2de92c6f592b0275LL, 0x4a7484aa6ea6e483LL, 0x5cb0a9dcbd41fbd4LL, 0x76f988da831153b5LL,
+	0x983e5152ee66dfabLL, 0xa831c66d2db43210LL, 0xb00327c898fb213fLL, 0xbf597fc7beef0ee4LL,
+	0xc6e00bf33da88fc2LL, 0xd5a79147930aa725LL, 0x06ca6351e003826fLL, 0x142929670a0e6e70LL,
+	0x27b70a8546d22ffcLL, 0x2e1b21385c26c926LL, 0x4d2c6dfc5ac42aedLL, 0x53380d139d95b3dfLL,
+	0x650a73548baf63deLL, 0x766a0abb3c77b2a8LL, 0x81c2c92e47edaee6LL, 0x92722c851482353bLL,
+	0xa2bfe8a14cf10364LL, 0xa81a664bbc423001LL, 0xc24b8b70d0f89791LL, 0xc76c51a30654be30LL,
+	0xd192e819d6ef5218LL, 0xd69906245565a910LL, 0xf40e35855771202aLL, 0x106aa07032bbd1b8LL,
+	0x19a4c116b8d2d0c8LL, 0x1e376c085141ab53LL, 0x2748774cdf8eeb99LL, 0x34b0bcb5e19b48a8LL,
+	0x391c0cb3c5c95a63LL, 0x4ed8aa4ae3418acbLL, 0x5b9cca4f7763e373LL, 0x682e6ff3d6b2b8a3LL,
+	0x748f82ee5defb2fcLL, 0x78a5636f43172f60LL, 0x84c87814a1f0ab72LL, 0x8cc702081a6439ecLL,
+	0x90befffa23631e28LL, 0xa4506cebde82bde9LL, 0xbef9a3f7b2c67915LL, 0xc67178f2e372532bLL,
+	0xca273eceea26619cLL, 0xd186b8c721c0c207LL, 0xeada7dd6cde0eb1eLL, 0xf57d4f7fee6ed178LL,
+	0x06f067aa72176fbaLL, 0x0a637dc5a2c898a6LL, 0x113f9804bef90daeLL, 0x1b710b35131c471bLL,
+	0x28db77f523047d84LL, 0x32caab7b40c72493LL, 0x3c9ebe0a15c9bebcLL, 0x431d67c49c100d4cLL,
+	0x4cc5d4becb3e42b6LL, 0x597f299cfc657e2aLL, 0x5fcb6fab3ad6faecLL, 0x6c44198c4a475817LL
+};
+
+void
+_sha2block128(uchar *p, ulong len, u64int *s)
+{
+	u64int w[16], a, b, c, d, e, f, g, h;
+	uchar *end;
+
+	/* at this point, we have a multiple of 64 bytes */
+	for(end = p+len; p < end;){
+		a = s[0];
+		b = s[1];
+		c = s[2];
+		d = s[3];
+		e = s[4];
+		f = s[5];
+		g = s[6];
+		h = s[7];
+
+#define STEP(a,b,c,d,e,f,g,h,i) \
+	if(i < 16) { \
+		w[i] = 	(u64int)(p[0]<<24 | p[1]<<16 | p[2]<<8 | p[3])<<32 | \
+			(p[4]<<24 | p[5]<<16 | p[6]<<8 | p[7]); \
+		p += 8; \
+	} else { \
+		u64int s0, s1; \
+		s1 = sigma1(w[i-2&15]); \
+		s0 = sigma0(w[i-15&15]); \
+		w[i&15] += s1 + w[i-7&15] + s0; \
+	} \
+	h += SIGMA1(e) + Ch(e,f,g) + K512[i] + w[i&15]; \
+	d += h; \
+	h += SIGMA0(a) + Maj(a,b,c);
+
+		STEP(a,b,c,d,e,f,g,h,0);
+		STEP(h,a,b,c,d,e,f,g,1);
+		STEP(g,h,a,b,c,d,e,f,2);
+		STEP(f,g,h,a,b,c,d,e,3);
+		STEP(e,f,g,h,a,b,c,d,4);
+		STEP(d,e,f,g,h,a,b,c,5);
+		STEP(c,d,e,f,g,h,a,b,6);
+		STEP(b,c,d,e,f,g,h,a,7);
+
+		STEP(a,b,c,d,e,f,g,h,8);
+		STEP(h,a,b,c,d,e,f,g,9);
+		STEP(g,h,a,b,c,d,e,f,10);
+		STEP(f,g,h,a,b,c,d,e,11);
+		STEP(e,f,g,h,a,b,c,d,12);
+		STEP(d,e,f,g,h,a,b,c,13);
+		STEP(c,d,e,f,g,h,a,b,14);
+		STEP(b,c,d,e,f,g,h,a,15);
+
+		STEP(a,b,c,d,e,f,g,h,16);
+		STEP(h,a,b,c,d,e,f,g,17);
+		STEP(g,h,a,b,c,d,e,f,18);
+		STEP(f,g,h,a,b,c,d,e,19);
+		STEP(e,f,g,h,a,b,c,d,20);
+		STEP(d,e,f,g,h,a,b,c,21);
+		STEP(c,d,e,f,g,h,a,b,22);
+		STEP(b,c,d,e,f,g,h,a,23);
+
+		STEP(a,b,c,d,e,f,g,h,24);
+		STEP(h,a,b,c,d,e,f,g,25);
+		STEP(g,h,a,b,c,d,e,f,26);
+		STEP(f,g,h,a,b,c,d,e,27);
+		STEP(e,f,g,h,a,b,c,d,28);
+		STEP(d,e,f,g,h,a,b,c,29);
+		STEP(c,d,e,f,g,h,a,b,30);
+		STEP(b,c,d,e,f,g,h,a,31);
+
+		STEP(a,b,c,d,e,f,g,h,32);
+		STEP(h,a,b,c,d,e,f,g,33);
+		STEP(g,h,a,b,c,d,e,f,34);
+		STEP(f,g,h,a,b,c,d,e,35);
+		STEP(e,f,g,h,a,b,c,d,36);
+		STEP(d,e,f,g,h,a,b,c,37);
+		STEP(c,d,e,f,g,h,a,b,38);
+		STEP(b,c,d,e,f,g,h,a,39);
+
+		STEP(a,b,c,d,e,f,g,h,40);
+		STEP(h,a,b,c,d,e,f,g,41);
+		STEP(g,h,a,b,c,d,e,f,42);
+		STEP(f,g,h,a,b,c,d,e,43);
+		STEP(e,f,g,h,a,b,c,d,44);
+		STEP(d,e,f,g,h,a,b,c,45);
+		STEP(c,d,e,f,g,h,a,b,46);
+		STEP(b,c,d,e,f,g,h,a,47);
+
+		STEP(a,b,c,d,e,f,g,h,48);
+		STEP(h,a,b,c,d,e,f,g,49);
+		STEP(g,h,a,b,c,d,e,f,50);
+		STEP(f,g,h,a,b,c,d,e,51);
+		STEP(e,f,g,h,a,b,c,d,52);
+		STEP(d,e,f,g,h,a,b,c,53);
+		STEP(c,d,e,f,g,h,a,b,54);
+		STEP(b,c,d,e,f,g,h,a,55);
+
+		STEP(a,b,c,d,e,f,g,h,56);
+		STEP(h,a,b,c,d,e,f,g,57);
+		STEP(g,h,a,b,c,d,e,f,58);
+		STEP(f,g,h,a,b,c,d,e,59);
+		STEP(e,f,g,h,a,b,c,d,60);
+		STEP(d,e,f,g,h,a,b,c,61);
+		STEP(c,d,e,f,g,h,a,b,62);
+		STEP(b,c,d,e,f,g,h,a,63);
+
+		STEP(a,b,c,d,e,f,g,h,64);
+		STEP(h,a,b,c,d,e,f,g,65);
+		STEP(g,h,a,b,c,d,e,f,66);
+		STEP(f,g,h,a,b,c,d,e,67);
+		STEP(e,f,g,h,a,b,c,d,68);
+		STEP(d,e,f,g,h,a,b,c,69);
+		STEP(c,d,e,f,g,h,a,b,70);
+		STEP(b,c,d,e,f,g,h,a,71);
+
+		STEP(a,b,c,d,e,f,g,h,72);
+		STEP(h,a,b,c,d,e,f,g,73);
+		STEP(g,h,a,b,c,d,e,f,74);
+		STEP(f,g,h,a,b,c,d,e,75);
+		STEP(e,f,g,h,a,b,c,d,76);
+		STEP(d,e,f,g,h,a,b,c,77);
+		STEP(c,d,e,f,g,h,a,b,78);
+		STEP(b,c,d,e,f,g,h,a,79);
+
+		s[0] += a;
+		s[1] += b;
+		s[2] += c;
+		s[3] += d;
+		s[4] += e;
+		s[5] += f;
+		s[6] += g;
+		s[7] += h;
+	}
+}
--- /dev/null
+++ b/libsec/port/sha2block64.c
@@ -1,0 +1,150 @@
+/*
+ * sha2_256 block cipher - unrolled version
+ *
+ *   note: the following upper and lower case macro names are distinct
+ *	   and reflect the functions defined in FIPS pub. 180-2.
+ */
+
+#include "os.h"
+
+#define ROTR(x,n)	(((x) >> (n)) | ((x) << (32-(n))))
+#define sigma0(x)	(ROTR((x),7) ^ ROTR((x),18) ^ ((x) >> 3))
+#define sigma1(x)	(ROTR((x),17) ^ ROTR((x),19) ^ ((x) >> 10))
+#define SIGMA0(x)	(ROTR((x),2) ^ ROTR((x),13) ^ ROTR((x),22))
+#define SIGMA1(x)	(ROTR((x),6) ^ ROTR((x),11) ^ ROTR((x),25))
+#define Ch(x,y,z)	((z) ^ ((x) & ((y) ^ (z))))
+#define Maj(x,y,z)	(((x) | (y)) & ((z) | ((x) & (y))))
+
+/*
+ * first 32 bits of the fractional parts of cube roots of
+ * first 64 primes (2..311).
+ */
+static u32int K256[64] = {
+	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5,
+	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5,
+	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3,
+	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174,
+	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc,
+	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da,
+	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7,
+	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967,
+	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13,
+	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85,
+	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3,
+	0xd192e819,0xd6990624,0xf40e3585,0x106aa070,
+	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5,
+	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3,
+	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208,
+	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2,
+};
+
+void
+_sha2block64(uchar *p, ulong len, u32int *s)
+{
+	u32int w[16], a, b, c, d, e, f, g, h;
+	uchar *end;
+
+	/* at this point, we have a multiple of 64 bytes */
+	for(end = p+len; p < end;){
+		a = s[0];
+		b = s[1];
+		c = s[2];
+		d = s[3];
+		e = s[4];
+		f = s[5];
+		g = s[6];
+		h = s[7];
+
+#define STEP(a,b,c,d,e,f,g,h,i) \
+	if(i < 16) {\
+		w[i] = p[0]<<24 | p[1]<<16 | p[2]<<8 | p[3]; \
+		p += 4; \
+	} else { \
+		w[i&15] += sigma1(w[i-2&15]) + w[i-7&15] + sigma0(w[i-15&15]); \
+	} \
+	h += SIGMA1(e) + Ch(e,f,g) + K256[i] + w[i&15]; \
+	d += h; \
+	h += SIGMA0(a) + Maj(a,b,c);
+
+		STEP(a,b,c,d,e,f,g,h,0);
+		STEP(h,a,b,c,d,e,f,g,1);
+		STEP(g,h,a,b,c,d,e,f,2);
+		STEP(f,g,h,a,b,c,d,e,3);
+		STEP(e,f,g,h,a,b,c,d,4);
+		STEP(d,e,f,g,h,a,b,c,5);
+		STEP(c,d,e,f,g,h,a,b,6);
+		STEP(b,c,d,e,f,g,h,a,7);
+
+		STEP(a,b,c,d,e,f,g,h,8);
+		STEP(h,a,b,c,d,e,f,g,9);
+		STEP(g,h,a,b,c,d,e,f,10);
+		STEP(f,g,h,a,b,c,d,e,11);
+		STEP(e,f,g,h,a,b,c,d,12);
+		STEP(d,e,f,g,h,a,b,c,13);
+		STEP(c,d,e,f,g,h,a,b,14);
+		STEP(b,c,d,e,f,g,h,a,15);
+
+		STEP(a,b,c,d,e,f,g,h,16);
+		STEP(h,a,b,c,d,e,f,g,17);
+		STEP(g,h,a,b,c,d,e,f,18);
+		STEP(f,g,h,a,b,c,d,e,19);
+		STEP(e,f,g,h,a,b,c,d,20);
+		STEP(d,e,f,g,h,a,b,c,21);
+		STEP(c,d,e,f,g,h,a,b,22);
+		STEP(b,c,d,e,f,g,h,a,23);
+
+		STEP(a,b,c,d,e,f,g,h,24);
+		STEP(h,a,b,c,d,e,f,g,25);
+		STEP(g,h,a,b,c,d,e,f,26);
+		STEP(f,g,h,a,b,c,d,e,27);
+		STEP(e,f,g,h,a,b,c,d,28);
+		STEP(d,e,f,g,h,a,b,c,29);
+		STEP(c,d,e,f,g,h,a,b,30);
+		STEP(b,c,d,e,f,g,h,a,31);
+
+		STEP(a,b,c,d,e,f,g,h,32);
+		STEP(h,a,b,c,d,e,f,g,33);
+		STEP(g,h,a,b,c,d,e,f,34);
+		STEP(f,g,h,a,b,c,d,e,35);
+		STEP(e,f,g,h,a,b,c,d,36);
+		STEP(d,e,f,g,h,a,b,c,37);
+		STEP(c,d,e,f,g,h,a,b,38);
+		STEP(b,c,d,e,f,g,h,a,39);
+
+		STEP(a,b,c,d,e,f,g,h,40);
+		STEP(h,a,b,c,d,e,f,g,41);
+		STEP(g,h,a,b,c,d,e,f,42);
+		STEP(f,g,h,a,b,c,d,e,43);
+		STEP(e,f,g,h,a,b,c,d,44);
+		STEP(d,e,f,g,h,a,b,c,45);
+		STEP(c,d,e,f,g,h,a,b,46);
+		STEP(b,c,d,e,f,g,h,a,47);
+
+		STEP(a,b,c,d,e,f,g,h,48);
+		STEP(h,a,b,c,d,e,f,g,49);
+		STEP(g,h,a,b,c,d,e,f,50);
+		STEP(f,g,h,a,b,c,d,e,51);
+		STEP(e,f,g,h,a,b,c,d,52);
+		STEP(d,e,f,g,h,a,b,c,53);
+		STEP(c,d,e,f,g,h,a,b,54);
+		STEP(b,c,d,e,f,g,h,a,55);
+
+		STEP(a,b,c,d,e,f,g,h,56);
+		STEP(h,a,b,c,d,e,f,g,57);
+		STEP(g,h,a,b,c,d,e,f,58);
+		STEP(f,g,h,a,b,c,d,e,59);
+		STEP(e,f,g,h,a,b,c,d,60);
+		STEP(d,e,f,g,h,a,b,c,61);
+		STEP(c,d,e,f,g,h,a,b,62);
+		STEP(b,c,d,e,f,g,h,a,63);
+
+		s[0] += a;
+		s[1] += b;
+		s[2] += c;
+		s[3] += d;
+		s[4] += e;
+		s[5] += f;
+		s[6] += g;
+		s[7] += h;
+	}
+}
--- /dev/null
+++ b/libsec/port/sha2test.c
@@ -1,0 +1,63 @@
+#include <u.h>
+#include <libc.h>
+#include "libsec.h"
+
+char *tests[] = {
+	"",
+	"a",
+	"abc",
+	"message digest",
+	"abcdefghijklmnopqrstuvwxyz",
+	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789",
+	"123456789012345678901234567890123456789012345678901234567890"
+		"12345678901234567890",
+	"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq",
+	"abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmnhi"
+		"jklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu",
+	0
+};
+
+void
+main(void)
+{
+	int i;
+	char **pp;
+	uchar *p;
+	uchar digest[SHA2_512dlen];
+
+	print("SHA2_224 tests:\n");
+	for(pp = tests; *pp; pp++){
+		p = (uchar*)*pp;
+		sha2_224(p, strlen(*pp), digest, 0);
+		for(i = 0; i < SHA2_224dlen; i++)
+			print("%2.2ux", digest[i]);
+		print("\n");
+	}
+
+	print("\nSHA256 tests:\n");
+	for(pp = tests; *pp; pp++){
+		p = (uchar*)*pp;
+		sha2_256(p, strlen(*pp), digest, 0);
+		for(i = 0; i < SHA2_256dlen; i++)
+			print("%2.2ux", digest[i]);
+		print("\n");
+	}
+
+	print("\nSHA384 tests:\n");
+	for(pp = tests; *pp; pp++){
+		p = (uchar*)*pp;
+		sha2_384(p, strlen(*pp), digest, 0);
+		for(i = 0; i < SHA2_384dlen; i++)
+			print("%2.2ux", digest[i]);
+		print("\n");
+	}
+
+	print("\nSHA512 tests:\n");
+	for(pp = tests; *pp; pp++){
+		p = (uchar*)*pp;
+		sha2_512(p, strlen(*pp), digest, 0);
+		for(i = 0; i < SHA2_512dlen; i++)
+			print("%2.2ux", digest[i]);
+		print("\n");
+	}
+}
--- a/libsec/port/smallprimes.c
+++ b/libsec/port/smallprimes.c
@@ -1,6 +1,6 @@
 #include "os.h"
 
-u32 smallprimes[1000] = {
+ulong smallprimes[1000] = {
 	2,
 	3,
 	5,
--- a/libsec/port/smallprimetest.c
+++ b/libsec/port/smallprimetest.c
@@ -2,7 +2,7 @@
 #include <mp.h>
 #include <libsec.h>
 
-static u32 smallprimes[] = {
+static ulong smallprimes[] = {
 	2,	3,	5,	7,	11,	13,	17,	19,	23,	29,
 	31,	37,	41,	43,	47,	53,	59,	61,	67,	71,
 	73,	79,	83,	89,	97,	101,	103,	107,	109,	113,
@@ -1007,7 +1007,7 @@
 
 //  return 1 if p is divisable by sp, 0 otherwise
 static int
-divides(mpint *dividend, u32 divisor)
+divides(mpint *dividend, ulong divisor)
 {
 	mpdigit d[2], q;
 	int i;
@@ -1026,7 +1026,7 @@
 smallprimetest(mpint *p)
 {
 	int i;
-	u32 sp;
+	ulong sp;
 
 	for(i = 0; i < nelem(smallprimes); i++){
 		sp = smallprimes[i];
--- /dev/null
+++ b/libsec/port/thumb.c
@@ -1,0 +1,170 @@
+#include "os.h"
+#include <bio.h>
+#include <libsec.h>
+
+enum{ ThumbTab = 1<<10 };
+
+static Thumbprint*
+tablehead(uchar *hash, Thumbprint *table)
+{
+	return &table[((hash[0]<<8) + hash[1]) & (ThumbTab-1)];
+}
+
+void
+freeThumbprints(Thumbprint *table)
+{
+	Thumbprint *hd, *p, *q;
+
+	if(table == nil)
+		return;
+	for(hd = table; hd < table+ThumbTab; hd++){
+		for(p = hd->next; p && p != hd; p = q){
+			q = p->next;
+			free(p);
+		}
+	}
+	free(table);
+}
+
+int
+okThumbprint(uchar *hash, int len, Thumbprint *table)
+{
+	Thumbprint *hd, *p;
+
+	if(table == nil)
+		return 0;
+	hd = tablehead(hash, table);
+	for(p = hd->next; p; p = p->next){
+		if(p->len == len && memcmp(hash, p->hash, len) == 0)
+			return 1;
+		if(p == hd)
+			break;
+	}
+	return 0;
+}
+
+int
+okCertificate(uchar *cert, int len, Thumbprint *table)
+{
+	uchar hash[SHA2_256dlen];
+	char thumb[2*SHA2_256dlen+1];
+
+	if(table == nil){
+		werrstr("no thumbprints provided");
+		return 0;
+	}
+	if(cert == nil || len <= 0){
+		werrstr("no certificate provided");
+		return 0;
+	}
+
+	sha1(cert, len, hash, nil);
+	if(okThumbprint(hash, SHA1dlen, table))
+		return 1;
+
+	sha2_256(cert, len, hash, nil);
+	if(okThumbprint(hash, SHA2_256dlen, table))
+		return 1;
+
+	if(X509digestSPKI(cert, len, sha2_256, hash) < 0)
+		return 0;
+	if(okThumbprint(hash, SHA2_256dlen, table))
+		return 1;
+
+	len = enc64(thumb, sizeof(thumb), hash, SHA2_256dlen);
+	while(len > 0 && thumb[len-1] == '=')
+		len--;
+	thumb[len] = '\0';
+	werrstr("sha256=%s", thumb);
+
+	return 0;
+}
+
+static int
+loadThumbprints(char *file, char *tag, Thumbprint *table, Thumbprint *crltab, int depth)
+{
+	Thumbprint *hd, *entry;
+	char *line, *field[50];
+	uchar hash[SHA2_256dlen];
+	Biobuf *bin;
+	int len, n;
+
+	if(depth > 8){
+		werrstr("too many includes, last file %s", file);
+		return -1;
+	}
+	if(access(file, AEXIST) < 0)
+		return 0;	/* not an error */
+	if((bin = Bopen(file, OREAD|OCEXEC)) == nil)
+		return -1;
+	for(; (line = Brdstr(bin, '\n', 1)) != nil; free(line)){
+		if(tokenize(line, field, nelem(field)) < 2)
+			continue;
+		if(strcmp(field[0], "#include") == 0){
+			if(loadThumbprints(field[1], tag, table, crltab, depth+1) < 0)
+				goto err;
+			continue;
+		}
+		if(strcmp(field[0], tag) != 0)
+			continue;
+		if(strncmp(field[1], "sha1=", 5) == 0){
+			field[1] += 5;
+			len = SHA1dlen;
+		} else if(strncmp(field[1], "sha256=", 7) == 0){
+			field[1] += 7;
+			len = SHA2_256dlen;
+		} else {
+			continue;
+		}
+		n = strlen(field[1]);
+		if((n != len*2 || dec16(hash, len, field[1], n) != len)
+		&& dec64(hash, len, field[1], n) != len){
+			werrstr("malformed %s entry in %s: %s", tag, file, field[1]);
+			goto err;
+		}
+		if(crltab && okThumbprint(hash, len, crltab))
+			continue;
+		hd = tablehead(hash, table);
+		if(hd->next == nil)
+			entry = hd;
+		else {
+			if((entry = malloc(sizeof(*entry))) == nil)
+				goto err;
+			entry->next = hd->next;
+		}
+		hd->next = entry;
+		entry->len = len;
+		memcpy(entry->hash, hash, len);
+	}
+	Bterm(bin);
+	return 0;
+err:
+	free(line);
+	Bterm(bin);
+	return -1;
+}
+
+Thumbprint *
+initThumbprints(char *ok, char *crl, char *tag)
+{
+	Thumbprint *table, *crltab;
+
+	table = crltab = nil;
+	if(crl){
+		if((crltab = malloc(ThumbTab * sizeof(*crltab))) == nil)
+			goto err;
+		memset(crltab, 0, ThumbTab * sizeof(*crltab));
+		if(loadThumbprints(crl, tag, crltab, nil, 0) < 0)
+			goto err;
+	}
+	if((table = malloc(ThumbTab * sizeof(*table))) == nil)
+		goto err;
+	memset(table, 0, ThumbTab * sizeof(*table));
+	if(loadThumbprints(ok, tag, table, crltab, 0) < 0){
+		freeThumbprints(table);
+		table = nil;
+	}
+err:
+	freeThumbprints(crltab);
+	return table;
+}
--- /dev/null
+++ b/libsec/port/tlshand.c
@@ -1,0 +1,3028 @@
+#include <u.h>
+#include <libc.h>
+#include <auth.h>
+#include <mp.h>
+#include <libsec.h>
+
+// The main groups of functions are:
+//		client/server - main handshake protocol definition
+//		message functions - formating handshake messages
+//		cipher choices - catalog of digest and encrypt algorithms
+//		security functions - PKCS#1, sslHMAC, session keygen
+//		general utility functions - malloc, serialization
+// The handshake protocol builds on the TLS/SSL3 record layer protocol,
+// which is implemented in kernel device #a.  See also /lib/rfc/rfc2246.
+
+enum {
+	TLSFinishedLen = 12,
+	SSL3FinishedLen = MD5dlen+SHA1dlen,
+	MaxKeyData = 160,	// amount of secret we may need
+	MAXdlen = SHA2_512dlen,
+	RandomSize = 32,
+	MasterSecretSize = 48,
+	AQueue = 0,
+	AFlush = 1,
+};
+
+typedef struct Bytes{
+	int len;
+	uchar data[];
+} Bytes;
+
+typedef struct Ints{
+	int len;
+	int data[];
+} Ints;
+
+typedef struct Algs{
+	char *enc;
+	char *digest;
+	int nsecret;
+	int tlsid;
+	int ok;
+} Algs;
+
+typedef struct Namedcurve{
+	int tlsid;
+	void (*init)(mpint *p, mpint *a, mpint *b, mpint *x, mpint *y, mpint *n, mpint *h);
+} Namedcurve;
+
+typedef struct Finished{
+	uchar verify[SSL3FinishedLen];
+	int n;
+} Finished;
+
+typedef struct HandshakeHash {
+	MD5state	md5;
+	SHAstate	sha1;
+	SHA2_256state	sha2_256;
+} HandshakeHash;
+
+typedef struct TlsSec TlsSec;
+struct TlsSec {
+	RSApub *rsapub;
+	AuthRpc *rpc;	// factotum for rsa private key
+	uchar *psk;	// pre-shared key
+	int psklen;
+	int clientVers;			// version in ClientHello
+	uchar sec[MasterSecretSize];	// master secret
+	uchar srandom[RandomSize];	// server random
+	uchar crandom[RandomSize];	// client random
+
+	Namedcurve *nc; // selected curve for ECDHE
+	// diffie hellman state
+	DHstate dh;
+	struct {
+		ECdomain dom;
+		ECpriv Q;
+	} ec;
+	uchar X[32];
+
+	// byte generation and handshake checksum
+	void (*prf)(uchar*, int, uchar*, int, char*, uchar*, int);
+	void (*setFinished)(TlsSec*, HandshakeHash, uchar*, int);
+	int nfin;
+};
+
+typedef struct TlsConnection{
+	TlsSec sec[1];	// security management goo
+	int hand, ctl;	// record layer file descriptors
+	int erred;		// set when tlsError called
+	int (*trace)(char*fmt, ...); // for debugging
+	int version;	// protocol we are speaking
+	Bytes *cert;	// server certificate; only last - no chain
+
+	int cipher;
+	int nsecret;	// amount of secret data to init keys
+	char *digest;	// name of digest algorithm to use
+	char *enc;	// name of encryption algorithm to use
+
+	// for finished messages
+	HandshakeHash	handhash;
+	Finished	finished;
+
+	uchar *sendp;
+	uchar buf[1<<16];
+} TlsConnection;
+
+typedef struct Msg{
+	int tag;
+	union {
+		struct {
+			int	version;
+			uchar 	random[RandomSize];
+			Bytes*	sid;
+			Ints*	ciphers;
+			Bytes*	compressors;
+			Bytes*	extensions;
+		} clientHello;
+		struct {
+			int	version;
+			uchar	random[RandomSize];
+			Bytes*	sid;
+			int	cipher;
+			int	compressor;
+			Bytes*	extensions;
+		} serverHello;
+		struct {
+			int ncert;
+			Bytes **certs;
+		} certificate;
+		struct {
+			Bytes *types;
+			Ints *sigalgs;
+			int nca;
+			Bytes **cas;
+		} certificateRequest;
+		struct {
+			Bytes *pskid;
+			Bytes *key;
+		} clientKeyExchange;
+		struct {
+			Bytes *pskid;
+			Bytes *dh_p;
+			Bytes *dh_g;
+			Bytes *dh_Ys;
+			Bytes *dh_parameters;
+			Bytes *dh_signature;
+			int sigalg;
+			int curve;
+		} serverKeyExchange;
+		struct {
+			int sigalg;
+			Bytes *signature;
+		} certificateVerify;		
+		Finished finished;
+	} u;
+} Msg;
+
+
+enum {
+	SSL3Version	= 0x0300,
+	TLS10Version	= 0x0301,
+	TLS11Version	= 0x0302,
+	TLS12Version	= 0x0303,
+	ProtocolVersion	= TLS12Version,	// maximum version we speak
+	MinProtoVersion	= 0x0300,	// limits on version we accept
+	MaxProtoVersion	= 0x03ff,
+};
+
+// handshake type
+enum {
+	HHelloRequest,
+	HClientHello,
+	HServerHello,
+	HSSL2ClientHello = 9,  /* local convention;  see devtls.c */
+	HCertificate = 11,
+	HServerKeyExchange,
+	HCertificateRequest,
+	HServerHelloDone,
+	HCertificateVerify,
+	HClientKeyExchange,
+	HFinished = 20,
+	HMax
+};
+
+// alerts
+enum {
+	ECloseNotify = 0,
+	EUnexpectedMessage = 10,
+	EBadRecordMac = 20,
+	EDecryptionFailed = 21,
+	ERecordOverflow = 22,
+	EDecompressionFailure = 30,
+	EHandshakeFailure = 40,
+	ENoCertificate = 41,
+	EBadCertificate = 42,
+	EUnsupportedCertificate = 43,
+	ECertificateRevoked = 44,
+	ECertificateExpired = 45,
+	ECertificateUnknown = 46,
+	EIllegalParameter = 47,
+	EUnknownCa = 48,
+	EAccessDenied = 49,
+	EDecodeError = 50,
+	EDecryptError = 51,
+	EExportRestriction = 60,
+	EProtocolVersion = 70,
+	EInsufficientSecurity = 71,
+	EInternalError = 80,
+	EInappropriateFallback = 86,
+	EUserCanceled = 90,
+	ENoRenegotiation = 100,
+	EUnknownPSKidentity = 115,
+	EMax = 256
+};
+
+// cipher suites
+enum {
+	TLS_RSA_WITH_3DES_EDE_CBC_SHA		= 0X000A,
+	TLS_DHE_RSA_WITH_3DES_EDE_CBC_SHA	= 0X0016,
+
+	TLS_RSA_WITH_AES_128_CBC_SHA		= 0X002F,
+	TLS_DHE_RSA_WITH_AES_128_CBC_SHA	= 0X0033,
+	TLS_RSA_WITH_AES_256_CBC_SHA		= 0X0035,
+	TLS_DHE_RSA_WITH_AES_256_CBC_SHA	= 0X0039,
+	TLS_RSA_WITH_AES_128_CBC_SHA256		= 0X003C,
+	TLS_RSA_WITH_AES_256_CBC_SHA256		= 0X003D,
+	TLS_DHE_RSA_WITH_AES_128_CBC_SHA256	= 0X0067,
+
+	TLS_RSA_WITH_AES_128_GCM_SHA256		= 0x009C,
+	TLS_DHE_RSA_WITH_AES_128_GCM_SHA256	= 0x009E,
+
+	TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA	= 0xC013,
+	TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA	= 0xC014,
+	TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256	= 0xC023,
+	TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256	= 0xC027,
+
+	TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256 = 0xC02B,
+	TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256	= 0xC02F,
+
+	GOOGLE_ECDHE_RSA_WITH_CHACHA20_POLY1305		= 0xCC13,
+	GOOGLE_ECDHE_ECDSA_WITH_CHACHA20_POLY1305	= 0xCC14,
+	GOOGLE_DHE_RSA_WITH_CHACHA20_POLY1305		= 0xCC15,
+
+	TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305	= 0xCCA8,
+	TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305	= 0xCCA9,
+	TLS_DHE_RSA_WITH_CHACHA20_POLY1305	= 0xCCAA,
+
+	TLS_PSK_WITH_CHACHA20_POLY1305		= 0xCCAB,
+	TLS_PSK_WITH_AES_128_CBC_SHA256		= 0x00AE,
+	TLS_PSK_WITH_AES_128_CBC_SHA		= 0x008C,
+
+	TLS_FALLBACK_SCSV = 0x5600,
+};
+
+// compression methods
+enum {
+	CompressionNull = 0,
+	CompressionMax
+};
+
+
+// curves
+enum {
+	X25519 = 0x001d,
+};
+
+// extensions
+enum {
+	Extsni = 0x0000,
+	Extec = 0x000a,
+	Extecp = 0x000b,
+	Extsigalgs = 0x000d,
+};
+
+static Algs cipherAlgs[] = {
+	// ECDHE-ECDSA
+	{"ccpoly96_aead", "clear", 2*(32+12), TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305},
+	{"ccpoly64_aead", "clear", 2*32, GOOGLE_ECDHE_ECDSA_WITH_CHACHA20_POLY1305},
+	{"aes_128_gcm_aead", "clear", 2*(16+4), TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256},
+	{"aes_128_cbc", "sha256", 2*(16+16+SHA2_256dlen), TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256},
+
+	// ECDHE-RSA
+	{"ccpoly96_aead", "clear", 2*(32+12), TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305},
+	{"ccpoly64_aead", "clear", 2*32, GOOGLE_ECDHE_RSA_WITH_CHACHA20_POLY1305},
+	{"aes_128_gcm_aead", "clear", 2*(16+4), TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256},
+	{"aes_128_cbc", "sha256", 2*(16+16+SHA2_256dlen), TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256},
+	{"aes_128_cbc", "sha1", 2*(16+16+SHA1dlen), TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA},
+	{"aes_256_cbc", "sha1", 2*(32+16+SHA1dlen), TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA},
+
+	// DHE-RSA
+	{"ccpoly96_aead", "clear", 2*(32+12), TLS_DHE_RSA_WITH_CHACHA20_POLY1305},
+	{"ccpoly64_aead", "clear", 2*32, GOOGLE_DHE_RSA_WITH_CHACHA20_POLY1305},
+	{"aes_128_gcm_aead", "clear", 2*(16+4), TLS_DHE_RSA_WITH_AES_128_GCM_SHA256},
+	{"aes_128_cbc", "sha256", 2*(16+16+SHA2_256dlen), TLS_DHE_RSA_WITH_AES_128_CBC_SHA256},
+	{"aes_128_cbc", "sha1", 2*(16+16+SHA1dlen), TLS_DHE_RSA_WITH_AES_128_CBC_SHA},
+	{"aes_256_cbc", "sha1", 2*(32+16+SHA1dlen), TLS_DHE_RSA_WITH_AES_256_CBC_SHA},
+	{"3des_ede_cbc","sha1",	2*(4*8+SHA1dlen), TLS_DHE_RSA_WITH_3DES_EDE_CBC_SHA},
+
+	// RSA
+	{"aes_128_gcm_aead", "clear", 2*(16+4), TLS_RSA_WITH_AES_128_GCM_SHA256},
+	{"aes_128_cbc", "sha256", 2*(16+16+SHA2_256dlen), TLS_RSA_WITH_AES_128_CBC_SHA256},
+	{"aes_256_cbc", "sha256", 2*(32+16+SHA2_256dlen), TLS_RSA_WITH_AES_256_CBC_SHA256},
+	{"aes_128_cbc", "sha1", 2*(16+16+SHA1dlen), TLS_RSA_WITH_AES_128_CBC_SHA},
+	{"aes_256_cbc", "sha1", 2*(32+16+SHA1dlen), TLS_RSA_WITH_AES_256_CBC_SHA},
+	{"3des_ede_cbc","sha1",	2*(4*8+SHA1dlen), TLS_RSA_WITH_3DES_EDE_CBC_SHA},
+
+	// PSK
+	{"ccpoly96_aead", "clear", 2*(32+12), TLS_PSK_WITH_CHACHA20_POLY1305},
+	{"aes_128_cbc", "sha256", 2*(16+16+SHA2_256dlen), TLS_PSK_WITH_AES_128_CBC_SHA256},
+	{"aes_128_cbc", "sha1", 2*(16+16+SHA1dlen), TLS_PSK_WITH_AES_128_CBC_SHA},
+};
+
+static uchar compressors[] = {
+	CompressionNull,
+};
+
+static Namedcurve namedcurves[] = {
+	X25519, nil,
+	0x0017, secp256r1,
+	0x0018, secp384r1,
+};
+
+static uchar pointformats[] = {
+	CompressionNull /* support of uncompressed point format is mandatory */
+};
+
+static struct {
+	DigestState* (*fun)(uchar*, u32, uchar*, DigestState*);
+	int len;
+} hashfun[] = {
+/*	[0x00]  is reserved for MD5+SHA1 for < TLS1.2 */
+	[0x01]	{md5,		MD5dlen},
+	[0x02]	{sha1,		SHA1dlen},
+	[0x03]	{sha2_224,	SHA2_224dlen},
+	[0x04]	{sha2_256,	SHA2_256dlen},
+	[0x05]	{sha2_384,	SHA2_384dlen},
+	[0x06]	{sha2_512,	SHA2_512dlen},
+};
+
+// signature algorithms (only RSA and ECDSA at the moment)
+static int sigalgs[] = {
+	0x0603,		/* SHA512 ECDSA */
+	0x0503,		/* SHA384 ECDSA */
+	0x0403,		/* SHA256 ECDSA */
+	0x0203,		/* SHA1 ECDSA */
+
+	0x0601,		/* SHA512 RSA */
+	0x0501,		/* SHA384 RSA */
+	0x0401,		/* SHA256 RSA */
+	0x0201,		/* SHA1 RSA */
+};
+
+static TlsConnection *tlsServer2(int ctl, int hand,
+	uchar *cert, int certlen,
+	char *pskid, uchar *psk, int psklen,
+	int (*trace)(char*fmt, ...), PEMChain *chain);
+static TlsConnection *tlsClient2(int ctl, int hand,
+	uchar *cert, int certlen,
+	char *pskid, uchar *psk, int psklen,
+	uchar *ext, int extlen, int (*trace)(char*fmt, ...));
+static void	msgClear(Msg *m);
+static char* msgPrint(char *buf, int n, Msg *m);
+static int	msgRecv(TlsConnection *c, Msg *m);
+static int	msgSend(TlsConnection *c, Msg *m, int act);
+static void	tlsError(TlsConnection *c, int err, char *msg, ...);
+#pragma	varargck argpos	tlsError 3
+static int setVersion(TlsConnection *c, int version);
+static int setSecrets(TlsConnection *c, int isclient);
+static int finishedMatch(TlsConnection *c, Finished *f);
+static void tlsConnectionFree(TlsConnection *c);
+
+static int isDHE(int tlsid);
+static int isECDHE(int tlsid);
+static int isPSK(int tlsid);
+static int isECDSA(int tlsid);
+
+static int setAlgs(TlsConnection *c, int a);
+static int okCipher(Ints *cv, int ispsk, int canec);
+static int okCompression(Bytes *cv);
+static int initCiphers(void);
+static Ints* makeciphers(int ispsk);
+
+static AuthRpc* factotum_rsa_open(RSApub *rsapub);
+static mpint* factotum_rsa_decrypt(AuthRpc *rpc, mpint *cipher);
+static void factotum_rsa_close(AuthRpc *rpc);
+
+static void	tlsSecInits(TlsSec *sec, int cvers, uchar *crandom);
+static int	tlsSecRSAs(TlsSec *sec, Bytes *epm);
+static Bytes*	tlsSecECDHEs1(TlsSec *sec);
+static int	tlsSecECDHEs2(TlsSec *sec, Bytes *Yc);
+static void	tlsSecInitc(TlsSec *sec, int cvers);
+static Bytes*	tlsSecRSAc(TlsSec *sec, uchar *cert, int ncert);
+static Bytes*	tlsSecDHEc(TlsSec *sec, Bytes *p, Bytes *g, Bytes *Ys);
+static Bytes*	tlsSecECDHEc(TlsSec *sec, int curve, Bytes *Ys);
+static void	tlsSecVers(TlsSec *sec, int v);
+static int	tlsSecFinished(TlsSec *sec, HandshakeHash hsh, uchar *fin, int nfin, int isclient);
+static void	setMasterSecret(TlsSec *sec, Bytes *pm);
+static int	digestDHparams(TlsSec *sec, Bytes *par, uchar digest[MAXdlen], int sigalg);
+static char*	verifyDHparams(TlsSec *sec, Bytes *par, Bytes *cert, Bytes *sig, int sigalg);
+
+static Bytes*	pkcs1_encrypt(Bytes* data, RSApub* key);
+static Bytes*	pkcs1_decrypt(TlsSec *sec, Bytes *data);
+static Bytes*	pkcs1_sign(TlsSec *sec, uchar *digest, int digestlen, int sigalg);
+
+static void* emalloc(int);
+static void* erealloc(void*, int);
+static void put32(uchar *p, u32int);
+static void put24(uchar *p, int);
+static void put16(uchar *p, int);
+static int get24(uchar *p);
+static int get16(uchar *p);
+static Bytes* newbytes(int len);
+static Bytes* makebytes(uchar* buf, int len);
+static Bytes* mptobytes(mpint* big, int len);
+static mpint* bytestomp(Bytes* bytes);
+static void freebytes(Bytes* b);
+static Ints* newints(int len);
+static void freeints(Ints* b);
+static int lookupid(Ints* b, int id);
+
+//================= client/server ========================
+
+//	push TLS onto fd, returning new (application) file descriptor
+//		or -1 if error.
+int
+tlsServer(int fd, TLSconn *conn)
+{
+	char buf[8];
+	char dname[64];
+	uchar seed[2*RandomSize];
+	int n, data, ctl, hand;
+	TlsConnection *tls;
+
+	if(conn == nil)
+		return -1;
+	ctl = open("#a/tls/clone", ORDWR|OCEXEC);
+	if(ctl < 0)
+		return -1;
+	n = read(ctl, buf, sizeof(buf)-1);
+	if(n < 0){
+		close(ctl);
+		return -1;
+	}
+	buf[n] = 0;
+	snprint(conn->dir, sizeof(conn->dir), "#a/tls/%s", buf);
+	snprint(dname, sizeof(dname), "#a/tls/%s/hand", buf);
+	hand = open(dname, ORDWR|OCEXEC);
+	if(hand < 0){
+		close(ctl);
+		return -1;
+	}
+	data = -1;
+	fprint(ctl, "fd %d 0x%x", fd, ProtocolVersion);
+	tls = tlsServer2(ctl, hand,
+		conn->cert, conn->certlen,
+		conn->pskID, conn->psk, conn->psklen,
+		conn->trace, conn->chain);
+	if(tls != nil){
+		snprint(dname, sizeof(dname), "#a/tls/%s/data", buf);
+		data = open(dname, ORDWR);
+	}
+	close(hand);
+	close(ctl);
+	if(data < 0){
+		tlsConnectionFree(tls);
+		return -1;
+	}
+	free(conn->cert);
+	conn->cert = nil;  // client certificates are not yet implemented
+	conn->certlen = 0;
+	conn->sessionIDlen = 0;
+	conn->sessionID = nil;
+	if(conn->sessionKey != nil
+	&& conn->sessionType != nil
+	&& strcmp(conn->sessionType, "ttls") == 0){
+		memmove(seed, tls->sec->crandom, RandomSize);
+		memmove(seed+RandomSize, tls->sec->srandom, RandomSize);
+		tls->sec->prf(
+			conn->sessionKey, conn->sessionKeylen,
+			tls->sec->sec, MasterSecretSize,
+			conn->sessionConst, 
+			seed, sizeof(seed));
+	}
+	tlsConnectionFree(tls);
+	close(fd);
+	return data;
+}
+
+static uchar*
+tlsClientExtensions(TLSconn *conn, int *plen)
+{
+	uchar *b, *p;
+	int i, n, m;
+
+	p = b = nil;
+
+	// RFC6066 - Server Name Identification
+	if(conn->serverName != nil){
+		n = strlen(conn->serverName);
+
+		m = p - b;
+		b = erealloc(b, m + 2+2+2+1+2+n);
+		p = b + m;
+
+		put16(p, Extsni), p += 2;	/* Type: server_name */
+		put16(p, 2+1+2+n), p += 2;	/* Length */
+		put16(p, 1+2+n), p += 2;	/* Server Name list length */
+		*p++ = 0;			/* Server Name Type: host_name */
+		put16(p, n), p += 2;		/* Server Name length */
+		memmove(p, conn->serverName, n);
+		p += n;
+	}
+
+	// Elliptic Curves (also called Supported Groups)
+	if(ProtocolVersion >= TLS10Version){
+		m = p - b;
+		b = erealloc(b, m + 2+2+2+nelem(namedcurves)*2 + 2+2+1+nelem(pointformats));
+		p = b + m;
+
+		n = nelem(namedcurves);
+		put16(p, Extec), p += 2;	/* Type: elliptic_curves / supported_groups */
+		put16(p, (n+1)*2), p += 2;	/* Length */
+		put16(p, n*2), p += 2;		/* Elliptic Curves Length */
+		for(i=0; i < n; i++){		/* Elliptic Curves */
+			put16(p, namedcurves[i].tlsid);
+			p += 2;
+		}
+
+		n = nelem(pointformats);
+		put16(p, Extecp), p += 2;	/* Type: ec_point_formats */
+		put16(p, n+1), p += 2;		/* Length */
+		*p++ = n;			/* EC point formats Length */
+		for(i=0; i < n; i++)		/* EC point formats */
+			*p++ = pointformats[i];
+	}
+
+	// signature algorithms
+	if(ProtocolVersion >= TLS12Version){
+		n = nelem(sigalgs);
+
+		m = p - b;
+		b = erealloc(b, m + 2+2+2+n*2);
+		p = b + m;
+
+		put16(p, Extsigalgs), p += 2;
+		put16(p, n*2 + 2), p += 2;
+		put16(p, n*2), p += 2;
+		for(i=0; i < n; i++){
+			put16(p, sigalgs[i]);
+			p += 2;
+		}
+	}
+	
+	*plen = p - b;
+	return b;
+}
+
+//	push TLS onto fd, returning new (application) file descriptor
+//		or -1 if error.
+int
+tlsClient(int fd, TLSconn *conn)
+{
+	char buf[8];
+	char dname[64];
+	uchar seed[2*RandomSize];
+	int n, data, ctl, hand;
+	TlsConnection *tls;
+	uchar *ext;
+
+	if(conn == nil)
+		return -1;
+	ctl = open("#a/tls/clone", ORDWR|OCEXEC);
+	if(ctl < 0)
+		return -1;
+	n = read(ctl, buf, sizeof(buf)-1);
+	if(n < 0){
+		close(ctl);
+		return -1;
+	}
+	buf[n] = 0;
+	snprint(conn->dir, sizeof(conn->dir), "#a/tls/%s", buf);
+	snprint(dname, sizeof(dname), "#a/tls/%s/hand", buf);
+	hand = open(dname, ORDWR|OCEXEC);
+	if(hand < 0){
+		close(ctl);
+		return -1;
+	}
+	snprint(dname, sizeof(dname), "#a/tls/%s/data", buf);
+	data = open(dname, ORDWR);
+	if(data < 0){
+		close(hand);
+		close(ctl);
+		return -1;
+	}
+	fprint(ctl, "fd %d 0x%x", fd, ProtocolVersion);
+	ext = tlsClientExtensions(conn, &n);
+	tls = tlsClient2(ctl, hand,
+		conn->cert, conn->certlen, 
+		conn->pskID, conn->psk, conn->psklen,
+		ext, n, conn->trace);
+	free(ext);
+	close(hand);
+	close(ctl);
+	if(tls == nil){
+		close(data);
+		return -1;
+	}
+	free(conn->cert);
+	if(tls->cert != nil){
+		conn->certlen = tls->cert->len;
+		conn->cert = emalloc(conn->certlen);
+		memcpy(conn->cert, tls->cert->data, conn->certlen);
+	} else {
+		conn->certlen = 0;
+		conn->cert = nil;
+	}
+	conn->sessionIDlen = 0;
+	conn->sessionID = nil;
+	if(conn->sessionKey != nil
+	&& conn->sessionType != nil
+	&& strcmp(conn->sessionType, "ttls") == 0){
+		memmove(seed, tls->sec->crandom, RandomSize);
+		memmove(seed+RandomSize, tls->sec->srandom, RandomSize);
+		tls->sec->prf(
+			conn->sessionKey, conn->sessionKeylen,
+			tls->sec->sec, MasterSecretSize,
+			conn->sessionConst, 
+			seed, sizeof(seed));
+	}
+	tlsConnectionFree(tls);
+	close(fd);
+	return data;
+}
+
+static int
+countchain(PEMChain *p)
+{
+	int i = 0;
+
+	while (p) {
+		i++;
+		p = p->next;
+	}
+	return i;
+}
+
+static int
+checkClientExtensions(TlsConnection *c, Bytes *ext)
+{
+	uchar *p, *e;
+	int i, j, n;
+
+	p = ext->data;
+	e = p+ext->len;
+	while(p < e){
+		if(e-p < 2)
+			goto Short;
+		switch(get16(p)){
+		case Extec:	
+			p += 2;
+			n = get16(p);
+			if(e-p < n || n < 2)
+				goto Short;
+			p += 2;
+			n = get16(p);
+			p += 2;
+			if(e-p < n || n & 1 || n == 0)
+				goto Short;
+			for(i = 0; i < nelem(namedcurves) && c->sec->nc == nil; i++)
+				for(j = 0; j < n; j += 2)
+					if(namedcurves[i].tlsid == get16(p+j)){
+						c->sec->nc = &namedcurves[i];
+						break;
+					}
+			p += n;
+			break;
+		default:
+			p += 2;
+			n = get16(p);
+			p += 2;
+			if(e-p < n)
+				goto Short;
+			p += n;
+			break;
+		}
+	}
+
+	return 0;
+Short:
+	tlsError(c, EDecodeError, "clienthello extensions has invalid length");
+	return -1; 
+} 
+
+static TlsConnection *
+tlsServer2(int ctl, int hand,
+	uchar *cert, int certlen,
+	char *pskid, uchar *psk, int psklen,
+	int (*trace)(char*fmt, ...), PEMChain *chp)
+{
+	int cipher, compressor, numcerts, i;
+	TlsConnection *c;
+	Msg m;
+
+	if(trace)
+		trace("tlsServer2\n");
+	if(!initCiphers())
+		return nil;
+
+	c = emalloc(sizeof(TlsConnection));
+	c->ctl = ctl;
+	c->hand = hand;
+	c->trace = trace;
+	c->version = ProtocolVersion;
+	c->sendp = c->buf;
+
+	memset(&m, 0, sizeof(m));
+	if(!msgRecv(c, &m)){
+		if(trace)
+			trace("initial msgRecv failed\n");
+		goto Err;
+	}
+	if(m.tag != HClientHello) {
+		tlsError(c, EUnexpectedMessage, "expected a client hello");
+		goto Err;
+	}
+	if(trace)
+		trace("ClientHello version %x\n", m.u.clientHello.version);
+	if(setVersion(c, m.u.clientHello.version) < 0) {
+		tlsError(c, EIllegalParameter, "incompatible version");
+		goto Err;
+	}
+	if(c->version < ProtocolVersion
+	&& lookupid(m.u.clientHello.ciphers, TLS_FALLBACK_SCSV) >= 0){
+		tlsError(c, EInappropriateFallback, "inappropriate fallback");
+		goto Err;
+	}
+	tlsSecInits(c->sec, m.u.clientHello.version, m.u.clientHello.random);
+	tlsSecVers(c->sec, c->version);
+	if(psklen > 0){
+		c->sec->psk = psk;
+		c->sec->psklen = psklen;
+	}
+	if(certlen > 0){
+		/* server certificate */
+		c->sec->rsapub = X509toRSApub(cert, certlen, nil, 0);
+		if(c->sec->rsapub == nil){
+			tlsError(c, EHandshakeFailure, "invalid X509/rsa certificate");
+			goto Err;
+		}
+		c->sec->rpc = factotum_rsa_open(c->sec->rsapub);
+		if(c->sec->rpc == nil){
+			tlsError(c, EHandshakeFailure, "factotum_rsa_open: %r");
+			goto Err;
+		}
+	}
+	if(checkClientExtensions(c, m.u.clientHello.extensions) < 0)
+		goto Err;
+	cipher = okCipher(m.u.clientHello.ciphers, psklen > 0, c->sec->nc != nil);
+	if(cipher < 0 || !setAlgs(c, cipher)) {
+		tlsError(c, EHandshakeFailure, "no matching cipher suite");
+		goto Err;
+	}
+	compressor = okCompression(m.u.clientHello.compressors);
+	if(compressor < 0) {
+		tlsError(c, EHandshakeFailure, "no matching compressor");
+		goto Err;
+	}
+	if(trace)
+		trace("  cipher %x, compressor %x\n", cipher, compressor);
+	msgClear(&m);
+
+	m.tag = HServerHello;
+	m.u.serverHello.version = c->version;
+	memmove(m.u.serverHello.random, c->sec->srandom, RandomSize);
+	m.u.serverHello.cipher = cipher;
+	m.u.serverHello.compressor = compressor;
+	m.u.serverHello.sid = makebytes(nil, 0);
+	if(!msgSend(c, &m, AQueue))
+		goto Err;
+
+	if(certlen > 0){
+		m.tag = HCertificate;
+		numcerts = countchain(chp);
+		m.u.certificate.ncert = 1 + numcerts;
+		m.u.certificate.certs = emalloc(m.u.certificate.ncert * sizeof(Bytes*));
+		m.u.certificate.certs[0] = makebytes(cert, certlen);
+		for (i = 0; i < numcerts && chp; i++, chp = chp->next)
+			m.u.certificate.certs[i+1] = makebytes(chp->pem, chp->pemlen);
+		if(!msgSend(c, &m, AQueue))
+			goto Err;
+	}
+
+	if(isECDHE(cipher)){
+		m.tag = HServerKeyExchange;
+		m.u.serverKeyExchange.curve = c->sec->nc->tlsid;
+		m.u.serverKeyExchange.dh_parameters = tlsSecECDHEs1(c->sec);
+		if(m.u.serverKeyExchange.dh_parameters == nil){
+			tlsError(c, EInternalError, "can't set DH parameters");
+			goto Err;
+		}
+
+		/* sign the DH parameters */
+		if(certlen > 0){
+			uchar digest[MAXdlen];
+			int digestlen;
+
+			if(c->version >= TLS12Version)
+				m.u.serverKeyExchange.sigalg = 0x0401;	/* RSA SHA256 */
+			digestlen = digestDHparams(c->sec, m.u.serverKeyExchange.dh_parameters,
+				digest, m.u.serverKeyExchange.sigalg);
+			if((m.u.serverKeyExchange.dh_signature = pkcs1_sign(c->sec, digest, digestlen,
+				m.u.serverKeyExchange.sigalg)) == nil){
+				tlsError(c, EHandshakeFailure, "pkcs1_sign: %r");
+				goto Err;
+			}
+		}
+		if(!msgSend(c, &m, AQueue))
+			goto Err;
+	}
+
+	m.tag = HServerHelloDone;
+	if(!msgSend(c, &m, AFlush))
+		goto Err;
+
+	if(!msgRecv(c, &m))
+		goto Err;
+	if(m.tag != HClientKeyExchange) {
+		tlsError(c, EUnexpectedMessage, "expected a client key exchange");
+		goto Err;
+	}
+	if(pskid != nil){
+		if(m.u.clientKeyExchange.pskid == nil
+		|| m.u.clientKeyExchange.pskid->len != strlen(pskid)
+		|| memcmp(pskid, m.u.clientKeyExchange.pskid->data, m.u.clientKeyExchange.pskid->len) != 0){
+			tlsError(c, EUnknownPSKidentity, "unknown or missing pskid");
+			goto Err;
+		}
+	}
+	if(isECDHE(cipher)){
+		if(tlsSecECDHEs2(c->sec, m.u.clientKeyExchange.key) < 0){
+			tlsError(c, EHandshakeFailure, "couldn't set keys: %r");
+			goto Err;
+		}
+	} else if(certlen > 0){
+		if(tlsSecRSAs(c->sec, m.u.clientKeyExchange.key) < 0){
+			tlsError(c, EHandshakeFailure, "couldn't set keys: %r");
+			goto Err;
+		}
+	} else if(psklen > 0){
+		setMasterSecret(c->sec, newbytes(psklen));
+	} else {
+		tlsError(c, EInternalError, "no psk or certificate");
+		goto Err;
+	}
+
+	if(trace)
+		trace("tls secrets\n");
+	if(setSecrets(c, 0) < 0){
+		tlsError(c, EHandshakeFailure, "can't set secrets: %r");
+		goto Err;
+	}
+
+	/* no CertificateVerify; skip to Finished */
+	if(tlsSecFinished(c->sec, c->handhash, c->finished.verify, c->finished.n, 1) < 0){
+		tlsError(c, EInternalError, "can't set finished: %r");
+		goto Err;
+	}
+	if(!msgRecv(c, &m))
+		goto Err;
+	if(m.tag != HFinished) {
+		tlsError(c, EUnexpectedMessage, "expected a finished");
+		goto Err;
+	}
+	if(!finishedMatch(c, &m.u.finished)) {
+		tlsError(c, EHandshakeFailure, "finished verification failed");
+		goto Err;
+	}
+	msgClear(&m);
+
+	/* change cipher spec */
+	if(fprint(c->ctl, "changecipher") < 0){
+		tlsError(c, EInternalError, "can't enable cipher: %r");
+		goto Err;
+	}
+
+	if(tlsSecFinished(c->sec, c->handhash, c->finished.verify, c->finished.n, 0) < 0){
+		tlsError(c, EInternalError, "can't set finished: %r");
+		goto Err;
+	}
+	m.tag = HFinished;
+	m.u.finished = c->finished;
+	if(!msgSend(c, &m, AFlush))
+		goto Err;
+	if(trace)
+		trace("tls finished\n");
+
+	if(fprint(c->ctl, "opened") < 0)
+		goto Err;
+	return c;
+
+Err:
+	msgClear(&m);
+	tlsConnectionFree(c);
+	return nil;
+}
+
+static Bytes*
+tlsSecDHEc(TlsSec *sec, Bytes *p, Bytes *g, Bytes *Ys)
+{
+	DHstate *dh = &sec->dh;
+	mpint *G, *P, *Y, *K;
+	Bytes *Yc;
+	int n;
+
+	if(p == nil || g == nil || Ys == nil)
+		return nil;
+	// reject dh primes that is susceptible to logjam
+	if(p->len <= 1024/8)
+		return nil;
+	Yc = nil;
+	P = bytestomp(p);
+	G = bytestomp(g);
+	Y = bytestomp(Ys);
+	K = nil;
+
+	if(dh_new(dh, P, nil, G) == nil)
+		goto Out;
+	n = (mpsignif(P)+7)/8;
+	Yc = mptobytes(dh->y, n);
+	K = dh_finish(dh, Y);	/* zeros dh */
+	if(K == nil){
+		freebytes(Yc);
+		Yc = nil;
+		goto Out;
+	}
+	setMasterSecret(sec, mptobytes(K, n));
+
+Out:
+	mpfree(K);
+	mpfree(Y);
+	mpfree(G);
+	mpfree(P);
+
+	return Yc;
+}
+
+static Bytes*
+tlsSecECDHEc(TlsSec *sec, int curve, Bytes *Ys)
+{
+	ECdomain *dom = &sec->ec.dom;
+	ECpriv *Q = &sec->ec.Q;
+	ECpub *pub;
+	ECpoint K;
+	Namedcurve *nc;
+	Bytes *Yc;
+	Bytes *Z;
+	int n;
+
+	if(Ys == nil)
+		return nil;
+
+	if(curve == X25519){
+		if(Ys->len != 32)
+			return nil;
+		Yc = newbytes(32);
+		curve25519_dh_new(sec->X, Yc->data);
+		Z = newbytes(32);
+		if(!curve25519_dh_finish(sec->X, Ys->data, Z->data)){
+			freebytes(Yc);
+			freebytes(Z);
+			return nil;
+		}
+		setMasterSecret(sec, Z);
+	}else{
+		for(nc = namedcurves; nc->tlsid != curve; nc++)
+			if(nc == &namedcurves[nelem(namedcurves)])
+				return nil;
+		ecdominit(dom, nc->init);
+		pub = ecdecodepub(dom, Ys->data, Ys->len);
+		if(pub == nil)
+			return nil;
+
+		memset(Q, 0, sizeof(*Q));
+		Q->x = mpnew(0);
+		Q->y = mpnew(0);
+		Q->d = mpnew(0);
+
+		memset(&K, 0, sizeof(K));
+		K.x = mpnew(0);
+		K.y = mpnew(0);
+
+		ecgen(dom, Q);
+		ecmul(dom, pub, Q->d, &K);
+
+		n = (mpsignif(dom->p)+7)/8;
+		setMasterSecret(sec, mptobytes(K.x, n));
+		Yc = newbytes(1 + 2*n);
+		Yc->len = ecencodepub(dom, Q, Yc->data, Yc->len);
+
+		mpfree(K.x);
+		mpfree(K.y);
+
+		ecpubfree(pub);
+	}
+	return Yc;
+}
+
+static TlsConnection *
+tlsClient2(int ctl, int hand,
+	uchar *cert, int certlen,
+	char *pskid, uchar *psk, int psklen,
+	uchar *ext, int extlen,
+	int (*trace)(char*fmt, ...))
+{
+	int creq, dhx, cipher;
+	TlsConnection *c;
+	Bytes *epm;
+	Msg m;
+
+	if(!initCiphers())
+		return nil;
+
+	epm = nil;
+	memset(&m, 0, sizeof(m));
+	c = emalloc(sizeof(TlsConnection));
+
+	c->ctl = ctl;
+	c->hand = hand;
+	c->trace = trace;
+	c->cert = nil;
+	c->sendp = c->buf;
+
+	c->version = ProtocolVersion;
+	tlsSecInitc(c->sec, c->version);
+	if(psklen > 0){
+		c->sec->psk = psk;
+		c->sec->psklen = psklen;
+	}
+	if(certlen > 0){
+		/* client certificate */
+		c->sec->rsapub = X509toRSApub(cert, certlen, nil, 0);
+		if(c->sec->rsapub == nil){
+			tlsError(c, EInternalError, "invalid X509/rsa certificate");
+			goto Err;
+		}
+		c->sec->rpc = factotum_rsa_open(c->sec->rsapub);
+		if(c->sec->rpc == nil){
+			tlsError(c, EInternalError, "factotum_rsa_open: %r");
+			goto Err;
+		}
+	}
+
+	/* client hello */
+	m.tag = HClientHello;
+	m.u.clientHello.version = c->version;
+	memmove(m.u.clientHello.random, c->sec->crandom, RandomSize);
+	m.u.clientHello.sid = makebytes(nil, 0);
+	m.u.clientHello.ciphers = makeciphers(psklen > 0);
+	m.u.clientHello.compressors = makebytes(compressors,sizeof(compressors));
+	m.u.clientHello.extensions = makebytes(ext, extlen);
+	if(!msgSend(c, &m, AFlush))
+		goto Err;
+
+	/* server hello */
+	if(!msgRecv(c, &m))
+		goto Err;
+	if(m.tag != HServerHello) {
+		tlsError(c, EUnexpectedMessage, "expected a server hello");
+		goto Err;
+	}
+	if(setVersion(c, m.u.serverHello.version) < 0) {
+		tlsError(c, EIllegalParameter, "incompatible version: %r");
+		goto Err;
+	}
+	tlsSecVers(c->sec, c->version);
+	memmove(c->sec->srandom, m.u.serverHello.random, RandomSize);
+
+	cipher = m.u.serverHello.cipher;
+	if((psklen > 0) != isPSK(cipher) || !setAlgs(c, cipher)) {
+		tlsError(c, EIllegalParameter, "invalid cipher suite");
+		goto Err;
+	}
+	if(m.u.serverHello.compressor != CompressionNull) {
+		tlsError(c, EIllegalParameter, "invalid compression");
+		goto Err;
+	}
+	dhx = isDHE(cipher) || isECDHE(cipher);
+	if(!msgRecv(c, &m))
+		goto Err;
+	if(m.tag == HCertificate){
+		if(m.u.certificate.ncert < 1) {
+			tlsError(c, EIllegalParameter, "runt certificate");
+			goto Err;
+		}
+		c->cert = makebytes(m.u.certificate.certs[0]->data, m.u.certificate.certs[0]->len);
+		if(!msgRecv(c, &m))
+			goto Err;
+	} else if(psklen == 0) {
+		tlsError(c, EUnexpectedMessage, "expected a certificate");
+		goto Err;
+	}
+	if(m.tag == HServerKeyExchange) {
+		if(dhx){
+			char *err = verifyDHparams(c->sec,
+				m.u.serverKeyExchange.dh_parameters,
+				c->cert,
+				m.u.serverKeyExchange.dh_signature,
+				c->version<TLS12Version ? 0x01 : m.u.serverKeyExchange.sigalg);
+			if(err != nil){
+				tlsError(c, EBadCertificate, "can't verify DH parameters: %s", err);
+				goto Err;
+			}
+			if(isECDHE(cipher))
+				epm = tlsSecECDHEc(c->sec,
+					m.u.serverKeyExchange.curve,
+					m.u.serverKeyExchange.dh_Ys);
+			else
+				epm = tlsSecDHEc(c->sec,
+					m.u.serverKeyExchange.dh_p, 
+					m.u.serverKeyExchange.dh_g,
+					m.u.serverKeyExchange.dh_Ys);
+			if(epm == nil){
+				tlsError(c, EHandshakeFailure, "bad DH parameters");
+				goto Err;
+			}
+		} else if(psklen == 0){
+			tlsError(c, EUnexpectedMessage, "got an server key exchange");
+			goto Err;
+		}
+		if(!msgRecv(c, &m))
+			goto Err;
+	} else if(dhx){
+		tlsError(c, EUnexpectedMessage, "expected server key exchange");
+		goto Err;
+	}
+
+	/* certificate request (optional) */
+	creq = 0;
+	if(m.tag == HCertificateRequest) {
+		creq = 1;
+		if(!msgRecv(c, &m))
+			goto Err;
+	}
+
+	if(m.tag != HServerHelloDone) {
+		tlsError(c, EUnexpectedMessage, "expected a server hello done");
+		goto Err;
+	}
+	msgClear(&m);
+
+	if(!dhx){
+		if(c->cert != nil){
+			epm = tlsSecRSAc(c->sec, c->cert->data, c->cert->len);
+			if(epm == nil){
+				tlsError(c, EBadCertificate, "bad certificate: %r");
+				goto Err;
+			}
+		} else if(psklen > 0){
+			setMasterSecret(c->sec, newbytes(psklen));
+		} else {
+			tlsError(c, EInternalError, "no psk or certificate");
+			goto Err;
+		}
+	}
+
+	if(trace)
+		trace("tls secrets\n");
+	if(setSecrets(c, 1) < 0){
+		tlsError(c, EHandshakeFailure, "can't set secrets: %r");
+		goto Err;
+	}
+
+	if(creq) {
+		m.tag = HCertificate;
+		if(certlen > 0){
+			m.u.certificate.ncert = 1;
+			m.u.certificate.certs = emalloc(m.u.certificate.ncert * sizeof(Bytes*));
+			m.u.certificate.certs[0] = makebytes(cert, certlen);
+		}		
+		if(!msgSend(c, &m, AFlush))
+			goto Err;
+	}
+
+	/* client key exchange */
+	m.tag = HClientKeyExchange;
+	if(psklen > 0){
+		if(pskid == nil)
+			pskid = "";
+		m.u.clientKeyExchange.pskid = makebytes((uchar*)pskid, strlen(pskid));
+	}
+	m.u.clientKeyExchange.key = epm;
+	epm = nil;
+	 
+	if(!msgSend(c, &m, AFlush))
+		goto Err;
+
+	/* certificate verify */
+	if(creq && certlen > 0) {
+		HandshakeHash hsave;
+		uchar digest[MAXdlen];
+		int digestlen;
+
+		/* save the state for the Finish message */
+		hsave = c->handhash;
+		if(c->version < TLS12Version){
+			md5(nil, 0, digest, &c->handhash.md5);
+			sha1(nil, 0, digest+MD5dlen, &c->handhash.sha1);
+			digestlen = MD5dlen+SHA1dlen;
+		} else {
+			m.u.certificateVerify.sigalg = 0x0401;	/* RSA SHA256 */
+			sha2_256(nil, 0, digest, &c->handhash.sha2_256);
+			digestlen = SHA2_256dlen;
+		}
+		c->handhash = hsave;
+
+		if((m.u.certificateVerify.signature = pkcs1_sign(c->sec, digest, digestlen,
+			m.u.certificateVerify.sigalg)) == nil){
+			tlsError(c, EHandshakeFailure, "pkcs1_sign: %r");
+			goto Err;
+		}
+
+		m.tag = HCertificateVerify;
+		if(!msgSend(c, &m, AFlush))
+			goto Err;
+	} 
+
+	/* change cipher spec */
+	if(fprint(c->ctl, "changecipher") < 0){
+		tlsError(c, EInternalError, "can't enable cipher: %r");
+		goto Err;
+	}
+
+	// Cipherchange must occur immediately before Finished to avoid
+	// potential hole;  see section 4.3 of Wagner Schneier 1996.
+	if(tlsSecFinished(c->sec, c->handhash, c->finished.verify, c->finished.n, 1) < 0){
+		tlsError(c, EInternalError, "can't set finished 1: %r");
+		goto Err;
+	}
+	m.tag = HFinished;
+	m.u.finished = c->finished;
+	if(!msgSend(c, &m, AFlush)) {
+		tlsError(c, EInternalError, "can't flush after client Finished: %r");
+		goto Err;
+	}
+
+	if(tlsSecFinished(c->sec, c->handhash, c->finished.verify, c->finished.n, 0) < 0){
+		tlsError(c, EInternalError, "can't set finished 0: %r");
+		goto Err;
+	}
+	if(!msgRecv(c, &m)) {
+		tlsError(c, EInternalError, "can't read server Finished: %r");
+		goto Err;
+	}
+	if(m.tag != HFinished) {
+		tlsError(c, EUnexpectedMessage, "expected a Finished msg from server");
+		goto Err;
+	}
+
+	if(!finishedMatch(c, &m.u.finished)) {
+		tlsError(c, EHandshakeFailure, "finished verification failed");
+		goto Err;
+	}
+	msgClear(&m);
+
+	if(fprint(c->ctl, "opened") < 0){
+		if(trace)
+			trace("unable to do final open: %r\n");
+		goto Err;
+	}
+	return c;
+
+Err:
+	free(epm);
+	msgClear(&m);
+	tlsConnectionFree(c);
+	return nil;
+}
+
+
+//================= message functions ========================
+
+static void
+msgHash(TlsConnection *c, uchar *p, int n)
+{
+	md5(p, n, 0, &c->handhash.md5);
+	sha1(p, n, 0, &c->handhash.sha1);
+	if(c->version >= TLS12Version)
+		sha2_256(p, n, 0, &c->handhash.sha2_256);
+}
+
+static int
+msgSend(TlsConnection *c, Msg *m, int act)
+{
+	uchar *p, *e; // sendp = start of new message;  p = write pointer; e = end pointer
+	int n, i;
+
+	p = c->sendp;
+	e = &c->buf[sizeof(c->buf)];
+	if(c->trace)
+		c->trace("send %s", msgPrint((char*)p, e - p, m));
+
+	p[0] = m->tag;	// header - fill in size later
+	p += 4;
+
+	switch(m->tag) {
+	default:
+		tlsError(c, EInternalError, "can't encode a %d", m->tag);
+		goto Err;
+	case HClientHello:
+		if(p+2+RandomSize > e)
+			goto Overflow;
+		put16(p, m->u.clientHello.version), p += 2;
+		memmove(p, m->u.clientHello.random, RandomSize);
+		p += RandomSize;
+
+		if(p+1+(n = m->u.clientHello.sid->len) > e)
+			goto Overflow;
+		*p++ = n;
+		memmove(p, m->u.clientHello.sid->data, n);
+		p += n;
+
+		if(p+2+(n = m->u.clientHello.ciphers->len) > e)
+			goto Overflow;
+		put16(p, n*2), p += 2;
+		for(i=0; i<n; i++)
+			put16(p, m->u.clientHello.ciphers->data[i]), p += 2;
+
+		if(p+1+(n = m->u.clientHello.compressors->len) > e)
+			goto Overflow;
+		*p++ = n;
+		memmove(p, m->u.clientHello.compressors->data, n);
+		p += n;
+
+		if(m->u.clientHello.extensions == nil
+		|| (n = m->u.clientHello.extensions->len) == 0)
+			break;
+		if(p+2+n > e)
+			goto Overflow;
+		put16(p, n), p += 2;
+		memmove(p, m->u.clientHello.extensions->data, n);
+		p += n;
+		break;
+	case HServerHello:
+		if(p+2+RandomSize > e)
+			goto Overflow;
+		put16(p, m->u.serverHello.version), p += 2;
+		memmove(p, m->u.serverHello.random, RandomSize);
+		p += RandomSize;
+
+		if(p+1+(n = m->u.serverHello.sid->len) > e)
+			goto Overflow;
+		*p++ = n;
+		memmove(p, m->u.serverHello.sid->data, n);
+		p += n;
+
+		if(p+2+1 > e)
+			goto Overflow;
+		put16(p, m->u.serverHello.cipher), p += 2;
+		*p++ = m->u.serverHello.compressor;
+
+		if(m->u.serverHello.extensions == nil
+		|| (n = m->u.serverHello.extensions->len) == 0)
+			break;
+		if(p+2+n > e)
+			goto Overflow;
+		put16(p, n), p += 2;
+		memmove(p, m->u.serverHello.extensions->data, n);
+		p += n;
+		break;
+	case HServerHelloDone:
+		break;
+	case HCertificate:
+		n = 0;
+		for(i = 0; i < m->u.certificate.ncert; i++)
+			n += 3 + m->u.certificate.certs[i]->len;
+		if(p+3+n > e)
+			goto Overflow;
+		put24(p, n), p += 3;
+		for(i = 0; i < m->u.certificate.ncert; i++){
+			n = m->u.certificate.certs[i]->len;
+			put24(p, n), p += 3;
+			memmove(p, m->u.certificate.certs[i]->data, n);
+			p += n;
+		}
+		break;
+	case HCertificateVerify:
+		if(p+2+2+(n = m->u.certificateVerify.signature->len) > e)
+			goto Overflow;
+		if(m->u.certificateVerify.sigalg != 0)
+			put16(p, m->u.certificateVerify.sigalg), p += 2;
+		put16(p, n), p += 2;
+		memmove(p, m->u.certificateVerify.signature->data, n);
+		p += n;
+		break;
+	case HServerKeyExchange:
+		if(m->u.serverKeyExchange.pskid != nil){
+			if(p+2+(n = m->u.serverKeyExchange.pskid->len) > e)
+				goto Overflow;
+			put16(p, n), p += 2;
+			memmove(p, m->u.serverKeyExchange.pskid->data, n);
+			p += n;
+		}
+		if(m->u.serverKeyExchange.dh_parameters == nil)
+			break;
+		if(p+(n = m->u.serverKeyExchange.dh_parameters->len) > e)
+			goto Overflow;
+		memmove(p, m->u.serverKeyExchange.dh_parameters->data, n);
+		p += n;
+		if(m->u.serverKeyExchange.dh_signature == nil)
+			break;
+		if(p+2+2+(n = m->u.serverKeyExchange.dh_signature->len) > e)
+			goto Overflow;
+		if(c->version >= TLS12Version)
+			put16(p, m->u.serverKeyExchange.sigalg), p += 2;
+		put16(p, n), p += 2;
+		memmove(p, m->u.serverKeyExchange.dh_signature->data, n);
+		p += n;
+		break;
+	case HClientKeyExchange:
+		if(m->u.clientKeyExchange.pskid != nil){
+			if(p+2+(n = m->u.clientKeyExchange.pskid->len) > e)
+				goto Overflow;
+			put16(p, n), p += 2;
+			memmove(p, m->u.clientKeyExchange.pskid->data, n);
+			p += n;
+		}
+		if(m->u.clientKeyExchange.key == nil)
+			break;
+		if(p+2+(n = m->u.clientKeyExchange.key->len) > e)
+			goto Overflow;
+		if(isECDHE(c->cipher))
+			*p++ = n;
+		else if(isDHE(c->cipher) || c->version != SSL3Version)
+			put16(p, n), p += 2;
+		memmove(p, m->u.clientKeyExchange.key->data, n);
+		p += n;
+		break;
+	case HFinished:
+		if(p+m->u.finished.n > e)
+			goto Overflow;
+		memmove(p, m->u.finished.verify, m->u.finished.n);
+		p += m->u.finished.n;
+		break;
+	}
+
+	// go back and fill in size
+	n = p - c->sendp;
+	put24(c->sendp+1, n-4);
+
+	// remember hash of Handshake messages
+	if(m->tag != HHelloRequest)
+		msgHash(c, c->sendp, n);
+
+	c->sendp = p;
+	if(act == AFlush){
+		c->sendp = c->buf;
+		if(write(c->hand, c->buf, p - c->buf) < 0){
+			fprint(2, "write error: %r\n");
+			goto Err;
+		}
+	}
+	msgClear(m);
+	return 1;
+Overflow:
+	tlsError(c, EInternalError, "not enougth send buffer for message (%d)", m->tag);
+Err:
+	msgClear(m);
+	return 0;
+}
+
+static uchar*
+tlsReadN(TlsConnection *c, int n)
+{
+	uchar *p, *w, *e;
+
+	e = &c->buf[sizeof(c->buf)];
+	p = e - n;
+	if(n > sizeof(c->buf) || p < c->sendp){
+		tlsError(c, EDecodeError, "handshake message too long %d", n);
+		return nil;
+	}
+	for(w = p; w < e; w += n)
+		if((n = read(c->hand, w, e - w)) <= 0)
+			return nil;
+	return p;
+}
+
+static int
+msgRecv(TlsConnection *c, Msg *m)
+{
+	uchar *p, *s;
+	int type, n, nn, i;
+
+	msgClear(m);
+	for(;;) {
+		p = tlsReadN(c, 4);
+		if(p == nil)
+			return 0;
+		type = p[0];
+		n = get24(p+1);
+
+		if(type != HHelloRequest)
+			break;
+		if(n != 0) {
+			tlsError(c, EDecodeError, "invalid hello request during handshake");
+			return 0;
+		}
+	}
+
+	if(type == HSSL2ClientHello){
+		/* Cope with an SSL3 ClientHello expressed in SSL2 record format.
+			This is sent by some clients that we must interoperate
+			with, such as Java's JSSE and Microsoft's Internet Explorer. */
+		int nsid, nrandom, nciph;
+
+		p = tlsReadN(c, n);
+		if(p == nil)
+			return 0;
+		msgHash(c, p, n);
+		m->tag = HClientHello;
+		if(n < 22)
+			goto Short;
+		m->u.clientHello.version = get16(p+1);
+		p += 3;
+		n -= 3;
+		nn = get16(p); /* cipher_spec_len */
+		nsid = get16(p + 2);
+		nrandom = get16(p + 4);
+		p += 6;
+		n -= 6;
+		if(nsid != 0 	/* no sid's, since shouldn't restart using ssl2 header */
+		|| nrandom < 16 || nn % 3 || n - nrandom < nn)
+			goto Err;
+		/* ignore ssl2 ciphers and look for {0x00, ssl3 cipher} */
+		nciph = 0;
+		for(i = 0; i < nn; i += 3)
+			if(p[i] == 0)
+				nciph++;
+		m->u.clientHello.ciphers = newints(nciph);
+		nciph = 0;
+		for(i = 0; i < nn; i += 3)
+			if(p[i] == 0)
+				m->u.clientHello.ciphers->data[nciph++] = get16(&p[i + 1]);
+		p += nn;
+		m->u.clientHello.sid = makebytes(nil, 0);
+		if(nrandom > RandomSize)
+			nrandom = RandomSize;
+		memset(m->u.clientHello.random, 0, RandomSize - nrandom);
+		memmove(&m->u.clientHello.random[RandomSize - nrandom], p, nrandom);
+		m->u.clientHello.compressors = newbytes(1);
+		m->u.clientHello.compressors->data[0] = CompressionNull;
+		goto Ok;
+	}
+	msgHash(c, p, 4);
+
+	p = tlsReadN(c, n);
+	if(p == nil)
+		return 0;
+
+	msgHash(c, p, n);
+
+	m->tag = type;
+
+	switch(type) {
+	default:
+		tlsError(c, EUnexpectedMessage, "can't decode a %d", type);
+		goto Err;
+	case HClientHello:
+		if(n < 2)
+			goto Short;
+		m->u.clientHello.version = get16(p);
+		p += 2, n -= 2;
+
+		if(n < RandomSize)
+			goto Short;
+		memmove(m->u.clientHello.random, p, RandomSize);
+		p += RandomSize, n -= RandomSize;
+		if(n < 1 || n < p[0]+1)
+			goto Short;
+		m->u.clientHello.sid = makebytes(p+1, p[0]);
+		p += m->u.clientHello.sid->len+1;
+		n -= m->u.clientHello.sid->len+1;
+
+		if(n < 2)
+			goto Short;
+		nn = get16(p);
+		p += 2, n -= 2;
+
+		if((nn & 1) || n < nn || nn < 2)
+			goto Short;
+		m->u.clientHello.ciphers = newints(nn >> 1);
+		for(i = 0; i < nn; i += 2)
+			m->u.clientHello.ciphers->data[i >> 1] = get16(&p[i]);
+		p += nn, n -= nn;
+
+		if(n < 1 || n < p[0]+1 || p[0] == 0)
+			goto Short;
+		nn = p[0];
+		m->u.clientHello.compressors = makebytes(p+1, nn);
+		p += nn + 1, n -= nn + 1;
+
+		if(n < 2)
+			break;
+		nn = get16(p);
+		if(nn > n-2)
+			goto Short;
+		m->u.clientHello.extensions = makebytes(p+2, nn);
+		n -= nn + 2;
+		break;
+	case HServerHello:
+		if(n < 2)
+			goto Short;
+		m->u.serverHello.version = get16(p);
+		p += 2, n -= 2;
+
+		if(n < RandomSize)
+			goto Short;
+		memmove(m->u.serverHello.random, p, RandomSize);
+		p += RandomSize, n -= RandomSize;
+
+		if(n < 1 || n < p[0]+1)
+			goto Short;
+		m->u.serverHello.sid = makebytes(p+1, p[0]);
+		p += m->u.serverHello.sid->len+1;
+		n -= m->u.serverHello.sid->len+1;
+
+		if(n < 3)
+			goto Short;
+		m->u.serverHello.cipher = get16(p);
+		m->u.serverHello.compressor = p[2];
+		p += 3, n -= 3;
+
+		if(n < 2)
+			break;
+		nn = get16(p);
+		if(nn > n-2)
+			goto Short;
+		m->u.serverHello.extensions = makebytes(p+2, nn);
+		n -= nn + 2;
+		break;
+	case HCertificate:
+		if(n < 3)
+			goto Short;
+		nn = get24(p);
+		p += 3, n -= 3;
+		if(nn == 0 && n > 0)
+			goto Short;
+		/* certs */
+		i = 0;
+		while(n > 0) {
+			if(n < 3)
+				goto Short;
+			nn = get24(p);
+			p += 3, n -= 3;
+			if(nn > n)
+				goto Short;
+			m->u.certificate.ncert = i+1;
+			m->u.certificate.certs = erealloc(m->u.certificate.certs, (i+1)*sizeof(Bytes*));
+			m->u.certificate.certs[i] = makebytes(p, nn);
+			p += nn, n -= nn;
+			i++;
+		}
+		break;
+	case HCertificateRequest:
+		if(n < 1)
+			goto Short;
+		nn = p[0];
+		p++, n--;
+		if(nn > n)
+			goto Short;
+		m->u.certificateRequest.types = makebytes(p, nn);
+		p += nn, n -= nn;
+		if(c->version >= TLS12Version){
+			if(n < 2)
+				goto Short;
+			nn = get16(p);
+			p += 2, n -= 2;
+			if(nn & 1)
+				goto Short;
+			m->u.certificateRequest.sigalgs = newints(nn>>1);
+			for(i = 0; i < nn; i += 2)
+				m->u.certificateRequest.sigalgs->data[i >> 1] = get16(&p[i]);
+			p += nn, n -= nn;
+
+		}
+		if(n < 2)
+			goto Short;
+		nn = get16(p);
+		p += 2, n -= 2;
+		/* nn == 0 can happen; yahoo's servers do it */
+		if(nn != n)
+			goto Short;
+		/* cas */
+		i = 0;
+		while(n > 0) {
+			if(n < 2)
+				goto Short;
+			nn = get16(p);
+			p += 2, n -= 2;
+			if(nn < 1 || nn > n)
+				goto Short;
+			m->u.certificateRequest.nca = i+1;
+			m->u.certificateRequest.cas = erealloc(
+				m->u.certificateRequest.cas, (i+1)*sizeof(Bytes*));
+			m->u.certificateRequest.cas[i] = makebytes(p, nn);
+			p += nn, n -= nn;
+			i++;
+		}
+		break;
+	case HServerHelloDone:
+		break;
+	case HServerKeyExchange:
+		if(isPSK(c->cipher)){
+			if(n < 2)
+				goto Short;
+			nn = get16(p);
+			p += 2, n -= 2;
+			if(nn > n)
+				goto Short;
+			m->u.serverKeyExchange.pskid = makebytes(p, nn);
+			p += nn, n -= nn;
+			if(n == 0)
+				break;
+		}
+		if(n < 2)
+			goto Short;
+		s = p;
+		if(isECDHE(c->cipher)){
+			nn = *p;
+			p++, n--;
+			if(nn != 3 || nn > n) /* not a named curve */
+				goto Short;
+			nn = get16(p);
+			p += 2, n -= 2;
+			m->u.serverKeyExchange.curve = nn;
+
+			nn = *p++, n--;
+			if(nn < 1 || nn > n)
+				goto Short;
+			m->u.serverKeyExchange.dh_Ys = makebytes(p, nn);
+			p += nn, n -= nn;
+		}else if(isDHE(c->cipher)){
+			nn = get16(p);
+			p += 2, n -= 2;
+			if(nn < 1 || nn > n)
+				goto Short;
+			m->u.serverKeyExchange.dh_p = makebytes(p, nn);
+			p += nn, n -= nn;
+	
+			if(n < 2)
+				goto Short;
+			nn = get16(p);
+			p += 2, n -= 2;
+			if(nn < 1 || nn > n)
+				goto Short;
+			m->u.serverKeyExchange.dh_g = makebytes(p, nn);
+			p += nn, n -= nn;
+	
+			if(n < 2)
+				goto Short;
+			nn = get16(p);
+			p += 2, n -= 2;
+			if(nn < 1 || nn > n)
+				goto Short;
+			m->u.serverKeyExchange.dh_Ys = makebytes(p, nn);
+			p += nn, n -= nn;
+		} else {
+			/* should not happen */
+			goto Short;
+		}
+		m->u.serverKeyExchange.dh_parameters = makebytes(s, p - s);
+		if(n >= 2){
+			m->u.serverKeyExchange.sigalg = 0;
+			if(c->version >= TLS12Version){
+				m->u.serverKeyExchange.sigalg = get16(p);
+				p += 2, n -= 2;
+				if(n < 2)
+					goto Short;
+			}
+			nn = get16(p);
+			p += 2, n -= 2;
+			if(nn > 0 && nn <= n){
+				m->u.serverKeyExchange.dh_signature = makebytes(p, nn);
+				n -= nn;
+			}
+		}
+		break;		
+	case HClientKeyExchange:
+		if(isPSK(c->cipher)){
+			if(n < 2)
+				goto Short;
+			nn = get16(p);
+			p += 2, n -= 2;
+			if(nn > n)
+				goto Short;
+			m->u.clientKeyExchange.pskid = makebytes(p, nn);
+			p += nn, n -= nn;
+			if(n == 0)
+				break;
+		}
+		if(n < 2)
+			goto Short;
+		if(isECDHE(c->cipher))
+			nn = *p++, n--;
+		else if(isDHE(c->cipher) || c->version != SSL3Version)
+			nn = get16(p), p += 2, n -= 2;
+		else
+			nn = n;
+		if(n < nn)
+			goto Short;
+		m->u.clientKeyExchange.key = makebytes(p, nn);
+		n -= nn;
+		break;
+	case HFinished:
+		m->u.finished.n = c->finished.n;
+		if(n < m->u.finished.n)
+			goto Short;
+		memmove(m->u.finished.verify, p, m->u.finished.n);
+		n -= m->u.finished.n;
+		break;
+	}
+
+	if(n != 0 && type != HClientHello && type != HServerHello)
+		goto Short;
+Ok:
+	if(c->trace)
+		c->trace("recv %s", msgPrint((char*)c->sendp, &c->buf[sizeof(c->buf)] - c->sendp, m));
+	return 1;
+Short:
+	tlsError(c, EDecodeError, "handshake message (%d) has invalid length", type);
+Err:
+	msgClear(m);
+	return 0;
+}
+
+static void
+msgClear(Msg *m)
+{
+	int i;
+
+	switch(m->tag) {
+	case HHelloRequest:
+		break;
+	case HClientHello:
+		freebytes(m->u.clientHello.sid);
+		freeints(m->u.clientHello.ciphers);
+		freebytes(m->u.clientHello.compressors);
+		freebytes(m->u.clientHello.extensions);
+		break;
+	case HServerHello:
+		freebytes(m->u.serverHello.sid);
+		freebytes(m->u.serverHello.extensions);
+		break;
+	case HCertificate:
+		for(i=0; i<m->u.certificate.ncert; i++)
+			freebytes(m->u.certificate.certs[i]);
+		free(m->u.certificate.certs);
+		break;
+	case HCertificateRequest:
+		freebytes(m->u.certificateRequest.types);
+		freeints(m->u.certificateRequest.sigalgs);
+		for(i=0; i<m->u.certificateRequest.nca; i++)
+			freebytes(m->u.certificateRequest.cas[i]);
+		free(m->u.certificateRequest.cas);
+		break;
+	case HCertificateVerify:
+		freebytes(m->u.certificateVerify.signature);
+		break;
+	case HServerHelloDone:
+		break;
+	case HServerKeyExchange:
+		freebytes(m->u.serverKeyExchange.pskid);
+		freebytes(m->u.serverKeyExchange.dh_p);
+		freebytes(m->u.serverKeyExchange.dh_g);
+		freebytes(m->u.serverKeyExchange.dh_Ys);
+		freebytes(m->u.serverKeyExchange.dh_parameters);
+		freebytes(m->u.serverKeyExchange.dh_signature);
+		break;
+	case HClientKeyExchange:
+		freebytes(m->u.clientKeyExchange.pskid);
+		freebytes(m->u.clientKeyExchange.key);
+		break;
+	case HFinished:
+		break;
+	}
+	memset(m, 0, sizeof(Msg));
+}
+
+static char *
+bytesPrint(char *bs, char *be, char *s0, Bytes *b, char *s1)
+{
+	int i;
+
+	if(s0)
+		bs = seprint(bs, be, "%s", s0);
+	if(b == nil)
+		bs = seprint(bs, be, "nil");
+	else {
+		bs = seprint(bs, be, "<%d> [ ", b->len);
+		for(i=0; i<b->len; i++)
+			bs = seprint(bs, be, "%.2x ", b->data[i]);
+		bs = seprint(bs, be, "]");
+	}
+	if(s1)
+		bs = seprint(bs, be, "%s", s1);
+	return bs;
+}
+
+static char *
+intsPrint(char *bs, char *be, char *s0, Ints *b, char *s1)
+{
+	int i;
+
+	if(s0)
+		bs = seprint(bs, be, "%s", s0);
+	if(b == nil)
+		bs = seprint(bs, be, "nil");
+	else {
+		bs = seprint(bs, be, "[ ");
+		for(i=0; i<b->len; i++)
+			bs = seprint(bs, be, "%x ", b->data[i]);
+		bs = seprint(bs, be, "]");
+	}
+	if(s1)
+		bs = seprint(bs, be, "%s", s1);
+	return bs;
+}
+
+static char*
+msgPrint(char *buf, int n, Msg *m)
+{
+	int i;
+	char *bs = buf, *be = buf+n;
+
+	switch(m->tag) {
+	default:
+		bs = seprint(bs, be, "unknown %d\n", m->tag);
+		break;
+	case HClientHello:
+		bs = seprint(bs, be, "ClientHello\n");
+		bs = seprint(bs, be, "\tversion: %.4x\n", m->u.clientHello.version);
+		bs = seprint(bs, be, "\trandom: ");
+		for(i=0; i<RandomSize; i++)
+			bs = seprint(bs, be, "%.2x", m->u.clientHello.random[i]);
+		bs = seprint(bs, be, "\n");
+		bs = bytesPrint(bs, be, "\tsid: ", m->u.clientHello.sid, "\n");
+		bs = intsPrint(bs, be, "\tciphers: ", m->u.clientHello.ciphers, "\n");
+		bs = bytesPrint(bs, be, "\tcompressors: ", m->u.clientHello.compressors, "\n");
+		if(m->u.clientHello.extensions != nil)
+			bs = bytesPrint(bs, be, "\textensions: ", m->u.clientHello.extensions, "\n");
+		break;
+	case HServerHello:
+		bs = seprint(bs, be, "ServerHello\n");
+		bs = seprint(bs, be, "\tversion: %.4x\n", m->u.serverHello.version);
+		bs = seprint(bs, be, "\trandom: ");
+		for(i=0; i<RandomSize; i++)
+			bs = seprint(bs, be, "%.2x", m->u.serverHello.random[i]);
+		bs = seprint(bs, be, "\n");
+		bs = bytesPrint(bs, be, "\tsid: ", m->u.serverHello.sid, "\n");
+		bs = seprint(bs, be, "\tcipher: %.4x\n", m->u.serverHello.cipher);
+		bs = seprint(bs, be, "\tcompressor: %.2x\n", m->u.serverHello.compressor);
+		if(m->u.serverHello.extensions != nil)
+			bs = bytesPrint(bs, be, "\textensions: ", m->u.serverHello.extensions, "\n");
+		break;
+	case HCertificate:
+		bs = seprint(bs, be, "Certificate\n");
+		for(i=0; i<m->u.certificate.ncert; i++)
+			bs = bytesPrint(bs, be, "\t", m->u.certificate.certs[i], "\n");
+		break;
+	case HCertificateRequest:
+		bs = seprint(bs, be, "CertificateRequest\n");
+		bs = bytesPrint(bs, be, "\ttypes: ", m->u.certificateRequest.types, "\n");
+		if(m->u.certificateRequest.sigalgs != nil)
+			bs = intsPrint(bs, be, "\tsigalgs: ", m->u.certificateRequest.sigalgs, "\n");
+		bs = seprint(bs, be, "\tcertificateauthorities\n");
+		for(i=0; i<m->u.certificateRequest.nca; i++)
+			bs = bytesPrint(bs, be, "\t\t", m->u.certificateRequest.cas[i], "\n");
+		break;
+	case HCertificateVerify:
+		bs = seprint(bs, be, "HCertificateVerify\n");
+		if(m->u.certificateVerify.sigalg != 0)
+			bs = seprint(bs, be, "\tsigalg: %.4x\n", m->u.certificateVerify.sigalg);
+		bs = bytesPrint(bs, be, "\tsignature: ", m->u.certificateVerify.signature,"\n");
+		break;	
+	case HServerHelloDone:
+		bs = seprint(bs, be, "ServerHelloDone\n");
+		break;
+	case HServerKeyExchange:
+		bs = seprint(bs, be, "HServerKeyExchange\n");
+		if(m->u.serverKeyExchange.pskid != nil)
+			bs = bytesPrint(bs, be, "\tpskid: ", m->u.serverKeyExchange.pskid, "\n");
+		if(m->u.serverKeyExchange.dh_parameters == nil)
+			break;
+		if(m->u.serverKeyExchange.curve != 0){
+			bs = seprint(bs, be, "\tcurve: %.4x\n", m->u.serverKeyExchange.curve);
+		} else {
+			bs = bytesPrint(bs, be, "\tdh_p: ", m->u.serverKeyExchange.dh_p, "\n");
+			bs = bytesPrint(bs, be, "\tdh_g: ", m->u.serverKeyExchange.dh_g, "\n");
+		}
+		bs = bytesPrint(bs, be, "\tdh_Ys: ", m->u.serverKeyExchange.dh_Ys, "\n");
+		if(m->u.serverKeyExchange.sigalg != 0)
+			bs = seprint(bs, be, "\tsigalg: %.4x\n", m->u.serverKeyExchange.sigalg);
+		bs = bytesPrint(bs, be, "\tdh_parameters: ", m->u.serverKeyExchange.dh_parameters, "\n");
+		bs = bytesPrint(bs, be, "\tdh_signature: ", m->u.serverKeyExchange.dh_signature, "\n");
+		break;
+	case HClientKeyExchange:
+		bs = seprint(bs, be, "HClientKeyExchange\n");
+		if(m->u.clientKeyExchange.pskid != nil)
+			bs = bytesPrint(bs, be, "\tpskid: ", m->u.clientKeyExchange.pskid, "\n");
+		if(m->u.clientKeyExchange.key != nil)
+			bs = bytesPrint(bs, be, "\tkey: ", m->u.clientKeyExchange.key, "\n");
+		break;
+	case HFinished:
+		bs = seprint(bs, be, "HFinished\n");
+		for(i=0; i<m->u.finished.n; i++)
+			bs = seprint(bs, be, "%.2x", m->u.finished.verify[i]);
+		bs = seprint(bs, be, "\n");
+		break;
+	}
+	USED(bs);
+	return buf;
+}
+
+static void
+tlsError(TlsConnection *c, int err, char *fmt, ...)
+{
+	char msg[512];
+	va_list arg;
+
+	va_start(arg, fmt);
+	vseprint(msg, msg+sizeof(msg), fmt, arg);
+	va_end(arg);
+	if(c->trace)
+		c->trace("tlsError: %s\n", msg);
+	if(c->erred)
+		fprint(2, "double error: %r, %s", msg);
+	else
+		errstr(msg, sizeof(msg));
+	c->erred = 1;
+	fprint(c->ctl, "alert %d", err);
+}
+
+// commit to specific version number
+static int
+setVersion(TlsConnection *c, int version)
+{
+	if(version > MaxProtoVersion || version < MinProtoVersion)
+		return -1;
+	if(version > c->version)
+		version = c->version;
+	if(version == SSL3Version) {
+		c->version = version;
+		c->finished.n = SSL3FinishedLen;
+	}else {
+		c->version = version;
+		c->finished.n = TLSFinishedLen;
+	}
+	return fprint(c->ctl, "version 0x%x", version);
+}
+
+// confirm that received Finished message matches the expected value
+static int
+finishedMatch(TlsConnection *c, Finished *f)
+{
+	return tsmemcmp(f->verify, c->finished.verify, f->n) == 0;
+}
+
+// free memory associated with TlsConnection struct
+//		(but don't close the TLS channel itself)
+static void
+tlsConnectionFree(TlsConnection *c)
+{
+	if(c == nil)
+		return;
+
+	dh_finish(&c->sec->dh, nil);
+
+	mpfree(c->sec->ec.Q.x);
+	mpfree(c->sec->ec.Q.y);
+	mpfree(c->sec->ec.Q.d);
+	ecdomfree(&c->sec->ec.dom);
+
+	factotum_rsa_close(c->sec->rpc);
+	rsapubfree(c->sec->rsapub);
+	freebytes(c->cert);
+
+	memset(c, 0, sizeof(*c));
+	free(c);
+}
+
+
+//================= cipher choices ========================
+
+static int
+isDHE(int tlsid)
+{
+	switch(tlsid){
+	case TLS_DHE_RSA_WITH_AES_128_GCM_SHA256:
+	case TLS_DHE_RSA_WITH_AES_128_CBC_SHA256:
+ 	case TLS_DHE_RSA_WITH_AES_128_CBC_SHA:
+ 	case TLS_DHE_RSA_WITH_AES_256_CBC_SHA:
+ 	case TLS_DHE_RSA_WITH_3DES_EDE_CBC_SHA:
+	case TLS_DHE_RSA_WITH_CHACHA20_POLY1305:
+	case GOOGLE_DHE_RSA_WITH_CHACHA20_POLY1305:
+		return 1;
+	}
+	return 0;
+}
+
+static int
+isECDHE(int tlsid)
+{
+	switch(tlsid){
+	case TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305:
+	case TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305:
+
+	case GOOGLE_ECDHE_ECDSA_WITH_CHACHA20_POLY1305:
+	case GOOGLE_ECDHE_RSA_WITH_CHACHA20_POLY1305:
+
+	case TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256:
+	case TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256:
+
+	case TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256:
+	case TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256:
+	case TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA:
+	case TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA:
+		return 1;
+	}
+	return 0;
+}
+
+static int
+isPSK(int tlsid)
+{
+	switch(tlsid){
+	case TLS_PSK_WITH_CHACHA20_POLY1305:
+	case TLS_PSK_WITH_AES_128_CBC_SHA256:
+	case TLS_PSK_WITH_AES_128_CBC_SHA:
+		return 1;
+	}
+	return 0;
+}
+
+static int
+isECDSA(int tlsid)
+{
+	switch(tlsid){
+	case TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305:
+	case GOOGLE_ECDHE_ECDSA_WITH_CHACHA20_POLY1305:
+	case TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256:
+	case TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256:
+		return 1;
+	}
+	return 0;
+}
+
+static int
+setAlgs(TlsConnection *c, int a)
+{
+	int i;
+
+	for(i = 0; i < nelem(cipherAlgs); i++){
+		if(cipherAlgs[i].tlsid == a){
+			c->cipher = a;
+			c->enc = cipherAlgs[i].enc;
+			c->digest = cipherAlgs[i].digest;
+			c->nsecret = cipherAlgs[i].nsecret;
+			if(c->nsecret > MaxKeyData)
+				return 0;
+			return 1;
+		}
+	}
+	return 0;
+}
+
+static int
+okCipher(Ints *cv, int ispsk, int canec)
+{
+	int i, c;
+
+	for(i = 0; i < nelem(cipherAlgs); i++) {
+		c = cipherAlgs[i].tlsid;
+		if(!cipherAlgs[i].ok || isECDSA(c) || isDHE(c))
+			continue;
+		if(isPSK(c) != ispsk)
+			continue;
+		if(isECDHE(c) && !canec)
+			continue;
+		if(lookupid(cv, c) >= 0)
+			return c;
+	}
+	return -1;
+}
+
+static int
+okCompression(Bytes *cv)
+{
+	int i, c;
+
+	for(i = 0; i < nelem(compressors); i++) {
+		c = compressors[i];
+		if(memchr(cv->data, c, cv->len) != nil)
+			return c;
+	}
+	return -1;
+}
+
+static Lock	ciphLock;
+static int	nciphers;
+
+static int
+initCiphers(void)
+{
+	enum {MaxAlgF = 1024, MaxAlgs = 10};
+	char s[MaxAlgF], *flds[MaxAlgs];
+	int i, j, n, ok;
+
+	lock(&ciphLock);
+	if(nciphers){
+		unlock(&ciphLock);
+		return nciphers;
+	}
+	j = open("#a/tls/encalgs", OREAD|OCEXEC);
+	if(j < 0){
+		werrstr("can't open #a/tls/encalgs: %r");
+		goto out;
+	}
+	n = read(j, s, MaxAlgF-1);
+	close(j);
+	if(n <= 0){
+		werrstr("nothing in #a/tls/encalgs: %r");
+		goto out;
+	}
+	s[n] = 0;
+	n = getfields(s, flds, MaxAlgs, 1, " \t\r\n");
+	for(i = 0; i < nelem(cipherAlgs); i++){
+		ok = 0;
+		for(j = 0; j < n; j++){
+			if(strcmp(cipherAlgs[i].enc, flds[j]) == 0){
+				ok = 1;
+				break;
+			}
+		}
+		cipherAlgs[i].ok = ok;
+	}
+
+	j = open("#a/tls/hashalgs", OREAD|OCEXEC);
+	if(j < 0){
+		werrstr("can't open #a/tls/hashalgs: %r");
+		goto out;
+	}
+	n = read(j, s, MaxAlgF-1);
+	close(j);
+	if(n <= 0){
+		werrstr("nothing in #a/tls/hashalgs: %r");
+		goto out;
+	}
+	s[n] = 0;
+	n = getfields(s, flds, MaxAlgs, 1, " \t\r\n");
+	for(i = 0; i < nelem(cipherAlgs); i++){
+		ok = 0;
+		for(j = 0; j < n; j++){
+			if(strcmp(cipherAlgs[i].digest, flds[j]) == 0){
+				ok = 1;
+				break;
+			}
+		}
+		cipherAlgs[i].ok &= ok;
+		if(cipherAlgs[i].ok)
+			nciphers++;
+	}
+out:
+	unlock(&ciphLock);
+	return nciphers;
+}
+
+static Ints*
+makeciphers(int ispsk)
+{
+	Ints *is;
+	int i, j;
+
+	is = newints(nciphers);
+	j = 0;
+	for(i = 0; i < nelem(cipherAlgs); i++)
+		if(cipherAlgs[i].ok && isPSK(cipherAlgs[i].tlsid) == ispsk)
+			is->data[j++] = cipherAlgs[i].tlsid;
+	is->len = j;
+	return is;
+}
+
+
+//================= security functions ========================
+
+// given a public key, set up connection to factotum
+// for using corresponding private key
+static AuthRpc*
+factotum_rsa_open(RSApub *rsapub)
+{
+	int afd;
+	char *s;
+	mpint *n;
+	AuthRpc *rpc;
+
+	// start talking to factotum
+	if((afd = open("/mnt/factotum/rpc", ORDWR|OCEXEC)) < 0)
+		return nil;
+	if((rpc = auth_allocrpc(afd)) == nil){
+		close(afd);
+		return nil;
+	}
+	s = "proto=rsa service=tls role=client";
+	if(auth_rpc(rpc, "start", s, strlen(s)) == ARok){
+		// roll factotum keyring around to match public key
+		n = mpnew(0);
+		while(auth_rpc(rpc, "read", nil, 0) == ARok){
+			if(strtomp(rpc->arg, nil, 16, n) != nil
+			&& mpcmp(n, rsapub->n) == 0){
+				mpfree(n);
+				return rpc;
+			}
+		}
+		mpfree(n);
+	}
+	factotum_rsa_close(rpc);
+	return nil;
+}
+
+static mpint*
+factotum_rsa_decrypt(AuthRpc *rpc, mpint *cipher)
+{
+	char *p;
+	int rv;
+
+	if(cipher == nil)
+		return nil;
+	p = mptoa(cipher, 16, nil, 0);
+	mpfree(cipher);
+	if(p == nil)
+		return nil;
+	rv = auth_rpc(rpc, "write", p, strlen(p));
+	free(p);
+	if(rv != ARok || auth_rpc(rpc, "read", nil, 0) != ARok)
+		return nil;
+	return strtomp(rpc->arg, nil, 16, nil);
+}
+
+static void
+factotum_rsa_close(AuthRpc *rpc)
+{
+	if(rpc == nil)
+		return;
+	close(rpc->afd);
+	auth_freerpc(rpc);
+}
+
+static void
+tlsP(uchar *buf, int nbuf, uchar *key, int nkey, uchar *label, int nlabel, uchar *seed, int nseed,
+	DigestState* (*x)(uchar*, u32, uchar*, u32, uchar*, DigestState*), int xlen)
+{
+	uchar ai[SHA2_256dlen], tmp[SHA2_256dlen];
+	DigestState *s;
+	int n;
+
+	assert(sizeof(ai) <= xlen && sizeof(tmp) <= xlen);
+	// generate a1
+	s = x(label, nlabel, key, nkey, nil, nil);
+	x(seed, nseed, key, nkey, ai, s);
+
+	while(nbuf > 0) {
+		s = x(ai, xlen, key, nkey, nil, nil);
+		s = x(label, nlabel, key, nkey, nil, s);
+		x(seed, nseed, key, nkey, tmp, s);
+		n = xlen;
+		if(n > nbuf)
+			n = nbuf;
+		memmove(buf, tmp, n);
+		buf += n;
+		nbuf -= n;
+		x(ai, xlen, key, nkey, tmp, nil);
+		memmove(ai, tmp, xlen);
+	}
+}
+
+// fill buf with md5(args)^sha1(args)
+static void
+tls10PRF(uchar *buf, int nbuf, uchar *key, int nkey, char *label, uchar *seed, int nseed)
+{
+	int nlabel = strlen(label);
+	int n = (nkey + 1) >> 1;
+
+	tlsP(buf, nbuf, key, n, (uchar*)label, nlabel, seed, nseed,
+		hmac_md5, MD5dlen);
+	tlsP(buf, nbuf, key+nkey-n, n, (uchar*)label, nlabel, seed, nseed,
+		hmac_sha1, SHA1dlen);
+}
+
+static void
+tls12PRF(uchar *buf, int nbuf, uchar *key, int nkey, char *label, uchar *seed, int nseed)
+{
+	tlsP(buf, nbuf, key, nkey, (uchar*)label, strlen(label), seed, nseed,
+		hmac_sha2_256, SHA2_256dlen);
+}
+
+static void
+sslPRF(uchar *buf, int nbuf, uchar *key, int nkey, char *label, uchar *seed, int nseed)
+{
+	uchar sha1dig[SHA1dlen], md5dig[MD5dlen], tmp[26];
+	DigestState *s;
+	int i, n, len;
+
+	USED(label);
+	len = 1;
+	while(nbuf > 0){
+		if(len > 26)
+			return;
+		for(i = 0; i < len; i++)
+			tmp[i] = 'A' - 1 + len;
+		s = sha1(tmp, len, nil, nil);
+		s = sha1(key, nkey, nil, s);
+		sha1(seed, nseed, sha1dig, s);
+		s = md5(key, nkey, nil, nil);
+		md5(sha1dig, SHA1dlen, md5dig, s);
+		n = MD5dlen;
+		if(n > nbuf)
+			n = nbuf;
+		memmove(buf, md5dig, n);
+		buf += n;
+		nbuf -= n;
+		len++;
+	}
+}
+
+static void
+sslSetFinished(TlsSec *sec, HandshakeHash hsh, uchar *finished, int isclient)
+{
+	DigestState *s;
+	uchar h0[MD5dlen], h1[SHA1dlen], pad[48];
+	char *label;
+
+	if(isclient)
+		label = "CLNT";
+	else
+		label = "SRVR";
+
+	md5((uchar*)label, 4, nil, &hsh.md5);
+	md5(sec->sec, MasterSecretSize, nil, &hsh.md5);
+	memset(pad, 0x36, 48);
+	md5(pad, 48, nil, &hsh.md5);
+	md5(nil, 0, h0, &hsh.md5);
+	memset(pad, 0x5C, 48);
+	s = md5(sec->sec, MasterSecretSize, nil, nil);
+	s = md5(pad, 48, nil, s);
+	md5(h0, MD5dlen, finished, s);
+
+	sha1((uchar*)label, 4, nil, &hsh.sha1);
+	sha1(sec->sec, MasterSecretSize, nil, &hsh.sha1);
+	memset(pad, 0x36, 40);
+	sha1(pad, 40, nil, &hsh.sha1);
+	sha1(nil, 0, h1, &hsh.sha1);
+	memset(pad, 0x5C, 40);
+	s = sha1(sec->sec, MasterSecretSize, nil, nil);
+	s = sha1(pad, 40, nil, s);
+	sha1(h1, SHA1dlen, finished + MD5dlen, s);
+}
+
+// fill "finished" arg with md5(args)^sha1(args)
+static void
+tls10SetFinished(TlsSec *sec, HandshakeHash hsh, uchar *finished, int isclient)
+{
+	uchar h[MD5dlen+SHA1dlen];
+	char *label;
+
+	// get current hash value, but allow further messages to be hashed in
+	md5(nil, 0, h, &hsh.md5);
+	sha1(nil, 0, h+MD5dlen, &hsh.sha1);
+
+	if(isclient)
+		label = "client finished";
+	else
+		label = "server finished";
+	tls10PRF(finished, TLSFinishedLen, sec->sec, MasterSecretSize, label, h, sizeof(h));
+}
+
+static void
+tls12SetFinished(TlsSec *sec, HandshakeHash hsh, uchar *finished, int isclient)
+{
+	uchar seed[SHA2_256dlen];
+	char *label;
+
+	// get current hash value, but allow further messages to be hashed in
+	sha2_256(nil, 0, seed, &hsh.sha2_256);
+
+	if(isclient)
+		label = "client finished";
+	else
+		label = "server finished";
+	tls12PRF(finished, TLSFinishedLen, sec->sec, MasterSecretSize, label, seed, SHA2_256dlen);
+}
+
+static void
+tlsSecInits(TlsSec *sec, int cvers, uchar *crandom)
+{
+	memset(sec, 0, sizeof(*sec));
+	sec->clientVers = cvers;
+	memmove(sec->crandom, crandom, RandomSize);
+
+	// putting time()'s output to the first 4 bytes is no
+	// longer recommended and is not useful
+	genrandom(sec->srandom, RandomSize);
+}
+
+static int
+tlsSecRSAs(TlsSec *sec, Bytes *epm)
+{
+	Bytes *pm;
+
+	if(epm == nil){
+		werrstr("no encrypted premaster secret");
+		return -1;
+	}
+	// if the client messed up, just continue as if everything is ok,
+	// to prevent attacks to check for correctly formatted messages.
+	pm = pkcs1_decrypt(sec, epm);
+	if(pm == nil || pm->len != MasterSecretSize || get16(pm->data) != sec->clientVers){
+		freebytes(pm);
+		pm = newbytes(MasterSecretSize);
+		genrandom(pm->data, pm->len);
+	}
+	setMasterSecret(sec, pm);
+	return 0;
+}
+
+static Bytes*
+tlsSecECDHEs1(TlsSec *sec)
+{
+	ECdomain *dom = &sec->ec.dom;
+	ECpriv *Q = &sec->ec.Q;
+	Bytes *par;
+	int n;
+
+	if(sec->nc == nil)
+		return nil;
+	if(sec->nc->tlsid == X25519){
+		par = newbytes(1+2+1+32);
+		par->data[0] = 3;
+		put16(par->data+1, X25519);
+		par->data[3] = 32;
+		curve25519_dh_new(sec->X, par->data+4);
+	}else{
+		ecdominit(dom, sec->nc->init);
+		memset(Q, 0, sizeof(*Q));
+		Q->x = mpnew(0);
+		Q->y = mpnew(0);
+		Q->d = mpnew(0);
+		ecgen(dom, Q);
+		n = 1 + 2*((mpsignif(dom->p)+7)/8);
+		par = newbytes(1+2+1+n);
+		par->data[0] = 3;
+		put16(par->data+1, sec->nc->tlsid);
+		n = ecencodepub(dom, Q, par->data+4, par->len-4);
+		par->data[3] = n;
+		par->len = 1+2+1+n;
+	}
+	return par;
+}
+
+static int
+tlsSecECDHEs2(TlsSec *sec, Bytes *Yc)
+{
+	ECdomain *dom = &sec->ec.dom;
+	ECpriv *Q = &sec->ec.Q;
+	ECpoint K;
+	ECpub *Y;
+	Bytes *Z;
+
+	if(Yc == nil){
+		werrstr("no public key");
+		return -1;
+	}
+
+	if(sec->nc->tlsid == X25519){
+		if(Yc->len != 32){
+			werrstr("bad public key");
+			return -1;
+		}
+		Z = newbytes(32);
+		if(!curve25519_dh_finish(sec->X, Yc->data, Z->data)){
+			werrstr("unlucky shared key");
+			freebytes(Z);
+			return -1;
+		}
+		setMasterSecret(sec, Z);
+	}else{
+		if((Y = ecdecodepub(dom, Yc->data, Yc->len)) == nil){
+			werrstr("bad public key");
+			return -1;
+		}
+
+		memset(&K, 0, sizeof(K));
+		K.x = mpnew(0);
+		K.y = mpnew(0);
+
+		ecmul(dom, Y, Q->d, &K);
+
+		setMasterSecret(sec, mptobytes(K.x, (mpsignif(dom->p)+7)/8));
+
+		mpfree(K.x);
+		mpfree(K.y);
+
+		ecpubfree(Y);
+	}
+	return 0;
+}
+
+static void
+tlsSecInitc(TlsSec *sec, int cvers)
+{
+	memset(sec, 0, sizeof(*sec));
+	sec->clientVers = cvers;
+	// see the comment on tlsSecInits
+	genrandom(sec->crandom, RandomSize);
+}
+
+static Bytes*
+tlsSecRSAc(TlsSec *sec, uchar *cert, int ncert)
+{
+	RSApub *pub;
+	Bytes *pm, *epm;
+
+	pub = X509toRSApub(cert, ncert, nil, 0);
+	if(pub == nil){
+		werrstr("invalid x509/rsa certificate");
+		return nil;
+	}
+	pm = newbytes(MasterSecretSize);
+	put16(pm->data, sec->clientVers);
+	genrandom(pm->data+2, MasterSecretSize - 2);
+	epm = pkcs1_encrypt(pm, pub);
+	setMasterSecret(sec, pm);
+	rsapubfree(pub);
+	return epm;
+}
+
+static int
+tlsSecFinished(TlsSec *sec, HandshakeHash hsh, uchar *fin, int nfin, int isclient)
+{
+	if(sec->nfin != nfin){
+		werrstr("invalid finished exchange");
+		return -1;
+	}
+	hsh.md5.malloced = 0;
+	hsh.sha1.malloced = 0;
+	hsh.sha2_256.malloced = 0;
+	(*sec->setFinished)(sec, hsh, fin, isclient);
+	return 0;
+}
+
+static void
+tlsSecVers(TlsSec *sec, int v)
+{
+	if(v == SSL3Version){
+		sec->setFinished = sslSetFinished;
+		sec->nfin = SSL3FinishedLen;
+		sec->prf = sslPRF;
+	}else if(v < TLS12Version) {
+		sec->setFinished = tls10SetFinished;
+		sec->nfin = TLSFinishedLen;
+		sec->prf = tls10PRF;
+	}else {
+		sec->setFinished = tls12SetFinished;
+		sec->nfin = TLSFinishedLen;
+		sec->prf = tls12PRF;
+	}
+}
+
+static int
+setSecrets(TlsConnection *c, int isclient)
+{
+	uchar kd[MaxKeyData], seed[2*RandomSize];
+	char *secrets;
+	int rv;
+
+	assert(c->nsecret <= sizeof(kd));
+	secrets = emalloc(2*c->nsecret);
+
+	memmove(seed, c->sec->srandom, RandomSize);
+	memmove(seed+RandomSize, c->sec->crandom, RandomSize);
+	/*
+	 * generate secret keys from the master secret.
+	 *
+	 * different cipher selections will require different amounts
+	 * of key expansion and use of key expansion data,
+	 * but it's all generated using the same function.
+	 */
+	(*c->sec->prf)(kd, c->nsecret, c->sec->sec, MasterSecretSize, "key expansion",
+			seed, sizeof(seed));
+
+	enc64(secrets, 2*c->nsecret, kd, c->nsecret);
+	memset(kd, 0, c->nsecret);
+
+	rv = fprint(c->ctl, "secret %s %s %d %s", c->digest, c->enc, isclient, secrets);
+	memset(secrets, 0, 2*c->nsecret);
+	free(secrets);
+
+	return rv;
+}
+
+/*
+ * set the master secret from the pre-master secret,
+ * destroys premaster.
+ */
+static void
+setMasterSecret(TlsSec *sec, Bytes *pm)
+{
+	uchar seed[2*RandomSize];
+
+	if(sec->psklen > 0){
+		Bytes *opm = pm;
+		uchar *p;
+
+		/* concatenate psk to pre-master secret */
+		pm = newbytes(4 + opm->len + sec->psklen);
+		p = pm->data;
+		put16(p, opm->len), p += 2;
+		memmove(p, opm->data, opm->len), p += opm->len;
+		put16(p, sec->psklen), p += 2;
+		memmove(p, sec->psk, sec->psklen);
+
+		memset(opm->data, 0, opm->len);
+		freebytes(opm);
+	}
+
+	memmove(seed, sec->crandom, RandomSize);
+	memmove(seed+RandomSize, sec->srandom, RandomSize);
+	(*sec->prf)(sec->sec, MasterSecretSize, pm->data, pm->len, "master secret",
+			seed, sizeof(seed));
+
+	memset(pm->data, 0, pm->len);	
+	freebytes(pm);
+}
+
+static int
+digestDHparams(TlsSec *sec, Bytes *par, uchar digest[MAXdlen], int sigalg)
+{
+	int hashalg = (sigalg>>8) & 0xFF;
+	int digestlen;
+	Bytes *blob;
+
+	blob = newbytes(2*RandomSize + par->len);
+	memmove(blob->data+0*RandomSize, sec->crandom, RandomSize);
+	memmove(blob->data+1*RandomSize, sec->srandom, RandomSize);
+	memmove(blob->data+2*RandomSize, par->data, par->len);
+	if(hashalg == 0){
+		digestlen = MD5dlen+SHA1dlen;
+		md5(blob->data, blob->len, digest, nil);
+		sha1(blob->data, blob->len, digest+MD5dlen, nil);
+	} else {
+		digestlen = -1;
+		if(hashalg < nelem(hashfun) && hashfun[hashalg].fun != nil){
+			digestlen = hashfun[hashalg].len;
+			(*hashfun[hashalg].fun)(blob->data, blob->len, digest, nil);
+		}
+	}
+	freebytes(blob);
+	return digestlen;
+}
+
+static char*
+verifyDHparams(TlsSec *sec, Bytes *par, Bytes *cert, Bytes *sig, int sigalg)
+{
+	uchar digest[MAXdlen];
+	int digestlen;
+	ECdomain dom;
+	ECpub *ecpk;
+	RSApub *rsapk;
+	char *err;
+
+	if(par == nil || par->len <= 0)
+		return "no DH parameters";
+
+	if(sig == nil || sig->len <= 0){
+		if(sec->psklen > 0)
+			return nil;
+		return "no signature";
+	}
+
+	if(cert == nil)
+		return "no certificate";
+
+	digestlen = digestDHparams(sec, par, digest, sigalg);
+	if(digestlen <= 0)
+		return "unknown signature digest algorithm";
+	
+	switch(sigalg & 0xFF){
+	case 0x01:
+		rsapk = X509toRSApub(cert->data, cert->len, nil, 0);
+		if(rsapk == nil)
+			return "bad certificate";
+		err = X509rsaverifydigest(sig->data, sig->len, digest, digestlen, rsapk);
+		rsapubfree(rsapk);
+		break;
+	case 0x03:
+		ecpk = X509toECpub(cert->data, cert->len, nil, 0, &dom);
+		if(ecpk == nil)
+			return "bad certificate";
+		err = X509ecdsaverifydigest(sig->data, sig->len, digest, digestlen, &dom, ecpk);
+		ecdomfree(&dom);
+		ecpubfree(ecpk);
+		break;
+	default:
+		err = "signaure algorithm not RSA or ECDSA";
+	}
+
+	return err;
+}
+
+// encrypt data according to PKCS#1, /lib/rfc/rfc2437 9.1.2.1
+static Bytes*
+pkcs1_encrypt(Bytes* data, RSApub* key)
+{
+	mpint *x, *y;
+
+	x = pkcs1padbuf(data->data, data->len, key->n, 2);
+	if(x == nil)
+		return nil;
+	y = rsaencrypt(key, x, nil);
+	mpfree(x);
+	data = newbytes((mpsignif(key->n)+7)/8);
+	mptober(y, data->data, data->len);
+	mpfree(y);
+	return data;
+}
+
+// decrypt data according to PKCS#1, with given key.
+static Bytes*
+pkcs1_decrypt(TlsSec *sec, Bytes *data)
+{
+	mpint *y;
+
+	if(data->len != (mpsignif(sec->rsapub->n)+7)/8)
+		return nil;
+	y = factotum_rsa_decrypt(sec->rpc, bytestomp(data));
+	if(y == nil)
+		return nil;
+	data = mptobytes(y, (mpsignif(y)+7)/8);
+	mpfree(y);
+	if((data->len = pkcs1unpadbuf(data->data, data->len, sec->rsapub->n, 2)) < 0){
+		freebytes(data);
+		return nil;
+	}
+	return data;
+}
+
+static Bytes*
+pkcs1_sign(TlsSec *sec, uchar *digest, int digestlen, int sigalg)
+{
+	int hashalg = (sigalg>>8)&0xFF;
+	mpint *signedMP;
+	Bytes *signature;
+	uchar buf[128];
+
+	if(hashalg > 0 && hashalg < nelem(hashfun) && hashfun[hashalg].len == digestlen)
+		digestlen = asn1encodedigest(hashfun[hashalg].fun, digest, buf, sizeof(buf));
+	else if(digestlen == MD5dlen+SHA1dlen)
+		memmove(buf, digest, digestlen);
+	else
+		digestlen = -1;
+	if(digestlen <= 0){
+		werrstr("bad digest algorithm");
+		return nil;
+	}
+
+	signedMP = factotum_rsa_decrypt(sec->rpc, pkcs1padbuf(buf, digestlen, sec->rsapub->n, 1));
+	if(signedMP == nil)
+		return nil;
+	signature = mptobytes(signedMP, (mpsignif(sec->rsapub->n)+7)/8);
+	mpfree(signedMP);
+	return signature;
+}
+
+
+//================= general utility functions ========================
+
+static void *
+emalloc(int n)
+{
+	void *p;
+	if(n==0)
+		n=1;
+	p = malloc(n);
+	if(p == nil)
+		sysfatal("out of memory");
+	memset(p, 0, n);
+	setmalloctag(p, getcallerpc(&n));
+	return p;
+}
+
+static void *
+erealloc(void *ReallocP, int ReallocN)
+{
+	if(ReallocN == 0)
+		ReallocN = 1;
+	if(ReallocP == nil)
+		ReallocP = emalloc(ReallocN);
+	else if((ReallocP = realloc(ReallocP, ReallocN)) == nil)
+		sysfatal("out of memory");
+	setrealloctag(ReallocP, getcallerpc(&ReallocP));
+	return(ReallocP);
+}
+
+static void
+put32(uchar *p, u32int x)
+{
+	p[0] = x>>24;
+	p[1] = x>>16;
+	p[2] = x>>8;
+	p[3] = x;
+}
+
+static void
+put24(uchar *p, int x)
+{
+	p[0] = x>>16;
+	p[1] = x>>8;
+	p[2] = x;
+}
+
+static void
+put16(uchar *p, int x)
+{
+	p[0] = x>>8;
+	p[1] = x;
+}
+
+static int
+get24(uchar *p)
+{
+	return (p[0]<<16)|(p[1]<<8)|p[2];
+}
+
+static int
+get16(uchar *p)
+{
+	return (p[0]<<8)|p[1];
+}
+
+static Bytes*
+newbytes(int len)
+{
+	Bytes* ans;
+
+	if(len < 0)
+		abort();
+	ans = emalloc(sizeof(Bytes) + len);
+	ans->len = len;
+	return ans;
+}
+
+/*
+ * newbytes(len), with data initialized from buf
+ */
+static Bytes*
+makebytes(uchar* buf, int len)
+{
+	Bytes* ans;
+
+	ans = newbytes(len);
+	memmove(ans->data, buf, len);
+	return ans;
+}
+
+static void
+freebytes(Bytes* b)
+{
+	free(b);
+}
+
+static mpint*
+bytestomp(Bytes* bytes)
+{
+	return betomp(bytes->data, bytes->len, nil);
+}
+
+/*
+ * Convert mpint* to Bytes, putting high order byte first.
+ */
+static Bytes*
+mptobytes(mpint *big, int len)
+{
+	Bytes* ans;
+
+	if(len == 0) len++;
+	ans = newbytes(len);
+	mptober(big, ans->data, ans->len);
+	return ans;
+}
+
+/* len is number of ints */
+static Ints*
+newints(int len)
+{
+	Ints* ans;
+
+	if(len < 0 || len > ((uint)-1>>1)/sizeof(int))
+		abort();
+	ans = emalloc(sizeof(Ints) + len*sizeof(int));
+	ans->len = len;
+	return ans;
+}
+
+static void
+freeints(Ints* b)
+{
+	free(b);
+}
+
+static int
+lookupid(Ints* b, int id)
+{
+	int i;
+
+	for(i=0; i<b->len; i++)
+		if(b->data[i] == id)
+			return i;
+	return -1;
+}
--- /dev/null
+++ b/libsec/port/tsmemcmp.c
@@ -1,0 +1,25 @@
+#include "os.h"
+#include <libsec.h>
+
+/*
+ * timing safe memcmp()
+ */
+int
+tsmemcmp(void *a1, void *a2, u32 n)
+{
+	int lt, gt, c1, c2, r, m;
+	uchar *s1, *s2;
+
+	r = m = 0;
+	s1 = a1;
+	s2 = a2;
+	while(n--){
+		c1 = *s1++;
+		c2 = *s2++;
+		lt = (c1 - c2) >> 8;
+		gt = (c2 - c1) >> 8;
+		r |= (lt - gt) & ~m;
+		m |= lt | gt;
+	}
+	return r;
+}
--- /dev/null
+++ b/libsec/port/x509.c
@@ -1,0 +1,3214 @@
+#include <u.h>
+#include <libc.h>
+#include <mp.h>
+#include <libsec.h>
+
+/*=============================================================*/
+/*  general ASN1 declarations and parsing
+ *
+ *  For now, this is used only for extracting the key from an
+ *  X509 certificate, so the entire collection is hidden.  But
+ *  someday we should probably make the functions visible and
+ *  give them their own man page.
+ */
+typedef struct Elem Elem;
+typedef struct Tag Tag;
+typedef struct Value Value;
+typedef struct Bytes Bytes;
+typedef struct Ints Ints;
+typedef struct Bits Bits;
+typedef struct Elist Elist;
+
+/* tag classes */
+#define Universal 0
+#define Context 0x80
+
+/* universal tags */
+#define BOOLEAN 1
+#define INTEGER 2
+#define BIT_STRING 3
+#define OCTET_STRING 4
+#define NULLTAG 5
+#define OBJECT_ID 6
+#define ObjectDescriptor 7
+#define EXTERNAL 8
+#define REAL 9
+#define ENUMERATED 10
+#define EMBEDDED_PDV 11
+#define UTF8String 12
+#define SEQUENCE 16		/* also SEQUENCE OF */
+#define SETOF 17				/* also SETOF OF */
+#define NumericString 18
+#define PrintableString 19
+#define TeletexString 20
+#define VideotexString 21
+#define IA5String 22
+#define UTCTime 23
+#define GeneralizedTime 24
+#define GraphicString 25
+#define VisibleString 26
+#define GeneralString 27
+#define UniversalString 28
+#define BMPString 30
+
+struct Bytes {
+	int	len;
+	uchar	data[];
+};
+
+struct Ints {
+	int	len;
+	int	data[];
+};
+
+struct Bits {
+	int	len;		/* number of bytes */
+	int	unusedbits;	/* unused bits in last byte */
+	uchar	data[];		/* most-significant bit first */
+};
+
+struct Tag {
+	int	class;
+	int	num;
+};
+
+enum { VBool, VInt, VOctets, VBigInt, VReal, VOther,
+	VBitString, VNull, VEOC, VObjId, VString, VSeq, VSet };
+struct Value {
+	int	tag;		/* VBool, etc. */
+	union {
+		int	boolval;
+		int	intval;
+		Bytes*	octetsval;
+		Bytes*	bigintval;
+		Bytes*	realval;	/* undecoded; hardly ever used */
+		Bytes*	otherval;
+		Bits*	bitstringval;
+		Ints*	objidval;
+		char*	stringval;
+		Elist*	seqval;
+		Elist*	setval;
+	} u;  /* (Don't use anonymous unions, for ease of porting) */
+};
+
+struct Elem {
+	Tag	tag;
+	Value	val;
+};
+
+struct Elist {
+	Elist*	tl;
+	Elem	hd;
+};
+
+/* decoding errors */
+enum { ASN_OK, ASN_ESHORT, ASN_ETOOBIG, ASN_EVALLEN,
+		ASN_ECONSTR, ASN_EPRIM, ASN_EINVAL, ASN_EUNIMPL };
+
+
+/* here are the functions to consider making extern someday */
+static Bytes*	newbytes(int len);
+static Bytes*	makebytes(uchar* buf, int len);
+static void	freebytes(Bytes* b);
+static Bytes*	catbytes(Bytes* b1, Bytes* b2);
+static Ints*	newints(int len);
+static Ints*	makeints(int* buf, int len);
+static void	freeints(Ints* b);
+static Bits*	newbits(int len);
+static Bits*	makebits(uchar* buf, int len, int unusedbits);
+static void	freebits(Bits* b);
+static Elist*	mkel(Elem e, Elist* tail);
+static void	freeelist(Elist* el);
+static int	elistlen(Elist* el);
+static int	is_seq(Elem* pe, Elist** pseq);
+static int	is_set(Elem* pe, Elist** pset);
+static int	is_int(Elem* pe, int* pint);
+static int	is_bigint(Elem* pe, Bytes** pbigint);
+static int	is_bitstring(Elem* pe, Bits** pbits);
+static int	is_octetstring(Elem* pe, Bytes** poctets);
+static int	is_oid(Elem* pe, Ints** poid);
+static int	is_string(Elem* pe, char** pstring);
+static int	is_time(Elem* pe, char** ptime);
+static int	decode(uchar* a, int alen, Elem* pelem);
+static int	encode(Elem e, Bytes** pbytes);
+static int	oid_lookup(Ints* o, Ints** tab);
+static void	freevalfields(Value* v);
+static mpint	*asn1mpint(Elem *e);
+static void	edump(Elem);
+
+#define TAG_MASK 0x1F
+#define CONSTR_MASK 0x20
+#define CLASS_MASK 0xC0
+#define MAXOBJIDLEN 20
+
+static int ber_decode(uchar** pp, uchar* pend, Elem* pelem);
+static int tag_decode(uchar** pp, uchar* pend, Tag* ptag, int* pisconstr);
+static int length_decode(uchar** pp, uchar* pend, int* plength);
+static int value_decode(uchar** pp, uchar* pend, int length, int kind, int isconstr, Value* pval);
+static int int_decode(uchar** pp, uchar* pend, int count, int unsgned, int* pint);
+static int uint7_decode(uchar** pp, uchar* pend, int* pint);
+static int octet_decode(uchar** pp, uchar* pend, int length, int isconstr, Bytes** pbytes);
+static int seq_decode(uchar** pp, uchar* pend, int length, int isconstr, Elist** pelist);
+static int enc(uchar** pp, Elem e, int lenonly);
+static int val_enc(uchar** pp, Elem e, int *pconstr, int lenonly);
+static void uint7_enc(uchar** pp, int num, int lenonly);
+static void int_enc(uchar** pp, int num, int unsgned, int lenonly);
+
+static void *
+emalloc(int n)
+{
+	void *p;
+	if(n==0)
+		n=1;
+	p = malloc(n);
+	if(p == nil)
+		sysfatal("out of memory");
+	memset(p, 0, n);
+	setmalloctag(p, getcallerpc(&n));
+	return p;
+}
+
+static char*
+estrdup(char *s)
+{
+	char *d;
+	int n;
+
+	n = strlen(s)+1;
+	d = emalloc(n);
+	memmove(d, s, n);
+	return d;
+}
+
+
+/*
+ * Decode a[0..len] as a BER encoding of an ASN1 type.
+ * The return value is one of ASN_OK, etc.
+ * Depending on the error, the returned elem may or may not
+ * be nil.
+ */
+static int
+decode(uchar* a, int alen, Elem* pelem)
+{
+	uchar* p = a;
+	int err;
+
+	err = ber_decode(&p, &a[alen], pelem);
+	if(err == ASN_OK && p != &a[alen])
+		err = ASN_EVALLEN;
+	return err;
+}
+
+/*
+ * All of the following decoding routines take arguments:
+ *	uchar **pp;
+ *	uchar *pend;
+ * Where parsing is supposed to start at **pp, and when parsing
+ * is done, *pp is updated to point at next char to be parsed.
+ * The pend pointer is just past end of string; an error should
+ * be returned parsing hasn't finished by then.
+ *
+ * The returned int is ASN_OK if all went fine, else ASN_ESHORT, etc.
+ * The remaining argument(s) are pointers to where parsed entity goes.
+ */
+
+/* Decode an ASN1 'Elem' (tag, length, value) */
+static int
+ber_decode(uchar** pp, uchar* pend, Elem* pelem)
+{
+	int err;
+	int isconstr;
+	int length;
+	Tag tag;
+	Value val;
+
+	memset(pelem, 0, sizeof(*pelem));
+	err = tag_decode(pp, pend, &tag, &isconstr);
+	if(err == ASN_OK) {
+		err = length_decode(pp, pend, &length);
+		if(err == ASN_OK) {
+			if(tag.class == Universal)
+				err = value_decode(pp, pend, length, tag.num, isconstr, &val);
+			else
+				err = value_decode(pp, pend, length, OCTET_STRING, 0, &val);
+			if(err == ASN_OK) {
+				pelem->tag = tag;
+				pelem->val = val;
+			}
+		}
+	}
+	return err;
+}
+
+/* Decode a tag field */
+static int
+tag_decode(uchar** pp, uchar* pend, Tag* ptag, int* pisconstr)
+{
+	int err;
+	int v;
+	uchar* p;
+
+	err = ASN_OK;
+	p = *pp;
+	if(pend-p >= 2) {
+		v = *p++;
+		ptag->class = v&CLASS_MASK;
+		if(v&CONSTR_MASK)
+			*pisconstr = 1;
+		else
+			*pisconstr = 0;
+		v &= TAG_MASK;
+		if(v == TAG_MASK)
+			err = uint7_decode(&p, pend, &v);
+		ptag->num = v;
+	}
+	else
+		err = ASN_ESHORT;
+	*pp = p;
+	return err;
+}
+
+/* Decode a length field */
+static int
+length_decode(uchar** pp, uchar* pend, int* plength)
+{
+	int err;
+	int num;
+	int v;
+	uchar* p;
+
+	err = ASN_OK;
+	num = 0;
+	p = *pp;
+	if(p < pend) {
+		v = *p++;
+		if(v&0x80)
+			err = int_decode(&p, pend, v&0x7F, 1, &num);
+		else
+			num = v;
+	}
+	else
+		err = ASN_ESHORT;
+	*pp = p;
+	*plength = num;
+	return err;
+}
+
+/* Decode a value field  */
+static int
+value_decode(uchar** pp, uchar* pend, int length, int kind, int isconstr, Value* pval)
+{
+	int err;
+	Bytes* va;
+	int num;
+	int bitsunused;
+	int subids[MAXOBJIDLEN];
+	int isubid;
+	Elist*	vl;
+	uchar* p;
+	uchar* pe;
+
+	err = ASN_OK;
+	p = *pp;
+	if(length == -1) {	/* "indefinite" length spec */
+		if(!isconstr)
+			err = ASN_EINVAL;
+	}
+	else if(p + length > pend)
+		err = ASN_EVALLEN;
+	if(err != ASN_OK)
+		return err;
+
+	switch(kind) {
+	case 0:
+		/* marker for end of indefinite constructions */
+		if(length == 0)
+			pval->tag = VNull;
+		else
+			err = ASN_EINVAL;
+		break;
+
+	case BOOLEAN:
+		if(isconstr)
+			err = ASN_ECONSTR;
+		else if(length != 1)
+			err = ASN_EVALLEN;
+		else {
+			pval->tag = VBool;
+			pval->u.boolval = (*p++ != 0);
+		}
+		break;
+
+	case INTEGER:
+	case ENUMERATED:
+		if(isconstr)
+			err = ASN_ECONSTR;
+		else if(length <= 4) {
+			err = int_decode(&p, pend, length, 0, &num);
+			if(err == ASN_OK) {
+				pval->tag = VInt;
+				pval->u.intval = num;
+			}
+		}
+		else {
+			pval->tag = VBigInt;
+			pval->u.bigintval = makebytes(p, length);
+			p += length;
+		}
+		break;
+
+	case BIT_STRING:
+		pval->tag = VBitString;
+		if(isconstr) {
+			if(length == -1 && p + 2 <= pend && *p == 0 && *(p+1) ==0) {
+				pval->u.bitstringval = makebits(0, 0, 0);
+				p += 2;
+			}
+			else	/* TODO: recurse and concat results */
+				err = ASN_EUNIMPL;
+		}
+		else {
+			if(length < 2) {
+				if(length == 1 && *p == 0) {
+					pval->u.bitstringval = makebits(0, 0, 0);
+					p++;
+				}
+				else
+					err = ASN_EINVAL;
+			}
+			else {
+				bitsunused = *p;
+				if(bitsunused > 7)
+					err = ASN_EINVAL;
+				else if(length > 0x0FFFFFFF)
+					err = ASN_ETOOBIG;
+				else {
+					pval->u.bitstringval = makebits(p+1, length-1, bitsunused);
+					p += length;
+				}
+			}
+		}
+		break;
+
+	case OCTET_STRING:
+	case ObjectDescriptor:
+		err = octet_decode(&p, pend, length, isconstr, &va);
+		if(err == ASN_OK) {
+			pval->tag = VOctets;
+			pval->u.octetsval = va;
+		}
+		break;
+
+	case NULLTAG:
+		if(isconstr)
+			err = ASN_ECONSTR;
+		else if(length != 0)
+			err = ASN_EVALLEN;
+		else
+			pval->tag = VNull;
+		break;
+
+	case OBJECT_ID:
+		if(isconstr)
+			err = ASN_ECONSTR;
+		else if(length == 0)
+			err = ASN_EVALLEN;
+		else {
+			isubid = 0;
+			pe = p+length;
+			while(p < pe && isubid < MAXOBJIDLEN) {
+				err = uint7_decode(&p, pend, &num);
+				if(err != ASN_OK)
+					break;
+				if(isubid == 0) {
+					subids[isubid++] = num / 40;
+					subids[isubid++] = num % 40;
+				}
+				else
+					subids[isubid++] = num;
+			}
+			if(err == ASN_OK) {
+				if(p != pe)
+					err = ASN_EVALLEN;
+				else {
+					pval->tag = VObjId;
+					pval->u.objidval = makeints(subids, isubid);
+				}
+			}
+		}
+		break;
+
+	case EXTERNAL:
+	case EMBEDDED_PDV:
+		/* TODO: parse this internally */
+		if(p+length > pend)
+			err = ASN_EVALLEN;
+		else {
+			pval->tag = VOther;
+			pval->u.otherval = makebytes(p, length);
+			p += length;
+		}
+		break;
+
+	case REAL:
+		/* Let the application decode */
+		if(isconstr)
+			err = ASN_ECONSTR;
+		else if(p+length > pend)
+			err = ASN_EVALLEN;
+		else {
+			pval->tag = VReal;
+			pval->u.realval = makebytes(p, length);
+			p += length;
+		}
+		break;
+
+	case SEQUENCE:
+		err = seq_decode(&p, pend, length, isconstr, &vl);
+		if(err == ASN_OK) {
+			pval->tag = VSeq ;
+			pval->u.seqval = vl;
+		}
+		break;
+
+	case SETOF:
+		err = seq_decode(&p, pend, length, isconstr, &vl);
+		if(err == ASN_OK) {
+			pval->tag = VSet;
+			pval->u.setval = vl;
+		}
+		break;
+
+	case UTF8String:
+	case NumericString:
+	case PrintableString:
+	case TeletexString:
+	case VideotexString:
+	case IA5String:
+	case UTCTime:
+	case GeneralizedTime:
+	case GraphicString:
+	case VisibleString:
+	case GeneralString:
+	case UniversalString:
+	case BMPString:
+		err = octet_decode(&p, pend, length, isconstr, &va);
+		if(err == ASN_OK) {
+			uchar *s;
+			char *d;
+			Rune r;
+			int n;
+
+			switch(kind){
+			case UniversalString:
+				n = va->len / 4;
+				d = emalloc(n*UTFmax+1);
+				pval->u.stringval = d;
+				s = va->data;
+				while(n > 0){
+					r = s[0]<<24 | s[1]<<16 | s[2]<<8 | s[3];
+					if(r == 0)
+						break;
+					n--;
+					s += 4;
+					d += runetochar(d, &r);
+				}
+				*d = 0;
+				break;
+			case BMPString:
+				n = va->len / 2;
+				d = emalloc(n*UTFmax+1);
+				pval->u.stringval = d;
+				s = va->data;
+				while(n > 0){
+					r = s[0]<<8 | s[1];
+					if(r == 0)
+						break;
+					n--;
+					s += 2;
+					d += runetochar(d, &r);
+				}
+				*d = 0;
+				break;
+			default:
+				n = va->len;
+				d = emalloc(n+1);
+				pval->u.stringval = d;
+				s = va->data;
+				while(n > 0){
+					if((*d = *s) == 0)
+						break;
+					n--;
+					s++;
+					d++;
+				}
+				*d = 0;
+				break;
+			}
+			if(n != 0){
+				err = ASN_EINVAL;
+				free(pval->u.stringval);
+			} else 
+				pval->tag = VString;
+			free(va);
+		}
+		break;
+
+	default:
+		if(p+length > pend)
+			err = ASN_EVALLEN;
+		else {
+			pval->tag = VOther;
+			pval->u.otherval = makebytes(p, length);
+			p += length;
+		}
+		break;
+	}
+	*pp = p;
+	return err;
+}
+
+/*
+ * Decode an int in format where count bytes are
+ * concatenated to form value.
+ * Although ASN1 allows any size integer, we return
+ * an error if the result doesn't fit in a 32-bit int.
+ * If unsgned is not set, make sure to propagate sign bit.
+ */
+static int
+int_decode(uchar** pp, uchar* pend, int count, int unsgned, int* pint)
+{
+	int err;
+	int num;
+	uchar* p;
+
+	p = *pp;
+	err = ASN_OK;
+	num = 0;
+	if(p+count <= pend) {
+		if((count > 4) || (unsgned && count == 4 && (*p&0x80)))
+			err = ASN_ETOOBIG;
+		else {
+			if(!unsgned && count > 0 && count < 4 && (*p&0x80))
+				num = -1;	/* set all bits, initially */
+			while(count--)
+				num = (num << 8)|(*p++);
+		}
+	}
+	else
+		err = ASN_ESHORT;
+	*pint = num;
+	*pp = p;
+	return err;
+}
+
+/*
+ * Decode an unsigned int in format where each
+ * byte except last has high bit set, and remaining
+ * seven bits of each byte are concatenated to form value.
+ * Although ASN1 allows any size integer, we return
+ * an error if the result doesn't fit in a 32 bit int.
+ */
+static int
+uint7_decode(uchar** pp, uchar* pend, int* pint)
+{
+	int err;
+	int num;
+	int more;
+	int v;
+	uchar* p;
+
+	p = *pp;
+	err = ASN_OK;
+	num = 0;
+	more = 1;
+	while(more && p < pend) {
+		v = *p++;
+		if(num&0x7F000000) {
+			err = ASN_ETOOBIG;
+			break;
+		}
+		num <<= 7;
+		more = v&0x80;
+		num |= (v&0x7F);
+	}
+	if(p == pend)
+		err = ASN_ESHORT;
+	*pint = num;
+	*pp = p;
+	return err;
+}
+
+/*
+ * Decode an octet string, recursively if isconstr.
+ * We've already checked that length==-1 implies isconstr==1,
+ * and otherwise that specified length fits within (*pp..pend)
+ */
+static int
+octet_decode(uchar** pp, uchar* pend, int length, int isconstr, Bytes** pbytes)
+{
+	int err;
+	uchar* p;
+	Bytes* ans;
+	Bytes* newans;
+	uchar* pstart;
+	uchar* pold;
+	Elem	elem;
+
+	err = ASN_OK;
+	p = *pp;
+	ans = nil;
+	if(length >= 0 && !isconstr) {
+		ans = makebytes(p, length);
+		p += length;
+	}
+	else {
+		/* constructed, either definite or indefinite length */
+		pstart = p;
+		for(;;) {
+			if(length >= 0 && p >= pstart + length) {
+				if(p != pstart + length)
+					err = ASN_EVALLEN;
+				break;
+			}
+			pold = p;
+			err = ber_decode(&p, pend, &elem);
+			if(err != ASN_OK)
+				break;
+			switch(elem.val.tag) {
+			case VOctets:
+				newans = catbytes(ans, elem.val.u.octetsval);
+				freevalfields(&elem.val);
+				freebytes(ans);
+				ans = newans;
+				break;
+
+			case VEOC:
+				if(length == -1)
+					goto cloop_done;
+				/* no break */
+			default:
+				freevalfields(&elem.val);
+				p = pold;
+				err = ASN_EINVAL;
+				goto cloop_done;
+			}
+		}
+cloop_done:
+		if(err != ASN_OK){
+			freebytes(ans);
+			ans = nil;
+		}
+	}
+	*pp = p;
+	*pbytes = ans;
+	return err;
+}
+
+/*
+ * Decode a sequence or set.
+ * We've already checked that length==-1 implies isconstr==1,
+ * and otherwise that specified length fits within (*p..pend)
+ */
+static int
+seq_decode(uchar** pp, uchar* pend, int length, int isconstr, Elist** pelist)
+{
+	int err;
+	uchar* p;
+	uchar* pstart;
+	uchar* pold;
+	Elist* ans;
+	Elem elem;
+	Elist* lve;
+	Elist* lveold;
+
+	err = ASN_OK;
+	ans = nil;
+	p = *pp;
+	if(!isconstr)
+		err = ASN_EPRIM;
+	else {
+		/* constructed, either definite or indefinite length */
+		lve = nil;
+		pstart = p;
+		for(;;) {
+			if(length >= 0 && p >= pstart + length) {
+				if(p != pstart + length)
+					err = ASN_EVALLEN;
+				break;
+			}
+			pold = p;
+			err = ber_decode(&p, pend, &elem);
+			if(err != ASN_OK)
+				break;
+			if(elem.val.tag == VEOC) {
+				if(length != -1) {
+					p = pold;
+					err = ASN_EINVAL;
+				}
+				break;
+			}
+			else
+				lve = mkel(elem, lve);
+		}
+		if(err != ASN_OK)
+			freeelist(lve);
+		else {
+			/* reverse back to original order */
+			while(lve != nil) {
+				lveold = lve;
+				lve = lve->tl;
+				lveold->tl = ans;
+				ans = lveold;
+			}
+		}
+	}
+	*pp = p;
+	*pelist = ans;
+	return err;
+}
+
+/*
+ * Encode e by BER rules, putting answer in *pbytes.
+ * This is done by first calling enc with lenonly==1
+ * to get the length of the needed buffer,
+ * then allocating the buffer and using enc again to fill it up.
+ */
+static int
+encode(Elem e, Bytes** pbytes)
+{
+	uchar* p;
+	Bytes* ans;
+	int err;
+	uchar uc;
+
+	p = &uc;
+	err = enc(&p, e, 1);
+	if(err == ASN_OK) {
+		ans = newbytes(p-&uc);
+		p = ans->data;
+		err = enc(&p, e, 0);
+		*pbytes = ans;
+	}
+	return err;
+}
+
+/*
+ * The various enc functions take a pointer to a pointer
+ * into a buffer, and encode their entity starting there,
+ * updating the pointer afterwards.
+ * If lenonly is 1, only the pointer update is done,
+ * allowing enc to be called first to calculate the needed
+ * buffer length.
+ * If lenonly is 0, it is assumed that the answer will fit.
+ */
+
+static int
+enc(uchar** pp, Elem e, int lenonly)
+{
+	int err;
+	int vlen;
+	int constr;
+	Tag tag;
+	int v;
+	int ilen;
+	uchar* p;
+	uchar* psave;
+
+	p = *pp;
+	err = val_enc(&p, e, &constr, 1);
+	if(err != ASN_OK)
+		return err;
+	vlen = p - *pp;
+	p = *pp;
+	tag = e.tag;
+	v = tag.class|constr;
+	if(tag.num < 31) {
+		if(!lenonly)
+			*p = (v|tag.num);
+		p++;
+	}
+	else {
+		if(!lenonly)
+			*p = (v|31);
+		p++;
+		if(tag.num < 0)
+			return ASN_EINVAL;
+		uint7_enc(&p, tag.num, lenonly);
+	}
+	if(vlen < 0x80) {
+		if(!lenonly)
+			*p = vlen;
+		p++;
+	}
+	else {
+		psave = p;
+		int_enc(&p, vlen, 1, 1);
+		ilen = p-psave;
+		p = psave;
+		if(!lenonly) {
+			*p++ = (0x80 | ilen);
+			int_enc(&p, vlen, 1, 0);
+		}
+		else
+			p += 1 + ilen;
+	}
+	if(!lenonly)
+		val_enc(&p, e, &constr, 0);
+	else
+		p += vlen;
+	*pp = p;
+	return err;
+}
+
+static int
+val_enc(uchar** pp, Elem e, int *pconstr, int lenonly)
+{
+	int err;
+	uchar* p;
+	int kind;
+	int cl;
+	int v;
+	Bytes* bb = nil;
+	Bits* bits;
+	Ints* oid;
+	int k;
+	Elist* el;
+	char* s;
+
+	p = *pp;
+	err = ASN_OK;
+	kind = e.tag.num;
+	cl = e.tag.class;
+	*pconstr = 0;
+	if(cl != Universal) {
+		switch(e.val.tag) {
+		case VBool:
+			kind = BOOLEAN;
+			break;
+		case VInt:
+			kind = INTEGER;
+			break;
+		case VBigInt:
+			kind = INTEGER;
+			break;
+		case VOctets:
+			kind = OCTET_STRING;
+			break;
+		case VReal:
+			kind = REAL;
+			break;
+		case VOther:
+			kind = OCTET_STRING;
+			break;
+		case VBitString:
+			kind = BIT_STRING;
+			break;
+		case VNull:
+			kind = NULLTAG;
+			break;
+		case VObjId:
+			kind = OBJECT_ID;
+			break;
+		case VString:
+			kind = UniversalString;
+			break;
+		case VSeq:
+			kind = SEQUENCE;
+			break;
+		case VSet:
+			kind = SETOF;
+			break;
+		}
+	}
+	switch(kind) {
+	case BOOLEAN:
+		if(is_int(&e, &v)) {
+			if(v != 0)
+				v = 255;
+			 int_enc(&p, v, 1, lenonly);
+		}
+		else
+			err = ASN_EINVAL;
+		break;
+
+	case INTEGER:
+	case ENUMERATED:
+		if(is_int(&e, &v))
+			int_enc(&p, v, 0, lenonly);
+		else {
+			if(is_bigint(&e, &bb)) {
+				if(!lenonly)
+					memmove(p, bb->data, bb->len);
+				p += bb->len;
+			}
+			else
+				err = ASN_EINVAL;
+		}
+		break;
+
+	case BIT_STRING:
+		if(is_bitstring(&e, &bits)) {
+			if(bits->len == 0) {
+				if(!lenonly)
+					*p = 0;
+				p++;
+			}
+			else {
+				v = bits->unusedbits;
+				if(v < 0 || v > 7)
+					err = ASN_EINVAL;
+				else {
+					if(!lenonly) {
+						*p = v;
+						memmove(p+1, bits->data, bits->len);
+					}
+					p += 1 + bits->len;
+				}
+			}
+		}
+		else
+			err = ASN_EINVAL;
+		break;
+
+	case OCTET_STRING:
+	case ObjectDescriptor:
+	case EXTERNAL:
+	case REAL:
+	case EMBEDDED_PDV:
+		bb = nil;
+		switch(e.val.tag) {
+		case VOctets:
+			bb = e.val.u.octetsval;
+			break;
+		case VReal:
+			bb = e.val.u.realval;
+			break;
+		case VOther:
+			bb = e.val.u.otherval;
+			break;
+		}
+		if(bb != nil) {
+			if(!lenonly)
+				memmove(p, bb->data, bb->len);
+			p += bb->len;
+		}
+		else
+			err = ASN_EINVAL;
+		break;
+
+	case NULLTAG:
+		break;
+
+	case OBJECT_ID:
+		if(is_oid(&e, &oid)) {
+			for(k = 0; k < oid->len; k++) {
+				v = oid->data[k];
+				if(k == 0) {
+					v *= 40;
+					if(oid->len > 1)
+						v += oid->data[++k];
+				}
+				uint7_enc(&p, v, lenonly);
+			}
+		}
+		else
+			err = ASN_EINVAL;
+		break;
+
+	case SEQUENCE:
+	case SETOF:
+		el = nil;
+		if(e.val.tag == VSeq)
+			el = e.val.u.seqval;
+		else if(e.val.tag == VSet)
+			el = e.val.u.setval;
+		else
+			err = ASN_EINVAL;
+		*pconstr = CONSTR_MASK;
+		for(; el != nil; el = el->tl) {
+			err = enc(&p, el->hd, lenonly);
+			if(err != ASN_OK)
+				break;
+		}
+		break;
+
+	case UTF8String:
+	case NumericString:
+	case PrintableString:
+	case TeletexString:
+	case VideotexString:
+	case IA5String:
+	case UTCTime:
+	case GeneralizedTime:
+	case GraphicString:
+	case VisibleString:
+	case GeneralString:
+	case UniversalString:
+	case BMPString:
+		if(e.val.tag == VString) {
+			s = e.val.u.stringval;
+			if(s != nil) {
+				v = strlen(s);
+				if(!lenonly)
+					memmove(p, s, v);
+				p += v;
+			}
+		}
+		else
+			err = ASN_EINVAL;
+		break;
+
+	default:
+		err = ASN_EINVAL;
+	}
+	*pp = p;
+	return err;
+}
+
+/*
+ * Encode num as unsigned 7 bit values with top bit 1 on all bytes
+ * except last, only putting in bytes if !lenonly.
+ */
+static void
+uint7_enc(uchar** pp, int num, int lenonly)
+{
+	int n;
+	int v;
+	int k;
+	uchar* p;
+
+	p = *pp;
+	n = 1;
+	v = num >> 7;
+	while(v > 0) {
+		v >>= 7;
+		n++;
+	}
+	if(lenonly)
+		p += n;
+	else {
+		for(k = (n - 1)*7; k > 0; k -= 7)
+			*p++= ((num >> k)|0x80);
+		*p++ = (num&0x7F);
+	}
+	*pp = p;
+}
+
+/*
+ * Encode num as unsigned or signed integer,
+ * only putting in bytes if !lenonly.
+ * Encoding is length followed by bytes to concatenate.
+ */
+static void
+int_enc(uchar** pp, int num, int unsgned, int lenonly)
+{
+	int v;
+	int n;
+	int prevv;
+	int k;
+	uchar* p;
+
+	p = *pp;
+	v = num;
+	if(v < 0)
+		v = -(v + 1);
+	n = 1;
+	prevv = v;
+	v >>= 8;
+	while(v > 0) {
+		prevv = v;
+		v >>= 8;
+		n++;
+	}
+	if(!unsgned && (prevv&0x80))
+		n++;
+	if(lenonly)
+		p += n;
+	else {
+		for(k = (n - 1)*8; k >= 0; k -= 8)
+			*p++ = (num >> k);
+	}
+	*pp = p;
+}
+
+static int
+ints_eq(Ints* a, Ints* b)
+{
+	int	alen;
+	int	i;
+
+	alen = a->len;
+	if(alen != b->len)
+		return 0;
+	for(i = 0; i < alen; i++)
+		if(a->data[i] != b->data[i])
+			return 0;
+	return 1;
+}
+
+/*
+ * Look up o in tab (which must have nil entry to terminate).
+ * Return index of matching entry, or -1 if none.
+ */
+static int
+oid_lookup(Ints* o, Ints** tab)
+{
+	int i;
+
+	for(i = 0; tab[i] != nil; i++)
+		if(ints_eq(o, tab[i]))
+			return  i;
+	return -1;
+}
+
+/*
+ * Return true if *pe is a SEQUENCE, and set *pseq to
+ * the value of the sequence if so.
+ */
+static int
+is_seq(Elem* pe, Elist** pseq)
+{
+	if(pe->tag.class == Universal && pe->tag.num == SEQUENCE && pe->val.tag == VSeq) {
+		*pseq = pe->val.u.seqval;
+		return 1;
+	}
+	return 0;
+}
+
+static int
+is_set(Elem* pe, Elist** pset)
+{
+	if(pe->tag.class == Universal && pe->tag.num == SETOF && pe->val.tag == VSet) {
+		*pset = pe->val.u.setval;
+		return 1;
+	}
+	return 0;
+}
+
+static int
+is_int(Elem* pe, int* pint)
+{
+	if(pe->tag.class == Universal) {
+		if(pe->tag.num == INTEGER && pe->val.tag == VInt) {
+			*pint = pe->val.u.intval;
+			return 1;
+		}
+		else if(pe->tag.num == BOOLEAN && pe->val.tag == VBool) {
+			*pint = pe->val.u.boolval;
+			return 1;
+		}
+	}
+	return 0;
+}
+
+/*
+ * for convience, all VInt's are readable via this routine,
+ * as well as all VBigInt's
+ */
+static int
+is_bigint(Elem* pe, Bytes** pbigint)
+{
+	if(pe->tag.class == Universal && pe->tag.num == INTEGER && pe->val.tag == VBigInt) {
+		*pbigint = pe->val.u.bigintval;
+		return 1;
+	}
+	return 0;
+}
+
+static int
+is_bitstring(Elem* pe, Bits** pbits)
+{
+	if(pe->tag.class == Universal && pe->tag.num == BIT_STRING && pe->val.tag == VBitString) {
+		*pbits = pe->val.u.bitstringval;
+		return 1;
+	}
+	return 0;
+}
+
+static int
+is_octetstring(Elem* pe, Bytes** poctets)
+{
+	if(pe->tag.class == Universal && pe->tag.num == OCTET_STRING && pe->val.tag == VOctets) {
+		*poctets = pe->val.u.octetsval;
+		return 1;
+	}
+	return 0;
+}
+
+static int
+is_oid(Elem* pe, Ints** poid)
+{
+	if(pe->tag.class == Universal && pe->tag.num == OBJECT_ID && pe->val.tag == VObjId) {
+		*poid = pe->val.u.objidval;
+		return 1;
+	}
+	return 0;
+}
+
+static int
+is_string(Elem* pe, char** pstring)
+{
+	if(pe->tag.class == Universal) {
+		switch(pe->tag.num) {
+		case UTF8String:
+		case NumericString:
+		case PrintableString:
+		case TeletexString:
+		case VideotexString:
+		case IA5String:
+		case GraphicString:
+		case VisibleString:
+		case GeneralString:
+		case UniversalString:
+		case BMPString:
+			if(pe->val.tag == VString) {
+				*pstring = pe->val.u.stringval;
+				return 1;
+			}
+		}
+	}
+	return 0;
+}
+
+static int
+is_time(Elem* pe, char** ptime)
+{
+	if(pe->tag.class == Universal
+	   && (pe->tag.num == UTCTime || pe->tag.num == GeneralizedTime)
+	   && pe->val.tag == VString) {
+		*ptime = pe->val.u.stringval;
+		return 1;
+	}
+	return 0;
+}
+
+
+/*
+ * malloc and return a new Bytes structure capable of
+ * holding len bytes. (len >= 0)
+ */
+static Bytes*
+newbytes(int len)
+{
+	Bytes* ans;
+
+	if(len < 0)
+		abort();
+	ans = emalloc(sizeof(Bytes) + len);
+	ans->len = len;
+	return ans;
+}
+
+/*
+ * newbytes(len), with data initialized from buf
+ */
+static Bytes*
+makebytes(uchar* buf, int len)
+{
+	Bytes* ans;
+
+	ans = newbytes(len);
+	memmove(ans->data, buf, len);
+	return ans;
+}
+
+static void
+freebytes(Bytes* b)
+{
+	free(b);
+}
+
+/*
+ * Make a new Bytes, containing bytes of b1 followed by those of b2.
+ * Either b1 or b2 or both can be nil.
+ */
+static Bytes*
+catbytes(Bytes* b1, Bytes* b2)
+{
+	Bytes* ans;
+	int n;
+
+	if(b1 == nil) {
+		if(b2 == nil)
+			ans = newbytes(0);
+		else
+			ans = makebytes(b2->data, b2->len);
+	}
+	else if(b2 == nil) {
+		ans = makebytes(b1->data, b1->len);
+	}
+	else {
+		n = b1->len + b2->len;
+		ans = newbytes(n);
+		ans->len = n;
+		memmove(ans->data, b1->data, b1->len);
+		memmove(ans->data+b1->len, b2->data, b2->len);
+	}
+	return ans;
+}
+
+/* len is number of ints */
+static Ints*
+newints(int len)
+{
+	Ints* ans;
+
+	if(len < 0 || len > ((uint)-1>>1)/sizeof(int))
+		abort();
+	ans = emalloc(sizeof(Ints) + len*sizeof(int));
+	ans->len = len;
+	return ans;
+}
+
+static Ints*
+makeints(int* buf, int len)
+{
+	Ints* ans;
+
+	ans = newints(len);
+	memmove(ans->data, buf, len*sizeof(int));
+	return ans;
+}
+
+static void
+freeints(Ints* b)
+{
+	free(b);
+}
+
+/* len is number of bytes */
+static Bits*
+newbits(int len)
+{
+	Bits* ans;
+
+	if(len < 0)
+		abort();
+	ans = emalloc(sizeof(Bits) + len);
+	ans->len = len;
+	ans->unusedbits = 0;
+	return ans;
+}
+
+static Bits*
+makebits(uchar* buf, int len, int unusedbits)
+{
+	Bits* ans;
+
+	ans = newbits(len);
+	memmove(ans->data, buf, len);
+	ans->unusedbits = unusedbits;
+	return ans;
+}
+
+static void
+freebits(Bits* b)
+{
+	free(b);
+}
+
+static Elist*
+mkel(Elem e, Elist* tail)
+{
+	Elist* el;
+
+	el = (Elist*)emalloc(sizeof(Elist));
+	setmalloctag(el, getcallerpc(&e));
+	el->hd = e;
+	el->tl = tail;
+	return el;
+}
+
+static int
+elistlen(Elist* el)
+{
+	int ans = 0;
+	while(el != nil) {
+		ans++;
+		el = el->tl;
+	}
+	return ans;
+}
+
+/* Frees elist, but not fields inside values of constituent elems */
+static void
+freeelist(Elist* el)
+{
+	Elist* next;
+
+	while(el != nil) {
+		next = el->tl;
+		free(el);
+		el = next;
+	}
+}
+
+/* free any allocated structures inside v (recursively freeing Elists) */
+static void
+freevalfields(Value* v)
+{
+	Elist* el;
+	Elist* l;
+	if(v == nil)
+		return;
+	switch(v->tag) {
+ 	case VOctets:
+		freebytes(v->u.octetsval);
+		break;
+	case VBigInt:
+		freebytes(v->u.bigintval);
+		break;
+	case VReal:
+		freebytes(v->u.realval);
+		break;
+	case VOther:
+		freebytes(v->u.otherval);
+		break;
+	case VBitString:
+		freebits(v->u.bitstringval);
+		break;
+	case VObjId:
+		freeints(v->u.objidval);
+		break;
+	case VString:
+		free(v->u.stringval);
+		break;
+	case VSeq:
+		el = v->u.seqval;
+		for(l = el; l != nil; l = l->tl)
+			freevalfields(&l->hd.val);
+		freeelist(el);
+		break;
+	case VSet:
+		el = v->u.setval;
+		for(l = el; l != nil; l = l->tl)
+			freevalfields(&l->hd.val);
+		freeelist(el);
+		break;
+	}
+	memset(v, 0, sizeof(*v));
+}
+
+static mpint*
+asn1mpint(Elem *e)
+{
+	Bytes *b;
+	int v;
+
+	if(is_int(e, &v))
+		return itomp(v, nil);
+	if(is_bigint(e, &b))
+		return betomp(b->data, b->len, nil);
+	return nil;
+}
+
+/* end of general ASN1 functions */
+
+
+
+
+
+/*=============================================================*/
+/*
+ * Decode and parse an X.509 Certificate, defined by this ASN1:
+ *	Certificate ::= SEQUENCE {
+ *		certificateInfo CertificateInfo,
+ *		signatureAlgorithm AlgorithmIdentifier,
+ *		signature BIT STRING }
+ *
+ *	CertificateInfo ::= SEQUENCE {
+ *		version [0] INTEGER DEFAULT v1 (0),
+ *		serialNumber INTEGER,
+ *		signature AlgorithmIdentifier,
+ *		issuer Name,
+ *		validity Validity,
+ *		subject Name,
+ *		subjectPublicKeyInfo SubjectPublicKeyInfo }
+ *	(version v2 has two more fields, optional unique identifiers for
+ *  issuer and subject; since we ignore these anyway, we won't parse them)
+ *
+ *	Validity ::= SEQUENCE {
+ *		notBefore UTCTime,
+ *		notAfter UTCTime }
+ *
+ *	SubjectPublicKeyInfo ::= SEQUENCE {
+ *		algorithm AlgorithmIdentifier,
+ *		subjectPublicKey BIT STRING }
+ *
+ *	AlgorithmIdentifier ::= SEQUENCE {
+ *		algorithm OBJECT IDENTIFER,
+ *		parameters ANY DEFINED BY ALGORITHM OPTIONAL }
+ *
+ *	Name ::= SEQUENCE OF RelativeDistinguishedName
+ *
+ *	RelativeDistinguishedName ::= SETOF SIZE(1..MAX) OF AttributeTypeAndValue
+ *
+ *	AttributeTypeAndValue ::= SEQUENCE {
+ *		type OBJECT IDENTIFER,
+ *		value DirectoryString }
+ *	(selected attributes have these Object Ids:
+ *		commonName {2 5 4 3}
+ *		countryName {2 5 4 6}
+ *		localityName {2 5 4 7}
+ *		stateOrProvinceName {2 5 4 8}
+ *		organizationName {2 5 4 10}
+ *		organizationalUnitName {2 5 4 11}
+ *	)
+ *
+ *	DirectoryString ::= CHOICE {
+ *		teletexString TeletexString,
+ *		printableString PrintableString,
+ *		universalString UniversalString }
+ *
+ *  See rfc1423, rfc2437 for AlgorithmIdentifier, subjectPublicKeyInfo, signature.
+ *
+ *  Not yet implemented:
+ *   CertificateRevocationList ::= SIGNED SEQUENCE{
+ *           signature       AlgorithmIdentifier,
+ *           issuer          Name,
+ *           lastUpdate      UTCTime,
+ *           nextUpdate      UTCTime,
+ *           revokedCertificates
+ *                           SEQUENCE OF CRLEntry OPTIONAL}
+ *   CRLEntry ::= SEQUENCE{
+ *           userCertificate SerialNumber,
+ *           revocationDate UTCTime}
+ */
+
+typedef struct CertX509 {
+	int	serial;
+	char*	issuer;
+	char*	validity_start;
+	char*	validity_end;
+	char*	subject;
+	int	publickey_alg;
+	Bits*	publickey;
+	int	signature_alg;
+	Bits*	signature;
+	int	curve;
+	Bytes*	ext;
+} CertX509;
+
+/* Algorithm object-ids */
+enum {
+	ALG_rsaEncryption,
+	ALG_md2WithRSAEncryption,
+	ALG_md4WithRSAEncryption,
+	ALG_md5WithRSAEncryption,
+
+	ALG_sha1WithRSAEncryption,
+	ALG_sha1WithRSAEncryptionOiw,
+
+	ALG_sha256WithRSAEncryption,
+	ALG_sha384WithRSAEncryption,
+	ALG_sha512WithRSAEncryption,
+	ALG_sha224WithRSAEncryption,
+
+	ALG_ecPublicKey,
+	ALG_sha1WithECDSA,
+	ALG_sha256WithECDSA,
+	ALG_sha384WithECDSA,
+	ALG_sha512WithECDSA,
+
+	ALG_md5,
+	ALG_sha1,
+	ALG_sha256,
+	ALG_sha384,
+	ALG_sha512,
+	ALG_sha224,
+
+	NUMALGS
+};
+
+typedef struct Ints15 {
+	int		len;
+	int		data[15];
+} Ints15;
+
+typedef struct DigestAlg {
+	int		alg;
+	DigestState*	(*fun)(uchar*,u32,uchar*,DigestState*);
+	int		len;
+} DigestAlg;
+
+static DigestAlg alg_md5 = { ALG_md5, md5, MD5dlen};
+static DigestAlg alg_sha1 = { ALG_sha1, sha1, SHA1dlen };
+static DigestAlg alg_sha256 = { ALG_sha256, sha2_256, SHA2_256dlen };
+static DigestAlg alg_sha384 = { ALG_sha384, sha2_384, SHA2_384dlen };
+static DigestAlg alg_sha512 = { ALG_sha512, sha2_512, SHA2_512dlen };
+static DigestAlg alg_sha224 = { ALG_sha224, sha2_224, SHA2_224dlen };
+
+/* maximum length of digest output of the digest algs above */
+enum {
+	MAXdlen = SHA2_512dlen,
+};
+
+static Ints15 oid_rsaEncryption = {7, 1, 2, 840, 113549, 1, 1, 1 };
+
+static Ints15 oid_md2WithRSAEncryption = {7, 1, 2, 840, 113549, 1, 1, 2 };
+static Ints15 oid_md4WithRSAEncryption = {7, 1, 2, 840, 113549, 1, 1, 3 };
+static Ints15 oid_md5WithRSAEncryption = {7, 1, 2, 840, 113549, 1, 1, 4 };
+static Ints15 oid_sha1WithRSAEncryption ={7, 1, 2, 840, 113549, 1, 1, 5 };
+static Ints15 oid_sha1WithRSAEncryptionOiw ={6, 1, 3, 14, 3, 2, 29 };
+static Ints15 oid_sha256WithRSAEncryption = {7, 1, 2, 840, 113549, 1, 1, 11 };
+static Ints15 oid_sha384WithRSAEncryption = {7, 1, 2, 840, 113549, 1, 1, 12 };
+static Ints15 oid_sha512WithRSAEncryption = {7, 1, 2, 840, 113549, 1, 1, 13 };
+static Ints15 oid_sha224WithRSAEncryption = {7, 1, 2, 840, 113549, 1, 1, 14 };
+
+static Ints15 oid_ecPublicKey = {6, 1, 2, 840, 10045, 2, 1 };
+static Ints15 oid_sha1WithECDSA = {6, 1, 2, 840, 10045, 4, 1 };
+static Ints15 oid_sha256WithECDSA = {7, 1, 2, 840, 10045, 4, 3, 2 };
+static Ints15 oid_sha384WithECDSA = {7, 1, 2, 840, 10045, 4, 3, 3 };
+static Ints15 oid_sha512WithECDSA = {7, 1, 2, 840, 10045, 4, 3, 4 };
+
+static Ints15 oid_md5 = {6, 1, 2, 840, 113549, 2, 5 };
+static Ints15 oid_sha1 = {6, 1, 3, 14, 3, 2, 26 };
+static Ints15 oid_sha256= {9, 2, 16, 840, 1, 101, 3, 4, 2, 1 };
+static Ints15 oid_sha384= {9, 2, 16, 840, 1, 101, 3, 4, 2, 2 };
+static Ints15 oid_sha512= {9, 2, 16, 840, 1, 101, 3, 4, 2, 3 };
+static Ints15 oid_sha224= {9, 2, 16, 840, 1, 101, 3, 4, 2, 4 };
+
+static Ints *alg_oid_tab[NUMALGS+1] = {
+	(Ints*)&oid_rsaEncryption,
+	(Ints*)&oid_md2WithRSAEncryption,
+	(Ints*)&oid_md4WithRSAEncryption,
+	(Ints*)&oid_md5WithRSAEncryption,
+
+	(Ints*)&oid_sha1WithRSAEncryption,
+	(Ints*)&oid_sha1WithRSAEncryptionOiw,
+
+	(Ints*)&oid_sha256WithRSAEncryption,
+	(Ints*)&oid_sha384WithRSAEncryption,
+	(Ints*)&oid_sha512WithRSAEncryption,
+	(Ints*)&oid_sha224WithRSAEncryption,
+
+	(Ints*)&oid_ecPublicKey,
+	(Ints*)&oid_sha1WithECDSA,
+	(Ints*)&oid_sha256WithECDSA,
+	(Ints*)&oid_sha384WithECDSA,
+	(Ints*)&oid_sha512WithECDSA,
+
+	(Ints*)&oid_md5,
+	(Ints*)&oid_sha1,
+	(Ints*)&oid_sha256,
+	(Ints*)&oid_sha384,
+	(Ints*)&oid_sha512,
+	(Ints*)&oid_sha224,
+	nil
+};
+
+static DigestAlg *digestalg[NUMALGS+1] = {
+	&alg_md5, &alg_md5, &alg_md5, &alg_md5,
+	&alg_sha1, &alg_sha1,
+	&alg_sha256, &alg_sha384, &alg_sha512, &alg_sha224,
+	&alg_sha256, &alg_sha1, &alg_sha256, &alg_sha384, &alg_sha512,
+	&alg_md5, &alg_sha1, &alg_sha256, &alg_sha384, &alg_sha512, &alg_sha224,
+	nil
+};
+
+static Bytes* encode_digest(DigestAlg *da, uchar *digest);
+
+static Ints15 oid_secp256r1 = {7, 1, 2, 840, 10045, 3, 1, 7};
+static Ints15 oid_secp384r1 = {5, 1, 3, 132, 0, 34};
+
+static Ints *namedcurves_oid_tab[] = {
+	(Ints*)&oid_secp256r1,
+	(Ints*)&oid_secp384r1,
+	nil,
+};
+static void (*namedcurves[])(mpint *p, mpint *a, mpint *b, mpint *x, mpint *y, mpint *n, mpint *h) = {
+	secp256r1,
+	secp384r1,
+	nil,
+};
+
+static void appendaltnames(char *name, int nname, Bytes *ext, int req);
+
+static void
+freecert(CertX509* c)
+{
+	if(c == nil)
+		return;
+	free(c->issuer);
+	free(c->validity_start);
+	free(c->validity_end);
+	free(c->subject);
+	freebits(c->publickey);
+	freebits(c->signature);
+	freebytes(c->ext);
+	free(c);
+}
+
+/*
+ * Parse the Name ASN1 type.
+ * The sequence of RelativeDistinguishedName's gives a sort of pathname,
+ * from most general to most specific.  Each element of the path can be
+ * one or more (but usually just one) attribute-value pair, such as
+ * countryName="US".
+ * We'll just form a "postal-style" address string by concatenating the elements
+ * from most specific to least specific, separated by commas.
+ * Return name-as-string (which must be freed by caller).
+ */
+static char*
+parse_name(Elem* e)
+{
+	Elist* el;
+	Elem* es;
+	Elist* esetl;
+	Elem* eat;
+	Elist* eatl;
+	char* s;
+	enum { MAXPARTS = 100 };
+	char* parts[MAXPARTS];
+	int i;
+	int plen;
+	char* ans = nil;
+
+	if(!is_seq(e, &el))
+		goto errret;
+	i = 0;
+	plen = 0;
+	while(el != nil) {
+		es = &el->hd;
+		if(!is_set(es, &esetl))
+			goto errret;
+		while(esetl != nil) {
+			eat = &esetl->hd;
+			if(!is_seq(eat, &eatl) || elistlen(eatl) != 2)
+				goto errret;
+			if(!is_string(&eatl->tl->hd, &s) || i>=MAXPARTS)
+				goto errret;
+			parts[i++] = s;
+			plen += strlen(s) + 2;		/* room for ", " after */
+			esetl = esetl->tl;
+		}
+		el = el->tl;
+	}
+	if(i > 0) {
+		ans = (char*)emalloc(plen);
+		*ans = '\0';
+		while(--i >= 0) {
+			s = parts[i];
+			strcat(ans, s);
+			if(i > 0)
+				strcat(ans, ", ");
+		}
+	}
+
+errret:
+	return ans;
+}
+
+/*
+ * Parse an AlgorithmIdentifer ASN1 type.
+ * Look up the oid in oid_tab and return one of OID_rsaEncryption, etc..,
+ * or -1 if not found.
+ * For now, ignore parameters, since none of our algorithms need them.
+ */
+static int
+parse_alg(Elem* e)
+{
+	Elist* el;
+	Ints* oid;
+
+	if(!is_seq(e, &el) || el == nil || !is_oid(&el->hd, &oid))
+		return -1;
+	return oid_lookup(oid, alg_oid_tab);
+}
+
+static int
+parse_curve(Elem* e)
+{
+	Elist* el;
+	Ints* oid;
+
+	if(!is_seq(e, &el) || elistlen(el)<2 || !is_oid(&el->tl->hd, &oid))
+		return -1;
+	return oid_lookup(oid, namedcurves_oid_tab);
+}
+
+static CertX509*
+decode_cert(uchar *buf, int len)
+{
+	int ok = 0;
+	int n;
+	Elem  ecert;
+	Elem* ecertinfo;
+	Elem* esigalg;
+	Elem* esig;
+	Elem* eserial;
+	Elem* eissuer;
+	Elem* evalidity;
+	Elem* esubj;
+	Elem* epubkey;
+	Elist* el;
+	Elist* elcert = nil;
+	Elist* elcertinfo = nil;
+	Elist* elvalidity = nil;
+	Elist* elpubkey = nil;
+	Bits* bits = nil;
+	Bytes* b;
+	Elem* e;
+	CertX509* c = nil;
+
+	if(decode(buf, len, &ecert) != ASN_OK)
+		goto errret;
+
+	c = (CertX509*)emalloc(sizeof(CertX509));
+	c->serial = -1;
+	c->issuer = nil;
+	c->validity_start = nil;
+	c->validity_end = nil;
+	c->subject = nil;
+	c->publickey_alg = -1;
+	c->publickey = nil;
+	c->signature_alg = -1;
+	c->signature = nil;
+	c->ext = nil;
+
+	/* Certificate */
+ 	if(!is_seq(&ecert, &elcert) || elistlen(elcert) !=3)
+		goto errret;
+ 	ecertinfo = &elcert->hd;
+ 	el = elcert->tl;
+ 	esigalg = &el->hd;
+	c->signature_alg = parse_alg(esigalg);
+ 	el = el->tl;
+ 	esig = &el->hd;
+
+	/* Certificate Info */
+	if(!is_seq(ecertinfo, &elcertinfo))
+		goto errret;
+	n = elistlen(elcertinfo);
+  	if(n < 6)
+		goto errret;
+	eserial =&elcertinfo->hd;
+ 	el = elcertinfo->tl;
+ 	/* check for optional version, marked by explicit context tag 0 */
+	if(eserial->tag.class == Context && eserial->tag.num == 0) {
+ 		eserial = &el->hd;
+ 		if(n < 7)
+ 			goto errret;
+ 		el = el->tl;
+ 	}
+
+	if(parse_alg(&el->hd) != c->signature_alg)
+		goto errret;
+ 	el = el->tl;
+ 	eissuer = &el->hd;
+ 	el = el->tl;
+ 	evalidity = &el->hd;
+ 	el = el->tl;
+ 	esubj = &el->hd;
+ 	el = el->tl;
+ 	epubkey = &el->hd;
+	if(el->tl != nil
+	&& el->tl->hd.tag.class == Context
+	&& el->tl->hd.tag.num == 3
+	&& el->tl->hd.val.tag == VOctets){
+		c->ext = el->tl->hd.val.u.octetsval;
+		el->tl->hd.val.u.octetsval = nil;	/* transfer ownership */
+	}
+	if(!is_int(eserial, &c->serial)) {
+		if(!is_bigint(eserial, &b))
+			goto errret;
+		c->serial = -1;	/* else we have to change cert struct */
+  	}
+	c->issuer = parse_name(eissuer);
+	if(c->issuer == nil)
+		goto errret;
+	/* Validity */
+  	if(!is_seq(evalidity, &elvalidity))
+		goto errret;
+	if(elistlen(elvalidity) != 2)
+		goto errret;
+	e = &elvalidity->hd;
+	if(!is_time(e, &c->validity_start))
+		goto errret;
+	e->val.u.stringval = nil;	/* string ownership transfer */
+	e = &elvalidity->tl->hd;
+ 	if(!is_time(e, &c->validity_end))
+		goto errret;
+	e->val.u.stringval = nil;	/* string ownership transfer */
+
+	/* resume CertificateInfo */
+ 	c->subject = parse_name(esubj);
+	if(c->subject == nil)
+		goto errret;
+
+	/* SubjectPublicKeyInfo */
+	if(!is_seq(epubkey, &elpubkey))
+		goto errret;
+	if(elistlen(elpubkey) != 2)
+		goto errret;
+
+	c->publickey_alg = parse_alg(&elpubkey->hd);
+	if(c->publickey_alg < 0)
+		goto errret;
+	c->curve = -1;
+	if(c->publickey_alg == ALG_ecPublicKey){
+		c->curve = parse_curve(&elpubkey->hd);
+		if(c->curve < 0)
+			goto errret;
+	}
+	elpubkey = elpubkey->tl;
+	if(!is_bitstring(&elpubkey->hd, &bits))
+		goto errret;
+	elpubkey->hd.val.u.bitstringval = nil;	/* transfer ownership */
+	c->publickey = bits;
+
+	/*resume Certificate */
+	if(c->signature_alg < 0)
+		goto errret;
+	if(!is_bitstring(esig, &bits))
+		goto errret;
+	esig->val.u.bitstringval = nil;	/* transfer ownership */
+	c->signature = bits;
+	ok = 1;
+
+errret:
+	freevalfields(&ecert.val);	/* recurses through lists, too */
+	if(!ok){
+		freecert(c);
+		c = nil;
+	}
+	return c;
+}
+
+/*
+ *	RSAPublickKey ::= SEQUENCE {
+ *		modulus INTEGER,
+ *		publicExponent INTEGER
+ *	}
+ */
+RSApub*
+asn1toRSApub(uchar *buf, int len)
+{
+	Elem e;
+	Elist *el;
+	RSApub* key;
+
+	key = nil;
+	if(decode(buf, len, &e) != ASN_OK)
+		goto errret;
+	if(!is_seq(&e, &el) || elistlen(el) != 2)
+		goto errret;
+
+	key = rsapuballoc();
+	if((key->n = asn1mpint(&el->hd)) == nil)
+		goto errret;
+	el = el->tl;
+	if((key->ek = asn1mpint(&el->hd)) == nil)
+		goto errret;
+
+	freevalfields(&e.val);
+	return key;
+errret:
+	freevalfields(&e.val);
+	rsapubfree(key);
+	return nil;
+
+}
+
+/*
+ *	RSAPrivateKey ::= SEQUENCE {
+ *		version Version,
+ *		modulus INTEGER, -- n
+ *		publicExponent INTEGER, -- e
+ *		privateExponent INTEGER, -- d
+ *		prime1 INTEGER, -- p
+ *		prime2 INTEGER, -- q
+ *		exponent1 INTEGER, -- d mod (p-1)
+ *		exponent2 INTEGER, -- d mod (q-1)
+ *		coefficient INTEGER -- (inverse of q) mod p }
+ */
+RSApriv*
+asn1toRSApriv(uchar *buf, int len)
+{
+	int version;
+	Elem e;
+	Elist *el;
+	Bytes *b;
+	RSApriv* key = nil;
+
+	if(decode(buf, len, &e) != ASN_OK)
+		goto errret;
+	if(!is_seq(&e, &el))
+		goto errret;
+
+	if(!is_int(&el->hd, &version) || version != 0)
+		goto errret;
+
+	if(elistlen(el) != 9){
+		if(elistlen(el) == 3
+		&& parse_alg(&el->tl->hd) == ALG_rsaEncryption
+		&& is_octetstring(&el->tl->tl->hd, &b)){
+			key = asn1toRSApriv(b->data, b->len);
+			if(key != nil)
+				goto done;
+		}
+		goto errret;
+	}
+
+	key = rsaprivalloc();
+	el = el->tl;
+	if((key->pub.n = asn1mpint(&el->hd)) == nil)
+		goto errret;
+
+	el = el->tl;
+	if((key->pub.ek = asn1mpint(&el->hd)) == nil)
+		goto errret;
+
+	el = el->tl;
+	if((key->dk = asn1mpint(&el->hd)) == nil)
+		goto errret;
+
+	el = el->tl;
+	if((key->q = asn1mpint(&el->hd)) == nil)
+		goto errret;
+
+	el = el->tl;
+	if((key->p = asn1mpint(&el->hd)) == nil)
+		goto errret;
+
+	el = el->tl;
+	if((key->kq = asn1mpint(&el->hd)) == nil)
+		goto errret;
+
+	el = el->tl;
+	if((key->kp = asn1mpint(&el->hd)) == nil)
+		goto errret;
+
+	el = el->tl;
+	if((key->c2 = asn1mpint(&el->hd)) == nil)
+		goto errret;
+
+done:
+	freevalfields(&e.val);
+	return key;
+errret:
+	freevalfields(&e.val);
+	rsaprivfree(key);
+	return nil;
+}
+
+/*
+ * digest(CertificateInfo)
+ * Our ASN.1 library doesn't return pointers into the original
+ * data array, so we need to do a little hand decoding.
+ */
+static int
+digest_certinfo(uchar *cert, int ncert, DigestAlg *da, uchar *digest)
+{
+	uchar *info, *p, *pend;
+	int isconstr, length;
+	Tag tag;
+	Elem elem;
+
+	p = cert;
+	pend = cert + ncert;
+	if(tag_decode(&p, pend, &tag, &isconstr) != ASN_OK ||
+	   tag.class != Universal || tag.num != SEQUENCE ||
+	   length_decode(&p, pend, &length) != ASN_OK ||
+	   p+length > pend ||
+	   p+length < p)
+		return -1;
+	info = p;
+	if(ber_decode(&p, pend, &elem) != ASN_OK)
+		return -1;
+	freevalfields(&elem.val);
+	if(elem.tag.num != SEQUENCE)
+		return -1;
+	(*da->fun)(info, p - info, digest, nil);
+	return da->len;
+}
+
+mpint*
+pkcs1padbuf(uchar *buf, int len, mpint *modulus, int blocktype)
+{
+	int i, n = (mpsignif(modulus)-1)/8;
+	int pad = n - 2 - len;
+	uchar *p;
+	mpint *mp;
+
+	if(pad < 8){
+		werrstr("rsa modulus too small");
+		return nil;
+	}
+	if((p = malloc(n)) == nil)
+		return nil;
+	p[0] = blocktype;
+	switch(blocktype){
+	default:
+	case 1:
+		memset(p+1, 0xFF, pad);
+		break;
+	case 2:
+		for(i=1; i <= pad; i++)
+			p[i] = 1 + nfastrand(255);
+		break;
+	}
+	p[1+pad] = 0;
+	memmove(p+2+pad, buf, len);
+	mp = betomp(p, n, nil);
+	free(p);
+	return mp;
+}
+
+int
+pkcs1unpadbuf(uchar *buf, int len, mpint *modulus, int blocktype)
+{
+	uchar *p = buf + 1, *e = buf + len;
+
+	if(len < 1 || len != (mpsignif(modulus)-1)/8 || buf[0] != blocktype)
+		return -1;
+	switch(blocktype){
+	default:
+	case 1:
+		while(p < e && *p == 0xFF)
+			p++;
+		break;
+	case 2:
+		while(p < e && *p != 0x00)
+			p++;
+		break;
+	}
+	if(p - buf <= 8 || p >= e || *p++ != 0x00)
+		return -1;
+	memmove(buf, p, len = e - p);
+	return len;
+}
+
+static char Ebadsig[] = "bad signature";
+
+char*
+X509rsaverifydigest(uchar *sig, int siglen, uchar *edigest, int edigestlen, RSApub *pk)
+{
+	mpint *x, *y;
+	DigestAlg **dp;
+	Bytes *digest;
+	uchar *buf;
+	int len;
+	char *err;
+
+	x = betomp(sig, siglen, nil);
+	y = rsaencrypt(pk, x, nil);
+	mpfree(x);
+	len = mptobe(y, nil, 0, &buf);
+	mpfree(y);	
+
+	err = Ebadsig;
+	len = pkcs1unpadbuf(buf, len, pk->n, 1);
+	if(len == edigestlen && tsmemcmp(buf, edigest, edigestlen) == 0)
+		err = nil;
+	for(dp = digestalg; err != nil && *dp != nil; dp++){
+		if((*dp)->len != edigestlen)
+			continue;
+		digest = encode_digest(*dp, edigest);
+		if(digest->len == len && tsmemcmp(digest->data, buf, len) == 0)
+			err = nil;
+		freebytes(digest);
+	}
+	free(buf);
+	return err;
+}
+
+char*
+X509ecdsaverifydigest(uchar *sig, int siglen, uchar *edigest, int edigestlen, ECdomain *dom, ECpub *pub)
+{
+	Elem e;
+	Elist *el;
+	mpint *r, *s;
+	char *err;
+
+	r = s = nil;
+	err = Ebadsig;
+	if(decode(sig, siglen, &e) != ASN_OK)
+		goto end;
+	if(!is_seq(&e, &el) || elistlen(el) != 2)
+		goto end;
+	r = asn1mpint(&el->hd);
+	if(r == nil)
+		goto end;
+	el = el->tl;
+	s = asn1mpint(&el->hd);
+	if(s == nil)
+		goto end;
+	if(ecdsaverify(dom, pub, edigest, edigestlen, r, s))
+		err = nil;
+end:
+	freevalfields(&e.val);
+	mpfree(s);
+	mpfree(r);
+	return err;
+}
+
+static void
+copysubject(char *name, int nname, char *subject)
+{
+	char *e;
+
+	if(name == nil)
+		return;
+	memset(name, 0, nname);
+	if(subject == nil)
+		return;
+	strncpy(name, subject, nname-1);
+	e = strchr(name, ',');
+	if(e != nil)
+		*e = 0;	/* take just CN part of Distinguished Name */
+}
+
+ECpub*
+X509toECpub(uchar *cert, int ncert, char *name, int nname, ECdomain *dom)
+{
+	CertX509 *c;
+	ECpub *pub;
+
+	c = decode_cert(cert, ncert);
+	if(c == nil)
+		return nil;
+	copysubject(name, nname, c->subject);
+	appendaltnames(name, nname, c->ext, 0);
+	pub = nil;
+	if(c->publickey_alg == ALG_ecPublicKey){
+		ecdominit(dom, namedcurves[c->curve]);
+		pub = ecdecodepub(dom, c->publickey->data, c->publickey->len);
+		if(pub == nil)
+			ecdomfree(dom);
+	}
+	freecert(c);
+	return pub;
+}
+
+char*
+X509ecdsaverify(uchar *cert, int ncert, ECdomain *dom, ECpub *pk)
+{
+	char *e;
+	CertX509 *c;
+	int digestlen;
+	uchar digest[MAXdlen];
+
+	c = decode_cert(cert, ncert);
+	if(c == nil)
+		return "cannot decode cert";
+	digestlen = digest_certinfo(cert, ncert, digestalg[c->signature_alg], digest);
+	if(digestlen <= 0){
+		freecert(c);
+		return "cannot decode certinfo";
+	}
+	e = X509ecdsaverifydigest(c->signature->data, c->signature->len, digest, digestlen, dom, pk);
+	freecert(c);
+	return e;
+}
+
+RSApub*
+X509toRSApub(uchar *cert, int ncert, char *name, int nname)
+{
+	CertX509 *c;
+	RSApub *pub;
+
+	c = decode_cert(cert, ncert);
+	if(c == nil)
+		return nil;
+	copysubject(name, nname, c->subject);
+	appendaltnames(name, nname, c->ext, 0);
+	pub = nil;
+	if(c->publickey_alg == ALG_rsaEncryption)
+		pub = asn1toRSApub(c->publickey->data, c->publickey->len);
+	freecert(c);
+	return pub;
+}
+
+char*
+X509rsaverify(uchar *cert, int ncert, RSApub *pk)
+{
+	char *e;
+	CertX509 *c;
+	int digestlen;
+	uchar digest[MAXdlen];
+
+	c = decode_cert(cert, ncert);
+	if(c == nil)
+		return "cannot decode cert";
+	digestlen = digest_certinfo(cert, ncert, digestalg[c->signature_alg], digest);
+	if(digestlen <= 0){
+		freecert(c);
+		return "cannot decode certinfo";
+	}
+	e = X509rsaverifydigest(c->signature->data, c->signature->len, digest, digestlen, pk);
+	freecert(c);
+	return e;
+}
+
+/* ------- Elem constructors ---------- */
+static Elem
+Null(void)
+{
+	Elem e;
+
+	e.tag.class = Universal;
+	e.tag.num = NULLTAG;
+	e.val.tag = VNull;
+	return e;
+}
+
+static Elem
+mkint(int j)
+{
+	Elem e;
+
+	e.tag.class = Universal;
+	e.tag.num = INTEGER;
+	e.val.tag = VInt;
+	e.val.u.intval = j;
+	return e;
+}
+
+static Elem
+mkbigint(mpint *p)
+{
+	Elem e;
+
+	e.tag.class = Universal;
+	e.tag.num = INTEGER;
+	e.val.tag = VBigInt;
+	e.val.u.bigintval = newbytes((mpsignif(p)+8)/8);
+	if(p->sign < 0){
+		mpint *s = mpnew(e.val.u.bigintval->len*8+1);
+		mpleft(mpone, e.val.u.bigintval->len*8, s);
+		mpadd(p, s, s);
+		mptober(s, e.val.u.bigintval->data, e.val.u.bigintval->len);
+		mpfree(s);
+	} else {
+		mptober(p, e.val.u.bigintval->data, e.val.u.bigintval->len);
+	}
+	return e;
+}
+
+static int
+printable(char *s)
+{
+	int c;
+
+	while((c = (uchar)*s++) != 0){
+		if((c >= 'a' && c <= 'z')
+		|| (c >= 'A' && c <= 'Z')
+		|| (c >= '0' && c <= '9')
+		|| strchr("'=()+,-./:? ", c) != nil)
+			continue;
+		return 0;
+	}
+	return 1;
+}
+
+#define DirectoryString 0
+
+static Elem
+mkstring(char *s, int t)
+{
+	Elem e;
+
+	if(t == DirectoryString)
+		t = printable(s) ? PrintableString : UTF8String;
+	e.tag.class = Universal;
+	e.tag.num = t;
+	e.val.tag = VString;
+	e.val.u.stringval = estrdup(s);
+	return e;
+}
+
+static Elem
+mkoctet(uchar *buf, int buflen)
+{
+	Elem e;
+
+	e.tag.class = Universal;
+	e.tag.num = OCTET_STRING;
+	e.val.tag = VOctets;
+	e.val.u.octetsval = makebytes(buf, buflen);
+	return e;
+}
+
+static Elem
+mkbits(uchar *buf, int buflen)
+{
+	Elem e;
+
+	e.tag.class = Universal;
+	e.tag.num = BIT_STRING;
+	e.val.tag = VBitString;
+	e.val.u.bitstringval = makebits(buf, buflen, 0);
+	return e;
+}
+
+static Elem
+mkutc(long t)
+{
+	Elem e;
+	char utc[50];
+	Tm *tm = gmtime(t);
+
+	e.tag.class = Universal;
+	e.tag.num = UTCTime;
+	e.val.tag = VString;
+	snprint(utc, sizeof(utc), "%.2d%.2d%.2d%.2d%.2d%.2dZ",
+		tm->year % 100, tm->mon+1, tm->mday, tm->hour, tm->min, tm->sec);
+	e.val.u.stringval = estrdup(utc);
+	return e;
+}
+
+static Elem
+mkoid(Ints *oid)
+{
+	Elem e;
+
+	e.tag.class = Universal;
+	e.tag.num = OBJECT_ID;
+	e.val.tag = VObjId;
+	e.val.u.objidval = makeints(oid->data, oid->len);
+	return e;
+}
+
+static Elem
+mkseq(Elist *el)
+{
+	Elem e;
+
+	e.tag.class = Universal;
+	e.tag.num = SEQUENCE;
+	e.val.tag = VSeq;
+	e.val.u.seqval = el;
+	return e;
+}
+
+static Elem
+mkset(Elist *el)
+{
+	Elem e;
+
+	e.tag.class = Universal;
+	e.tag.num = SETOF;
+	e.val.tag = VSet;
+	e.val.u.setval = el;
+	return e;
+}
+
+static Elem
+mkalg(int alg)
+{
+	return mkseq(mkel(mkoid(alg_oid_tab[alg]), mkel(Null(), nil)));
+}
+
+typedef struct Ints7pref {
+	int	len;
+	int	data[7];
+	char	prefix[4];
+	int	stype;
+} Ints7pref;
+Ints7pref DN_oid[] = {
+	{4, 2, 5, 4, 6, 0, 0, 0,        "C=", PrintableString},
+	{4, 2, 5, 4, 8, 0, 0, 0,        "ST=",DirectoryString},
+	{4, 2, 5, 4, 7, 0, 0, 0,        "L=", DirectoryString},
+	{4, 2, 5, 4, 10, 0, 0, 0,       "O=", DirectoryString},
+	{4, 2, 5, 4, 11, 0, 0, 0,       "OU=",DirectoryString},
+	{4, 2, 5, 4, 3, 0, 0, 0,        "CN=",DirectoryString},
+	{7, 1,2,840,113549,1,9,1,       "E=", IA5String},
+	{7, 0,9,2342,19200300,100,1,25,	"DC=",IA5String},
+};
+
+static Elem
+mkname(Ints7pref *oid, char *subj)
+{
+	return mkset(mkel(mkseq(mkel(mkoid((Ints*)oid), mkel(mkstring(subj, oid->stype), nil))), nil));
+}
+
+static Elem
+mkDN(char *dn)
+{
+	int i, j, nf;
+	char *f[20], *prefix, *d2 = estrdup(dn);
+	Elist* el = nil;
+
+	nf = tokenize(d2, f, nelem(f));
+	for(i=nf-1; i>=0; i--){
+		for(j=0; j<nelem(DN_oid); j++){
+			prefix = DN_oid[j].prefix;
+			if(strncmp(f[i],prefix,strlen(prefix))==0){
+				el = mkel(mkname(&DN_oid[j],f[i]+strlen(prefix)), el);
+				break;
+			}
+		}
+	}
+	free(d2);
+	return mkseq(el);
+}
+
+/*
+ * DigestInfo ::= SEQUENCE {
+ *	digestAlgorithm AlgorithmIdentifier,
+ *	digest OCTET STRING }
+ */
+static Bytes*
+encode_digest(DigestAlg *da, uchar *digest)
+{
+	Bytes *b = nil;
+	Elem e = mkseq(
+		mkel(mkalg(da->alg),
+		mkel(mkoctet(digest, da->len),
+		nil)));
+	encode(e, &b);
+	freevalfields(&e.val);
+	return b;
+}
+
+int
+asn1encodedigest(DigestState* (*fun)(uchar*, u32, uchar*, DigestState*), uchar *digest, uchar *buf, int len)
+{
+	Bytes *bytes;
+	DigestAlg **dp;
+
+	for(dp = digestalg; *dp != nil; dp++){
+		if((*dp)->fun != fun)
+			continue;
+		bytes = encode_digest(*dp, digest);
+		if(bytes == nil)
+			break;
+		if(bytes->len > len){
+			freebytes(bytes);
+			break;
+		}
+		len = bytes->len;
+		memmove(buf, bytes->data, len);
+		freebytes(bytes);
+		return len;
+	}
+	return -1;
+}
+
+static Elem
+mkcont(int num, Elist *l)
+{
+	Elem e = mkseq(l);
+	e.tag.class = Context;
+	e.tag.num = num;
+	return e;
+}
+
+static Elem
+mkaltname(char *s)
+{
+	Elem e;
+	int i;
+
+	for(i=0; i<nelem(DN_oid); i++){
+		if(strstr(s, DN_oid[i].prefix) != nil)
+			return mkcont(4, mkel(mkDN(s), nil)); /* DN */
+	}
+	e = mkstring(s, IA5String);
+	e.tag.class = Context;
+	e.tag.num = strchr(s, '@') != nil ? 1 : 2; /* email : DNS */
+	return e;
+}
+
+static Elist*
+mkaltnames(char *alts)
+{
+	Elist *el;
+	char *s, *p;
+
+	if(alts == nil)
+		return nil;
+
+	el = nil;
+	alts = estrdup(alts);
+	for(s = alts; s != nil; s = p){
+		while(*s == ' ')
+			s++;
+		if(*s == '\0')
+			break;
+		if((p = strchr(s, ',')) != nil)
+			*p++ = 0;
+		el = mkel(mkaltname(s), el);
+	}
+	free(alts);
+	return el;
+}
+
+static Elist*
+mkextel(Elem e, Ints *oid, Elist *el)
+{
+	Bytes *b = nil;
+
+	if(encode(e, &b) == ASN_OK){
+		el = mkel(mkseq(
+			mkel(mkoid(oid),
+			mkel(mkoctet(b->data, b->len),
+			nil))), el);
+		freebytes(b);
+	}
+	freevalfields(&e.val);
+	return el;
+}
+
+static Ints15 oid_subjectAltName = {4, 2, 5, 29, 17 };
+static Ints15 oid_extensionRequest = { 7, 1, 2, 840, 113549, 1, 9, 14};
+
+static Elist*
+mkextensions(char *alts, int isreq)
+{
+	Elist *sl, *xl;
+
+	xl = nil;
+	if((sl = mkaltnames(alts)) != nil)
+		xl = mkextel(mkseq(sl), (Ints*)&oid_subjectAltName, xl);
+	if(xl != nil){
+		xl = mkel(mkseq(xl), nil);
+		if(isreq)
+			xl = mkel(mkseq(
+				mkel(mkoid((Ints*)&oid_extensionRequest),
+				mkel(mkset(xl), nil))), nil);
+	}
+	if(isreq)
+		xl = mkel(mkcont(0, xl), nil);
+	else if(xl != nil)
+		xl = mkel(mkcont(3, xl), nil);
+	return xl;
+}
+
+static char*
+splitalts(char *s)
+{
+	int q;
+
+	for(q = 0; *s != '\0'; s++){
+		if(*s == '\'')
+			q ^= 1;
+		else if(q == 0 && *s == ','){
+			*s++ = 0;
+			return s;
+		}
+	}
+	return nil;
+}
+
+static void
+appendaltnames(char *name, int nname, Bytes *ext, int isreq)
+{
+	Elem eext, ealt, edn;
+	Elist *el, *l;
+	Ints *oid;
+	char *alt, *e;
+	int len;
+
+	if(name == nil || ext == nil)
+		return;
+	if(decode(ext->data, ext->len, &eext) != ASN_OK)
+		return;
+	if(isreq){
+		if(!is_seq(&eext, &el) || elistlen(el) != 2)
+			goto errext;
+		if(!is_oid(&el->hd, &oid) || !ints_eq(oid, (Ints*)&oid_extensionRequest))
+			goto errext;
+		el = el->tl;
+		if(!is_set(&el->hd, &el))
+			goto errext;
+		if(!is_seq(&el->hd, &el))
+			goto errext;
+	} else {
+		if(!is_seq(&eext, &el))
+			goto errext;
+	}
+	for(; el != nil; el = el->tl){
+		if(!is_seq(&el->hd, &l) || elistlen(l) != 2)
+			goto errext;
+		if(!is_oid(&l->hd, &oid) || !ints_eq(oid, (Ints*)&oid_subjectAltName))
+			continue;
+		el = l->tl;
+		break;
+	}
+	if(el == nil)
+		goto errext;
+	if(!is_octetstring(&el->hd, &ext))
+		goto errext;
+	if(decode(ext->data, ext->len, &ealt) != ASN_OK)
+		goto errext;
+	if(!is_seq(&ealt, &el))
+		goto erralt;
+	for(; el != nil; el = el->tl){
+		ext = el->hd.val.u.octetsval;
+		switch(el->hd.tag.num){
+		default:
+			continue;
+		case 1:	/* email */
+		case 2:	/* DNS */
+			if(el->hd.val.tag != VOctets)
+				goto erralt;
+			alt = smprint("%.*s", ext->len, (char*)ext->data);
+			break;
+		case 4:	/* DN */
+			if(el->hd.val.tag != VOctets
+			|| decode(ext->data, ext->len, &edn) != ASN_OK)
+				goto erralt;
+			alt = parse_name(&edn);
+			freevalfields(&edn.val);
+			break;
+		}
+		if(alt == nil)
+			goto erralt;
+		/* take just CN part of Distinguished Name */
+		if((e = strchr(alt, ',')) != nil)
+			*e = '\0';
+		len = strlen(alt);
+		if(strncmp(name, alt, len) == 0 && strchr(",", name[len]) != nil){
+			free(alt);	/* same as the subject */
+			continue;
+		}
+		if(name[0] != '\0')
+			strncat(name, ", ", nname-1);
+		strncat(name, alt, nname-1);
+		free(alt);
+	}
+erralt:
+	freevalfields(&ealt.val);
+errext:
+	freevalfields(&eext.val);
+}
+	
+static Bytes*
+encode_rsapubkey(RSApub *pk)
+{
+	Bytes *b = nil;
+	Elem e = mkseq(
+		mkel(mkbigint(pk->n),
+		mkel(mpsignif(pk->ek)<32 ? mkint(mptoi(pk->ek)) : mkbigint(pk->ek),
+		nil)));
+	encode(e, &b);
+	freevalfields(&e.val);
+	return b;
+}
+
+static Bytes*
+encode_rsaprivkey(RSApriv *k)
+{
+	Bytes *b = nil;
+	RSApub *pk = &k->pub;
+	Elem e = mkseq(
+		mkel(mkint(0),
+		mkel(mkbigint(pk->n),
+		mkel(mpsignif(pk->ek)<32 ? mkint(mptoi(pk->ek)) : mkbigint(pk->ek),
+		mkel(mkbigint(k->dk),
+		mkel(mkbigint(k->p),
+		mkel(mkbigint(k->q),
+		mkel(mkbigint(k->kp),
+		mkel(mkbigint(k->kq),
+		mkel(mkbigint(k->c2),
+		nil))))))))));
+	encode(e, &b);
+	freevalfields(&e.val);
+	return b;
+}
+
+int
+asn1encodeRSApub(RSApub *pk, uchar *buf, int len)
+{
+	Bytes *b = encode_rsapubkey(pk);
+	if(b == nil)
+		return -1;
+	if(b->len > len){
+		freebytes(b);
+		werrstr("buffer too small");
+		return -1;
+	}
+	memmove(buf, b->data, len = b->len);
+	freebytes(b);
+	return len;
+}
+
+int
+asn1encodeRSApriv(RSApriv *k, uchar *buf, int len)
+{
+	Bytes *b;
+	b = encode_rsaprivkey(k);
+	if(b == nil)
+		return -1;
+	if(b->len > len){
+		freebytes(b);
+		werrstr("buffer too small");
+		return -1;
+	}
+	memmove(buf, b->data, len = b->len);
+	freebytes(b);
+	return len;
+}
+
+uchar*
+X509rsagen(RSApriv *priv, char *subj, u32 valid[2], int *certlen)
+{
+	int serial = 0, sigalg = ALG_sha256WithRSAEncryption;
+	uchar *cert = nil;
+	Bytes *certbytes, *pkbytes, *certinfobytes, *sigbytes;
+	Elem e, certinfo;
+	DigestAlg *da;
+	uchar digest[MAXdlen], *buf;
+	int buflen;
+	mpint *pkcs1;
+	char *alts;
+
+	if((pkbytes = encode_rsapubkey(&priv->pub)) == nil)
+		return nil;
+
+	subj = estrdup(subj);
+	alts = splitalts(subj);
+
+	e = mkseq(
+		mkel(mkcont(0, mkel(mkint(2), nil)),
+		mkel(mkint(serial),
+		mkel(mkalg(sigalg),
+		mkel(mkDN(subj),
+		mkel(mkseq(
+			mkel(mkutc(valid[0]),
+			mkel(mkutc(valid[1]),
+			nil))),
+		mkel(mkDN(subj),
+		mkel(mkseq(
+			mkel(mkalg(ALG_rsaEncryption),
+			mkel(mkbits(pkbytes->data, pkbytes->len),
+			nil))),
+		mkextensions(alts, 0)))))))));
+	freebytes(pkbytes);
+	if(encode(e, &certinfobytes) != ASN_OK)
+		goto errret;
+
+	da = digestalg[sigalg];
+	(*da->fun)(certinfobytes->data, certinfobytes->len, digest, 0);
+	freebytes(certinfobytes);
+	certinfo = e;
+
+	sigbytes = encode_digest(da, digest);
+	if(sigbytes == nil)
+		goto errret;
+	pkcs1 = pkcs1padbuf(sigbytes->data, sigbytes->len, priv->pub.n, 1);
+	freebytes(sigbytes);
+	if(pkcs1 == nil)
+		goto errret;
+
+	rsadecrypt(priv, pkcs1, pkcs1);
+	buflen = mptobe(pkcs1, nil, 0, &buf);
+	mpfree(pkcs1);
+	e = mkseq(
+		mkel(certinfo,
+		mkel(mkalg(sigalg),
+		mkel(mkbits(buf, buflen),
+		nil))));
+	free(buf);
+	if(encode(e, &certbytes) != ASN_OK)
+		goto errret;
+	if(certlen != nil)
+		*certlen = certbytes->len;
+	cert = (uchar*)certbytes;
+	memmove(cert, certbytes->data, certbytes->len);
+errret:
+	freevalfields(&e.val);
+	free(subj);
+	return cert;
+}
+
+uchar*
+X509rsareq(RSApriv *priv, char *subj, int *certlen)
+{
+	/* RFC 2314, PKCS #10 Certification Request Syntax */
+	int version = 0, sigalg = ALG_sha256WithRSAEncryption;
+	uchar *cert = nil;
+	Bytes *certbytes, *pkbytes, *certinfobytes, *sigbytes;
+	Elem e, certinfo;
+	DigestAlg *da;
+	uchar digest[MAXdlen], *buf;
+	int buflen;
+	mpint *pkcs1;
+	char *alts;
+
+	if((pkbytes = encode_rsapubkey(&priv->pub)) == nil)
+		return nil;
+
+	subj = estrdup(subj);
+	alts = splitalts(subj);
+
+	e = mkseq(
+		mkel(mkint(version),
+		mkel(mkDN(subj),
+		mkel(mkseq(
+			mkel(mkalg(ALG_rsaEncryption),
+			mkel(mkbits(pkbytes->data, pkbytes->len),
+			nil))),
+		mkextensions(alts, 1)))));
+	freebytes(pkbytes);
+	if(encode(e, &certinfobytes) != ASN_OK)
+		goto errret;
+	da = digestalg[sigalg];
+	(*da->fun)(certinfobytes->data, certinfobytes->len, digest, 0);
+	freebytes(certinfobytes);
+	certinfo = e;
+
+	sigbytes = encode_digest(da, digest);
+	if(sigbytes == nil)
+		goto errret;
+	pkcs1 = pkcs1padbuf(sigbytes->data, sigbytes->len, priv->pub.n, 1);
+	freebytes(sigbytes);
+	if(pkcs1 == nil)
+		goto errret;
+
+	rsadecrypt(priv, pkcs1, pkcs1);
+	buflen = mptobe(pkcs1, nil, 0, &buf);
+	mpfree(pkcs1);
+	e = mkseq(
+		mkel(certinfo,
+		mkel(mkalg(sigalg),
+		mkel(mkbits(buf, buflen),
+		nil))));
+	free(buf);
+	if(encode(e, &certbytes) != ASN_OK)
+		goto errret;
+	if(certlen != nil)
+		*certlen = certbytes->len;
+	cert = (uchar*)certbytes;
+	memmove(cert, certbytes->data, certbytes->len);
+errret:
+	freevalfields(&e.val);
+	free(subj);
+	return cert;
+}
+
+RSApub*
+X509reqtoRSApub(uchar *req, int nreq, char *name, int nname)
+{
+	Elem ereq;
+	Elist *el;
+	char *subject;
+	Bits *bits;
+	RSApub *pub;
+
+	pub = nil;
+	if(decode(req, nreq, &ereq) != ASN_OK)
+		goto errret;
+	if(!is_seq(&ereq, &el) || elistlen(el) != 3)
+		goto errret;
+	if(!is_seq(&el->hd, &el) || elistlen(el) < 3)
+		goto errret;
+ 	el = el->tl;
+	subject = parse_name(&el->hd);
+	if(subject == nil)
+		goto errret;
+	copysubject(name, nname, subject);
+	free(subject);
+	el = el->tl;
+	if(el->tl != nil
+	&& el->tl->hd.tag.class == Context
+	&& el->tl->hd.tag.num == 0
+	&& el->tl->hd.val.tag == VOctets)
+		appendaltnames(name, nname, el->tl->hd.val.u.octetsval, 1);
+	if(!is_seq(&el->hd, &el) || elistlen(el) != 2)
+		goto errret;
+	if(parse_alg(&el->hd) != ALG_rsaEncryption)
+		goto errret;
+	el = el->tl;
+	if(!is_bitstring(&el->hd, &bits))
+		goto errret;
+	pub = asn1toRSApub(bits->data, bits->len);
+	if(pub == nil)
+		goto errret;
+errret:
+	freevalfields(&ereq.val);
+	return pub;
+}
+
+static void
+digestSPKI(int alg, uchar *pubkey, int npubkey, DigestState* (*fun)(uchar*, u32, uchar*, DigestState*), uchar *digest)
+{
+	Bytes *b = nil;
+	Elem e = mkseq(mkel(mkalg(alg), mkel(mkbits(pubkey, npubkey), nil)));
+	encode(e, &b);
+	freevalfields(&e.val);
+	(*fun)(b->data, b->len, digest, nil);
+	freebytes(b);
+}
+
+int
+X509digestSPKI(uchar *cert, int ncert, DigestState* (*fun)(uchar*, u32, uchar*, DigestState*), uchar *digest)
+{
+	CertX509 *c;
+
+	c = decode_cert(cert, ncert);
+	if(c == nil){
+		werrstr("cannot decode cert");
+		return -1;
+	}
+	digestSPKI(c->publickey_alg, c->publickey->data, c->publickey->len, fun, digest);
+	freecert(c);
+	return 0;
+}
+
+static char*
+tagdump(Tag tag)
+{
+	static char buf[32];
+
+	if(tag.class != Universal){
+		snprint(buf, sizeof(buf), "class%d,num%d", tag.class, tag.num);
+		return buf;
+	}
+	switch(tag.num){
+	case BOOLEAN: return "BOOLEAN";
+	case INTEGER: return "INTEGER";
+	case BIT_STRING: return "BIT STRING";
+	case OCTET_STRING: return "OCTET STRING";
+	case NULLTAG: return "NULLTAG";
+	case OBJECT_ID: return "OID";
+	case ObjectDescriptor: return "OBJECT_DES";
+	case EXTERNAL: return "EXTERNAL";
+	case REAL: return "REAL";
+	case ENUMERATED: return "ENUMERATED";
+	case EMBEDDED_PDV: return "EMBEDDED PDV";
+	case SEQUENCE: return "SEQUENCE";
+	case SETOF: return "SETOF";
+	case UTF8String: return "UTF8String";
+	case NumericString: return "NumericString";
+	case PrintableString: return "PrintableString";
+	case TeletexString: return "TeletexString";
+	case VideotexString: return "VideotexString";
+	case IA5String: return "IA5String";
+	case UTCTime: return "UTCTime";
+	case GeneralizedTime: return "GeneralizedTime";
+	case GraphicString: return "GraphicString";
+	case VisibleString: return "VisibleString";
+	case GeneralString: return "GeneralString";
+	case UniversalString: return "UniversalString";
+	case BMPString: return "BMPString";
+	default:
+		snprint(buf, sizeof(buf), "Universal,num%d", tag.num);
+		return buf;
+	}
+}
+
+static void
+edump(Elem e)
+{
+	Value v;
+	Elist *el;
+	int i;
+
+	print("%s{", tagdump(e.tag));
+	v = e.val;
+	switch(v.tag){
+	case VBool: print("Bool %d",v.u.boolval); break;
+	case VInt: print("Int %d",v.u.intval); break;
+	case VOctets: print("Octets[%d] %.2x%.2x...",v.u.octetsval->len,v.u.octetsval->data[0],v.u.octetsval->data[1]); break;
+	case VBigInt: print("BigInt[%d] %.2x%.2x...",v.u.bigintval->len,v.u.bigintval->data[0],v.u.bigintval->data[1]); break;
+	case VReal: print("Real..."); break;
+	case VOther: print("Other..."); break;
+	case VBitString: print("BitString[%d]...", v.u.bitstringval->len*8 - v.u.bitstringval->unusedbits); break;
+	case VNull: print("Null"); break;
+	case VEOC: print("EOC..."); break;
+	case VObjId: print("ObjId");
+		for(i = 0; i<v.u.objidval->len; i++)
+			print(" %d", v.u.objidval->data[i]);
+		break;
+	case VString: print("String \"%s\"",v.u.stringval); break;
+	case VSeq: print("Seq\n");
+		for(el = v.u.seqval; el!=nil; el = el->tl)
+			edump(el->hd);
+		break;
+	case VSet: print("Set\n");
+		for(el = v.u.setval; el!=nil; el = el->tl)
+			edump(el->hd);
+		break;
+	}
+	print("}\n");
+}
+
+void
+asn1dump(uchar *der, int len)
+{
+	Elem e;
+
+	if(decode(der, len, &e) != ASN_OK){
+		print("didn't parse\n");
+		exits("didn't parse");
+	}
+	edump(e);
+}
+
+void
+X509dump(uchar *cert, int ncert)
+{
+	char *e;
+	CertX509 *c;
+	RSApub *rsapub;
+	ECpub *ecpub;
+	ECdomain ecdom;
+	int digestlen;
+	uchar digest[MAXdlen];
+
+	print("begin X509dump\n");
+	c = decode_cert(cert, ncert);
+	if(c == nil){
+		print("cannot decode cert\n");
+		return;
+	}
+
+	digestlen = digest_certinfo(cert, ncert, digestalg[c->signature_alg], digest);
+	if(digestlen <= 0){
+		freecert(c);
+		print("cannot decode certinfo\n");
+		return;
+	}
+
+	print("serial %d\n", c->serial);
+	print("issuer %s\n", c->issuer);
+	print("validity %s %s\n", c->validity_start, c->validity_end);
+	print("subject %s\n", c->subject);
+	print("sigalg=%d digest=%.*H\n", c->signature_alg, digestlen, digest);
+	print("publickey_alg=%d pubkey[%d] %.*H\n", c->publickey_alg, c->publickey->len,
+		c->publickey->len, c->publickey->data);
+
+	switch(c->publickey_alg){
+	case ALG_rsaEncryption:
+		rsapub = asn1toRSApub(c->publickey->data, c->publickey->len);
+		if(rsapub != nil){
+			print("rsa pubkey e=%B n(%d)=%B\n", rsapub->ek, mpsignif(rsapub->n), rsapub->n);
+			e = X509rsaverifydigest(c->signature->data, c->signature->len,
+				digest, digestlen, rsapub);
+			if(e==nil)
+				e = "nil (meaning ok)";
+			print("self-signed X509rsaverifydigest returns: %s\n", e);
+			rsapubfree(rsapub);
+		}
+		break;
+	case ALG_ecPublicKey:
+		ecdominit(&ecdom, namedcurves[c->curve]);
+		ecpub = ecdecodepub(&ecdom, c->publickey->data, c->publickey->len);
+		if(ecpub != nil){
+			e = X509ecdsaverifydigest(c->signature->data, c->signature->len,
+				digest, digestlen, &ecdom, ecpub);
+			if(e==nil)
+				e = "nil (meaning ok)";
+			print("self-signed X509ecdsaverifydigest returns: %s\n", e);
+			ecpubfree(ecpub);
+		}
+		ecdomfree(&ecdom);
+		break;
+	}
+
+	digestSPKI(c->publickey_alg, c->publickey->data, c->publickey->len, sha2_256, digest);
+	print("publickey_thumbprint sha256=%.*[\n", SHA2_256dlen, digest);
+
+	sha2_256(cert, ncert, digest, nil);
+	print("cert_thumbprint sha256=%.*[\n", SHA2_256dlen, digest);
+
+	sha1(cert, ncert, digest, nil);
+	print("cert_thumbprint sha1=%.*H\n", SHA1dlen, digest);
+
+	freecert(c);
+	print("end X509dump\n");
+}