code: plan9front

Download patch

ref: fff070f2cbb01b7c0879e9dcb13ee4e3ed2497f0
parent: 9126ee3eea90d639f4e877c01400248581d10f65
author: cinap_lenrek <cinap_lenrek@felloff.net>
date: Sun May 8 12:50:29 EDT 2022

imx8: add work in progress i.MX8MQ kernel for the mntreform2 laptop

This is a work in progress port to the mntreform2 laptop.

Working so far:

- mmu (same as raspberry pi 3b+)
- arm generic timer
- gicv3
- uart1
- enet

With access to the uart, one can netboot this kernel in u-boot
using the following commands:

> dhcp
> bootm

diff: cannot open b/sys/src/9/imx8//null: file does not exist: 'b/sys/src/9/imx8//null'
--- /dev/null
+++ b/sys/src/9/imx8/cache.v8.s
@@ -1,0 +1,212 @@
+#include "sysreg.h"
+
+#undef	SYSREG
+#define	SYSREG(op0,op1,Cn,Cm,op2)	SPR(((op0)<<19|(op1)<<16|(Cn)<<12|(Cm)<<8|(op2)<<5))
+
+/*
+ * instruction cache operations
+ */
+TEXT cacheiinvse(SB), 1, $-4
+	MOVWU	len+8(FP), R2
+	ADD	R0, R2
+
+	MRS	DAIF, R11
+	MSR	$0x2, DAIFSet
+	MOVWU	$1, R10
+	MSR	R10, CSSELR_EL1
+	ISB	$SY
+	MRS	CCSIDR_EL1, R4
+
+	ANDW	$7, R4
+	ADDW	$4, R4		// log2(linelen)
+	LSL	R4, R10
+	LSR	R4, R0
+	LSL	R4, R0
+
+_iinvse:
+	IC	R0, 3,7,5,1	// IVAU
+	ADD	R10, R0
+	CMP	R0, R2
+	BGT	_iinvse
+	DSB	$NSH
+	ISB	$SY
+	MSR	R11, DAIF
+	RETURN
+
+TEXT cacheiinv(SB), 1, $-4
+	IC	R0, 0,7,5,0	// IALLU
+	DSB	$NSH
+	ISB	$SY
+	RETURN
+
+TEXT cacheuwbinv(SB), 1, $0
+	BL	cachedwbinv(SB)
+	BL	cacheiinv(SB)
+	RETURN
+
+/*
+ * data cache operations
+ */
+TEXT cachedwbse(SB), 1, $-4
+	MOV	LR, R29
+	BL	cachedva<>(SB)
+TEXT dccvac(SB), 1, $-4
+	DC	R0, 3,7,10,1	// CVAC
+	RETURN
+
+TEXT cacheduwbse(SB), 1, $-4
+	MOV	LR, R29
+	BL	cachedva<>(SB)
+TEXT dccvau(SB), 1, $-4
+	DC	R0, 3,7,11,1	// CVAU
+	RETURN
+
+TEXT cachedinvse(SB), 1, $-4
+	MOV	LR, R29
+	BL	cachedva<>(SB)
+TEXT dcivac(SB), 1, $-4
+	DC	R0, 0,7,6,1	// IVAC
+	RETURN
+
+TEXT cachedwbinvse(SB), 1, $-4
+	MOV	LR, R29
+	BL	cachedva<>(SB)
+TEXT dccivac(SB), 1, $-4
+	DC	R0, 3,7,14,1	// CIVAC
+	RETURN
+
+TEXT cachedva<>(SB), 1, $-4
+	MOV	LR, R1
+	MOVWU	len+8(FP), R2
+	ADD	R0, R2
+
+	MRS	DAIF, R11
+	MSR	$0x2, DAIFSet
+	MOVWU	$0, R10
+	MSR	R10, CSSELR_EL1
+	ISB	$SY
+	MRS	CCSIDR_EL1, R4
+
+	ANDW	$7, R4
+	ADDW	$4, R4		// log2(linelen)
+	MOVWU	$1, R10
+	LSL	R4, R10
+	LSR	R4, R0
+	LSL	R4, R0
+
+	DSB	$SY
+	ISB	$SY
+_cachedva:
+	BL	(R1)
+	ADD	R10, R0
+	CMP	R0, R2
+	BGT	_cachedva
+	DSB	$SY
+	ISB	$SY
+	MSR	R11, DAIF
+	RET	R29
+
+/*
+ * l1 cache operations
+ */
+TEXT cachedwb(SB), 1, $-4
+	MOVWU	$0, R0
+_cachedwb:
+	MOV	LR, R29
+	BL	cachedsw<>(SB)
+TEXT dccsw(SB), 1, $-4
+	DC	R0, 0,7,10,2	// CSW
+	RETURN
+
+TEXT cachedinv(SB), 1, $-4
+	MOVWU	$0, R0
+_cachedinv:
+	MOV	LR, R29
+	BL	cachedsw<>(SB)
+TEXT dcisw(SB), 1, $-4
+	DC	R0, 0,7,6,2	// ISW
+	RETURN
+
+TEXT cachedwbinv(SB), 1, $-4
+	MOVWU	$0, R0
+_cachedwbinv:
+	MOV	LR, R29
+	BL	cachedsw<>(SB)
+TEXT dccisw(SB), 1, $-4
+	DC	R0, 0,7,14,2	// CISW
+	RETURN
+
+/*
+ * l2 cache operations
+ */
+TEXT l2cacheuwb(SB), 1, $-4
+	MOVWU	$1, R0
+	B	_cachedwb
+TEXT l2cacheuinv(SB), 1, $-4
+	MOVWU	$1, R0
+	B	_cachedinv
+TEXT l2cacheuwbinv(SB), 1, $-4
+	MOVWU	$1, R0
+	B	_cachedwbinv
+
+TEXT cachesize(SB), 1, $-4
+	MRS	DAIF, R11
+	MSR	$0x2, DAIFSet
+	MSR	R0, CSSELR_EL1
+	ISB	$SY
+	MRS	CCSIDR_EL1, R0
+	MSR	R11, DAIF
+	RETURN
+
+TEXT cachedsw<>(SB), 1, $-4
+	MOV	LR, R1
+
+	MRS	DAIF, R11
+	MSR	$0x2, DAIFSet
+	ADDW	R0, R0, R8
+	MSR	R8, CSSELR_EL1
+	ISB	$SY
+	MRS	CCSIDR_EL1, R4
+
+	LSR	$3, R4, R7
+	ANDW	$1023, R7	// lastway
+	ADDW	$1, R7, R5	// #ways
+
+	LSR	$13, R4, R2
+	ANDW	$32767, R2	// lastset
+	ADDW	$1, R2		// #sets
+
+	ANDW	$7, R4
+	ADDW	$4, R4		// log2(linelen)
+
+	MOVWU	$32, R3		// wayshift = 32 - log2(#ways)
+_countlog2ways:
+	CBZ	R7, _loop	// lastway == 0?
+	LSR	$1, R7		// lastway >>= 1
+	SUB	$1, R3		// wayshift--
+	B _countlog2ways
+_loop:
+	DSB	$SY
+	ISB	$SY
+_nextway:
+	MOVWU	$0, R6		// set
+_nextset:
+	LSL	R3, R7, R0	// way<<wayshift
+	LSL	R4, R6, R9	// set<<log2(linelen)
+	ORRW	R8, R0		// level
+	ORRW	R9, R0		// setway
+
+	BL	(R1)		// op(setway)
+
+	ADDW	$1, R6		// set++
+	CMPW	R2, R6
+	BLT	_nextset
+
+	ADDW	$1, R7		// way++
+	CMPW	R5, R7
+	BLT	_nextway
+
+	DSB	$SY
+	ISB	$SY
+	MSR	R11, DAIF
+	RET	R29
--- /dev/null
+++ b/sys/src/9/imx8/clock.c
@@ -1,0 +1,114 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "ureg.h"
+#include "sysreg.h"
+
+static uvlong freq;
+
+enum {
+	Enable	= 1<<0,
+	Imask	= 1<<1,
+	Istatus = 1<<2,
+};
+
+void
+clockshutdown(void)
+{
+}
+
+static void
+localclockintr(Ureg *ureg, void *)
+{
+	timerintr(ureg, 0);
+}
+
+void
+clockinit(void)
+{
+	syswr(PMCR_EL0, 1<<6 | 7);
+	syswr(PMCNTENSET, 1<<31);
+	syswr(PMUSERENR_EL0, 1<<2);
+	syswr(CNTKCTL_EL1, 1<<1);
+
+	syswr(CNTP_TVAL_EL0, ~0UL);
+	syswr(CNTP_CTL_EL0, Enable);
+
+	if(m->machno == 0){
+		freq = sysrd(CNTFRQ_EL0);
+		print("timer frequency %lld Hz\n", freq);
+	}
+
+	intrenable(IRQcntpns, localclockintr, nil, BUSUNKNOWN, "clock");
+}
+
+void
+timerset(uvlong next)
+{
+	uvlong now;
+	long period;
+
+	now = fastticks(nil);
+	period = next - now;
+	syswr(CNTP_TVAL_EL0, period);
+}
+
+uvlong
+fastticks(uvlong *hz)
+{
+	if(hz)
+		*hz = freq;
+	return sysrd(CNTPCT_EL0);
+}
+
+ulong
+perfticks(void)
+{
+	return fastticks(nil);
+}
+
+ulong
+µs(void)
+{
+	uvlong hz;
+	uvlong t = fastticks(&hz);
+	return (t * 1000000ULL) / hz;
+}
+
+void
+microdelay(int n)
+{
+	ulong now;
+
+	now = µs();
+	while(µs() - now < n);
+}
+
+void
+delay(int n)
+{
+	while(--n >= 0)
+		microdelay(1000);
+}
+
+void
+synccycles(void)
+{
+	static Ref r1, r2;
+	int s;
+
+	s = splhi();
+	r2.ref = 0;
+	incref(&r1);
+	while(r1.ref != conf.nmach)
+		;
+//	syswr(PMCR_EL0, 1<<6 | 7);
+	incref(&r2);
+	while(r2.ref != conf.nmach)
+		;
+	r1.ref = 0;
+	splx(s);
+}
--- /dev/null
+++ b/sys/src/9/imx8/dat.h
@@ -1,0 +1,218 @@
+/*
+ * Time.
+ *
+ * HZ should divide 1000 evenly, ideally.
+ * 100, 125, 200, 250 and 333 are okay.
+ */
+#define	HZ		100			/* clock frequency */
+#define	MS2HZ		(1000/HZ)		/* millisec per clock tick */
+#define	TK2SEC(t)	((t)/HZ)		/* ticks to seconds */
+
+enum {
+	Mhz	= 1000 * 1000,
+};
+
+typedef struct Conf	Conf;
+typedef struct Confmem	Confmem;
+typedef struct FPsave	FPsave;
+typedef struct PFPU	PFPU;
+typedef struct ISAConf	ISAConf;
+typedef struct Label	Label;
+typedef struct Lock	Lock;
+typedef struct Memcache	Memcache;
+typedef struct MMMU	MMMU;
+typedef struct Mach	Mach;
+typedef struct Page	Page;
+typedef struct PhysUart	PhysUart;
+typedef struct Pcidev	Pcidev;
+typedef struct PMMU	PMMU;
+typedef struct Proc	Proc;
+typedef u64int		PTE;
+typedef struct Soc	Soc;
+typedef struct Uart	Uart;
+typedef struct Ureg	Ureg;
+typedef uvlong		Tval;
+typedef void		KMap;
+
+#pragma incomplete Pcidev
+#pragma incomplete Ureg
+
+#define MAXSYSARG	5	/* for mount(fd, mpt, flag, arg, srv) */
+
+/*
+ *  parameters for sysproc.c
+ */
+#define AOUT_MAGIC	(R_MAGIC)
+
+struct Lock
+{
+	ulong	key;
+	u32int	sr;
+	uintptr	pc;
+	Proc*	p;
+	Mach*	m;
+	int	isilock;
+};
+
+struct Label
+{
+	uintptr	sp;
+	uintptr	pc;
+};
+
+struct FPsave
+{
+	uvlong	regs[32][2];
+
+	ulong	control;
+	ulong	status;
+};
+
+struct PFPU
+{
+	FPsave	fpsave[1];
+
+	int	fpstate;
+};
+
+enum
+{
+	FPinit,
+	FPactive,
+	FPinactive,
+
+	/* bits or'd with the state */
+	FPillegal= 0x100,
+};
+
+struct Confmem
+{
+	uintptr	base;
+	ulong	npage;
+	uintptr	limit;
+	uintptr	kbase;
+	uintptr	klimit;
+};
+
+struct Conf
+{
+	ulong	nmach;		/* processors */
+	ulong	nproc;		/* processes */
+	Confmem	mem[3];		/* physical memory */
+	ulong	npage;		/* total physical pages of memory */
+	ulong	upages;		/* user page pool */
+	ulong	copymode;	/* 0 is copy on write, 1 is copy on reference */
+	ulong	ialloc;		/* max interrupt time allocation in bytes */
+	ulong	pipeqsize;	/* size in bytes of pipe queues */
+	ulong	nimage;		/* number of page cache image headers */
+	ulong	nswap;		/* number of swap pages */
+	int	nswppo;		/* max # of pageouts per segment pass */
+	ulong	hz;		/* processor cycle freq */
+	ulong	mhz;
+	int	monitor;	/* flag */
+};
+
+/*
+ *  MMU stuff in Mach.
+ */
+struct MMMU
+{
+	PTE*	mmutop;		/* first level user page table */
+};
+
+/*
+ *  MMU stuff in proc
+ */
+#define NCOLOR	1		/* 1 level cache, don't worry about VCE's */
+
+struct PMMU
+{
+	union {
+	Page	*mmufree;	/* mmuhead[0] is freelist head */
+	Page	*mmuhead[PTLEVELS];
+	};
+	Page	*mmutail[PTLEVELS];
+	int	asid;
+	uintptr	tpidr;
+};
+
+#include "../port/portdat.h"
+
+struct Mach
+{
+	int	machno;			/* physical id of processor */
+	uintptr	splpc;			/* pc of last caller to splhi */
+	Proc*	proc;			/* current process on this processor */
+	/* end of offsets known to asm */
+
+	MMMU;
+
+	PMach;
+
+	int	cputype;
+	ulong	delayloop;
+	int	cpumhz;
+	uvlong	cpuhz;			/* speed of cpu */
+
+	int	stack[1];
+};
+
+struct
+{
+	char	machs[MAXMACH];		/* active CPUs */
+	int	exiting;		/* shutdown */
+}active;
+
+#define MACHP(n)	((Mach*)MACHADDR(n))
+
+extern register Mach* m;			/* R27 */
+extern register Proc* up;			/* R26 */
+extern int normalprint;
+
+/*
+ *  a parsed plan9.ini line
+ */
+#define NISAOPT		8
+
+struct ISAConf {
+	char	*type;
+	uvlong	port;
+	int	irq;
+	ulong	dma;
+	ulong	mem;
+	ulong	size;
+	ulong	freq;
+
+	int	nopt;
+	char	*opt[NISAOPT];
+};
+
+/*
+ * Horrid. But the alternative is 'defined'.
+ */
+#ifdef _DBGC_
+#define DBGFLG		(dbgflg[_DBGC_])
+#else
+#define DBGFLG		(0)
+#endif /* _DBGC_ */
+
+int vflag;
+extern char dbgflg[256];
+
+#define dbgprint	print		/* for now */
+
+/*
+ *  hardware info about a device
+ */
+typedef struct {
+	ulong	port;
+	int	size;
+} Devport;
+
+struct DevConf
+{
+	ulong	intnum;			/* interrupt number */
+	char	*type;			/* card type, malloced */
+	int	nports;			/* Number of ports */
+	Devport	*ports;			/* The ports themselves */
+};
--- /dev/null
+++ b/sys/src/9/imx8/etherimx.c
@@ -1,0 +1,706 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "../port/netif.h"
+#include "../port/etherif.h"
+#include "../port/ethermii.h"
+
+enum {
+	Moduleclk	= 125000000,	/* 125Mhz */
+	Maxtu		= 1518,
+
+	R_BUF_SIZE	= ((Maxtu+BLOCKALIGN-1)&~BLOCKALIGN),
+};
+
+enum {
+	ENET_EIR	= 0x004/4,	/* Interrupt Event Register */
+	ENET_EIMR	= 0x008/4,	/* Interrupt Mask Register */
+		INT_BABR	=1<<30,	/* Babbling Receive Error */
+		INT_BABT	=1<<31,	/* Babbling Transmit Error */
+		INT_GRA		=1<<28,	/* Graceful Stop Complete */
+		INT_TXF		=1<<27,	/* Transmit Frame Interrupt */
+		INT_TXB		=1<<26,	/* Transmit Buffer Interrupt */
+		INT_RXF		=1<<25,	/* Receive Frame Interrupt */
+		INT_RXB		=1<<24,	/* Receive Buffer Interrupt */
+		INT_MII		=1<<23,	/* MII Interrupt */
+		INT_EBERR	=1<<22,	/* Ethernet Bus Error */
+		INT_LC		=1<<21,	/* Late Collision */
+		INT_RL		=1<<20,	/* Collision Retry Limit */
+		INT_UN		=1<<19,	/* Transmit FIFO Underrun */
+		INT_PLR		=1<<18,	/* Payload Receive Error */
+		INT_WAKEUP	=1<<17,	/* Node Wakeup Request Indication */
+		INT_TS_AVAIL	=1<<16,	/* Transmit Timestamp Available */
+		INT_TS_TIMER	=1<<15,	/* Timestamp Timer */
+		INT_RXFLUSH_2	=1<<14,	/* RX DMA Ring 2 flush indication */
+		INT_RXFLUSH_1	=1<<13,	/* RX DMA Ring 1 flush indication */
+		INT_RXFLUSH_0	=1<<12,	/* RX DMA Ring 0 flush indication */
+		INT_TXF2	=1<<7,	/* Transmit frame interrupt, class 2 */
+		INT_TXB2	=1<<6,	/* Transmit buffer interrupt, class 2 */
+		INT_RXF2	=1<<5,	/* Receive frame interrupt, class 2 */
+		INT_RXB2	=1<<4,	/* Receive buffer interrupt, class 2 */
+		INT_TXF1	=1<<3,	/* Transmit frame interrupt, class 1 */
+		INT_TXB1	=1<<2,	/* Transmit buffer interrupt, class 1 */
+		INT_RXF1	=1<<1,	/* Receive frame interrupt, class 1 */
+		INT_RXB1	=1<<0,	/* Receive buffer interrupt, class 1 */
+
+	ENET_RDAR	= 0x010/4,	/* Receive Descriptor Active Register */
+		RDAR_ACTIVE	=1<<24,	/* Descriptor Active */
+	ENET_TDAR	= 0x014/4,	/* Transmit Descriptor Active Register */
+		TDAR_ACTIVE	=1<<24,	/* Descriptor Active */
+
+	ENET_ECR	= 0x024/4,	/* Ethernet Control Register */
+		ECR_RESERVED	=7<<28,
+		ECR_SVLANDBL	=1<<11,	/* S-VLAN double tag */
+		ECR_VLANUSE2ND	=1<<10,	/* VLAN use second tag */
+		ECR_SVLANEN	=1<<9,	/* S-VLAN enable */
+		ECR_DBSWP	=1<<8,	/* Descriptor Byte Swapping Enable */
+		ECR_DBGEN	=1<<6,	/* Debug Enable */
+		ECR_SPEED_100M	=0<<5,
+		ECR_SPEED_1000M	=1<<5,
+		ECR_EN1588	=1<<4,	/* Enables enhanced functionality of the MAC */
+		ECR_SLEEP	=1<<3,	/* Sleep Mode Enable */
+		ECR_MAGICEN	=1<<2,	/* Magic Packet Detection Enable */
+		ECR_ETHEREN	=1<<1,	/* Ethernet Enable */
+		ECR_RESET	=1<<0,	/* Ethernet MAC Reset */
+
+	ENET_MMFR	= 0x040/4,	/* MII Management Frame Register */
+		MMFR_ST		=1<<30,
+		MMFR_RD		=2<<28,
+		MMFR_WR		=1<<28,
+		MMFR_PA_SHIFT	=23,
+		MMFR_TA		=2<<16,
+		MMFR_RA_SHIFT	=18,
+
+	ENET_MSCR	= 0x044/4,	/* MII Speed Control Register */
+		MSCR_SPEED_SHIFT=1,	/* MII speed = module_clock/((SPEED+1)*2) */
+		MSCR_DIS_PRE	=1<<7,	/* disable preamble */
+		MSCR_HOLD_SHIFT	=8,	/* hold cycles in module_clock */
+
+	ENET_MIBC	= 0x064/4,	/* MIB Control Register */
+	ENET_RCR	= 0x084/4,	/* Receive Control Register */
+		RCR_GRS		=1<<31,	/* Gracefull Receive Stopped */
+		RCR_NLC		=1<<30,	/* Payload Length Check Disable */
+		RCR_MAX_FL_SHIFT=16,	/* Maximum Frame Length */
+		RCR_CFEN	=1<<15,	/* MAC Control Frame Enable */
+		RCR_CRCFWD	=1<<14,	/* Forward Received CRC */
+		RCR_PAUFWD	=1<<13,	/* Forward Pause Frames */
+		RCR_PADEN	=1<<12,	/* Enable Frame Padding Remove */
+		RCR_RMII_10T	=1<<9,	/* Enables 10-Mbit/s mode of the RMII/RGMII */
+		RCR_RMII_MODE	=1<<8,	/* RMII Mode Enable */
+		RCR_RGMII_EN	=1<<6,	/* RGMII Mode Enable */
+		RCR_FCE		=1<<5,	/* Flow Control Enable */
+		RCR_REJ		=1<<4,	/* Broadcast Frame Reject */
+		RCR_PROM	=1<<3,	/* Promiscuous Mode */
+		RCR_MII_MODE	=1<<2,	/* Media Independent Interface Mode (must always be set) */
+		RCR_DRT		=1<<1,	/* Disable Receive On Timeout */
+		RCR_LOOP	=1<<0,	/* Internal Loopback */
+
+	ENET_TCR	= 0x0C4/4,	/* Transmit Control Register */
+		TCR_CRCFWD	=1<<9,	/* Foward Frame From Application With CRC */
+		TCR_ADDINS	=1<<8,	/* Set MAC Address on Transmit */
+		TCR_RFC_PAUSE	=1<<4,	/* Receive Frame Control Pause */
+		TCR_TFC_PAUSE	=1<<3,	/* Transmit Frame Control Pause */
+		TCR_FDEN	=1<<2,	/* Full-Duplex Enable */
+		TCR_GTS		=1<<0,	/* Graceful Transmit Stop */
+
+	ENET_PALR	= 0x0E4/4,	/* Physical Address Lower Register */
+	ENET_PAUR	= 0x0E8/4,	/* Physical Address Upper Register */
+
+	ENET_OPD	= 0x0EC/4,	/* Opcode/Pause Duration Register */
+
+	ENET_TXIC0	= 0x0F0/4,	/* Transmit Interrupt Coalescing Register */
+	ENET_TXIC1	= 0x0F4/4,	/* Transmit Interrupt Coalescing Register */
+	ENET_TXIC2	= 0x0F8/4,	/* Transmit Interrupt Coalescing Register */
+	ENET_RXIC0	= 0x100/4,	/* Receive Interrupt Coalescing Register */
+	ENET_RXIC1	= 0x104/4,	/* Receive Interrupt Coalescing Register */
+	ENET_RXIC2	= 0x108/4,	/* Receive Interrupt Coalescing Register */
+		IC_EN		= 1<<31,
+		IC_CS		= 1<<30,
+		IC_FT_SHIFT	= 20,
+		IC_TT_SHIFT	= 0,
+
+	ENET_IAUR	= 0x118/4,	/* Descriptor Individual Upper Address Register */
+	ENET_IALR	= 0x11C/4,	/* Descriptor Individual Lower Address Register */
+	ENET_GAUR	= 0x120/4,	/* Descriptor Group Upper Address Register */
+	ENET_GALR	= 0x124/4,	/* Descriptor Group Lower Address Register */
+	ENET_TFWR	= 0x144/4,	/* Transmit FIFO Watermark Register */
+		TFWR_STRFWD	= 1<<8,
+
+	ENET_RDSR1	= 0x160/4,	/* Receive Descriptor Ring 1 Start Register */
+	ENET_TDSR1	= 0x164/4,	/* Transmit Buffer Descriptor Ring 1 Start Register */
+	ENET_MRBR1	= 0x168/4,	/* Maximum Receive Buffer Size Register Ring 1 */
+
+	ENET_RDSR2	= 0x16C/4,	/* Receive Descriptor Ring 2 Start Register */
+	ENET_TDSR2	= 0x170/4,	/* Transmit Buffer Descriptor Ring 2 Start Register */
+	ENET_MRBR2	= 0x174/4,	/* Maximum Receive Buffer Size Register Ring 2 */
+
+	ENET_RDSR	= 0x180/4,	/* Receive Descriptor Ring 0 Start Register */
+	ENET_TDSR	= 0x184/4,	/* Transmit Buffer Descriptor Ring 0 Start Register */
+	ENET_MRBR	= 0x188/4,	/* Maximum Receive Buffer Size Register Ring 0 */
+
+	ENET_RSFL	= 0x190/4,	/* Receive FIFO Section Full Threshold */
+	ENET_RSEM	= 0x194/4,	/* Receive FIFO Section Empty Threshold */
+	ENET_RAEM	= 0x198/4,	/* Receive FIFO Almost Empty Threshold */
+	ENET_RAFL	= 0x19C/4,	/* Receive FIFO Almost Full Threshold */
+
+	ENET_TSEM	= 0x1A0/4,	/* Transmit FIFO Section Empty Threshold */
+	ENET_TAEM	= 0x1A4/4,	/* Transmit FIFO Almost Empty Threshold */
+	ENET_TAFL	= 0x1A8/4,	/* Transmit FIFO Almost Full Threshold */
+
+	ENET_TIPG	= 0x1AC/4,	/* Transmit Inter-Packet Gap */
+	ENET_FTRL	= 0x1B0/4,	/* Frame Truncation Length */
+	ENET_TACC	= 0x1C0/4,	/* Transmit Accelerator Function Configuration */
+	ENET_RACC	= 0x1C4/4,	/* Receive Accelerator Function Configuration */
+
+	ENET_RCMR1	= 0x1C8/4,	/* Receive Classification Match Register */
+	ENET_RCMR2	= 0x1CC/4,	/* Receive Classification Match Register */
+
+	ENET_DMA1CFG	= 0x1D8/4,	/* DMA Class Based Configuration */
+	ENET_DMA2CFG	= 0x1DC/4,	/* DMA Class Based Configuration */
+
+	ENET_RDAR1	= 0x1E0/4,	/* Receive Descriptor Active Register - Ring 1 */
+	ENET_TDAR1	= 0x1E4/4,	/* Transmit Descriptor Active Register - Ring 1 */
+	ENET_RDAR2	= 0x1E8/4,	/* Receive Descriptor Active Register - Ring 2 */
+	ENET_TDAR2	= 0x1EC/4,	/* Transmit Descriptor Active Register - Ring 2 */
+
+	ENET_QOS	= 0x1F0/4,	/* QOS Scheme */
+};
+
+enum {
+	/* transmit descriptor status bits */
+	TD_R		= 1<<(15+16),	/* Ready */
+	TD_OWN		= 1<<(14+16),	/* Ownership */
+	TD_W		= 1<<(13+16),	/* Wrap */
+	TD_L		= 1<<(11+16),	/* Last in a frame */
+
+	TD_TC		= 1<<(10+16),	/* Transmit CRC */
+	TD_ERR		= TD_TC,
+
+	TD_LEN		= 0xFFFF,
+
+	/* receive desctriptor status bits */
+	RD_E		= 1<<(15+16),	/* Empty */
+	RD_W		= 1<<(13+16),	/* Wrap */
+	RD_L		= 1<<(11+16),	/* Last in a frame */
+
+	RD_M		= 1<<(8+16),	/* Miss */
+	RD_BC		= 1<<(7+16),	/* broadcast */
+	RD_MC		= 1<<(6+16),	/* multicast */
+
+	RD_LG		= 1<<(5+16),	/* length violation */
+	RD_NO		= 1<<(4+16),	/* non octet aligned frame */
+	RD_CR		= 1<<(2+16),	/* crc error */
+	RD_OV		= 1<<(1+16),	/* overrun */
+	RD_TR		= 1<<(0+16),	/* truncated */
+	RD_ERR		= RD_LG | RD_NO | RD_CR | RD_OV | RD_TR,
+
+	RD_LEN		= 0xFFFF,
+};
+
+typedef struct Descr Descr;
+struct Descr
+{
+	u32int	status;
+	u32int	addr;
+};
+
+typedef struct Ctlr Ctlr;
+struct Ctlr
+{
+	u32int	*regs;
+	u32int	intmask;
+
+	struct {
+		Block	*b[256];
+		Descr	*d;
+		Rendez;
+	}	rx[1];
+
+	struct {
+		Block	*b[256];
+		Descr	*d;
+		Rendez;
+	}	tx[1];
+
+	struct {
+		Rendez;
+	}	free[1];
+
+	struct {
+		Mii;
+		Rendez;
+	}	mii[1];
+
+	int	attached;
+	QLock;
+};
+
+#define rr(c, r)	((c)->regs[r])
+#define wr(c, r, v)	((c)->regs[r] = (v))
+
+static int
+mdiodone(void *arg)
+{
+	Ctlr *ctlr = arg;
+	return rr(ctlr, ENET_EIR) & INT_MII;
+}
+static int
+mdiowait(Ctlr *ctlr)
+{
+	int i;
+
+	for(i = 0; i < 200; i++){
+		tsleep(ctlr->mii, mdiodone, ctlr, 5);
+		if(mdiodone(ctlr))
+			return 0;
+	}
+	return -1;
+}
+static int
+mdiow(Mii* mii, int phy, int addr, int data)
+{
+	Ctlr *ctlr = mii->ctlr;
+
+	data &= 0xFFFF;
+	wr(ctlr, ENET_EIR, INT_MII);
+	wr(ctlr, ENET_MMFR, MMFR_WR | MMFR_ST | MMFR_TA | phy<<MMFR_PA_SHIFT | addr<<MMFR_RA_SHIFT | data);
+	if(mdiowait(ctlr) < 0) return -1;
+	return data;
+}
+static int
+mdior(Mii* mii, int phy, int addr)
+{
+	Ctlr *ctlr = mii->ctlr;
+
+	wr(ctlr, ENET_EIR, INT_MII);
+	wr(ctlr, ENET_MMFR, MMFR_RD | MMFR_ST | MMFR_TA | phy<<MMFR_PA_SHIFT | addr<<MMFR_RA_SHIFT);
+	if(mdiowait(ctlr) < 0) return -1;
+	return rr(ctlr, ENET_MMFR) & 0xFFFF;
+}
+
+static void
+interrupt(Ureg*, void *arg)
+{
+	Ether *edev = arg;
+	Ctlr *ctlr = edev->ctlr;
+	u32int e;
+
+	e = rr(ctlr, ENET_EIR);
+	wr(ctlr, ENET_EIR, e);
+
+	if(e & INT_RXF) wakeup(ctlr->rx);
+	if(e & INT_TXF) wakeup(ctlr->tx);
+	if(e & INT_MII) wakeup(ctlr->mii);
+}
+
+static void
+shutdown(Ether *edev)
+{
+	Ctlr *ctlr = edev->ctlr;
+	coherence();
+
+	wr(ctlr, ENET_ECR, ECR_RESERVED | ECR_RESET);
+	while(rr(ctlr, ENET_ECR) & ECR_RESET) delay(1);
+
+	/* mask and clear interrupt events */
+	wr(ctlr, ENET_EIMR, 0);
+	wr(ctlr, ENET_EIR, ~0);
+}
+
+static int
+tdfree(void *arg)
+{
+	Descr *d = arg;
+	return (d->status & (TD_OWN|TD_R)) == 0;
+}
+
+static void
+txproc(void *arg)
+{
+	Ether *edev = arg;
+	Ctlr *ctlr = edev->ctlr;
+	Block *b;
+	Descr *d;
+	uint i = 0;
+
+	while(waserror())
+		;
+
+	for(;;){
+		if((b = qbread(edev->oq, 100000)) == nil)
+			break;
+
+		d = &ctlr->tx->d[i];
+		while(!tdfree(d))
+			sleep(ctlr->free, tdfree, d);
+
+		ctlr->tx->b[i] = b;
+
+		dmaflush(1, b->rp, BLEN(b));
+		d->addr = PADDR(b->rp);
+		coherence();
+		if(i == nelem(ctlr->tx->b)-1){
+			d->status = BLEN(b) | TD_OWN | TD_R | TD_L | TD_TC | TD_W;
+			i = 0;
+		} else {
+			d->status = BLEN(b) | TD_OWN | TD_R | TD_L | TD_TC;
+			i++;
+		}
+		wr(ctlr, ENET_TDAR, TDAR_ACTIVE);
+	}
+}
+
+static int
+tddone(void *arg)
+{
+	Descr *d = arg;
+	return (d->status & (TD_OWN|TD_R)) == TD_OWN;
+}
+
+static void
+frproc(void *arg)
+{
+	Ether *edev = arg;
+	Ctlr *ctlr = edev->ctlr;
+	Block *b;
+	Descr *d;
+	uint i = 0;
+
+	while(waserror())
+		;
+
+	for(;;){
+		d = &ctlr->tx->d[i];
+		while(!tddone(d))
+			sleep(ctlr->tx, tddone, d);
+
+		b = ctlr->tx->b[i];
+		ctlr->tx->b[i] = nil;
+		coherence();
+
+		if(i == nelem(ctlr->tx->b)-1){
+			d->status = TD_W;
+			i = 0;
+		} else {
+			d->status = 0;
+			i++;
+		}
+
+		wakeup(ctlr->free);
+		freeb(b);
+	}
+}
+
+static int
+rdfull(void *arg)
+{
+	Descr *d = arg;
+	return (d->status & RD_E) == 0;
+}
+
+static void
+rxproc(void *arg)
+{
+	Ether *edev = arg;
+	Ctlr *ctlr = edev->ctlr;
+	Block *b;
+	Descr *d;
+	uint s, i = 0;
+
+	while(waserror())
+		;
+
+	for(;;){
+		d = &ctlr->rx->d[i];
+		s = d->status;
+		if(s & RD_E){
+			sleep(ctlr->rx, rdfull, d);
+			continue;
+		}
+		if(((s^RD_L) & (RD_L|RD_ERR)) == 0){
+			b = ctlr->rx->b[i];
+			b->wp = b->rp + (s & RD_LEN);
+			dmaflush(0, b->rp, BLEN(b));
+			etheriq(edev, b);
+
+			/* replenish */
+			b = allocb(R_BUF_SIZE);
+			ctlr->rx->b[i] = b;
+			dmaflush(1, b->rp, R_BUF_SIZE);
+			d->addr = PADDR(b->rp); 
+			coherence();
+		}
+		if(i == nelem(ctlr->rx->b)-1) {
+			d->status = RD_E | RD_W;
+			i = 0;
+		} else {
+			d->status = RD_E;
+			i++;
+		}
+		wr(ctlr, ENET_RDAR, RDAR_ACTIVE);
+	}
+}
+
+static void
+linkproc(void *arg)
+{
+	Ether *edev = arg;
+	Ctlr *ctlr = edev->ctlr;
+	MiiPhy *phy;
+	int link = -1;
+
+	while(waserror())
+		;
+
+	miiane(ctlr->mii, ~0, AnaAP|AnaP, ~0);
+
+	for(;;){
+		miistatus(ctlr->mii);
+		phy = ctlr->mii->curphy;
+		if(phy->link == link){
+			tsleep(ctlr->mii, return0, nil, 5000);
+			continue;
+		}
+		link = phy->link;
+		if(link){
+			u32int ecr = rr(ctlr, ENET_ECR) & ~ECR_SPEED_1000M;
+			u32int rcr = rr(ctlr, ENET_RCR) & ~(RCR_RMII_10T|RCR_FCE);
+			u32int tcr = rr(ctlr, ENET_TCR) & ~(TCR_RFC_PAUSE|TCR_TFC_PAUSE|TCR_FDEN);
+
+			switch(phy->speed){
+			case 1000:
+				ecr |= ECR_SPEED_1000M;
+				rcr |= RCR_FCE;
+
+				/* receive fifo thresholds */
+				wr(ctlr, ENET_RSFL, 16);
+				wr(ctlr, ENET_RSEM, 132);
+				wr(ctlr, ENET_RAEM, 8);
+				wr(ctlr, ENET_RAFL, 8);
+
+				/* opcode/pause duration */
+				wr(ctlr, ENET_OPD, 0xFFF0);
+				break;
+			case 100:
+				ecr |= ECR_SPEED_100M;
+				break;
+			case 10:
+				rcr |= RCR_RMII_10T;
+				break;
+			}
+			if(phy->fd)
+				tcr |= TCR_FDEN;
+			if(phy->rfc)
+				tcr |= TCR_RFC_PAUSE;
+			if(phy->tfc)
+				tcr |= TCR_TFC_PAUSE;
+
+			wr(ctlr, ENET_ECR, ecr);
+			wr(ctlr, ENET_RCR, rcr);
+			wr(ctlr, ENET_TCR, tcr);
+
+			edev->mbps = phy->speed;
+
+			wr(ctlr, ENET_RDAR, RDAR_ACTIVE);
+		}
+		edev->link = link;
+		print("#l%d: link %d speed %d\n", edev->ctlrno, edev->link, edev->mbps);
+	}
+}
+
+static void
+attach(Ether *edev)
+{
+	Ctlr *ctlr = edev->ctlr;
+	Descr *d;
+	int i;
+
+	eqlock(ctlr);
+	if(ctlr->attached){
+		qunlock(ctlr);
+		return;
+	}
+	if(waserror()){
+		qunlock(ctlr);
+		nexterror();
+	}
+
+	/* RGMII mode, max frame length */
+	wr(ctlr, ENET_RCR, RCR_MII_MODE | RCR_RGMII_EN | Maxtu<<RCR_MAX_FL_SHIFT);
+
+	/* set MII clock to 2.5Mhz, 10ns hold time */
+	wr(ctlr, ENET_MSCR, ((Moduleclk/(2*2500000))-1)<<MSCR_SPEED_SHIFT | ((Moduleclk/10000000)-1)<<MSCR_HOLD_SHIFT);
+
+	ctlr->intmask |= INT_MII;
+	wr(ctlr, ENET_EIMR, ctlr->intmask);
+	mii(ctlr->mii, ~0);
+
+	if(ctlr->mii->curphy == nil)
+		error("no phy");
+
+	print("#l%d: phy%d id %.8ux oui %x\n", 
+		edev->ctlrno, ctlr->mii->curphy->phyno, 
+		ctlr->mii->curphy->id, ctlr->mii->curphy->oui);
+
+	/* clear mac filter hash table */
+	wr(ctlr, ENET_IALR, 0);
+	wr(ctlr, ENET_IAUR, 0);
+	wr(ctlr, ENET_GALR, 0);
+	wr(ctlr, ENET_GAUR, 0);
+
+	/* set MAC address */
+	wr(ctlr, ENET_PALR, (u32int)edev->ea[0]<<24 | (u32int)edev->ea[1]<<16 | (u32int)edev->ea[2]<<8 | edev->ea[3]<<0);
+	wr(ctlr, ENET_PAUR, (u32int)edev->ea[4]<<24 | (u32int)edev->ea[5]<<16);
+
+	if(ctlr->rx->d == nil)
+		ctlr->rx->d = ucalloc(sizeof(Descr) * nelem(ctlr->rx->b));
+	for(i=0; i<nelem(ctlr->rx->b); i++){
+		Block *b = allocb(R_BUF_SIZE);
+		ctlr->rx->b[i] = b;
+		d = &ctlr->rx->d[i];
+		dmaflush(1, b->rp, R_BUF_SIZE);
+		d->addr = PADDR(b->rp);
+		d->status = RD_E;
+	}
+	ctlr->rx->d[nelem(ctlr->rx->b)-1].status = RD_E | RD_W;
+	wr(ctlr, ENET_MRBR, R_BUF_SIZE);
+	coherence();
+	wr(ctlr, ENET_RDSR, PADDR(ctlr->rx->d));
+
+	if(ctlr->tx->d == nil)
+		ctlr->tx->d = ucalloc(sizeof(Descr) * nelem(ctlr->tx->b));
+	for(i=0; i<nelem(ctlr->tx->b); i++){
+		ctlr->tx->b[i] = nil;
+		d = &ctlr->tx->d[i];
+		d->addr = 0;
+		d->status = 0;
+	}
+	ctlr->tx->d[nelem(ctlr->tx->b)-1].status = TD_W;
+	coherence();
+	wr(ctlr, ENET_TDSR, PADDR(ctlr->tx->d));
+
+	/* store and forward tx fifo */
+	wr(ctlr, ENET_TFWR, TFWR_STRFWD);
+
+	/* interrupt coalescing: 200 pkts, 1000 µs */
+	wr(ctlr, ENET_RXIC0, IC_EN | 200<<IC_FT_SHIFT | ((1000*Moduleclk)/64000000)<<IC_TT_SHIFT);
+	wr(ctlr, ENET_TXIC0, IC_EN | 200<<IC_FT_SHIFT | ((1000*Moduleclk)/64000000)<<IC_TT_SHIFT);
+
+	ctlr->intmask |= INT_TXF | INT_RXF;
+	wr(ctlr, ENET_EIMR, ctlr->intmask);
+
+	/* enable ethernet */
+	wr(ctlr, ENET_ECR, rr(ctlr, ENET_ECR) | ECR_ETHEREN | ECR_DBSWP);
+
+	ctlr->attached = 1;
+
+	kproc("ether-rx", rxproc, edev);
+	kproc("ether-tx", txproc, edev);
+	kproc("ether-fr", frproc, edev);
+
+	kproc("ether-link", linkproc, edev);
+
+	qunlock(ctlr);
+	poperror();
+}
+
+static void
+prom(void *arg, int on)
+{
+	Ether *edev = arg;
+	Ctlr *ctlr = edev->ctlr;
+
+	if(on)
+		wr(ctlr, ENET_RCR, rr(ctlr, ENET_RCR) | RCR_PROM);
+	else
+		wr(ctlr, ENET_RCR, rr(ctlr, ENET_RCR) & ~RCR_PROM);
+}
+
+static void
+multi(void *arg, uchar*, int)
+{
+	Ether *edev = arg;
+	Ctlr *ctlr = edev->ctlr;
+	Netaddr *a;
+	u64int hash;
+
+	hash = 0;
+	for(a = edev->maddr; a != nil; a = a->next)
+		hash |= 1ULL << ((ethercrc(a->addr, edev->alen) >> (32 - 6)) & 0x3F);
+
+	wr(ctlr, ENET_GALR, hash & 0xFFFFFFFF);
+	wr(ctlr, ENET_GAUR, hash >> 32);
+}
+
+static long
+ctl(Ether*, void*, long len)
+{
+	return len;
+}
+
+static int
+reset(Ether *edev)
+{
+	Ctlr *ctlr = edev->ctlr;
+	u32int paddr1, paddr2;
+
+	/* steal mac address from uboot */
+	paddr1 = rr(ctlr, ENET_PALR);
+	paddr2 = rr(ctlr, ENET_PAUR);
+	edev->ea[0] = paddr1>>24;
+	edev->ea[1] = paddr1>>16;
+	edev->ea[2] = paddr1>>8;
+	edev->ea[3] = paddr1>>0;
+	edev->ea[4] = paddr2>>24;
+	edev->ea[5] = paddr2>>16;
+
+	shutdown(edev);
+
+	return 0;
+}
+
+static int
+pnp(Ether *edev)
+{
+	static Ctlr ctlr[1];
+
+	if(ctlr->regs != nil)
+		return -1;
+
+	ctlr->regs = (u32int*)(VIRTIO + 0xbe0000);
+
+	ctlr->mii->ctlr = ctlr;
+	ctlr->mii->mir = mdior;
+	ctlr->mii->miw = mdiow;
+
+	edev->port = (uintptr)ctlr->regs - KZERO;
+	edev->irq = IRQenet1;
+	edev->ctlr = ctlr;
+	edev->attach = attach;
+	edev->shutdown = shutdown;
+	edev->promiscuous = prom;
+	edev->multicast = multi;
+	edev->ctl = ctl;
+	edev->arg = edev;
+	edev->mbps = 1000;
+	edev->maxmtu = Maxtu;
+
+	if(reset(edev) < 0)
+		return -1;
+
+	intrenable(edev->irq+0, interrupt, edev, BUSUNKNOWN, edev->name);
+	intrenable(edev->irq+1, interrupt, edev, BUSUNKNOWN, edev->name);
+	intrenable(edev->irq+2, interrupt, edev, BUSUNKNOWN, edev->name);
+	intrenable(edev->irq+3, interrupt, edev, BUSUNKNOWN, edev->name);
+
+	return 0;
+}
+
+void
+etherimxlink(void)
+{
+	addethercard("imx", pnp);
+}
--- /dev/null
+++ b/sys/src/9/imx8/fns.h
@@ -1,0 +1,139 @@
+#include "../port/portfns.h"
+
+/* l.s */
+extern void sev(void);
+extern int tas(void *);
+extern int cmpswap(long*, long, long);
+extern void coherence(void);
+extern void idlehands(void);
+extern uvlong vcycles(void);
+#define cycles(ip) *(ip) = vcycles()
+extern int splfhi(void);
+extern void splflo(void);
+extern void touser(uintptr sp);
+extern void forkret(void);
+extern void noteret(void);
+extern void returnto(void*);
+extern void fpsaveregs(void*);
+extern void fploadregs(void*);
+
+extern void setttbr(uintptr pa);
+extern uintptr getfar(void);
+
+extern void flushasidva(uintptr asidva);
+extern void tlbivae1is(uintptr asidva);
+
+extern void flushasidvall(uintptr asidva);
+extern void tlbivale1is(uintptr asidva);
+
+extern void flushasid(uintptr asid);
+extern void tlbiaside1is(uintptr asid);
+
+extern void flushtlb(void);
+extern void tlbivmalle1(void);
+
+extern void flushlocaltlb(void);
+extern void tlbivmalle1(void);
+
+/* cache */
+extern ulong cachesize(int level);
+
+extern void cacheiinvse(void*, int);
+extern void cacheuwbinv(void);
+extern void cacheiinv(void);
+
+extern void cachedwbse(void*, int);
+extern void cacheduwbse(void*, int);
+extern void cachedinvse(void*, int);
+extern void cachedwbinvse(void*, int);
+
+extern void cachedwb(void);
+extern void cachedinv(void);
+extern void cachedwbinv(void);
+
+extern void l2cacheuwb(void);
+extern void l2cacheuinv(void);
+extern void l2cacheuwbinv(void);
+
+/* mmu */
+#define	getpgcolor(a)	0
+extern uintptr paddr(void*);
+#define PADDR(a) paddr((void*)(a))
+extern uintptr cankaddr(uintptr);
+extern void* kaddr(uintptr);
+#define KADDR(a) kaddr(a)
+extern void kmapinval(void);
+#define	VA(k)	((uintptr)(k))
+extern KMap *kmap(Page*);
+extern void kunmap(KMap*);
+extern uintptr mmukmap(uintptr, uintptr, usize);
+extern void* vmap(uvlong, vlong);
+extern void vunmap(void*, vlong);
+
+extern void mmu0init(uintptr*);
+extern void mmu0clear(uintptr*);
+extern void mmuidmap(uintptr*);
+extern void mmu1init(void);
+extern void meminit(void);
+
+extern void putasid(Proc*);
+
+extern void* ucalloc(usize);
+
+/* clock */
+extern void clockinit(void);
+extern void synccycles(void);
+extern void armtimerset(int);
+extern void clockshutdown(void);
+
+/* fpu */
+extern void fpuinit(void);
+extern void fpoff(void);
+extern void fpinit(void);
+extern void fpclear(void);
+extern void fpsave(FPsave*);
+extern void fprestore(FPsave*);
+extern void mathtrap(Ureg*);
+
+/* trap */
+extern void trapinit(void);
+extern int userureg(Ureg*);
+extern void evenaddr(uintptr);
+extern void setkernur(Ureg*, Proc*);
+extern void procfork(Proc*);
+extern void procsetup(Proc*);
+extern void procsave(Proc*);
+extern void procrestore(Proc *);
+extern void trap(Ureg*);
+extern void syscall(Ureg*);
+extern void noted(Ureg*, ulong);
+extern void faultarm64(Ureg*);
+extern void dumpstack(void);
+extern void dumpregs(Ureg*);
+
+/* irq */
+extern void intrinit(void);
+extern void intrcpushutdown(void);
+extern void intrsoff(void);
+extern void intrenable(int, void (*)(Ureg*, void*), void*, int, char*);
+extern void intrdisable(int, void (*)(Ureg*, void*), void*, int, char*);
+extern int irq(Ureg*);
+extern void fiq(Ureg*);
+
+/* sysreg */
+extern uvlong	sysrd(ulong);
+extern void	syswr(ulong, uvlong);
+
+/* uartimx */
+extern void uartconsinit(void);
+
+/* dma */
+extern void dmaflush(int, void*, ulong);
+
+/* main */
+extern char *getconf(char *name);
+extern void setconfenv(void);
+extern void writeconf(void);
+
+extern int isaconfig(char*, int, ISAConf*);
+extern void links(void);
--- /dev/null
+++ b/sys/src/9/imx8/fpu.c
@@ -1,0 +1,92 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+
+#include "ureg.h"
+#include "sysreg.h"
+
+/* libc */
+extern ulong getfcr(void);
+extern void setfcr(ulong fcr);
+extern ulong getfsr(void);
+extern void setfsr(ulong fsr);
+
+void
+fpuinit(void)
+{
+	fpoff();
+}
+
+void
+fpon(void)
+{
+	syswr(CPACR_EL1, 3<<20);
+}
+
+void
+fpoff(void)
+{
+	syswr(CPACR_EL1, 0<<20);
+}
+
+void
+fpinit(void)
+{
+	fpon();
+	setfcr(0);
+	setfsr(0);
+}
+
+void
+fpclear(void)
+{
+	fpoff();
+}
+
+void
+fpsave(FPsave *p)
+{
+	p->control = getfcr();
+	p->status = getfsr();
+	fpsaveregs(p->regs);
+	fpoff();
+}
+
+void
+fprestore(FPsave *p)
+{
+	fpon();
+	setfcr(p->control);
+	setfsr(p->status);
+	fploadregs(p->regs);
+}
+
+void
+mathtrap(Ureg*)
+{
+	int s;
+
+	if((up->fpstate & FPillegal) != 0){
+		postnote(up, 1, "sys: floating point in note handler", NDebug);
+		return;
+	}
+	switch(up->fpstate){
+	case FPinit:
+		s = splhi();
+		fpinit();
+		up->fpstate = FPactive;
+		splx(s);
+		break;
+	case FPinactive:
+		s = splhi();
+		fprestore(up->fpsave);
+		up->fpstate = FPactive;
+		splx(s);
+		break;
+	case FPactive:
+		postnote(up, 1, "sys: floating point error", NDebug);
+		break;
+	}
+}
--- /dev/null
+++ b/sys/src/9/imx8/gic.c
@@ -1,0 +1,316 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "../port/pci.h"
+#include "ureg.h"
+#include "sysreg.h"
+#include "../port/error.h"
+
+enum {
+	GICD_CTLR	= 0x000/4,	/* RW, Distributor Control Register */
+	GICD_TYPER	= 0x004/4,	/* RO, Interrupt Controller Type */
+	GICD_IIDR	= 0x008/4,	/* RO, Distributor Implementer Identification Register */
+
+	GICD_IGROUPR0	= 0x080/4,	/* RW, Interrupt Group Registers (0x80-0xBC) */
+
+	GICD_ISENABLER0	= 0x100/4,	/* RW, Interrupt Set-Enable Registers (0x100-0x13C) */
+	GICD_ICENABLER0	= 0x180/4,	/* RW, Interrupt Clear-Enable Registers (0x180-0x1BC) */
+
+	GICD_ISPENDR0	= 0x200/4,	/* RW, Interrupt Set-Pending Registers (0x200-0x23C) */
+	GICD_ICPENDR0	= 0x280/4,	/* RW, Interrupt Clear-Pending Registers (0x280-0x2BC) */
+
+	GICD_ISACTIVER0	= 0x300/4,	/* RW, Interrupt Set-Active Registers (0x300-0x33C) */
+	GICD_ICACTIVER0 = 0x380/4,	/* RW, Interrupt Clear-Active Registers (0x380-0x3BC) */
+
+	GICD_IPRIORITYR0= 0x400/4,	/* RW, Interrupt Priority Registers (0x400-0x5FC) */
+	GICD_TARGETSR0	= 0x800/4,	/* RW, Interrupt Target Registers (0x800-0x9FC) */
+	GICD_ICFGR0	= 0xC00/4,	/* RW, Interrupt Configuration Registers (0xC00-0xC7C) */
+
+	GICD_ISR0	= 0xD00/4,
+	GICD_PPISR	= GICD_ISR0,	/* RO, Private Peripheral Interrupt Status Register */
+	GICD_SPISR0	= GICD_ISR0+1,	/* RO, Shared Peripheral Interrupt Status Register */
+	GICD_SGIR	= 0xF00/4,	/* WO, Software Generated Interrupt Register */
+
+	GICD_CPENDSGIR0	= 0xF10/4,	/* RW, SGI Clear-Pending Registers (0xF10-0xF1C) */
+	GICD_SPENDSGIR0	= 0xF20/4,	/* RW, SGI Set-Pending Registers (0xF20-0xF2C) */
+
+	GICD_PIDR4	= 0xFD0/4,	/* RO, Perpheral ID Registers */
+	GICD_PIDR5	= 0xFD4/4,
+	GICD_PIDR6	= 0xFD8/4,
+	GICD_PIDR7	= 0xFDC/4,
+	GICD_PIDR0	= 0xFE0/4,
+	GICD_PIDR1	= 0xFE4/4,
+	GICD_PIDR2	= 0xFE8/4,
+	GICD_PIDR3	= 0xFEC/4,
+
+	GICD_CIDR0	= 0xFF0/4,	/* RO, Component ID Registers */
+	GICD_CIDR1	= 0xFF4/4,
+	GICD_CIDR2	= 0xFF8/4,
+	GICD_CIDR3	= 0xFFC/4,
+
+	RD_base		= 0x00000,
+	GICR_CTLR	= (RD_base+0x000)/4,
+	GICR_IIDR	= (RD_base+0x004)/4,
+	GICR_TYPER	= (RD_base+0x008)/4,
+	GICR_STATUSR	= (RD_base+0x010)/4,
+	GICR_WAKER	= (RD_base+0x014)/4,
+	GICR_SETLPIR	= (RD_base+0x040)/4,
+	GICR_CLRLPIR	= (RD_base+0x048)/4,
+	GICR_PROPBASER	= (RD_base+0x070)/4,
+	GICR_PENDBASER	= (RD_base+0x078)/4,
+	GICR_INVLPIR	= (RD_base+0x0A0)/4,
+	GICR_INVALLR	= (RD_base+0x0B0)/4,
+	GICR_SYNCR	= (RD_base+0x0C0)/4,
+
+	SGI_base	= 0x10000,
+	GICR_IGROUPR0	= (SGI_base+0x080)/4,
+	GICR_ISENABLER0	= (SGI_base+0x100)/4,
+	GICR_ICENABLER0	= (SGI_base+0x180)/4,
+	GICR_ISPENDR0	= (SGI_base+0x200)/4,
+	GICR_ICPENDR0	= (SGI_base+0x280)/4,
+	GICR_ISACTIVER0	= (SGI_base+0x300)/4,
+	GICR_ICACTIVER0	= (SGI_base+0x380)/4,
+	GICR_IPRIORITYR0= (SGI_base+0x400)/4,
+	GICR_ICFGR0	= (SGI_base+0xC00)/4,
+	GICR_ICFGR1	= (SGI_base+0xC04)/4,
+	GICR_IGRPMODR0	= (SGI_base+0xD00)/4,
+	GICR_NSACR	= (SGI_base+0xE00)/4,
+};
+
+typedef struct Vctl Vctl;
+struct Vctl {
+	Vctl	*next;
+	void	(*f)(Ureg*, void*);
+	void	*a;
+	int	irq;
+	u32int	intid;
+};
+
+static Lock vctllock;
+static Vctl *vctl[MAXMACH][32], *vfiq;
+static u32int *dregs = (u32int*)(VIRTIO + 0x8800000);
+
+static u32int*
+getrregs(int machno)
+{
+	u32int *rregs = (u32int*)(VIRTIO + 0x8880000);
+
+	for(;;){
+		if((rregs[GICR_TYPER] & 0xFFFF00) == (machno << 8))
+			return rregs;
+		if(rregs[GICR_TYPER] & (1<<4))
+			break;
+		rregs += (0x20000/4);
+	}
+	panic("getrregs: no re-distributor for cpu %d\n", machno);
+	return nil;
+}
+
+void
+intrcpushutdown(void)
+{
+	/* disable cpu interface */
+	syswr(ICC_IGRPEN0_EL1, 0);
+	syswr(ICC_IGRPEN1_EL1, 0);
+	coherence();
+}
+
+void
+intrsoff(void)
+{
+	/* disable distributor */
+	dregs[GICD_CTLR] = 0;
+	coherence();
+	while(dregs[GICD_CTLR]&(1<<31))
+		;
+}
+
+void
+intrinit(void)
+{
+	u32int *rregs;
+	int i, n;
+
+	if(m->machno == 0){
+		intrsoff();
+
+		/* clear all interrupts */
+		n = ((dregs[GICD_TYPER] & 0x1F)+1) << 5;
+print("nirq %d\n", n);
+		for(i = 32; i < n; i += 32){
+			dregs[GICD_IGROUPR0 + (i/32)] = -1;
+
+			dregs[GICD_ISENABLER0 + (i/32)] = -1;
+			while(dregs[GICD_CTLR]&(1<<31))
+				;
+print("%d: distributor stuck disabled: %.8ux\n", i, ~dregs[GICD_ISENABLER0 + (i/32)]);
+			dregs[GICD_ICENABLER0 + (i/32)] = -1;
+			while(dregs[GICD_CTLR]&(1<<31))
+				;
+print("%d: distributor stuck enabled:  %.8ux\n", i, dregs[GICD_ISENABLER0 + (i/32)]);
+			dregs[GICD_ICACTIVER0 + (i/32)] = -1;
+		}
+		for(i = 0; i < n; i += 4){
+			dregs[GICD_IPRIORITYR0 + (i/4)] = 0;
+			dregs[GICD_TARGETSR0 + (i/4)] = 0;
+		}
+		for(i = 32; i < n; i += 16){
+			dregs[GICD_ICFGR0 + (i/16)] = 0;
+		}
+		coherence();
+		while(dregs[GICD_CTLR]&(1<<31))
+			;
+		dregs[GICD_CTLR] = (1<<0) | (1<<1) | (1<<4);
+	}
+
+	rregs = getrregs(m->machno);
+	n = 32;
+	for(i = 0; i < n; i += 32){
+		rregs[GICR_IGROUPR0 + (i/32)] = -1;
+
+		rregs[GICR_ISENABLER0 + (i/32)] = -1;
+		while(rregs[GICR_CTLR]&(1<<3))
+			;
+print("%d: re-distributor stuck disabled: %.8ux\n", i, ~rregs[GICR_ISENABLER0 + (i/32)]);
+		rregs[GICR_ICENABLER0 + (i/32)] = -1;
+		while(dregs[GICD_CTLR]&(1<<31))
+			;
+print("%d: re-distributor stuck enabled:  %.8ux\n", i, rregs[GICR_ISENABLER0 + (i/32)]);
+		rregs[GICR_ICACTIVER0 + (i/32)] = -1;
+	}
+	for(i = 0; i < n; i += 4){
+		rregs[GICR_IPRIORITYR0 + (i/4)] = 0;
+	}
+	coherence();
+	while(rregs[GICR_CTLR]&(1<<3))
+		;
+
+	coherence();
+
+	/* enable cpu interface */
+	syswr(ICC_CTLR_EL1, 0);
+	syswr(ICC_BPR1_EL1, 7);
+	syswr(ICC_PMR_EL1, 0xFF);
+
+	coherence();
+}
+
+
+/*
+ *  called by trap to handle irq interrupts.
+ *  returns true iff a clock interrupt, thus maybe reschedule.
+ */
+int
+irq(Ureg* ureg)
+{
+	Vctl *v;
+	int clockintr;
+	u32int intid;
+
+	m->intr++;
+	intid = sysrd(ICC_IAR1_EL1) & 0xFFFFFF;
+// iprint("i<%d>", intid);
+	if((intid & ~3) == 1020)
+		return 0; // spurious
+	clockintr = 0;
+	for(v = vctl[m->machno][intid%32]; v != nil; v = v->next)
+		if(v->intid == intid){
+			coherence();
+			v->f(ureg, v->a);
+			coherence();
+			if(v->irq == IRQcntpns)
+				clockintr = 1;
+		}
+	coherence();
+	syswr(ICC_EOIR1_EL1, intid);
+	return clockintr;
+}
+
+/*
+ * called direct from lexception.s to handle fiq interrupt.
+ */
+void
+fiq(Ureg *ureg)
+{
+	Vctl *v;
+	u32int intid;
+
+	m->intr++;
+	intid = sysrd(ICC_IAR1_EL1) & 0xFFFFFF;
+// iprint("f<%d>", intid);
+	if((intid & ~3) == 1020)
+		return;	// spurious
+	v = vfiq;
+	if(v != nil && v->intid == intid && m->machno == 0){
+		coherence();
+		v->f(ureg, v->a);
+		coherence();
+	}
+	syswr(ICC_EOIR1_EL1, intid);
+}
+
+void
+intrenable(int irq, void (*f)(Ureg*, void*), void *a, int tbdf, char *)
+{
+	Vctl *v;
+	u32int intid;
+	int cpu, prio;
+
+	if(tbdf != BUSUNKNOWN)
+		return;
+
+	prio = 0x80;
+	intid = irq;
+	if((v = xalloc(sizeof(Vctl))) == nil)
+		panic("irqenable: no mem");
+	v->irq = irq;
+	v->intid = intid;
+	v->f = f;
+	v->a = a;
+
+	lock(&vctllock);
+	if(intid < SPI)
+		cpu = m->machno;
+	else
+		cpu = 0;
+	if(irq == IRQfiq){
+		vfiq = v;
+		prio = 0;
+	}else{
+		v->next = vctl[cpu][intid%32];
+		vctl[cpu][intid%32] = v;
+	}
+	syswr(ICC_IGRPEN1_EL1, sysrd(ICC_IGRPEN1_EL1)|1);
+	coherence();
+
+	syswr(ICC_EOIR1_EL1, intid);
+	coherence();
+
+	/* setup */
+	if(intid < 32){
+		u32int *rregs = getrregs(cpu);
+		rregs[GICR_IPRIORITYR0 + (intid/4)] |= prio << ((intid%4) << 3);
+		coherence();
+		rregs[GICR_ISENABLER0] = 1 << (intid%32);
+		coherence();
+		while(rregs[GICR_CTLR]&(1<<3))
+			;
+	} else {
+		dregs[GICD_IPRIORITYR0 + (intid/4)] |= prio << ((intid%4) << 3);
+		dregs[GICD_TARGETSR0 + (intid/4)] |= (1<<cpu) << ((intid%4) << 3);
+		coherence();
+		dregs[GICD_ISENABLER0 + (intid/32)] = 1 << (intid%32);
+		coherence();
+		while(dregs[GICD_CTLR]&(1<<31))
+			;
+	}
+	unlock(&vctllock);
+}
+
+void
+intrdisable(int, void (*)(Ureg*, void*), void *, int, char*)
+{
+}
--- /dev/null
+++ b/sys/src/9/imx8/init9.s
@@ -1,0 +1,4 @@
+TEXT main(SB), 1, $8
+	MOV	$setSB(SB), R28		/* load the SB */
+	MOV	$boot(SB), R0
+	B	startboot(SB)
--- /dev/null
+++ b/sys/src/9/imx8/io.h
@@ -1,0 +1,31 @@
+enum {
+	IRQfiq		= -1,
+
+	PPI		= 16,
+	SPI		= 32,
+
+	IRQcntps	= PPI+13,
+	IRQcntpns	= PPI+14,
+
+	IRQuart1	= SPI+26,
+	IRQuart2	= SPI+27,
+	IRQuart3	= SPI+28,
+	IRQuart4	= SPI+29,
+
+	IRQi2c1		= SPI+35,
+	IRQi2c2		= SPI+36,
+	IRQi2c3		= SPI+37,
+	IRQi2c4		= SPI+38,
+
+	IRQrdc		= SPI+39,
+
+	IRQusb1		= SPI+40,
+	IRQusb2		= SPI+41,
+
+	IRQsctr0	= SPI+47,
+	IRQsctr1	= SPI+48,
+
+	IRQenet1	= SPI+118,
+};
+
+#define BUSUNKNOWN (-1)
--- /dev/null
+++ b/sys/src/9/imx8/l.s
@@ -1,0 +1,681 @@
+#include "mem.h"
+#include "sysreg.h"
+
+#undef	SYSREG
+#define	SYSREG(op0,op1,Cn,Cm,op2)	SPR(((op0)<<19|(op1)<<16|(Cn)<<12|(Cm)<<8|(op2)<<5))
+
+TEXT _start(SB), 1, $-4
+	MOV	R0, R26		/* save */
+
+	MOV	$setSB-KZERO(SB), R28
+	BL	svcmode<>(SB)
+
+	/* use dedicated stack pointer per exception level */
+	MOVWU	$1, R1
+	MSR	R1, SPSel
+
+	BL	mmudisable<>(SB)
+
+	/* invalidate local caches */
+	BL	cachedwbinv(SB)
+	BL	cacheiinv(SB)
+
+	MOV	$(MACHADDR(0)-KZERO), R27
+	MRS	MPIDR_EL1, R1
+	ANDW	$(MAXMACH-1), R1
+	MOVWU	$MACHSIZE, R2
+	MULW	R1, R2, R2
+	SUB	R2, R27
+
+	ADD	$(MACHSIZE-16), R27, R2
+	MOV	R2, SP
+
+	CBNZ	R1, _startup
+
+	/* clear page table and machs */
+	MOV	$(L1-KZERO), R1
+	MOV	$(MACHADDR(-1)-KZERO), R2
+_zerol1:
+	MOV	ZR, (R1)8!
+	CMP	R1, R2
+	BNE	_zerol1
+
+	/* clear BSS */
+	MOV	$edata-KZERO(SB), R1
+	MOV	$end-KZERO(SB), R2
+_zerobss:
+	MOV	ZR, (R1)8!
+	CMP	R1, R2
+	BNE	_zerobss
+
+	/* setup page tables */
+	MOV	$(L1-KZERO), R0
+	BL	mmu0init(SB)
+
+	SEVL
+_startup:
+	WFE
+	BL	mmuenable<>(SB)
+
+	MOV	R26, R0
+	MOV	$0, R26
+	ORR	$KZERO, R27
+	MSR	R27, TPIDR_EL1
+	MOV	$setSB(SB), R28
+
+	BL	main(SB)
+
+TEXT	stop<>(SB), 1, $-4
+_stop:
+	WFE
+	B	_stop
+
+TEXT	aaa<>(SB), 1, $-4
+xxx:
+	MOV $(0x860040+VIRTIO), R1
+	MOVW $'A', R2
+	MOVW R2, (R1)
+	B xxx
+
+TEXT sev(SB), 1, $-4
+	SEV
+	WFE
+	RETURN
+
+TEXT svcmode<>(SB), 1, $-4
+	MSR	$0xF, DAIFSet
+	MRS	CurrentEL, R0
+	ANDW	$(3<<2), R0
+	CMPW	$(1<<2), R0
+	BEQ	el1
+	CMPW	$(2<<2), R0
+	BEQ	el2
+	B	stop<>(SB)
+el2:
+	MOV	$0, R0
+	MSR	R0, MDCR_EL2
+	ISB	$SY
+
+	/* HCR = RW, HCD, SWIO, BSU, FB */
+	MOVWU	$(1<<31 | 1<<29 | 1<<2 | 0<<10 | 0<<9), R0
+	MSR	R0, HCR_EL2
+	ISB	$SY
+
+	/* SCTLR = RES1 */
+	MOVWU	$(3<<4 | 1<<11 | 1<<16 | 1<<18 | 3<<22 | 3<<28), R0
+	ISB	$SY
+	MSR	R0, SCTLR_EL2
+	ISB	$SY
+
+	/* set VMID to zero */
+	MOV	$0, R0
+	MSR	R0, VTTBR_EL2
+	ISB	$SY
+
+	MOVWU	$(0xF<<6 | 4), R0
+	MSR	R0, SPSR_EL2
+	MSR	LR, ELR_EL2
+	ERET
+el1:
+	RETURN
+
+TEXT mmudisable<>(SB), 1, $-4
+#define SCTLRCLR \
+	/* RES0 */	( 3<<30 \
+	/* RES0 */	| 1<<27 \
+	/* UCI */	| 1<<26 \
+	/* EE */	| 1<<25 \
+	/* RES0 */	| 1<<21 \
+	/* E0E */	| 1<<24 \
+	/* WXN */	| 1<<19 \
+	/* nTWE */	| 1<<18 \
+	/* RES0 */	| 1<<17 \
+	/* nTWI */	| 1<<16 \
+	/* UCT */	| 1<<15 \
+	/* DZE */	| 1<<14 \
+	/* RES0 */	| 1<<13 \
+	/* RES0 */	| 1<<10 \
+	/* UMA */	| 1<<9 \
+	/* SA0 */	| 1<<4 \
+	/* SA */	| 1<<3 \
+	/* A */		| 1<<1 )
+#define SCTLRSET \
+	/* RES1 */	( 3<<28 \
+	/* RES1 */	| 3<<22 \
+	/* RES1 */	| 1<<20 \
+	/* RES1 */	| 1<<11 )
+#define SCTLRMMU \
+	/* I */		( 1<<12 \
+	/* C */		| 1<<2 \
+	/* M */		| 1<<0 )
+
+	/* initialise SCTLR, MMU and caches off */
+	ISB	$SY
+	MRS	SCTLR_EL1, R0
+	BIC	$(SCTLRCLR | SCTLRMMU), R0
+	ORR	$SCTLRSET, R0
+	ISB	$SY
+	MSR	R0, SCTLR_EL1
+	ISB	$SY
+
+	B	flushlocaltlb(SB)
+
+TEXT mmuenable<>(SB), 1, $-4
+	/* return to virtual */
+	ORR	$KZERO, LR
+	MOV	LR, -16(RSP)!
+
+	BL	flushlocaltlb(SB)
+
+	/* memory attributes */
+#define MAIRINIT \
+	( 0xFF << MA_MEM_WB*8 \
+	| 0x33 << MA_MEM_WT*8 \
+	| 0x44 << MA_MEM_UC*8 \
+	| 0x00 << MA_DEV_nGnRnE*8 \
+	| 0x04 << MA_DEV_nGnRE*8 \
+	| 0x08 << MA_DEV_nGRE*8 \
+	| 0x0C << MA_DEV_GRE*8 )
+	MOV	$MAIRINIT, R1
+	MSR	R1, MAIR_EL1
+	ISB	$SY
+
+	/* translation control */
+#define TCRINIT \
+	/* TBI1 */	( 0<<38 \
+	/* TBI0 */	| 0<<37 \
+	/* AS */	| 0<<36 \
+	/* TG1 */	| (((3<<16|1<<14|2<<12)>>PGSHIFT)&3)<<30 \
+	/* SH1 */	| SHARE_INNER<<28 \
+	/* ORGN1 */	| CACHE_WB<<26 \
+	/* IRGN1 */	| CACHE_WB<<24 \
+	/* EPD1 */	| 0<<23 \
+	/* A1 */	| 0<<22 \
+	/* T1SZ */	| (64-EVASHIFT)<<16 \
+	/* TG0 */	| (((1<<16|2<<14|0<<12)>>PGSHIFT)&3)<<14 \
+	/* SH0 */	| SHARE_INNER<<12 \
+	/* ORGN0 */	| CACHE_WB<<10 \
+	/* IRGN0 */	| CACHE_WB<<8 \
+	/* EPD0 */	| 0<<7 \
+	/* T0SZ */	| (64-EVASHIFT)<<0 )
+	MOV	$TCRINIT, R1
+	MRS	ID_AA64MMFR0_EL1, R2
+	ANDW	$0x7, R2	// PARange
+	ADD	R2<<32, R1	// IPS
+	MSR	R1, TCR_EL1
+	ISB	$SY
+
+	/* load the page tables */
+	MOV	$(L1TOP-KZERO), R0
+	ISB	$SY
+	MSR	R0, TTBR0_EL1
+	MSR	R0, TTBR1_EL1
+	ISB	$SY
+
+	/* enable MMU and caches */
+	MRS	SCTLR_EL1, R1
+	ORR	$SCTLRMMU, R1
+	ISB	$SY
+	MSR	R1, SCTLR_EL1
+	ISB	$SY
+
+	MOV	RSP, R1
+	ORR	$KZERO, R1
+	MOV	R1, RSP
+	MOV	(RSP)16!, LR
+	B	cacheiinv(SB)
+
+TEXT touser(SB), 1, $-4
+	MSR	$0x3, DAIFSet	// interrupts off
+	MOVWU	$0x10028, R1	// entry
+	MOVWU	$0, R2		// psr
+	MSR	R0, SP_EL0	// sp
+	MSR	R1, ELR_EL1
+	MSR	R2, SPSR_EL1
+	ERET
+
+TEXT cas(SB), 1, $-4
+TEXT cmpswap(SB), 1, $-4
+	MOVWU	ov+8(FP), R1
+	MOVWU	nv+16(FP), R2
+_cas1:
+	LDXRW	(R0), R3
+	CMP	R3, R1
+	BNE	_cas0
+	STXRW	R2, (R0), R4
+	CBNZ	R4, _cas1
+	MOVW	$1, R0
+	DMB	$ISH
+	RETURN
+_cas0:
+	CLREX
+	MOVW	$0, R0
+	RETURN
+
+TEXT tas(SB), 1, $-4
+TEXT _tas(SB), 1, $-4
+	MOVW	$0xdeaddead, R2
+_tas1:
+	LDXRW	(R0), R1
+	STXRW	R2, (R0), R3
+	CBNZ	R3, _tas1
+	MOVW	R1, R0
+
+TEXT coherence(SB), 1, $-4
+	DMB	$ISH
+	RETURN
+
+TEXT islo(SB), 1, $-4
+	MRS	DAIF, R0
+	AND	$(0x2<<6), R0
+	EOR	$(0x2<<6), R0
+	RETURN
+
+TEXT splhi(SB), 1, $-4
+	MRS	DAIF, R0
+	MSR	$0x2, DAIFSet
+	RETURN
+
+TEXT splfhi(SB), 1, $-4
+	MRS	DAIF, R0
+	MSR	$0x3, DAIFSet
+	RETURN
+
+TEXT spllo(SB), 1, $-4
+	MSR	$0x3, DAIFClr
+	RETURN
+
+TEXT splflo(SB), 1, $-4
+	MSR	$0x1, DAIFClr
+	RETURN
+
+TEXT splx(SB), 1, $-4
+	MSR	R0, DAIF
+	RETURN
+
+TEXT idlehands(SB), 1, $-4
+	DMB	$ISH
+	MOV	$nrdy(SB), R1
+	LDXRW	(R1), R0
+	CBZ	R0, _goodnight
+	CLREX
+	SEVL
+_goodnight:
+	WFE
+	RETURN
+
+TEXT vcycles(SB), 1, $-4
+	MRS	CNTVCT_EL0, R0
+	RETURN
+
+TEXT lcycles(SB), 1, $-4
+	MRS	PMCCNTR_EL0, R0
+	RETURN
+
+TEXT setlabel(SB), 1, $-4
+	MOV	LR, 8(R0)
+	MOV	SP, R1
+	MOV	R1, 0(R0)
+	MOVW	$0, R0
+	RETURN
+
+TEXT gotolabel(SB), 1, $-4
+	MOV	8(R0), LR	/* link */
+	MOV	0(R0), R1	/* sp */
+	MOV	R1, SP
+	MOVW	$1, R0
+	RETURN
+
+TEXT returnto(SB), 1, $-4
+	MOV	R0, 0(SP)
+	RETURN
+
+TEXT getfar(SB), 1, $-4
+	MRS	FAR_EL1, R0
+	RETURN
+
+TEXT setttbr(SB), 1, $-4
+	DSB	$ISHST
+	MSR	R0, TTBR0_EL1
+	DSB	$ISH
+	ISB	$SY
+	RETURN
+
+/*
+ * TLB maintenance operations.
+ * these broadcast to all cpu's in the cluser
+ * (inner sharable domain).
+ */
+TEXT flushasidva(SB), 1, $-4
+TEXT tlbivae1is(SB), 1, $-4
+	DSB	$ISHST
+	TLBI	R0, 0,8,3,1	/* VAE1IS */
+	DSB	$ISH
+	ISB	$SY
+	RETURN
+
+TEXT flushasidvall(SB), 1, $-4
+TEXT tlbivale1is(SB), 1, $-4
+	DSB	$ISHST
+	TLBI	R0, 0,8,3,5	/* VALE1IS */
+	DSB	$ISH
+	ISB	$SY
+	RETURN
+
+TEXT flushasid(SB), 1, $-4
+TEXT tlbiaside1is(SB), 1, $-4
+	DSB	$ISHST
+	TLBI	R0, 0,8,3,2	/* ASIDE1IS */
+	DSB	$ISH
+	ISB	$SY
+	RETURN
+
+TEXT flushtlb(SB), 1, $-4
+TEXT tlbivmalle1is(SB), 1, $-4
+	DSB	$ISHST
+	TLBI	R0, 0,8,3,0	/* VMALLE1IS */
+	DSB	$ISH
+	ISB	$SY
+	RETURN
+
+/*
+ * flush the tlb of this cpu. no broadcast.
+ */
+TEXT flushlocaltlb(SB), 1, $-4
+TEXT tlbivmalle1(SB), 1, $-4
+	DSB	$NSHST
+	TLBI	R0, 0,8,7,0	/* VMALLE1 */
+	DSB	$NSH
+	ISB	$SY
+	RETURN
+
+TEXT fpsaveregs(SB), 1, $-4
+	WORD	$(1<<30 | 3 << 26 | 2<<22 | 0x1F<<16 | 3<<10 | 0<<5 | 0)  /* MOV { V0, V1, V2, V3  }, (R0)64! */
+	WORD	$(1<<30 | 3 << 26 | 2<<22 | 0x1F<<16 | 3<<10 | 0<<5 | 4)  /* MOV { V4, V5, V6, V7  }, (R0)64! */
+	WORD	$(1<<30 | 3 << 26 | 2<<22 | 0x1F<<16 | 3<<10 | 0<<5 | 8)  /* MOV { V8, V9, V10,V11 }, (R0)64! */
+	WORD	$(1<<30 | 3 << 26 | 2<<22 | 0x1F<<16 | 3<<10 | 0<<5 | 12) /* MOV { V12,V13,V14,V15 }, (R0)64! */
+	WORD	$(1<<30 | 3 << 26 | 2<<22 | 0x1F<<16 | 3<<10 | 0<<5 | 16) /* MOV { V16,V17,V18,V19 }, (R0)64! */
+	WORD	$(1<<30 | 3 << 26 | 2<<22 | 0x1F<<16 | 3<<10 | 0<<5 | 20) /* MOV { V20,V21,V22,V23 }, (R0)64! */
+	WORD	$(1<<30 | 3 << 26 | 2<<22 | 0x1F<<16 | 3<<10 | 0<<5 | 24) /* MOV { V24,V25,V26,V27 }, (R0)64! */
+	WORD	$(1<<30 | 3 << 26 | 2<<22 | 0x1F<<16 | 3<<10 | 0<<5 | 28) /* MOV { V28,V29,V30,V31 }, (R0)64! */
+	RETURN
+
+TEXT fploadregs(SB), 1, $-4
+	WORD	$(1<<30 | 3 << 26 | 3<<22 | 0x1F<<16 | 3<<10 | 0<<5 | 0)  /* MOV (R0)64!, { V0, V1, V2, V3  } */
+	WORD	$(1<<30 | 3 << 26 | 3<<22 | 0x1F<<16 | 3<<10 | 0<<5 | 4)  /* MOV (R0)64!, { V4, V5, V6, V7  } */
+	WORD	$(1<<30 | 3 << 26 | 3<<22 | 0x1F<<16 | 3<<10 | 0<<5 | 8)  /* MOV (R0)64!, { V8, V9, V10,V11 } */
+	WORD	$(1<<30 | 3 << 26 | 3<<22 | 0x1F<<16 | 3<<10 | 0<<5 | 12) /* MOV (R0)64!, { V12,V13,V14,V15 } */
+	WORD	$(1<<30 | 3 << 26 | 3<<22 | 0x1F<<16 | 3<<10 | 0<<5 | 16) /* MOV (R0)64!, { V16,V17,V18,V19 } */
+	WORD	$(1<<30 | 3 << 26 | 3<<22 | 0x1F<<16 | 3<<10 | 0<<5 | 20) /* MOV (R0)64!, { V20,V21,V22,V23 } */
+	WORD	$(1<<30 | 3 << 26 | 3<<22 | 0x1F<<16 | 3<<10 | 0<<5 | 24) /* MOV (R0)64!, { V24,V25,V26,V27 } */
+	WORD	$(1<<30 | 3 << 26 | 3<<22 | 0x1F<<16 | 3<<10 | 0<<5 | 28) /* MOV (R0)64!, { V28,V29,V30,V31 } */
+	RETURN
+
+// syscall or trap from EL0
+TEXT vsys0(SB), 1, $-4
+	LSRW	$26, R0, R17	// ec
+	CMPW	$0x15, R17	// SVC trap?
+	BNE	_itsatrap	// nope.
+
+	MOVP	R26, R27, 224(RSP)
+	MOVP	R28, R29, 240(RSP)
+
+	MRS	SP_EL0, R1
+	MRS	ELR_EL1, R2
+	MRS	SPSR_EL1, R3
+
+	MOV	R0, 288(RSP)	// type
+	MOV	R1, 264(RSP)	// sp
+	MOV	R2, 272(RSP)	// pc
+	MOV	R3, 280(RSP)	// psr
+
+	MOV	$setSB(SB), R28
+	MRS	TPIDR_EL1, R27
+	MOV	16(R27), R26
+
+	ADD	$16, RSP, R0	// ureg
+	BL	syscall(SB)
+
+TEXT forkret(SB), 1, $-4
+	MSR	$0x3, DAIFSet	// interrupts off
+
+	ADD	$16, RSP, R0	// ureg
+
+	MOV	16(RSP), R0	// ret
+	MOV	264(RSP), R1	// sp
+	MOV	272(RSP), R2	// pc
+	MOV	280(RSP), R3	// psr
+
+	MSR	R1, SP_EL0
+	MSR	R2, ELR_EL1
+	MSR	R3, SPSR_EL1
+
+	MOVP	224(RSP), R26, R27
+	MOVP	240(RSP), R28, R29
+
+	MOV	256(RSP), R30	// link
+
+	ADD	$TRAPFRAMESIZE, RSP
+	ERET
+
+TEXT itsatrap<>(SB), 1, $-4
+_itsatrap:
+	MOVP	R1, R2, 24(RSP)
+	MOVP	R3, R4, 40(RSP)
+	MOVP	R5, R6, 56(RSP)
+	MOVP	R7, R8, 72(RSP)
+	MOVP	R9, R10, 88(RSP)
+	MOVP	R11, R12, 104(RSP)
+	MOVP	R13, R14, 120(RSP)
+	MOVP	R15, R16, 136(RSP)
+
+	MOVP	R18, R19, 160(RSP)
+	MOVP	R20, R21, 176(RSP)
+	MOVP	R22, R23, 192(RSP)
+	MOVP	R24, R25, 208(RSP)
+
+// trap/irq/fiq/serr from EL0
+TEXT vtrap0(SB), 1, $-4
+	MOVP	R26, R27, 224(RSP)
+	MOVP	R28, R29, 240(RSP)
+
+	MRS	SP_EL0, R1
+	MRS	ELR_EL1, R2
+	MRS	SPSR_EL1, R3
+
+	MOV	R0, 288(RSP)	// type
+	MOV	R1, 264(RSP)	// sp
+	MOV	R2, 272(RSP)	// pc
+	MOV	R3, 280(RSP)	// psr
+
+	MOV	$setSB(SB), R28
+	MRS	TPIDR_EL1, R27
+	MOV	16(R27), R26
+
+	ADD	$16, RSP, R0	// ureg
+	BL	trap(SB)
+
+TEXT noteret(SB), 1, $-4
+	MSR	$0x3, DAIFSet	// interrupts off
+
+	ADD	$16, RSP, R0	// ureg
+
+	MOV	264(RSP), R1	// sp
+	MOV	272(RSP), R2	// pc
+	MOV	280(RSP), R3	// psr
+
+	MSR	R1, SP_EL0
+	MSR	R2, ELR_EL1
+	MSR	R3, SPSR_EL1
+
+	MOVP	224(RSP), R26, R27
+	MOVP	240(RSP), R28, R29
+
+_intrreturn:
+	MOVP	16(RSP), R0, R1
+	MOVP	32(RSP), R2, R3
+	MOVP	48(RSP), R4, R5
+	MOVP	64(RSP), R6, R7
+	MOVP	80(RSP), R8, R9
+	MOVP	96(RSP), R10, R11
+	MOVP	112(RSP), R12, R13
+	MOVP	128(RSP), R14, R15
+	MOVP	144(RSP), R16, R17
+	MOVP	160(RSP), R18, R19
+	MOVP	176(RSP), R20, R21
+	MOVP	192(RSP), R22, R23
+	MOVP	208(RSP), R24, R25
+
+	MOV	256(RSP), R30	// link
+
+	ADD	$TRAPFRAMESIZE, RSP
+	ERET
+
+// irq/fiq/trap/serr from EL1
+TEXT vtrap1(SB), 1, $-4
+	MOV	R29, 248(RSP)	// special
+
+	ADD	$TRAPFRAMESIZE, RSP, R1
+	MRS	ELR_EL1, R2
+	MRS	SPSR_EL1, R3
+
+	MOV	R0, 288(RSP)	// type
+	MOV	R1, 264(RSP)	// sp
+	MOV	R2, 272(RSP)	// pc
+	MOV	R3, 280(RSP)	// psr
+
+	ADD	$16, RSP, R0	// ureg
+	BL	trap(SB)
+
+	MSR	$0x3, DAIFSet	// interrupts off
+
+	MOV	272(RSP), R2	// pc
+	MOV	280(RSP), R3	// psr
+
+	MSR	R2, ELR_EL1
+	MSR	R3, SPSR_EL1
+
+	MOV	248(RSP), R29	// special
+	B	_intrreturn	
+
+// vector tables
+TEXT vsys(SB), 1, $-4
+	SUB	$TRAPFRAMESIZE, RSP
+
+	MOV	R0, 16(RSP)
+	MOV	R30, 256(RSP)	// link
+
+	MOV	R17, 152(RSP)	// temp
+
+	MRS	ESR_EL1, R0	// type
+
+_vsyspatch:
+	B	_vsyspatch	// branch to vsys0() patched in
+
+TEXT vtrap(SB), 1, $-4
+	SUB	$TRAPFRAMESIZE, RSP
+
+	MOVP	R0, R1, 16(RSP)
+	MOVP	R2, R3, 32(RSP)
+	MOVP	R4, R5, 48(RSP)
+	MOVP	R6, R7, 64(RSP)
+	MOVP	R8, R9, 80(RSP)
+	MOVP	R10, R11, 96(RSP)
+	MOVP	R12, R13, 112(RSP)
+	MOVP	R14, R15, 128(RSP)
+	MOVP	R16, R17, 144(RSP)
+	MOVP	R18, R19, 160(RSP)
+	MOVP	R20, R21, 176(RSP)
+	MOVP	R22, R23, 192(RSP)
+	MOVP	R24, R25, 208(RSP)
+
+	MOV	R30, 256(RSP)	// link
+
+	MRS	ESR_EL1, R0	// type
+
+_vtrappatch:
+	B	_vtrappatch	// branch to vtrapX() patched in
+
+TEXT virq(SB), 1, $-4
+	SUB	$TRAPFRAMESIZE, RSP
+
+	MOVP	R0, R1, 16(RSP)
+	MOVP	R2, R3, 32(RSP)
+	MOVP	R4, R5, 48(RSP)
+	MOVP	R6, R7, 64(RSP)
+	MOVP	R8, R9, 80(RSP)
+	MOVP	R10, R11, 96(RSP)
+	MOVP	R12, R13, 112(RSP)
+	MOVP	R14, R15, 128(RSP)
+	MOVP	R16, R17, 144(RSP)
+	MOVP	R18, R19, 160(RSP)
+	MOVP	R20, R21, 176(RSP)
+	MOVP	R22, R23, 192(RSP)
+	MOVP	R24, R25, 208(RSP)
+
+	MOV	R30, 256(RSP)	// link
+
+	MOV	$(1<<32), R0	// type irq
+
+_virqpatch:
+	B	_virqpatch	// branch to vtrapX() patched in
+
+TEXT vfiq(SB), 1, $-4
+	SUB	$TRAPFRAMESIZE, RSP
+
+	MOVP	R0, R1, 16(RSP)
+	MOVP	R2, R3, 32(RSP)
+	MOVP	R4, R5, 48(RSP)
+	MOVP	R6, R7, 64(RSP)
+	MOVP	R8, R9, 80(RSP)
+	MOVP	R10, R11, 96(RSP)
+	MOVP	R12, R13, 112(RSP)
+	MOVP	R14, R15, 128(RSP)
+	MOVP	R16, R17, 144(RSP)
+	MOVP	R18, R19, 160(RSP)
+	MOVP	R20, R21, 176(RSP)
+	MOVP	R22, R23, 192(RSP)
+	MOVP	R24, R25, 208(RSP)
+
+	MOV	R30, 256(RSP)	// link
+	MOV	$(2<<32), R0	// type fiq
+
+_vfiqpatch:
+	B	_vfiqpatch	// branch to vtrapX() patched in
+
+TEXT vserr(SB), 1, $-4
+	SUB	$TRAPFRAMESIZE, RSP
+
+	MOVP	R0, R1, 16(RSP)
+	MOVP	R2, R3, 32(RSP)
+	MOVP	R4, R5, 48(RSP)
+	MOVP	R6, R7, 64(RSP)
+	MOVP	R8, R9, 80(RSP)
+	MOVP	R10, R11, 96(RSP)
+	MOVP	R12, R13, 112(RSP)
+	MOVP	R14, R15, 128(RSP)
+	MOVP	R16, R17, 144(RSP)
+	MOVP	R18, R19, 160(RSP)
+	MOVP	R20, R21, 176(RSP)
+	MOVP	R22, R23, 192(RSP)
+	MOVP	R24, R25, 208(RSP)
+
+	MOV	R30, 256(RSP)	// link
+
+	MRS	ESR_EL1, R0
+	ORR	$(3<<32), R0	// type
+_vserrpatch:
+	B	_vserrpatch	// branch to vtrapX() patched in
+
+/* fault-proof memcpy */
+TEXT peek(SB), 1, $-4
+	MOV	R0, R1
+	MOV	dst+8(FP), R2
+	MOVWU	len+16(FP), R0
+TEXT _peekinst(SB), 1, $-4
+_peekloop:
+	MOVBU	(R1)1!, R3
+	MOVBU	R3, (R2)1!
+	SUBS	$1, R0
+	BNE	_peekloop
+	RETURN
--- /dev/null
+++ b/sys/src/9/imx8/main.c
@@ -1,0 +1,218 @@
+#include "u.h"
+#include "tos.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "pool.h"
+#include "io.h"
+#include "sysreg.h"
+
+Conf conf;
+
+/*
+ *  starting place for first process
+ */
+void
+init0(void)
+{
+	char **sp;
+
+	chandevinit();
+
+	if(!waserror()){
+		ksetenv("cputype", "arm64", 0);
+		if(cpuserver)
+			ksetenv("service", "cpu", 0);
+		else
+			ksetenv("service", "terminal", 0);
+		ksetenv("console", "0", 0);
+		poperror();
+	}
+	kproc("alarm", alarmkproc, 0);
+	sp = (char**)(USTKTOP-sizeof(Tos) - 8 - sizeof(sp[0])*4);
+	sp[3] = sp[2] = sp[1] = nil;
+	strcpy(sp[1] = (char*)&sp[4], "boot");
+	sp[0] = (void*)&sp[1];
+	touser((uintptr)sp);
+}
+
+void
+confinit(void)
+{
+	int userpcnt;
+	ulong kpages;
+	char *p;
+	int i;
+
+	conf.nmach = 1;
+
+	if(p = getconf("service")){
+		if(strcmp(p, "cpu") == 0)
+			cpuserver = 1;
+		else if(strcmp(p,"terminal") == 0)
+			cpuserver = 0;
+	}
+
+	if(p = getconf("*kernelpercent"))
+		userpcnt = 100 - strtol(p, 0, 0);
+	else
+		userpcnt = 0;
+
+	if(userpcnt < 10)
+		userpcnt = 60 + cpuserver*10;
+
+	conf.npage = 0;
+	for(i = 0; i < nelem(conf.mem); i++)
+		conf.npage += conf.mem[i].npage;
+
+	kpages = conf.npage - (conf.npage*userpcnt)/100;
+	if(kpages > ((uintptr)-VDRAM)/BY2PG)
+		kpages = ((uintptr)-VDRAM)/BY2PG;
+
+	conf.upages = conf.npage - kpages;
+	conf.ialloc = (kpages/2)*BY2PG;
+
+	/* set up other configuration parameters */
+	conf.nproc = 100 + ((conf.npage*BY2PG)/MB)*5;
+	if(cpuserver)
+		conf.nproc *= 3;
+	if(conf.nproc > 2000)
+		conf.nproc = 2000;
+	conf.nswap = conf.npage*3;
+	conf.nswppo = 4096;
+	conf.nimage = 200;
+
+	conf.copymode = conf.nmach > 1;
+
+	/*
+	 * Guess how much is taken by the large permanent
+	 * datastructures. Mntcache and Mntrpc are not accounted for.
+	 */
+	kpages = conf.npage - conf.upages;
+	kpages *= BY2PG;
+	kpages -= conf.upages*sizeof(Page)
+		+ conf.nproc*sizeof(Proc)
+		+ conf.nimage*sizeof(Image)
+		+ conf.nswap
+		+ conf.nswppo*sizeof(Page*);
+	mainmem->maxsize = kpages;
+	if(!cpuserver)
+		/*
+		 * give terminals lots of image memory, too; the dynamic
+		 * allocation will balance the load properly, hopefully.
+		 * be careful with 32-bit overflow.
+		 */
+		imagmem->maxsize = kpages;
+
+}
+
+void
+machinit(void)
+{
+	m->ticks = 1;
+	m->perf.period = 1;
+	active.machs[m->machno] = 1;
+}
+
+void
+main(void)
+{
+	machinit();
+	if(m->machno){
+		trapinit();
+		fpuinit();
+		intrinit();
+		clockinit();
+		// cpuidprint();
+		synccycles();
+		timersinit();
+		flushtlb();
+		mmu1init();
+		m->ticks = MACHP(0)->ticks;
+		schedinit();
+		return;
+	}
+	quotefmtinstall();
+	meminit();
+	confinit();
+	xinit();
+	uartconsinit();
+	printinit();
+	print("\nPlan 9\n");
+	trapinit();
+	fpuinit();
+	intrinit();
+	clockinit();
+	timersinit();
+	pageinit();
+	procinit0();
+	initseg();
+	links();
+	chandevreset();
+	userinit();
+//	mpinit();
+	mmu0clear((uintptr*)L1);
+	flushtlb();
+	mmu1init();
+	schedinit();
+}
+
+void
+exit(int)
+{
+	cpushutdown();
+	splfhi();
+	for(;;);
+}
+
+int
+isaconfig(char *, int, ISAConf *)
+{
+	return 0;
+}
+
+char*
+getconf(char *)
+{
+	return nil;
+}
+
+void
+writeconf(void)
+{
+}
+
+void
+reboot(void *, void *, ulong)
+{
+}
+
+void
+dmaflush(int clean, void *p, ulong len)
+{
+	uintptr s = (uintptr)p;
+	uintptr e = (uintptr)p + len;
+
+	if(clean){
+		s &= ~(BLOCKALIGN-1);
+		e += BLOCKALIGN-1;
+		e &= ~(BLOCKALIGN-1);
+		cachedwbse((void*)s, e - s);
+		return;
+	}
+	if(s & BLOCKALIGN-1){
+		s &= ~(BLOCKALIGN-1);
+		cachedwbinvse((void*)s, BLOCKALIGN);
+		s += BLOCKALIGN;
+	}
+	if(e & BLOCKALIGN-1){
+		e &= ~(BLOCKALIGN-1);
+		if(e < s)
+			return;
+		cachedwbinvse((void*)e, BLOCKALIGN);
+	}
+	if(s < e)
+		cachedinvse((void*)s, e - s);
+}
--- /dev/null
+++ b/sys/src/9/imx8/mem.h
@@ -1,0 +1,138 @@
+/*
+ * Memory and machine-specific definitions.  Used in C and assembler.
+ */
+#define KiB		1024u			/* Kibi 0x0000000000000400 */
+#define MiB		1048576u		/* Mebi 0x0000000000100000 */
+#define GiB		1073741824u		/* Gibi 000000000040000000 */
+
+/*
+ * Sizes:
+ * 	L0	L1	L2	L3
+ *	4K	2M	1G	512G
+ *	16K	32M	64G	128T
+ *	64K	512M	4T	-
+ */
+#define	PGSHIFT		16		/* log(BY2PG) */
+#define	BY2PG		(1ULL<<PGSHIFT)	/* bytes per page */
+#define	ROUND(s, sz)	(((s)+(sz-1))&~(sz-1))
+#define	PGROUND(s)	ROUND(s, BY2PG)
+
+/* effective virtual address space */
+#define EVASHIFT	34
+#define EVAMASK		((1ULL<<EVASHIFT)-1)
+
+#define PTSHIFT		(PGSHIFT-3)
+#define PTLEVELS	(((EVASHIFT-PGSHIFT)+PTSHIFT-1)/PTSHIFT)	
+#define PTLX(v, l)	((((v) & EVAMASK) >> (PGSHIFT + (l)*PTSHIFT)) & ((1 << PTSHIFT)-1))
+#define PGLSZ(l)	(1ULL << (PGSHIFT + (l)*PTSHIFT))
+
+#define PTL1X(v, l)	(L1TABLEX(v, l) | PTLX(v, l))
+#define L1TABLEX(v, l)	(L1TABLE(v, l) << PTSHIFT)
+#define L1TABLES	((-KSEG0+PGLSZ(2)-1)/PGLSZ(2))
+#define L1TABLE(v, l)	(L1TABLES - ((PTLX(v, 2) % L1TABLES) >> (((l)-1)*PTSHIFT)) + (l)-1)
+#define L1TOPSIZE	(1ULL << (EVASHIFT - PTLEVELS*PTSHIFT))
+
+#define	MAXMACH		4			/* max # cpus system can run */
+#define	MACHSIZE	(8*KiB)
+
+#define KSTACK		(8*KiB)
+#define STACKALIGN(sp)	((sp) & ~7)		/* bug: assure with alloc */
+#define TRAPFRAMESIZE	(38*8)
+
+/* reserved dram for ucalloc() at the end of KZERO (physical) */
+#define	UCRAMBASE	(-KZERO - UCRAMSIZE)
+#define	UCRAMSIZE	(1*MiB)
+
+#define VDRAM		(0xFFFFFFFFC0000000ULL)	/* 0x40000000 - 0x80000000 */
+#define	KTZERO		(VDRAM + 0x100000)	/* kernel text start */
+
+#define	ARMLOCAL	(0xFFFFFFFFB1000000ULL)	/* 0x31000000 */
+#define	VIRTIO		(0xFFFFFFFFB0000000ULL)	/* 0x30000000 */
+
+#define	KZERO		(0xFFFFFFFF80000000ULL)	/* 0x00000000 - kernel address space */
+
+#define VMAP		(0xFFFFFFFF00000000ULL)	/* 0x00000000 - 0x40000000 */
+
+#define KMAPEND		(0xFFFFFFFF00000000ULL)	/* 0x140000000 */
+#define KMAP		(0xFFFFFFFE00000000ULL)	/* 0x40000000 */
+
+#define KSEG0		(0xFFFFFFFE00000000ULL)
+
+#define L1		(L1TOP-L1SIZE)
+#define L1SIZE		((L1TABLES+PTLEVELS-2)*BY2PG)
+#define L1TOP		((MACHADDR(MAXMACH-1)-L1TOPSIZE)&-BY2PG)
+
+#define MACHADDR(n)	(KTZERO-((n)+1)*MACHSIZE)
+
+#define	UZERO		0ULL			/* user segment */
+#define	UTZERO		(UZERO+0x10000)		/* user text start */
+#define	USTKTOP		((EVAMASK>>1)-0xFFFF)	/* user segment end +1 */
+#define	USTKSIZE	(16*1024*1024)		/* user stack size */
+
+#define BLOCKALIGN	64			/* only used in allocb.c */
+
+/*
+ * Sizes
+ */
+#define BI2BY		8			/* bits per byte */
+#define BY2SE		4
+#define BY2WD		8
+#define BY2V		8			/* only used in xalloc.c */
+
+#define	PTEMAPMEM	(1024*1024)
+#define	PTEPERTAB	(PTEMAPMEM/BY2PG)
+#define	SEGMAPSIZE	8192
+#define	SSEGMAPSIZE	16
+#define	PPN(x)		((x)&~(BY2PG-1))
+
+#define SHARE_NONE	0
+#define SHARE_OUTER	2
+#define SHARE_INNER	3
+
+#define CACHE_UC	0
+#define CACHE_WB	1
+#define CACHE_WT	2
+#define CACHE_WB_NA	3
+
+#define MA_MEM_WB	0
+#define MA_MEM_WT	1
+#define MA_MEM_UC	2
+#define MA_DEV_nGnRnE	3
+#define MA_DEV_nGnRE	4
+#define MA_DEV_nGRE	5
+#define MA_DEV_GRE	6
+
+#define	PTEVALID	1
+#define PTEBLOCK	0
+#define PTETABLE	2
+#define PTEPAGE		2
+
+#define PTEMA(x)	((x)<<2)
+#define PTEAP(x)	((x)<<6)
+#define PTESH(x)	((x)<<8)
+
+#define PTEAF		(1<<10)
+#define PTENG		(1<<11)
+#define PTEPXN		(1ULL<<53)
+#define PTEUXN		(1ULL<<54)
+
+#define PTEKERNEL	PTEAP(0)
+#define PTEUSER		PTEAP(1)
+#define PTEWRITE	PTEAP(0)
+#define PTERONLY	PTEAP(2)
+#define PTENOEXEC	(PTEPXN|PTEUXN)
+
+#define PTECACHED	PTEMA(MA_MEM_WB)
+#define PTEWT		PTEMA(MA_MEM_WT)
+#define PTEUNCACHED	PTEMA(MA_MEM_UC)
+#define PTEDEVICE	PTEMA(MA_DEV_nGnRE)
+
+/*
+ * Physical machine information from here on.
+ *	PHYS addresses as seen from the arm cpu.
+ *	BUS  addresses as seen from peripherals
+ */
+#define	PHYSDRAM	0
+
+#define MIN(a, b)	((a) < (b)? (a): (b))
+#define MAX(a, b)	((a) > (b)? (a): (b))
--- /dev/null
+++ b/sys/src/9/imx8/mkfile
@@ -1,0 +1,94 @@
+CONF=reform
+CONFLIST=reform
+
+kzero=0xffffffff80000000
+loadaddr=0xffffffffc0100000
+
+objtype=arm64
+</$objtype/mkfile
+p=9
+
+DEVS=`{rc ../port/mkdevlist $CONF}
+
+PORT=\
+	alarm.$O\
+	alloc.$O\
+	allocb.$O\
+	auth.$O\
+	cache.$O\
+	chan.$O\
+	dev.$O\
+	edf.$O\
+	fault.$O\
+	mul64fract.$O\
+	page.$O\
+	parse.$O\
+	pgrp.$O\
+	portclock.$O\
+	print.$O\
+	proc.$O\
+	qio.$O\
+	qlock.$O\
+	rdb.$O\
+	rebootcmd.$O\
+	segment.$O\
+	syscallfmt.$O\
+	sysfile.$O\
+	sysproc.$O\
+	taslock.$O\
+	tod.$O\
+	xalloc.$O\
+	userinit.$O\
+
+OBJ=\
+	l.$O\
+	cache.v8.$O\
+	clock.$O\
+	fpu.$O\
+	main.$O\
+	mmu.$O\
+	sysreg.$O\
+	random.$O\
+	trap.$O\
+	$CONF.root.$O\
+	$CONF.rootc.$O\
+	$DEVS\
+	$PORT\
+
+# HFILES=
+
+LIB=\
+#	/$objtype/lib/libmemlayer.a\
+#	/$objtype/lib/libmemdraw.a\
+#	/$objtype/lib/libdraw.a\
+	/$objtype/lib/libip.a\
+	/$objtype/lib/libsec.a\
+#	/$objtype/lib/libmp.a\
+	/$objtype/lib/libc.a\
+#	/$objtype/lib/libdtracy.a\
+
+9:V:	$p$CONF $p$CONF.u
+
+$p$CONF.u:D:	$p$CONF
+	aux/aout2uimage -Z$kzero $p$CONF
+
+$p$CONF:D:	$OBJ $CONF.$O $LIB
+	$LD -o $target -T$loadaddr -l $prereq
+	size $target
+
+$OBJ: $HFILES
+
+install:V: /$objtype/$p$CONF
+
+/$objtype/$p$CONF:D: $p$CONF s$p$CONF
+	cp -x $p$CONF $p$CONF.u /$objtype/
+
+<../boot/bootmkfile
+<../port/portmkfile
+<|../port/mkbootrules $CONF
+
+initcode.out:		init9.$O initcode.$O /$objtype/lib/libc.a
+	$LD -l -R1 -s -o $target $prereq
+
+$CONF.clean:
+	rm -rf $p$CONF $p$CONF.u errstr.h $CONF.c boot$CONF.c
--- /dev/null
+++ b/sys/src/9/imx8/mmu.c
@@ -1,0 +1,492 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "sysreg.h"
+
+void
+mmu0init(uintptr *l1)
+{
+	uintptr va, pa, pe, attr;
+
+	/* VDRAM */
+	attr = PTEWRITE | PTEAF | PTEKERNEL | PTEUXN | PTESH(SHARE_INNER);
+	pe = -KZERO;
+	for(pa = VDRAM - KZERO, va = VDRAM; pa < pe; pa += PGLSZ(1), va += PGLSZ(1)){
+		l1[PTL1X(va, 1)] = pa | PTEVALID | PTEBLOCK | attr;
+		l1[PTL1X(pa, 1)] = pa | PTEVALID | PTEBLOCK | attr;
+	}
+
+	attr = PTEWRITE | PTEAF | PTEKERNEL | PTEUXN | PTEPXN | PTESH(SHARE_OUTER) | PTEDEVICE;
+	pe = VDRAM - KZERO;
+	for(pa = VIRTIO - KZERO, va = VIRTIO; pa < pe; pa += PGLSZ(1), va += PGLSZ(1)){
+		if(((pa|va) & PGLSZ(1)-1) != 0){
+			l1[PTL1X(va, 1)] = (uintptr)l1 | PTEVALID | PTETABLE;
+			for(; pa < pe && ((va|pa) & PGLSZ(1)-1) != 0; pa += PGLSZ(0), va += PGLSZ(0)){
+				assert(l1[PTLX(va, 0)] == 0);
+				l1[PTLX(va, 0)] = pa | PTEVALID | PTEPAGE | attr;
+			}
+			break;
+		}
+		l1[PTL1X(va, 1)] = pa | PTEVALID | PTEBLOCK | attr;
+	}
+
+	if(PTLEVELS > 2)
+	for(va = KSEG0; va != 0; va += PGLSZ(2))
+		l1[PTL1X(va, 2)] = (uintptr)&l1[L1TABLEX(va, 1)] | PTEVALID | PTETABLE;
+	if(PTLEVELS > 3)
+	for(va = KSEG0; va != 0; va += PGLSZ(3))
+		l1[PTL1X(va, 3)] = (uintptr)&l1[L1TABLEX(va, 2)] | PTEVALID | PTETABLE;
+}
+
+void
+mmu0clear(uintptr *l1)
+{
+	uintptr va, pa, pe;
+
+	pe = -VDRAM;
+	for(pa = VDRAM - KZERO, va = VDRAM; pa < pe; pa += PGLSZ(1), va += PGLSZ(1))
+		if(PTL1X(pa, 1) != PTL1X(va, 1))
+			l1[PTL1X(pa, 1)] = 0;
+
+	if(PTLEVELS > 2)
+	for(pa = VDRAM - KZERO, va = VDRAM; pa < pe; pa += PGLSZ(2), va += PGLSZ(2))
+		if(PTL1X(pa, 2) != PTL1X(va, 2))
+			l1[PTL1X(pa, 2)] = 0;
+	if(PTLEVELS > 3)
+	for(pa = VDRAM - KZERO, va = VDRAM; pa < pe; pa += PGLSZ(3), va += PGLSZ(3))
+		if(PTL1X(pa, 3) != PTL1X(va, 3))
+			l1[PTL1X(pa, 3)] = 0;
+}
+
+void
+mmu1init(void)
+{
+	m->mmutop = mallocalign(L1TOPSIZE, BY2PG, 0, 0);
+	if(m->mmutop == nil)
+		panic("mmu1init: no memory for mmutop");
+	memset(m->mmutop, 0, L1TOPSIZE);
+	mmuswitch(nil);
+}
+
+/* KZERO maps the first 1GB of ram */
+uintptr
+paddr(void *va)
+{
+	if((uintptr)va >= KZERO)
+		return (uintptr)va-KZERO;
+	panic("paddr: va=%#p pc=%#p", va, getcallerpc(&va));
+	return 0;
+}
+
+uintptr
+cankaddr(uintptr pa)
+{
+	if(pa < (uintptr)-KZERO)
+		return -KZERO - pa;
+	return 0;
+}
+
+void*
+kaddr(uintptr pa)
+{
+	if(pa < (uintptr)-KZERO)
+		return (void*)(pa + KZERO);
+	panic("kaddr: pa=%#p pc=%#p", pa, getcallerpc(&pa));
+	return nil;
+}
+
+static void*
+kmapaddr(uintptr pa)
+{
+	if(pa < (uintptr)-KZERO)
+		return (void*)(pa + KZERO);
+	if(pa < (VDRAM - KZERO) || pa >= (VDRAM - KZERO) + (KMAPEND - KMAP))
+		panic("kmapaddr: pa=%#p pc=%#p", pa, getcallerpc(&pa));
+	return (void*)(pa + KMAP - (VDRAM - KZERO));
+}
+
+KMap*
+kmap(Page *p)
+{
+	return kmapaddr(p->pa);
+}
+
+void
+kunmap(KMap*)
+{
+}
+
+void
+kmapinval(void)
+{
+}
+
+#define INITMAP	(ROUND((uintptr)end + BY2PG, PGLSZ(1))-KZERO)
+
+static void*
+rampage(void)
+{
+	uintptr pa;
+
+	if(conf.npage)
+		return mallocalign(BY2PG, BY2PG, 0, 0);
+
+	pa = conf.mem[0].base;
+	assert((pa % BY2PG) == 0);
+	assert(pa < INITMAP);
+	conf.mem[0].base += BY2PG;
+	return KADDR(pa);
+}
+
+static void
+l1map(uintptr va, uintptr pa, uintptr pe, uintptr attr)
+{
+	uintptr *l1, *l0;
+
+	assert(pa < pe);
+
+	va &= -BY2PG;
+	pa &= -BY2PG;
+	pe = PGROUND(pe);
+
+	attr |= PTEKERNEL | PTEAF;
+
+	l1 = (uintptr*)L1;
+
+	while(pa < pe){
+		if(l1[PTL1X(va, 1)] == 0 && (pe-pa) >= PGLSZ(1) && ((va|pa) & PGLSZ(1)-1) == 0){
+			l1[PTL1X(va, 1)] = PTEVALID | PTEBLOCK | pa | attr;
+			va += PGLSZ(1);
+			pa += PGLSZ(1);
+			continue;
+		}
+		if(l1[PTL1X(va, 1)] & PTEVALID) {
+			assert((l1[PTL1X(va, 1)] & PTETABLE) == PTETABLE);
+			l0 = KADDR(l1[PTL1X(va, 1)] & -PGLSZ(0));
+		} else {
+			l0 = rampage();
+			memset(l0, 0, BY2PG);
+			l1[PTL1X(va, 1)] = PTEVALID | PTETABLE | PADDR(l0);
+		}
+		assert(l0[PTLX(va, 0)] == 0);
+		l0[PTLX(va, 0)] = PTEVALID | PTEPAGE | pa | attr;
+		va += BY2PG;
+		pa += BY2PG;
+	}
+}
+
+static void
+kmapram(uintptr base, uintptr limit)
+{
+	if(base < (uintptr)-KZERO && limit > (uintptr)-KZERO){
+		kmapram(base, (uintptr)-KZERO);
+		kmapram((uintptr)-KZERO, limit);
+		return;
+	}
+	if(base < INITMAP)
+		base = INITMAP;
+	if(base >= limit || limit <= INITMAP)
+		return;
+
+	l1map((uintptr)kmapaddr(base), base, limit,
+		PTEWRITE | PTEPXN | PTEUXN | PTESH(SHARE_INNER));
+}
+
+void
+meminit(void)
+{
+	uintptr va, pa;
+
+	/*
+	 * now we know the real memory regions, unmap
+	 * everything above INITMAP and map again with
+	 * the proper sizes.
+	 */
+	coherence();
+	for(va = INITMAP+KZERO; va != 0; va += PGLSZ(1)){
+		pa = va-KZERO;
+		((uintptr*)L1)[PTL1X(pa, 1)] = 0;
+		((uintptr*)L1)[PTL1X(va, 1)] = 0;
+	}
+	flushtlb();
+
+	/* DDR Memory (All modules) */
+	conf.mem[0].base = PGROUND((uintptr)end - KZERO);
+
+	/* exclude uncached dram for ucalloc() */
+	conf.mem[0].limit = UCRAMBASE;
+	conf.mem[1].base = UCRAMBASE+UCRAMSIZE;
+
+	conf.mem[1].limit = 0x100000000ULL;
+
+	/* DDR Memory (Quad-A53 only) */
+	conf.mem[2].base =  0x100000000ULL;
+	conf.mem[2].limit = 0x140000000ULL;
+
+	kmapram(conf.mem[0].base, conf.mem[0].limit);
+	kmapram(conf.mem[1].base, conf.mem[1].limit);
+	kmapram(conf.mem[2].base, conf.mem[2].limit);
+
+	conf.mem[0].npage = (conf.mem[0].limit - conf.mem[0].base)/BY2PG;
+	conf.mem[1].npage = (conf.mem[1].limit - conf.mem[1].base)/BY2PG;
+	conf.mem[2].npage = (conf.mem[2].limit - conf.mem[2].base)/BY2PG;
+}
+
+uintptr
+mmukmap(uintptr va, uintptr pa, usize size)
+{
+	uintptr attr, off;
+
+	if(va == 0)
+		return 0;
+
+	off = pa & BY2PG-1;
+
+	attr = va & PTEMA(7);
+	attr |= PTEWRITE | PTEUXN | PTEPXN | PTESH(SHARE_OUTER);
+
+	va &= -BY2PG;
+	pa &= -BY2PG;
+
+	l1map(va, pa, pa + off + size, attr);
+	flushtlb();
+
+	return va + off;
+}
+
+void*
+vmap(uvlong pa, vlong size)
+{
+	static uintptr base = VMAP;
+	uvlong pe = pa + size;
+	uintptr va;
+
+	va = base;
+	base += PGROUND(pe) - (pa & -BY2PG);
+	
+	return (void*)mmukmap(va | PTEDEVICE, pa, size);
+}
+
+void
+vunmap(void *, vlong)
+{
+}
+
+static uintptr*
+mmuwalk(uintptr va, int level)
+{
+	uintptr *table, pte;
+	Page *pg;
+	int i, x;
+
+	x = PTLX(va, PTLEVELS-1);
+	table = m->mmutop;
+	for(i = PTLEVELS-2; i >= level; i--){
+		pte = table[x];
+		if(pte & PTEVALID) {
+			if(pte & (0xFFFFULL<<48))
+				iprint("strange pte %#p va %#p\n", pte, va);
+			pte &= ~(0xFFFFULL<<48 | BY2PG-1);
+		} else {
+			pg = up->mmufree;
+			if(pg == nil)
+				return nil;
+			up->mmufree = pg->next;
+			pg->va = va & -PGLSZ(i+1);
+			if((pg->next = up->mmuhead[i+1]) == nil)
+				up->mmutail[i+1] = pg;
+			up->mmuhead[i+1] = pg;
+			pte = pg->pa;
+			memset(kmapaddr(pte), 0, BY2PG);
+			coherence();
+			table[x] = pte | PTEVALID | PTETABLE;
+		}
+		table = kmapaddr(pte);
+		x = PTLX(va, (uintptr)i);
+	}
+	return &table[x];
+}
+
+static Proc *asidlist[256];
+
+static int
+allocasid(Proc *p)
+{
+	static Lock lk;
+	Proc *x;
+	int a;
+
+	lock(&lk);
+	a = p->asid;
+	if(a < 0)
+		a = -a;
+	if(a == 0)
+		a = p->pid;
+	for(;; a++){
+		a %= nelem(asidlist);
+		if(a == 0)
+			continue;	// reserved
+		x = asidlist[a];
+		if(x == p || x == nil || (x->asid < 0 && x->mach == nil))
+			break;
+	}
+	p->asid = a;
+	asidlist[a] = p;
+	unlock(&lk);
+
+	return x != p;
+}
+
+static void
+freeasid(Proc *p)
+{
+	int a;
+
+	a = p->asid;
+	if(a < 0)
+		a = -a;
+	if(a > 0 && asidlist[a] == p)
+		asidlist[a] = nil;
+	p->asid = 0;
+}
+
+void
+putasid(Proc *p)
+{
+	/*
+	 * Prevent the following scenario:
+	 *	pX sleeps on cpuA, leaving its page tables in mmutop
+	 *	pX wakes up on cpuB, and exits, freeing its page tables
+	 *  pY on cpuB allocates a freed page table page and overwrites with data
+	 *  cpuA takes an interrupt, and is now running with bad page tables
+	 * In theory this shouldn't hurt because only user address space tables
+	 * are affected, and mmuswitch will clear mmutop before a user process is
+	 * dispatched.  But empirically it correlates with weird problems, eg
+	 * resetting of the core clock at 0x4000001C which confuses local timers.
+	 */
+	if(conf.nmach > 1)
+		mmuswitch(nil);
+
+	if(p->asid > 0)
+		p->asid = -p->asid;
+}
+
+void
+putmmu(uintptr va, uintptr pa, Page *pg)
+{
+	uintptr *pte, old;
+	int s;
+
+	s = splhi();
+	while((pte = mmuwalk(va, 0)) == nil){
+		spllo();
+		up->mmufree = newpage(0, nil, 0);
+		splhi();
+	}
+	old = *pte;
+	*pte = 0;
+	if((old & PTEVALID) != 0)
+		flushasidvall((uvlong)up->asid<<48 | va>>12);
+	else
+		flushasidva((uvlong)up->asid<<48 | va>>12);
+	*pte = pa | PTEPAGE | PTEUSER | PTEPXN | PTENG | PTEAF |
+		(((pa & PTEMA(7)) == PTECACHED)? PTESH(SHARE_INNER): PTESH(SHARE_OUTER));
+	if(needtxtflush(pg)){
+		cachedwbinvse(kmap(pg), BY2PG);
+		cacheiinvse((void*)va, BY2PG);
+		donetxtflush(pg);
+	}
+	splx(s);
+}
+
+static void
+mmufree(Proc *p)
+{
+	int i;
+
+	freeasid(p);
+
+	for(i=1; i<PTLEVELS; i++){
+		if(p->mmuhead[i] == nil)
+			break;
+		p->mmutail[i]->next = p->mmufree;
+		p->mmufree = p->mmuhead[i];
+		p->mmuhead[i] = p->mmutail[i] = nil;
+	}
+}
+
+void
+mmuswitch(Proc *p)
+{
+	uintptr va;
+	Page *t;
+
+	for(va = UZERO; va < USTKTOP; va += PGLSZ(PTLEVELS-1))
+		m->mmutop[PTLX(va, PTLEVELS-1)] = 0;
+
+	if(p == nil){
+		setttbr(PADDR(m->mmutop));
+		return;
+	}
+
+	if(p->newtlb){
+		mmufree(p);
+		p->newtlb = 0;
+	}
+
+	if(allocasid(p))
+		flushasid((uvlong)p->asid<<48);
+
+	setttbr((uvlong)p->asid<<48 | PADDR(m->mmutop));
+
+	for(t = p->mmuhead[PTLEVELS-1]; t != nil; t = t->next){
+		va = t->va;
+		m->mmutop[PTLX(va, PTLEVELS-1)] = t->pa | PTEVALID | PTETABLE;
+	}
+}
+
+void
+mmurelease(Proc *p)
+{
+	mmuswitch(nil);
+	mmufree(p);
+	freepages(p->mmufree, nil, 0);
+	p->mmufree = nil;
+}
+
+void
+flushmmu(void)
+{
+	int x;
+
+	x = splhi();
+	up->newtlb = 1;
+	mmuswitch(up);
+	splx(x);
+}
+
+void
+checkmmu(uintptr, uintptr)
+{
+}
+
+void*
+ucalloc(usize size)
+{
+	static uintptr top = UCRAMBASE + UCRAMSIZE;
+	static Lock lk;
+	uintptr va;
+
+	size = PGROUND(size);
+
+	lock(&lk);
+	top -= size;
+	if(top < UCRAMBASE)
+		panic("ucalloc: %p needs %zd bytes\n", getcallerpc(&size), size);
+	va = KZERO + top;
+	unlock(&lk);
+
+	return (void*)mmukmap(va | PTEUNCACHED, PADDR(va), size);
+}
--- /dev/null
+++ b/sys/src/9/imx8/reform
@@ -1,0 +1,39 @@
+dev
+	root
+	cons
+	swap
+	env
+	pipe
+	proc
+	mnt
+	srv
+	shr
+	dup
+	tls
+	cap
+	fs
+	ether	netif
+	ip	arp chandial ip ipv6 ipaux iproute netlog nullmedium pktmedium inferno
+	uart
+link
+	etherimx	ethermii
+	ethermedium
+	loopbackmedium
+ip
+	tcp
+	udp
+	il
+	ipifc
+	icmp
+	icmp6
+	ipmux
+misc
+	gic
+	uartimx
+port
+	int cpuserver = 0;
+bootdir
+	/$objtype/bin/paqfs
+	/$objtype/bin/auth/factotum
+	bootfs.paq
+	boot
--- /dev/null
+++ b/sys/src/9/imx8/sysreg.c
@@ -1,0 +1,58 @@
+/*
+ * ARMv8 system registers
+ * mainly to cope with arm hard-wiring register numbers into instructions.
+ *
+ * these routines must be callable from KZERO.
+ *
+ * on a multiprocessor, process switching to another cpu is assumed
+ * to be inhibited by the caller as these registers are local to the cpu.
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+
+static void*
+mkinstr(ulong wd)
+{
+	static ulong ib[256], *ep[MAXMACH+1];
+	static Lock lk;
+	ulong *ip, *ie;
+
+	ie = ep[m->machno];
+	for(ip = ib; ip < ie; ip += 2)
+		if(*ip == wd)
+			return ip;
+
+	ilock(&lk);
+	ie = ep[MAXMACH];
+	for(; ip < ie; ip += 2)
+		if(*ip == wd)
+			goto Found;
+	if(ip >= &ib[nelem(ib)])
+		panic("mkinstr: out of instrucuction buffer");
+	ip[0] = wd;
+	ip[1] = 0xd65f03c0;	// RETURN
+	ep[MAXMACH] = ie = ip + 2;
+	cachedwbinvse(ip, 2*sizeof(*ip));
+Found:
+	iunlock(&lk);
+	cacheiinv();
+	ep[m->machno] = ie;
+	return ip;
+}
+
+uvlong
+sysrd(ulong spr)
+{
+	uvlong (*fp)(void) = mkinstr(0xd5380000UL | spr);
+	return fp();
+}
+
+void
+syswr(ulong spr, uvlong val)
+{
+	void (*fp)(uvlong) = mkinstr(0xd5180000UL | spr);
+	fp(val);
+}
--- /dev/null
+++ b/sys/src/9/imx8/sysreg.h
@@ -1,0 +1,89 @@
+#define MIDR_EL1			SYSREG(3,0,0,0,0)
+#define MPIDR_EL1			SYSREG(3,0,0,0,5)
+#define ID_AA64AFR0_EL1			SYSREG(3,0,0,5,4)
+#define ID_AA64AFR1_EL1			SYSREG(3,0,0,5,5)
+#define ID_AA64DFR0_EL1			SYSREG(3,0,0,5,0)
+#define ID_AA64DFR1_EL1			SYSREG(3,0,0,5,1)
+#define ID_AA64ISAR0_EL1		SYSREG(3,0,0,6,0)
+#define ID_AA64ISAR1_EL1		SYSREG(3,0,0,6,1)
+#define ID_AA64MMFR0_EL1		SYSREG(3,0,0,7,0)
+#define ID_AA64MMFR1_EL1		SYSREG(3,0,0,7,1)
+#define ID_AA64PFR0_EL1			SYSREG(3,0,0,4,0)
+#define ID_AA64PFR1_EL1			SYSREG(3,0,0,4,1)
+#define SCTLR_EL1			SYSREG(3,0,1,0,0)
+#define CPACR_EL1			SYSREG(3,0,1,0,2)
+#define MAIR_EL1			SYSREG(3,0,10,2,0)
+#define TCR_EL1				SYSREG(3,0,2,0,2)
+#define TTBR0_EL1			SYSREG(3,0,2,0,0)
+#define TTBR1_EL1			SYSREG(3,0,2,0,1)
+#define ESR_EL1				SYSREG(3,0,5,2,0)
+#define FAR_EL1				SYSREG(3,0,6,0,0)
+#define VBAR_EL1			SYSREG(3,0,12,0,0)
+#define VTTBR_EL2			SYSREG(3,4,2,1,0)
+#define SP_EL0				SYSREG(3,0,4,1,0)
+#define SP_EL1				SYSREG(3,4,4,1,0)
+#define SP_EL2				SYSREG(3,6,4,1,0)
+#define SCTLR_EL2			SYSREG(3,4,1,0,0)
+#define HCR_EL2				SYSREG(3,4,1,1,0)
+#define MDCR_EL2			SYSREG(3,4,1,1,1)
+#define PMCR_EL0			SYSREG(3,3,9,12,0)
+#define PMCNTENSET			SYSREG(3,3,9,12,1)
+#define PMCCNTR_EL0			SYSREG(3,3,9,13,0)
+#define PMUSERENR_EL0			SYSREG(3,3,9,14,0)
+
+#define CNTPCT_EL0			SYSREG(3,3,14,0,1)
+#define CNTVCT_EL0			SYSREG(3,3,14,0,2)
+#define CNTKCTL_EL1			SYSREG(3,0,14,1,0)
+#define	CNTFRQ_EL0			SYSREG(3,3,14,0,0)
+#define CNTP_TVAL_EL0			SYSREG(3,3,14,2,0)
+#define CNTP_CTL_EL0			SYSREG(3,3,14,2,1)
+#define CNTP_CVAL_EL0			SYSREG(3,3,14,2,2)
+
+#define TPIDR_EL0			SYSREG(3,3,13,0,2)
+#define TPIDR_EL1			SYSREG(3,0,13,0,4)
+
+#define CCSIDR_EL1			SYSREG(3,1,0,0,0)
+#define CSSELR_EL1			SYSREG(3,2,0,0,0)
+
+#define ACTLR_EL2			SYSREG(3,4,1,0,1)
+#define CPUACTLR_EL1			SYSREG(3,1,15,2,0)
+#define CPUECTLR_EL1			SYSREG(3,1,15,2,1)
+#define CBAR_EL1			SYSREG(3,1,15,3,0)
+
+#define	ICC_AP0R_EL1(m)			SYSREG(3,0,12,8,4|(m))
+#define	ICC_AP1R_EL1(m)			SYSREG(3,0,12,9,0|(m))
+#define	ICC_ASGI1R_EL1			SYSREG(3,0,12,11,6)
+#define	ICC_BPR0_EL1			SYSREG(3,0,12,8,3)
+#define	ICC_BPR1_EL1			SYSREG(3,0,12,12,3)
+#define	ICC_CTLR_EL1			SYSREG(3,0,12,12,4)
+#define	ICC_DIR_EL1			SYSREG(3,0,12,11,1)
+#define	ICC_EOIR0_EL1			SYSREG(3,0,12,8,1)
+#define	ICC_EOIR1_EL1			SYSREG(3,0,12,12,1)
+#define	ICC_HPPIR0_EL1			SYSREG(3,0,12,8,2)
+#define	ICC_HPPIR1_EL1			SYSREG(3,0,12,12,2)
+#define ICC_IAR0_EL1			SYSREG(3,0,12,8,0)
+#define	ICC_IAR1_EL1			SYSREG(3,0,12,12,0)
+#define	ICC_IGRPEN0_EL1			SYSREG(3,0,12,12,6)
+#define	ICC_IGRPEN1_EL1			SYSREG(3,0,12,12,7)
+#define	ICC_NMIAR1_EL1			SYSREG(3,0,12,9,5)
+#define	ICC_PMR_EL1			SYSREG(3,0,4,6,0)
+#define	ICC_RPR_EL1			SYSREG(3,0,12,11,3)
+#define	ICC_SGI0R_EL1			SYSREG(3,0,12,11,7)
+#define	ICC_SGI1R_EL1			SYSREG(3,0,12,11,5)
+#define ICC_SRE_EL1			SYSREG(3,0,12,12,5)
+
+/* l.s redefines this for the assembler */
+#define SYSREG(op0,op1,Cn,Cm,op2)	((op0)<<19|(op1)<<16|(Cn)<<12|(Cm)<<8|(op2)<<5)
+
+#define	OSHLD	(0<<2 | 1)
+#define OSHST	(0<<2 | 2)
+#define	OSH	(0<<2 | 3)
+#define NSHLD	(1<<2 | 1)
+#define NSHST	(1<<2 | 2)
+#define NSH	(1<<2 | 3)
+#define ISHLD	(2<<2 | 1)
+#define ISHST	(2<<2 | 2)
+#define ISH	(2<<2 | 3)
+#define LD	(3<<2 | 1)
+#define ST	(3<<2 | 2)
+#define SY	(3<<2 | 3)
--- /dev/null
+++ b/sys/src/9/imx8/trap.c
@@ -1,0 +1,716 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "../port/error.h"
+#include "../port/systab.h"
+
+#include <tos.h>
+#include "ureg.h"
+#include "sysreg.h"
+
+int	(*buserror)(Ureg*);
+
+/* SPSR bits user can modify */
+#define USPSRMASK	(0xFULL<<28)
+
+static void
+setupvector(u32int *v, void (*t)(void), void (*f)(void))
+{
+	int i;
+
+	for(i = 0; i < 0x80/4; i++){
+		v[i] = ((u32int*)t)[i];
+		if(v[i] == 0x14000000){
+			v[i] |= ((u32int*)f - &v[i]) & 0x3ffffff;
+			return;
+		}
+	}
+	panic("bug in vector code");
+}
+
+void
+trapinit(void)
+{
+	extern void vsys(void);
+	extern void vtrap(void);
+	extern void virq(void);
+	extern void vfiq(void);
+	extern void vserr(void);
+
+	extern void vsys0(void);
+	extern void vtrap0(void);
+	extern void vtrap1(void);
+
+	static u32int *v;
+
+	intrcpushutdown();
+	if(v == nil){
+		/* disable everything */
+		intrsoff();
+
+		v = mallocalign(0x80*4*4, 1<<11, 0, 0);
+		if(v == nil)
+			panic("no memory for vector table");
+
+		setupvector(&v[0x000/4], vtrap,	vtrap0);
+		setupvector(&v[0x080/4], virq,	vtrap0);
+		setupvector(&v[0x100/4], vfiq,	vtrap0);
+		setupvector(&v[0x180/4], vserr,	vtrap0);
+
+		setupvector(&v[0x200/4], vtrap,	vtrap1);
+		setupvector(&v[0x280/4], virq,	vtrap1);
+		setupvector(&v[0x300/4], vfiq,	vtrap1);
+		setupvector(&v[0x380/4], vserr,	vtrap1);
+
+		setupvector(&v[0x400/4], vsys,	vsys0);
+		setupvector(&v[0x480/4], virq,	vtrap0);
+		setupvector(&v[0x500/4], vfiq,	vtrap0);
+		setupvector(&v[0x580/4], vserr, vtrap0);
+
+		setupvector(&v[0x600/4], vtrap,	vtrap0);
+		setupvector(&v[0x680/4], virq,	vtrap0);
+		setupvector(&v[0x700/4], vfiq,	vtrap0);
+		setupvector(&v[0x780/4], vserr,	vtrap0);
+
+		cacheduwbse(v, 0x80*4*4);
+	}
+	cacheiinvse(v, 0x80*4*4);
+	syswr(VBAR_EL1, (uintptr)v);
+	splx(0x3<<6);	// unmask serr and debug
+}
+
+static char *traps[64] = {
+	[0x00]	"sys: trap: unknown",
+	[0x01]	"sys: trap: WFI or WFE instruction execution",
+	[0x0E]	"sys: trap: illegal execution state",
+	[0x18]	"sys: trap: illegal MSR/MRS access",
+	[0x22]	"sys: trap: misaligned pc",
+	[0x26]	"sys: trap: stack pointer misaligned",
+	[0x30]	"sys: trap: breakpoint",
+	[0x32]	"sys: trap: software step",
+	[0x34]	"sys: trap: watchpoint",
+	[0x3C]	"sys: trap: BRK instruction",
+};
+
+void
+trap(Ureg *ureg)
+{
+	u32int type, intr;
+	int user;
+
+	intr = ureg->type >> 32;
+	if(intr == 2){
+		fiq(ureg);
+		return;
+	}
+	splflo();
+	user = kenter(ureg);
+	type = (u32int)ureg->type >> 26;
+	switch(type){
+	case 0x20:	// instruction abort from lower level
+	case 0x21:	// instruction abort from same level
+	case 0x24:	// data abort from lower level
+	case 0x25:	// data abort from same level
+		faultarm64(ureg);
+		break;
+	case 0x07:	// SIMD/FP
+	case 0x2C:	// FPU exception (A64 only)
+		mathtrap(ureg);
+		break;
+	case 0x00:	// unknown
+		if(intr == 1){
+			if(irq(ureg) && up != nil && up->delaysched)
+				sched();
+			break;
+		}
+		if(intr == 3){
+	case 0x2F:	// SError interrupt
+			if(buserror != nil && (*buserror)(ureg))
+				break;
+			dumpregs(ureg);
+			panic("SError interrupt");
+			break;
+		}
+		/* wet floor */
+	case 0x01:	// WFI or WFE instruction execution
+	case 0x03:	// MCR or MRC access to CP15 (A32 only)
+	case 0x04:	// MCRR or MRC access to CP15 (A32 only)
+	case 0x05:	// MCR or MRC access to CP14 (A32 only)
+	case 0x06:	// LDC or STD access to CP14 (A32 only)
+	case 0x08:	// MCR or MRC to CP10 (A32 only)
+	case 0x0C:	// MRC access to CP14 (A32 only)
+	case 0x0E:	// Illegal Execution State
+	case 0x11:	// SVC instruction execution (A32 only)
+	case 0x12:	// HVC instruction execution (A32 only)
+	case 0x13:	// SMC instruction execution (A32 only)
+	case 0x15:	// SVC instruction execution (A64 only)
+	case 0x16:	// HVC instruction execution (A64 only)
+	case 0x17:	// SMC instruction execution (A64 only)
+	case 0x18:	// MSR/MRS (A64)
+	case 0x22:	// misaligned pc
+	case 0x26:	// stack pointer misaligned
+	case 0x28:	// FPU exception (A32 only)
+	case 0x30:	// breakpoint from lower level
+	case 0x31:	// breakpoint from same level
+	case 0x32:	// software step from lower level
+	case 0x33:	// software step from same level
+	case 0x34:	// watchpoint execution from lower level
+	case 0x35:	// watchpoint exception from same level
+	case 0x38:	// breapoint (A32 only)
+	case 0x3A:	// vector catch exception (A32 only)
+	case 0x3C:	// BRK instruction (A64 only)
+	default:
+		if(!userureg(ureg)){
+			dumpregs(ureg);
+			panic("unhandled trap");
+		}
+		if(traps[type] == nil) type = 0;	// unknown
+		postnote(up, 1, traps[type], NDebug);
+		break;
+	}
+	splhi();
+	if(user){
+		if(up->procctl || up->nnote)
+			notify(ureg);
+		kexit(ureg);
+	}
+}
+
+void
+syscall(Ureg *ureg)
+{
+	vlong startns, stopns;
+	uintptr sp, ret;
+	ulong scallnr;
+	int i, s;
+	char *e;
+
+	if(!kenter(ureg))
+		panic("syscall from  kernel");
+	
+	m->syscall++;
+	up->insyscall = 1;
+	up->pc = ureg->pc;
+	
+	sp = ureg->sp;
+	up->scallnr = scallnr = ureg->r0;
+
+	spllo();
+	
+	up->nerrlab = 0;
+	startns = 0;
+	ret = -1;
+	if(!waserror()){
+		if(sp < USTKTOP - BY2PG || sp > USTKTOP - sizeof(Sargs) - BY2WD){
+			validaddr(sp, sizeof(Sargs)+BY2WD, 0);
+			evenaddr(sp);
+		}
+		up->s = *((Sargs*) (sp + BY2WD));
+
+		if(up->procctl == Proc_tracesyscall){
+			syscallfmt(scallnr, ureg->pc, (va_list) up->s.args);
+			s = splhi();
+			up->procctl = Proc_stopme;
+			procctl();
+			splx(s);
+			startns = todget(nil);
+		}
+		
+		if(scallnr >= nsyscall || systab[scallnr] == nil){
+			pprint("bad sys call number %lud pc %#p", scallnr, ureg->pc);
+			postnote(up, 1, "sys: bad sys call", NDebug);
+			error(Ebadarg);
+		}
+		up->psstate = sysctab[scallnr];
+		ret = systab[scallnr]((va_list)up->s.args);
+		poperror();
+	}else{
+		e = up->syserrstr;
+		up->syserrstr = up->errstr;
+		up->errstr = e;
+	}
+	if(up->nerrlab){
+		print("bad errstack [%lud]: %d extra\n", scallnr, up->nerrlab);
+		for(i = 0; i < NERR; i++)
+			print("sp=%#p pc=%#p\n", up->errlab[i].sp, up->errlab[i].pc);
+		panic("error stack");
+	}
+	ureg->r0 = ret;
+	if(up->procctl == Proc_tracesyscall){
+		stopns = todget(nil);
+		sysretfmt(scallnr, (va_list) up->s.args, ret, startns, stopns);
+		s = splhi();
+		up->procctl = Proc_stopme;
+		procctl();
+		splx(s);
+	}
+	
+	up->insyscall = 0;
+	up->psstate = 0;
+	if(scallnr == NOTED){
+		noted(ureg, *((ulong*) up->s.args));
+		/*
+		 * normally, syscall() returns to forkret()
+		 * not restoring general registers when going
+		 * to userspace. to completely restore the
+		 * interrupted context, we have to return thru
+		 * noteret(). we override return pc to jump to
+		 * to it when returning form syscall()
+		 */
+		returnto(noteret);
+	}
+
+	if(scallnr != RFORK && (up->procctl || up->nnote)){
+		splhi();
+		notify(ureg);
+	}
+	if(up->delaysched)
+		sched();
+	kexit(ureg);
+}
+
+int
+notify(Ureg *ureg)
+{
+	int l;
+	uintptr s, sp;
+	Note *n;
+
+	if(up->procctl)
+		procctl();
+	if(up->nnote == 0)
+		return 0;
+	if(up->fpstate == FPactive){
+		fpsave(up->fpsave);
+		up->fpstate = FPinactive;
+	}
+	up->fpstate |= FPillegal;
+
+	s = spllo();
+	qlock(&up->debug);
+	up->notepending = 0;
+	n = &up->note[0];
+	if(strncmp(n->msg, "sys:", 4) == 0){
+		l = strlen(n->msg);
+		if(l > ERRMAX-23)	/* " pc=0x0123456789abcdef\0" */
+			l = ERRMAX-23;
+		sprint(n->msg+l, " pc=%#p", ureg->pc);
+	}
+
+	if(n->flag!=NUser && (up->notified || up->notify==0)){
+		qunlock(&up->debug);
+		if(n->flag == NDebug)
+			pprint("suicide: %s\n", n->msg);
+		pexit(n->msg, n->flag!=NDebug);
+	}
+
+	if(up->notified){
+		qunlock(&up->debug);
+		splhi();
+		return 0;
+	}
+
+	if(!up->notify){
+		qunlock(&up->debug);
+		pexit(n->msg, n->flag!=NDebug);
+	}
+	sp = ureg->sp;
+	sp -= 256;	/* debugging: preserve context causing problem */
+	sp -= sizeof(Ureg);
+	sp = STACKALIGN(sp);
+
+	if(!okaddr((uintptr)up->notify, 1, 0)
+	|| !okaddr(sp-ERRMAX-4*BY2WD, sizeof(Ureg)+ERRMAX+4*BY2WD, 1)
+	|| ((uintptr) up->notify & 3) != 0
+	|| (sp & 7) != 0){
+		qunlock(&up->debug);
+		pprint("suicide: bad address in notify: handler=%#p sp=%#p\n",
+			up->notify, sp);
+		pexit("Suicide", 0);
+	}
+
+	memmove((Ureg*)sp, ureg, sizeof(Ureg));
+	*(Ureg**)(sp-BY2WD) = up->ureg;	/* word under Ureg is old up->ureg */
+	up->ureg = (void*)sp;
+	sp -= BY2WD+ERRMAX;
+	memmove((char*)sp, up->note[0].msg, ERRMAX);
+	sp -= 3*BY2WD;
+	*(uintptr*)(sp+2*BY2WD) = sp+3*BY2WD;
+	*(uintptr*)(sp+1*BY2WD) = (uintptr)up->ureg;
+	ureg->r0 = (uintptr) up->ureg;
+	ureg->sp = sp;
+	ureg->pc = (uintptr) up->notify;
+	ureg->link = 0;
+	up->notified = 1;
+	up->nnote--;
+	memmove(&up->lastnote, &up->note[0], sizeof(Note));
+	memmove(&up->note[0], &up->note[1], up->nnote*sizeof(Note));
+
+	qunlock(&up->debug);
+	splx(s);
+	return 1;
+}
+
+void
+noted(Ureg *ureg, ulong arg0)
+{
+	Ureg *nureg;
+	uintptr oureg, sp;
+	
+	qlock(&up->debug);
+	if(arg0 != NRSTR && !up->notified){
+		qunlock(&up->debug);
+		pprint("call to noted() when not notified\n");
+		pexit("Suicide", 0);
+	}
+	up->notified = 0;
+	
+	nureg = up->ureg;
+	up->fpstate &= ~FPillegal;
+	
+	oureg = (uintptr) nureg;
+	if(!okaddr(oureg - BY2WD, BY2WD + sizeof(Ureg), 0) || (oureg & 7) != 0){
+		qunlock(&up->debug);
+		pprint("bad ureg in noted or call to noted when not notified\n");
+		pexit("Suicide", 0);
+	}
+
+	nureg->psr = (nureg->psr & USPSRMASK) | (ureg->psr & ~USPSRMASK);
+	memmove(ureg, nureg, sizeof(Ureg));
+	
+	switch(arg0){
+	case NCONT: case NRSTR:
+		if(!okaddr(nureg->pc, BY2WD, 0) || !okaddr(nureg->sp, BY2WD, 0) ||
+				(nureg->pc & 3) != 0 || (nureg->sp & 7) != 0){
+			qunlock(&up->debug);
+			pprint("suicide: trap in noted\n");
+			pexit("Suicide", 0);
+		}
+		up->ureg = (Ureg *) (*(uintptr*) (oureg - BY2WD));
+		qunlock(&up->debug);
+		break;
+	
+	case NSAVE:
+		if(!okaddr(nureg->pc, BY2WD, 0) || !okaddr(nureg->sp, BY2WD, 0) ||
+				(nureg->pc & 3) != 0 || (nureg->sp & 7) != 0){
+			qunlock(&up->debug);
+			pprint("suicide: trap in noted\n");
+			pexit("Suicide", 0);
+		}
+		qunlock(&up->debug);
+		sp = oureg - 4 * BY2WD - ERRMAX;
+		splhi();
+		ureg->sp = sp;
+		ureg->r0 = (uintptr) oureg;
+		((uintptr *) sp)[1] = oureg;
+		((uintptr *) sp)[0] = 0;
+		break;
+	
+	default:
+		up->lastnote.flag = NDebug;
+	
+	case NDFLT:
+		qunlock(&up->debug);
+		if(up->lastnote.flag == NDebug)
+			pprint("suicide: %s\n", up->lastnote.msg);
+		pexit(up->lastnote.msg, up->lastnote.flag != NDebug);
+	}
+}
+
+void
+faultarm64(Ureg *ureg)
+{
+	extern void checkpages(void);
+	char buf[ERRMAX];
+	int read, insyscall;
+	uintptr addr;
+
+	insyscall = up->insyscall;
+	up->insyscall = 1;
+
+	if(!userureg(ureg)){
+		extern void _peekinst(void);
+
+		if(ureg->pc == (uintptr)_peekinst){
+			ureg->pc = ureg->link;
+			goto out;
+		}
+
+		if(waserror()){
+			if(up->nerrlab == 0){
+				pprint("suicide: sys: %s\n", up->errstr);
+				pexit(up->errstr, 1);
+			}
+			up->insyscall = insyscall;
+			nexterror();
+		}
+	}
+
+	addr = getfar();
+	read = (ureg->type & (1<<6)) == 0;
+
+	switch((u32int)ureg->type & 0x3F){
+	case  4: case  5: case  6: case  7:	// Tanslation fault.
+	case  8: case  9: case 10: case 11:	// Access flag fault.
+	case 12: case 13: case 14: case 15:	// Permission fault.
+	case 48:				// tlb conflict fault.
+		if(fault(addr, ureg->pc, read) == 0)
+			break;
+
+		/* wet floor */
+	case  0: case  1: case  2: case  3:	// Address size fault.
+	case 16: 				// synchronous external abort.
+	case 24: 				// synchronous parity error on a memory access.
+	case 20: case 21: case 22: case 23:	// synchronous external abort on a table walk.
+	case 28: case 29: case 30: case 31:	// synchronous parity error on table walk.
+	case 33:				// alignment fault.
+	case 52:				// implementation defined, lockdown abort.
+	case 53:				// implementation defined, unsuppoted exclusive.
+	case 61:				// first level domain fault
+	case 62:				// second level domain fault
+	default:
+		if(!userureg(ureg)){
+			dumpregs(ureg);
+			panic("fault: %s addr=%#p", read ? "read" : "write", addr);
+		}
+		checkpages();
+		sprint(buf, "sys: trap: fault %s addr=%#p", read ? "read" : "write", addr);
+		postnote(up, 1, buf, NDebug);
+	}
+
+	if(!userureg(ureg))
+		poperror();
+
+out:
+	up->insyscall = insyscall;
+}
+
+int
+userureg(Ureg* ureg)
+{
+	return (ureg->psr & 15) == 0;
+}
+
+uintptr
+userpc(void)
+{
+	Ureg *ur = up->dbgreg;
+	return ur->pc;
+}
+
+uintptr
+dbgpc(Proc *)
+{
+	Ureg *ur = up->dbgreg;
+	if(ur == nil)
+		return 0;
+	return ur->pc;
+}
+
+void
+procfork(Proc *p)
+{
+	int s;
+
+	s = splhi();
+	switch(up->fpstate & ~FPillegal){
+	case FPactive:
+		fpsave(up->fpsave);
+		up->fpstate = FPinactive;
+	case FPinactive:
+		memmove(p->fpsave, up->fpsave, sizeof(FPsave));
+		p->fpstate = FPinactive;
+	}
+	splx(s);
+
+	p->tpidr = up->tpidr;
+}
+
+void
+procsetup(Proc *p)
+{
+	p->fpstate = FPinit;
+	fpoff();
+
+	p->tpidr = 0;
+	syswr(TPIDR_EL0, p->tpidr);
+}
+
+void
+procsave(Proc *p)
+{
+	if(p->fpstate == FPactive){
+		if(p->state == Moribund)
+			fpclear();
+		else
+			fpsave(p->fpsave);
+		p->fpstate = FPinactive;
+	}
+
+	if(p->kp == 0)
+		p->tpidr = sysrd(TPIDR_EL0);
+
+	putasid(p);	// release asid
+}
+
+void
+procrestore(Proc *p)
+{
+	if(p->kp == 0)
+		syswr(TPIDR_EL0, p->tpidr);
+}
+
+void
+kprocchild(Proc *p, void (*entry)(void))
+{
+	p->sched.pc = (uintptr) entry;
+	p->sched.sp = (uintptr) p->kstack + KSTACK - 16;
+	*(void**)p->sched.sp = kprocchild;	/* fake */
+}
+
+void
+forkchild(Proc *p, Ureg *ureg)
+{
+	Ureg *cureg;
+
+	p->sched.pc = (uintptr) forkret;
+	p->sched.sp = (uintptr) p->kstack + KSTACK - TRAPFRAMESIZE;
+
+	cureg = (Ureg*) (p->sched.sp + 16);
+	memmove(cureg, ureg, sizeof(Ureg));
+	cureg->r0 = 0;
+}
+
+uintptr
+execregs(uintptr entry, ulong ssize, ulong nargs)
+{
+	uintptr *sp;
+	Ureg *ureg;
+
+	sp = (uintptr*)(USTKTOP - ssize);
+	*--sp = nargs;
+
+	ureg = up->dbgreg;
+	ureg->sp = (uintptr)sp;
+	ureg->pc = entry;
+	ureg->link = 0;
+	return USTKTOP-sizeof(Tos);
+}
+
+void
+evenaddr(uintptr addr)
+{
+	if(addr & 3){
+		postnote(up, 1, "sys: odd address", NDebug);
+		error(Ebadarg);
+	}
+}
+
+void
+callwithureg(void (*f) (Ureg *))
+{
+	Ureg u;
+	
+	u.pc = getcallerpc(&f);
+	u.sp = (uintptr) &f;
+	f(&u);
+}
+
+void
+setkernur(Ureg *ureg, Proc *p)
+{
+	ureg->pc = p->sched.pc;
+	ureg->sp = p->sched.sp;
+	ureg->link = (uintptr)sched;
+}
+
+void
+setupwatchpts(Proc*, Watchpt*, int)
+{
+}
+
+void
+setregisters(Ureg* ureg, char* pureg, char* uva, int n)
+{
+	ulong v;
+
+	v = ureg->psr;
+	memmove(pureg, uva, n);
+	ureg->psr = (ureg->psr & USPSRMASK) | (v & ~USPSRMASK);
+}
+
+static void
+dumpstackwithureg(Ureg *ureg)
+{
+	uintptr v, estack, sp;
+	char *s;
+	int i;
+
+	if((s = getconf("*nodumpstack")) != nil && strcmp(s, "0") != 0){
+		iprint("dumpstack disabled\n");
+		return;
+	}
+	iprint("ktrace /kernel/path %#p %#p %#p # pc, sp, link\n",
+		ureg->pc, ureg->sp, ureg->link);
+	delay(2000);
+
+	sp = ureg->sp;
+	if(sp < KZERO || (sp & 7) != 0)
+		sp = (uintptr)&ureg;
+
+	estack = (uintptr)m+MACHSIZE;
+	if(up != nil && sp <= (uintptr)up->kstack+KSTACK)
+		estack = (uintptr)up->kstack+KSTACK;
+
+	if(sp > estack){
+		if(up != nil)
+			iprint("&up->kstack %#p sp %#p\n", up->kstack, sp);
+		else
+			iprint("&m %#p sp %#p\n", m, sp);
+		return;
+	}
+
+	i = 0;
+	for(; sp < estack; sp += sizeof(uintptr)){
+		v = *(uintptr*)sp;
+		if(KTZERO < v && v < (uintptr)etext && (v & 3) == 0){
+			iprint("%#8.8lux=%#8.8lux ", (ulong)sp, (ulong)v);
+			i++;
+		}
+		if(i == 4){
+			i = 0;
+			iprint("\n");
+		}
+	}
+	if(i)
+		iprint("\n");
+}
+
+void
+dumpstack(void)
+{
+	callwithureg(dumpstackwithureg);
+}
+
+void
+dumpregs(Ureg *ureg)
+{
+	u64int *r;
+	int i, x;
+
+	x = splhi();
+	if(up != nil)
+		iprint("cpu%d: dumpregs ureg %#p process %lud: %s\n", m->machno, ureg,
+			up->pid, up->text);
+	else
+		iprint("cpu%d: dumpregs ureg %#p\n", m->machno, ureg);
+	r = &ureg->r0;
+	for(i = 0; i < 30; i += 3)
+		iprint("R%d %.16llux  R%d %.16llux  R%d %.16llux\n", i, r[i], i+1, r[i+1], i+2, r[i+2]);
+	iprint("PC %#p  SP %#p  LR %#p  PSR %llux  TYPE %llux\n",
+		ureg->pc, ureg->sp, ureg->link,
+		ureg->psr, ureg->type);
+	splx(x);
+}
--- /dev/null
+++ b/sys/src/9/imx8/uartimx.c
@@ -1,0 +1,383 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "../port/error.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+
+enum {
+	URXD	= 0x00/4,	/* UART Receiver Register */
+		RX_CHARRDY	= 1<<15,
+		RX_ERR		= 1<<14,
+		RX_OVRRUN	= 1<<13,
+		RX_FRMERR	= 1<<12,
+		RX_BRK		= 1<<11,
+		RX_PRERR	= 1<<10,
+		RX_DATA		= 0xFF,
+		
+	UTXD	= 0x40/4,	/* UART Transmitter Register */
+		TX_DATA		= 0xFF,
+
+	UCR1	= 0x80/4,	/* UART Control Register 1 */
+		CR1_ADEN	= 1<<15,	/* Automatic Baud Rate Detection Interrupt Enable */
+		CR1_ADNR	= 1<<14,	/* Automatic Detection of Baud Rate */
+		CR1_TRDYEN	= 1<<13,	/* Transmitter Ready Interrupt Enable */
+		CR1_IDEN	= 1<<12,	/* Idle Condition Detected Interrupt Enable */
+
+		CR1_ICD_SHIFT	= 10,		/* Idle Condition Detect Mask */
+		CR1_ICD_MASK	= 3<<CR1_ICD_SHIFT,
+
+		CR1_RRDYEN	= 1<<9,		/* Receiver Ready Interrupt Enable */
+		CR1_RXDMAEN	= 1<<8,		/* Receive Ready DMA Enable */
+		CR1_IREN	= 1<<7,		/* Infrared Interface Enable */
+		CR1_TXMPTYEN	= 1<<6,		/* Transmitter Empty Interrupt Enable */
+		CR1_RTSDEN	= 1<<5,		/* RTS Delta Interrupt Enable */
+		CR1_SNDBRK	= 1<<4,		/* Send BREAK */
+		CR1_TXDMAEN	= 1<<3,		/* Transmitter Ready DMA Enable */
+		CR1_ATDMAEN	= 1<<2,		/* Aging DMA Timer Enable */
+		CR1_DOZE	= 1<<1,		/* DOZE */
+		CR1_UARTEN	= 1<<0,		/* Uart Enable */
+
+	UCR2	= 0x84/4,	/* UART Control Register 2 */
+		CR2_ESCI	= 1<<15,	/* Escape Sequence Interrupt Enable */
+		CR2_IRTS	= 1<<14,	/* Ignore RTS Pin */
+		CR2_CTSC	= 1<<13,	/* CTS Pin Control */
+		CR2_CTS		= 1<<12,	/* Clear to Send */
+		CR2_ESCEN	= 1<<11,	/* Escape Enable */
+
+		CR2_RTEC_RAISING= 0<<9,
+		CR2_RTEC_FALLING= 1<<9,
+		CR2_RTEC_ANY	= 2<<9,
+		CR2_RTEC_MASK	= 3<<9,
+
+		CR2_PREN	= 1<<8,		/* Parity Enable */
+		CR2_PREVEN	= 0<<7,		/* Parity Even */
+		CR2_PRODD	= 1<<7,		/* Parity Odd */
+		CR2_STPB	= 1<<6,		/* Stop */
+		CR2_WS8		= 1<<5,		/* Word Size */
+		CR2_WS7		= 0<<5,
+		CR2_RTSEN	= 1<<4,		/* Request to Send Interrupt Enable */
+		CR2_ATEN	= 1<<3,		/* Aging Timer Enable */
+		CR2_TXEN	= 1<<2,		/* Transmitter Enable */
+		CR2_RXEN	= 1<<1,		/* Receiver Enable */
+		CR2_SRST	= 1<<0,		/* Software Reset */
+
+	UCR3	= 0x88/4,	/* UART Control Register 3 */
+		CR3_PARERREN	= 1<<12,	/* Parity Error Interrupt Enable */
+		CR3_FRAERREN	= 1<<11,	/* Frame Error Interrupt Enable */
+		CR3_ADNIMP	= 1<<7,		/* Autobaud Detection Not Improved */
+		CR3_RXDSEN	= 1<<6,		/* Receive Status Interrupt Enable */
+		CR3_AIRINTEN	= 1<<5,		/* Asynchronous IR WAKE Interrupt Enable */
+		CR3_AWAKEN	= 1<<4,		/* Asynchronous WAKE Interrupt Enable */
+		CR3_RXDMUXSEL	= 1<<2,		/* RXD Muxed Input Selected */
+		CR3_INVT	= 1<<1,		/* Invert TXD output in RS-232/RS-485 mode */
+		CR3_ACIEN	= 1<<0,		/* Autobaud Counter Interrupt Enable */
+
+	UCR4	= 0x8C/4,	/* UART Control Register 4 */
+		CR4_CTSTL_SHIFT	= 10,		/* CTS Trigger Level */
+		CR4_CTSTL_MASK	= 0x3F<<CR4_CTSTL_SHIFT,
+
+		CR4_INVR	= 1<<9,		/* Invert RXD Input in RS-232/RS-485 Mode */
+		CR4_ENIRI	= 1<<8,		/* Serial Infrared Interrupt Enable */
+		CR4_WKEN	= 1<<7,		/* WAKE Interrupt Enable */
+		CR4_IDDMAEN	= 1<<6,		/* DMA IDLE Condition Detected Interrupt Enable */
+		CR4_IRSC	= 1<<5,		/* IR Special Case */
+		CR4_LPBYP	= 1<<4,		/* Low Power Bypass */
+		CR4_TCEN	= 1<<3,		/* Transmit Complete Interrupt Enable */
+		CR4_BKEN	= 1<<2,		/* BREAK Condition Detected Interrupt Enable */
+		CR4_OREN	= 1<<1,		/* Receiver Overrun Interrupt Enable */
+		CR4_DREN	= 1<<0,		/* Receive Data Interrupt Enable */
+
+	UFCR	= 0x90/4,	/* UART FIFO Control Register */ 
+		FCR_TXTL_SHIFT	= 10,		/* Transmitter Trigger Level */
+		FCR_TXTL_MASK	= 0x3F<<FCR_TXTL_SHIFT,
+
+		FCR_RFDIV_SHIFT	= 7,		/* Reference Frequency Divider */
+		FCR_RFDIV_MASK	= 0x7<<FCR_RFDIV_SHIFT,
+
+		FCR_DCE		= 0<<6,		/* DCE/DTE mode select */
+		FCR_DTE		= 1<<6,
+
+		FCR_RXTL_SHIFT	= 0,		/* Receive Trigger Level */
+		FCR_RXTL_MASK	= 0x3F<<FCR_RXTL_SHIFT,
+
+	USR1	= 0x94/4,	/* UART Status Register 1 */
+		SR1_PARITYERR	= 1<<15,	/* Parity Error Interrupt Flag */
+		SR1_RTSS	= 1<<14,	/* RTS_B Pin Status */
+		SR1_TRDY	= 1<<13,	/* Transmitter Ready Interrupt / DMA Flag */
+		SR1_RTSD	= 1<<12,	/* RTS Delta */
+		SR1_ESCF	= 1<<11,	/* Escape Sequence Interrupt Flag */
+		SR1_FRAMEERR	= 1<<10,	/* Frame Error Interrupt Flag */
+		SR1_RRDY	= 1<<9,		/* Receiver Ready Interrupt / DMA Flag */
+		SR1_AGTIM	= 1<<8,		/* Aging Timer Interrupt Flag */
+		SR1_DTRD	= 1<<7,	
+		SR1_RXDS	= 1<<6,		/* Receiver IDLE Interrupt Flag */
+		SR1_AIRINT	= 1<<5,		/* Asynchronous IR WAKE Interrupt Flag */
+		SR1_AWAKE	= 1<<4,		/* Asynchronous WAKE Interrupt Flag */
+		SR1_SAD		= 1<<3,		/* RS-485 Slave Address Detected Interrupt Flag */
+
+	USR2	= 0x98/4,	/* UART Status Register 2 */
+		SR2_ADET	= 1<<15,	/* Automatic Baud Rate Detected Complete */
+		SR2_TXFE	= 1<<14,	/* Transmit Buffer FIFO Empty */
+		SR2_DTRF	= 1<<13,
+		SR2_IDLE	= 1<<12,	/* Idle Condition */
+		SR2_ACST	= 1<<11,	/* Autobaud Counter Stopped */
+		SR2_RIDELT	= 1<<10,
+		SR2_RIIN	= 1<<9,
+		SR2_IRINT	= 1<<8,		/* Serial Infrared Interrupt Flag */
+		SR2_WAKE	= 1<<7,		/* Wake */
+		SR2_DCDDELT	= 1<<6,
+		SR2_DCDIN	= 1<<5,
+		SR2_RTSF	= 1<<4,		/* RTS Edge Triggered Interrupt Flag */
+		SR2_TXDC	= 1<<3,		/* Transmitter Complete */
+		SR2_BRCD	= 1<<2,		/* BREAK Condition Detected */
+		SR2_ORE		= 1<<1,		/* Overrun Error */
+		SR2_RDR		= 1<<0,		/* Receive Data Ready */
+
+	UESC	= 0x9C/4,	/* UART Escape Character Register */
+	UTIM	= 0xA0/4,	/* UART Escape Timer Register */
+	UBIR	= 0xA4/4,	/* UART BRM Incremental Modulator Register */
+	UBMR	= 0xA8/4,	/* UART BRM Modulator Register */
+	UBRC	= 0xAC/4,	/* UART Baud Rate Count Register */
+	ONEMS	= 0xB0/4,	/* UART One-Millisecond Register */
+	UTS	= 0xB5/4,	/* UART Test Register */
+	UMCR	= 0xB8/4,	/* UART RS-485 Mode Control Register */
+};
+
+extern PhysUart imxphysuart;
+
+static Uart uart1 = {
+	.regs	= (u32int*)(VIRTIO + 0x860000ULL),
+	.name	= "uart1",
+	.baud	= 115200,
+	.freq	= 25*Mhz,
+	.phys	= &imxphysuart,
+};
+
+static Uart*
+pnp(void)
+{
+	return &uart1;
+}
+
+static void
+kick(Uart *u)
+{
+	u32int *regs = (u32int*)u->regs;
+
+	if(u->blocked)
+		return;
+	while(regs[USR1] & SR1_TRDY){
+		if(u->op >= u->oe && uartstageoutput(u) == 0)
+			break;
+		regs[UTXD] = *(u->op++) & TX_DATA;
+	}
+}
+
+static void
+config(Uart *u)
+{
+	u32int cr2, *regs = u->regs;
+
+	/* enable uart */
+	regs[UCR1] = CR1_UARTEN;
+
+	cr2 = CR2_SRST | CR2_IRTS | CR2_RXEN | CR2_TXEN;
+	switch(u->parity){
+	case 'e': cr2 |= CR2_PREN | CR2_PREVEN; break;
+	case 'o': cr2 |= CR2_PREN | CR2_PRODD; break;
+	}
+	cr2 |= u->bits == 7 ? CR2_WS7 : CR2_WS8;
+	if(u->stop == 2) cr2 |= CR2_STPB;
+	regs[UCR2] = cr2;
+	regs[UCR3] = 0x7<<8 | CR3_RXDMUXSEL;
+	regs[UCR4] = 31<<CR4_CTSTL_SHIFT;
+
+	/* baud = clock / (16 * (ubmr+1)/(ubir+1)) */
+	regs[UFCR] = (6 - 1)<<FCR_RFDIV_SHIFT | 32<<FCR_TXTL_SHIFT | 32<<FCR_RXTL_SHIFT;
+	regs[UBIR] = ((16*u->baud) / 1600)-1;
+	regs[UBMR] = (u->freq / 1600)-1;
+
+	regs[UCR1] = CR1_UARTEN | CR1_TRDYEN | CR1_RRDYEN;
+}
+
+static int
+bits(Uart *u, int n)
+{
+	switch(n){
+	case 8:
+		break;
+	case 7:
+		break;
+	default:
+		return -1;
+	}
+	u->bits = n;
+	config(u);
+	return 0;
+}
+
+static int
+stop(Uart *u, int n)
+{
+	switch(n){
+	case 1:
+		break;
+	case 2:
+		break;
+	default:
+		return -1;
+	}
+	u->stop = n;
+	config(u);
+	return 0;
+}
+
+static int
+parity(Uart *u, int n)
+{
+	switch(n){
+	case 'n':
+		break;
+	case 'e':
+		break;
+	case 'o':
+		break;
+	default:
+		return -1;
+	}
+	u->parity = n;
+	config(u);
+	return 0;
+}
+
+static int
+baud(Uart *u, int n)
+{
+	if(u->freq == 0 || n <= 0)
+		return -1;
+	u->baud = n;
+	config(u);
+	return 0;
+}
+
+static void
+rts(Uart*, int)
+{
+}
+
+static void
+dobreak(Uart*, int)
+{
+}
+
+static long
+status(Uart *uart, void *buf, long n, long offset)
+{
+	char *p;
+
+	p = malloc(READSTR);
+	if(p == nil)
+		error(Enomem);
+	snprint(p, READSTR,
+		"b%d\n"
+		"dev(%d) type(%d) framing(%d) overruns(%d) "
+		"berr(%d) serr(%d)\n",
+
+		uart->baud,
+		uart->dev,
+		uart->type,
+		uart->ferr,
+		uart->oerr,
+		uart->berr,
+		uart->serr
+	);
+	n = readstr(offset, buf, n, p);
+	free(p);
+
+	return n;
+}
+
+static void
+interrupt(Ureg*, void *arg)
+{
+	Uart *uart = arg;
+	u32int v, *regs = (u32int*)uart->regs;
+
+	while((v = regs[URXD]) & RX_CHARRDY)
+		uartrecv(uart, v & RX_DATA);
+
+	uartkick(uart);
+}
+
+static void
+disable(Uart *u)
+{
+	u32int *regs = u->regs;
+	regs[UCR1] = 0;
+}
+
+static void
+enable(Uart *u, int ie)
+{
+	disable(u);
+	if(ie) intrenable(IRQuart1, interrupt, u, BUSUNKNOWN, u->name);
+	config(u);
+}
+
+static void
+donothing(Uart*, int)
+{
+}
+
+static void
+putc(Uart *u, int c)
+{
+	u32int *regs = u->regs;
+
+	while((regs[USR1] & SR1_TRDY) == 0)
+		;
+	regs[UTXD] = c & TX_DATA;
+}
+
+static int
+getc(Uart *u)
+{
+	u32int c, *regs = (u32int*)u->regs;
+
+	do 
+		c = regs[URXD];
+	while((c & RX_CHARRDY) == 0);
+	return c & RX_DATA;
+}
+
+void
+uartconsinit(void)
+{
+	consuart = &uart1;
+	consuart->console = 1;
+	uartctl(consuart, "l8 pn s1");
+	uartputs(kmesg.buf, kmesg.n);
+}
+
+PhysUart imxphysuart = {
+	.name		= "imx",
+	.pnp		= pnp,
+	.enable		= enable,
+	.disable	= disable,
+	.kick		= kick,
+	.dobreak	= dobreak,
+	.baud		= baud,
+	.bits		= bits,
+	.stop		= stop,
+	.parity		= parity,
+	.modemctl	= donothing,
+	.rts		= rts,
+	.dtr		= donothing,
+	.status		= status,
+	.fifo		= donothing,
+	.getc		= getc,
+	.putc		= putc,
+};
+