git: 9front

ref: f177d6657a3b119be124b27f73f34a3ba7ccdbe8
dir: /sys/src/9/teg2/arm.s/

View raw version
/*
 * nvidia tegra 2 machine assist, definitions
 * dual-core cortex-a9 processor
 *
 * R9 and R10 are used for `extern register' variables.
 * R11 is used by the loader as a temporary, so avoid it.
 */

#include "mem.h"
#include "arm.h"

#undef B					/* B is for 'botch' */

#define KADDR(pa)	(KZERO    | ((pa) & ~KSEGM))
#define PADDR(va)	(PHYSDRAM | ((va) & ~KSEGM))

#define L1X(va)		(((((va))>>20) & 0x0fff)<<2)

#define MACHADDR	(L1-MACHSIZE)		/* only room for cpu0's */

/* L1 pte values */
#define PTEDRAM	(Dom0|L1AP(Krw)|Section|L1ptedramattrs)
#define PTEIO	(Dom0|L1AP(Krw)|Section)

#define DOUBLEMAPMBS	 512	/* megabytes of low dram to double-map */

/* steps on R0 */
#define DELAY(label, mloops) \
	MOVW	$((mloops)*1000000), R0; \
label: \
	SUB.S	$1, R0; \
	BNE	label

/* print a byte on the serial console; clobbers R0 & R6; needs R12 (SB) set */
#define PUTC(c) \
	BARRIERS; \
	MOVW	$(c), R0; \
	MOVW	$PHYSCONS, R6; \
	MOVW	R0, (R6); \
	BARRIERS

/*
 * new instructions
 */

#define SMC	WORD	$0xe1600070	/* low 4-bits are call # (trustzone) */
/* flush branch-target cache */
#define FLBTC  MTCP CpSC, 0, PC, C(CpCACHE), C(CpCACHEinvi), CpCACHEflushbtc
/* flush one entry of the branch-target cache, va in R0 (cortex) */
#define FLBTSE MTCP CpSC, 0, R0, C(CpCACHE), C(CpCACHEinvi), CpCACHEflushbtse

/* arm v7 arch defines these */
#define DSB	WORD	$0xf57ff04f	/* data synch. barrier; last f = SY */
#define DMB	WORD	$0xf57ff05f	/* data mem. barrier; last f = SY */
#define ISB	WORD	$0xf57ff06f	/* instr. sync. barrier; last f = SY */

#define WFI	WORD	$0xe320f003	/* wait for interrupt */
#define NOOP	WORD	$0xe320f000

#define CLZ(s, d) WORD	$(0xe16f0f10 | (d) << 12 | (s))	/* count leading 0s */

#define SETEND(o) WORD	$(0xf1010000 | (o) << 9)  /* o==0, little-endian */

#define CPSIE	WORD	$0xf1080080	/* intr enable: zeroes I bit */
#define CPSID	WORD	$0xf10c00c0	/* intr disable: sets I,F bits */
#define CPSAE	WORD	$0xf1080100	/* async abt enable: zeroes A bit */
#define CPSMODE(m) WORD $(0xf1020000 | (m)) /* switch to mode m (PsrM*) */

#define	CLREX	WORD	$0xf57ff01f

/* floating point */
#define VMRS(fp, cpu) WORD $(0xeef00a10 | (fp)<<16 | (cpu)<<12) /* FP → arm */
#define VMSR(cpu, fp) WORD $(0xeee00a10 | (fp)<<16 | (cpu)<<12) /* arm → FP */

/*
 * a popular code sequence used to write a pte for va is:
 *
 *	MOVW	R(n), TTB[LnX(va)]
 *	// clean the cache line
 *	DSB
 *	// invalidate tlb entry for va
 *	FLBTC
 *	DSB
 * 	PFF (now ISB)
 */
#define	BARRIERS	FLBTC; DSB; ISB

/*
 * invoked with PTE bits in R2, pa in R3, PTE pointed to by R4.
 * fill PTE pointed to by R4 and increment R4 past it.
 * increment R3 by a MB.  clobbers R1.
 */
#define FILLPTE() \
	ORR	R3, R2, R1;			/* pte bits in R2, pa in R3 */ \
	MOVW	R1, (R4); \
	ADD	$4, R4;				/* bump PTE address */ \
	ADD	$MiB, R3;			/* bump pa */ \

/* zero PTE pointed to by R4 and increment R4 past it. assumes R0 is 0. */
#define ZEROPTE() \
	MOVW	R0, (R4); \
	ADD	$4, R4;				/* bump PTE address */

/*
 * set kernel SB for zero segment (instead of usual KZERO segment).
 * NB: the next line puts rubbish in R12:
 *	MOVW	$setR12-KZERO(SB), R12
 */
#define SETZSB \
	MOVW	$setR12(SB), R12;		/* load kernel's SB */ \
	SUB	$KZERO, R12; \
	ADD	$PHYSDRAM, R12

/*
 * note that 5a's RFE is not the v6/7 arch. instruction (0xf8900a00),
 * which loads CPSR from the word after the PC at (R13), but rather
 * the pre-v6 simulation `MOVM.IA.S.W (R13), [R15]' (0xe8fd8000 since
 * MOVM is LDM in this case), which loads CPSR not from memory but
 * from SPSR due to `.S'.
 */
#define RFEV7(r)    WORD $(0xf8900a00 | (r) << 16)
#define RFEV7W(r)   WORD $(0xf8900a00 | (r) << 16 | 0x00200000)	/* RFE.W */
#define RFEV7DB(r)  WORD $(0xf9100a00 | (r) << 16)		/* RFE.DB */
#define RFEV7DBW(r) WORD $(0xf9100a00 | (r) << 16 | 0x00200000)	/* RFE.DB.W */

#define CKPSR(psr, tmp, bad)
#define CKCPSR(psrtmp, tmp, bad)

/* return with cpu id in r and condition codes set from "r == 0" */
#define CPUID(r) \
	MFCP	CpSC, 0, r, C(CpID), C(CpIDidct), CpIDmpid; \
	AND.S	$(MAXMACH-1), r			/* mask out non-cpu-id bits */