git: 9front

ref: f596580cba7c5b7bae84ee7cd83d2b2a438a1e9b
dir: /sys/src/9/teg2/archtegra.c/

View raw version
/*
 * nvidia tegra 2 architecture-specific stuff
 */

#include "u.h"
#include "../port/lib.h"
#include "mem.h"
#include "dat.h"
#include "fns.h"
#include "../port/error.h"
#include "io.h"
#include "arm.h"

#include "../port/netif.h"
#include "../port/etherif.h"
#include "../port/flashif.h"

enum {
	/* hardware limits imposed by register contents or layouts */
	Maxcpus		= 4,
	Maxflowcpus	= 2,

	Debug	= 0,
};

typedef struct Clkrst Clkrst;
typedef struct Diag Diag;
typedef struct Flow Flow;
typedef struct Scu Scu;
typedef struct Power Power;

struct Clkrst {
	ulong	rstsrc;
	ulong	rstdevl;
	ulong	rstdevh;
	ulong	rstdevu;

	ulong	clkoutl;
	ulong	clkouth;
	ulong	clkoutu;

	uchar	_pad0[0x24-0x1c];
	ulong	supcclkdiv;		/* super cclk divider */
	ulong	_pad1;
	ulong	supsclkdiv;		/* super sclk divider */

	uchar	_pad4[0x4c-0x30];
	ulong	clkcpu;

	uchar	_pad1[0xe0-0x50];
	ulong	pllxbase;		/* pllx controls CPU clock speed */
	ulong	pllxmisc;
	ulong	pllebase;		/* plle is dedicated to pcie */
	ulong	pllemisc;

	uchar	_pad2[0x340-0xf0];
	ulong	cpuset;
	ulong	cpuclr;
};

enum {
	/* rstsrc bits */
	Wdcpurst =	1<<0,
	Wdcoprst =	1<<1,
	Wdsysrst =	1<<2,
	Wdsel =		1<<4,		/* tmr1 or tmr2? */
	Wdena =		1<<5,

	/* devl bits */
	Sysreset =	1<<2,

	/* clkcpu bits */
	Cpu1stop =	1<<9,
	Cpu0stop =	1<<8,

	/* cpu* bits */
	Cpu1dbgreset =	1<<13,
	Cpu0dbgreset =	1<<12,
	Cpu1wdreset =	1<<9,
	Cpu0wdreset =	1<<8,
	Cpu1dereset =	1<<5,
	Cpu0dereset =	1<<4,
	Cpu1reset =	1<<1,
	Cpu0reset =	1<<0,
};

struct Power {
	ulong	ctl;			/* mainly for rtc clock signals */
	ulong	secregdis;
	ulong	swrst;

	ulong	wakevmask;
	ulong	waklvl;
	ulong	waksts;
	ulong	swwaksts;

	ulong	dpdpadsovr;		/* deep power down pads override */
	ulong	dpdsample;
	ulong	dpden;

	ulong	gatetimroff;
	ulong	gatetimron;
	ulong	toggle;
	ulong	unclamp;
	ulong	gatests;		/* ro */

	ulong	goodtmr;
	ulong	blinktmr;

	ulong	noiopwr;
	ulong	detect;
	ulong	detlatch;

	ulong	scratch[24];
	ulong	secscratch[6];

	ulong	cpupwrgoodtmr;
	ulong	cpupwrofftmr;

	ulong	pgmask[2];

	ulong	autowaklvl;
	ulong	autowaklvlmask;
	ulong	wakdelay;

	ulong	detval;
	ulong	ddr;
	ulong	usbdebdel;	/* usb de-bounce delay */
	ulong	usbao;
	ulong	cryptoop;
	ulong	pllpwb0ovr;
	ulong	scratch24[42-24+1];
	ulong	boundoutmirr[3];
	ulong	sys33ven;
	ulong	boundoutmirracc;
	ulong	gate;
};

enum {
	/* toggle bits */
	Start	= 1<<8,
	/* partition ids */
	Partpcie= 3,
	Partl2	= 4,
};

struct Scu {
	ulong	ctl;
	ulong	cfg;			/* ro */
	ulong	cpupwrsts;
	ulong	inval;

	uchar	_pad0[0x40-0x10];
	ulong	filtstart;
	ulong	filtend;

	uchar	_pad1[0x50-0x48];
	ulong	accctl;			/* initially 0 */
	ulong	nsaccctl;
};

enum {
	/* ctl bits */
	Scuenable =	1<<0,
	Filter =	1<<1,
	Scuparity =	1<<2,
	Specfill =	1<<3,		/* only for PL310 */
	Allport0 =	1<<4,
	Standby =	1<<5,
	Icstandby =	1<<6,
};

struct Flow {
	ulong	haltcpu0;
	ulong	haltcop;
	ulong	cpu0;
	ulong	cop;
	ulong	xrq;
	ulong	haltcpu1;
	ulong	cpu1;
};

enum {
	/* haltcpu* bits */
	Stop =	2<<29,

	/* cpu* bits */
	Event =			1<<14,	/* w1c */
	Waitwfebitsshift =	4,
	Waitwfebitsmask =	MASK(2),
	Eventenable =		1<<1,
	Cpuenable =		1<<0,
};

struct Diag {
	Cacheline c0;
	Lock;
	Ref	cnt;
	Ref	sync;
	Cacheline c1;
};

extern ulong testmem;

/*
 * number of cpus available.  contrast with conf.nmach, which is number
 * of running cpus.
 */
int navailcpus;
Isolated l1ptstable;

Soc soc = {
	.clkrst	= 0x60006000,		/* clock & reset signals */
	.power	= 0x7000e400,
	.exceptvec = PHYSEVP,		/* undocumented magic */
	.sema	= 0x60001000,
	.l2cache= PHYSL2BAG,		/* pl310 bag on the side */
	.flow	= 0x60007000,

	/* 4 non-gic controllers */
//	.intr	= { 0x60004000, 0x60004100, 0x60004200, 0x60004300, },

	/* private memory region */
	.scu	= 0x50040000,
	/* we got this address from the `cortex-a series programmer's guide'. */
	.intr	= 0x50040100,		/* per-cpu interface */
	.glbtmr	= 0x50040200,
	.loctmr	= 0x50040600,
	.intrdist=0x50041000,

	.uart	= { 0x70006000, 0x70006040,
		    0x70006200, 0x70006300, 0x70006400, },

	.rtc	= 0x7000e000,
	.tmr	= { 0x60005000, 0x60005008, 0x60005050, 0x60005058, },
	.µs	= 0x60005010,

	.pci	= 0x80000000,
	.ether	= 0xa0024000,

	.nand	= 0x70008000,
	.nor	= 0x70009000,		/* also VIRTNOR */

	.ehci	= P2VAHB(0xc5000000),	/* 1st of 3 */
	.ide	= P2VAHB(0xc3000000),

	.gpio	= { 0x6000d000, 0x6000d080, 0x6000d100, 0x6000d180,
			    0x6000d200, 0x6000d280, 0x6000d300, },
	.spi	= { 0x7000d400, 0x7000d600, 0x7000d800, 0x7000da00, },
 	.twsi	= 0x7000c000,
	.mmc	= { P2VAHB(0xc8000000), P2VAHB(0xc8000200),
		    P2VAHB(0xc8000400), P2VAHB(0xc8000600), },
};

static volatile Diag diag;
static int missed;

void
dumpcpuclks(void)		/* run CPU at full speed */
{
	Clkrst *clk = (Clkrst *)soc.clkrst;

	iprint("pllx base %#lux misc %#lux\n", clk->pllxbase, clk->pllxmisc);
	iprint("plle base %#lux misc %#lux\n", clk->pllebase, clk->pllemisc);
	iprint("super cclk divider %#lux\n", clk->supcclkdiv);
	iprint("super sclk divider %#lux\n", clk->supsclkdiv);
}

static char *
devidstr(ulong)
{
	return "ARM Cortex-A9";
}

void
archtegralink(void)
{
}

/* convert AddrDevid register to a string in buf and return buf */
char *
cputype2name(char *buf, int size)
{
	ulong r;

	r = cpidget();			/* main id register */
	assert((r >> 24) == 'A');
	seprint(buf, buf + size, "Cortex-A9 r%ldp%ld",
		(r >> 20) & MASK(4), r & MASK(4));
	return buf;
}

static void
errata(void)
{
	ulong reg, r, p;

	/* apply cortex-a9 errata workarounds */
	r = cpidget();			/* main id register */
	assert((r >> 24) == 'A');
	p = r & MASK(4);		/* minor revision */
	r >>= 20;
	r &= MASK(4);			/* major revision */

	/* this is an undocumented `diagnostic register' that linux knows */
	reg = cprdsc(0, CpDTLB, 0, 1);
	if (r < 2 || r == 2 && p <= 2)
		reg |= 1<<4;			/* 742230 */
	if (r == 2 && p <= 2)
		reg |= 1<<6 | 1<<12 | 1<<22;	/* 743622, 2×742231 */
	if (r < 3)
		reg |= 1<<11;			/* 751472 */
	cpwrsc(0, CpDTLB, 0, 1, reg);
}

void
archconfinit(void)
{
	char *p;
	ulong hz;

	assert(m != nil);
	m->cpuhz = 1000 * Mhz;			/* trimslice speed */
	p = getconf("*cpumhz");
	if (p) {
		hz = atoi(p) * Mhz;
		if (hz >= 100*Mhz && hz <= 3600UL*Mhz)
			m->cpuhz = hz;
	}
	m->cyclefreq = m->cpuhz;
	m->delayloop = m->cpuhz/2000;		/* initial estimate */
	errata();
}

int
archether(unsigned ctlrno, Ether *ether)
{
	switch(ctlrno) {
	case 0:
		ether->type = "rtl8169";		/* pci-e ether */
		ether->ctlrno = ctlrno;
		ether->irq = Pcieirq;			/* non-msi pci-e intr */
		ether->nopt = 0;
		ether->mbps = 1000;
		return 1;
	}
	return -1;
}

void
dumpscustate(void)
{
	Scu *scu = (Scu *)soc.scu;

	print("cpu%d scu: accctl %#lux\n", m->machno, scu->accctl);
	print("cpu%d scu: smp cpu bit map %#lo for %ld cpus; ", m->machno,
		(scu->cfg >> 4) & MASK(4), (scu->cfg & MASK(2)) + 1);
	print("cpus' power %#lux\n", scu->cpupwrsts);
}

void
scuon(void)
{
	Scu *scu = (Scu *)soc.scu;

	if (scu->ctl & Scuenable)
		return;
	scu->inval = MASK(16);
	coherence();
	scu->ctl = Scuparity | Scuenable | Specfill;
	coherence();
}

int
getncpus(void)
{
	int n;
	char *p;
	Scu *scu;

	if (navailcpus == 0) {
		scu = (Scu *)soc.scu;
		navailcpus = (scu->cfg & MASK(2)) + 1;
		if (navailcpus > MAXMACH)
			navailcpus = MAXMACH;

		p = getconf("*ncpu");
		if (p && *p) {
			n = atoi(p);
			if (n > 0 && n < navailcpus)
				navailcpus = n;
		}
	}
	return navailcpus;
}

void
cpuidprint(void)
{
	char name[64];

	cputype2name(name, sizeof name);
	delay(50);				/* let uart catch up */
	iprint("cpu%d: %lldMHz ARM %s %s-endian\n",
		m->machno, m->cpuhz / Mhz, name,
		getpsr() & PsrBigend? "big": "little");
}

static void
clockson(void)
{
	Clkrst *clk = (Clkrst *)soc.clkrst;

	/* enable all by clearing resets */
	clk->rstdevl = clk->rstdevh = clk->rstdevu = 0;
	coherence();
	clk->clkoutl = clk->clkouth = clk->clkoutu = ~0; /* enable all clocks */
	coherence();

	clk->rstsrc = Wdcpurst | Wdcoprst | Wdsysrst | Wdena;
	coherence();
}

/* we could be shutting down ourself (if cpu == m->machno), so take care. */
void
stopcpu(uint cpu)
{
	Flow *flow = (Flow *)soc.flow;
	Clkrst *clk = (Clkrst *)soc.clkrst;

	if (cpu == 0) {
		iprint("stopcpu: may not stop cpu0\n");
		return;
	}

	machoff(cpu);
	lock(&active);
	active.stopped |= 1 << cpu;
	unlock(&active);
	l1cache->wb();

	/* shut down arm7 avp coproc so it can't cause mischief. */
	/* could try watchdog without stopping avp. */
	flow->haltcop = Stop;
	coherence();
	flow->cop = 0;					/* no Cpuenable */
	coherence();
	delay(10);

	assert(cpu < Maxflowcpus);
	*(cpu == 0? &flow->haltcpu0: &flow->haltcpu1) = Stop;
	coherence();
	*(cpu == 0? &flow->cpu0: &flow->cpu1) = 0;	/* no Cpuenable */
	coherence();
	delay(10);

	/* cold reset */
	assert(cpu < Maxcpus);
	clk->cpuset = (Cpu0reset | Cpu0dbgreset | Cpu0dereset) << cpu;
	coherence();
	delay(1);

	l1cache->wb();
}

static void
synccpus(Ref *cntp, int n)
{
	incref(cntp);
	while (cntp->ref < n)
		;
	/* all cpus should now be here */
}

static void
pass1(int pass, volatile Diag *dp)
{
	int i;

	if(m->machno == 0)
		iprint(" %d", pass);
	for (i = 1000*1000; --i > 0; ) {
		incref(&dp->cnt);
		incref(&dp->cnt);
	}

	synccpus(&dp->sync, navailcpus);
	/* all cpus are now here */

	ilock(dp);
	if(dp->cnt.ref != 0)
		panic("cpu%d: diag: failed w count %ld", m->machno, dp->cnt.ref);
	iunlock(dp);

	synccpus(&dp->sync, 2 * navailcpus);
	/* all cpus are now here */
	decref(&dp->sync);
	decref(&dp->sync);
}

/*
 * try to confirm coherence of l1 caches.
 * assume that all available cpus will be started.
 */
void
l1diag(void)
{
	int pass;
	volatile Diag *dp;

	if (!Debug)
		return;

	l1cache->wb();

	/*
	 * synchronise and print
	 */
	dp = &diag;
	ilock(dp);
	if (m->machno == 0)
		iprint("l1: waiting for %d cpus... ", navailcpus);
	iunlock(dp);

	synccpus(&dp->sync, navailcpus);

	ilock(dp);
	if (m->machno == 0)
		iprint("cache coherency pass");
	iunlock(dp);

	synccpus(&dp->sync, 2 * navailcpus);
	decref(&dp->sync);
	decref(&dp->sync);

	/*
	 * cpus contend
	 */
	for (pass = 0; pass < 3; pass++)
		pass1(pass, dp);

	/*
	 * synchronise and check sanity
	 */
	synccpus(&dp->sync, navailcpus);

	if(dp->sync.ref < navailcpus || dp->sync.ref >= 2 * navailcpus)
		panic("cpu%d: diag: failed w dp->sync %ld", m->machno,
			dp->sync.ref);
	if(dp->cnt.ref != 0)
		panic("cpu%d: diag: failed w dp->cnt %ld", m->machno,
			dp->cnt.ref);

	ilock(dp);
	iprint(" cpu%d ok", m->machno);
	iunlock(dp);

	synccpus(&dp->sync, 2 * navailcpus);
	decref(&dp->sync);
	decref(&dp->sync);
	l1cache->wb();

	/*
	 * all done, print
	 */
	ilock(dp);
	if (m->machno == 0)
		iprint("\n");
	iunlock(dp);
}

static void
unfreeze(uint cpu)
{
	Clkrst *clk = (Clkrst *)soc.clkrst;
	Flow *flow = (Flow *)soc.flow;

	assert(cpu < Maxcpus);

	clk->clkcpu &= ~(Cpu0stop << cpu);
	coherence();
	/* out of reset */
	clk->cpuclr = (Cpu0reset | Cpu0wdreset | Cpu0dbgreset | Cpu0dereset) <<
		cpu;
	coherence();

	assert(cpu < Maxflowcpus);
	*(cpu == 0? &flow->cpu0: &flow->cpu1) = 0;
	coherence();
	*(cpu == 0? &flow->haltcpu0: &flow->haltcpu1) = 0; /* normal operat'n */
	coherence();
}

/*
 * this is all a bit magic.  the soc.exceptvec register is effectively
 * undocumented.  we had to look at linux and experiment, alas.  this is the
 * sort of thing that should be standardised as part of the cortex mpcore spec.
 * even intel document their equivalent procedure.
 */
int
startcpu(uint cpu)
{
	int i, r;
	ulong oldvec, rstaddr;
	ulong *evp = (ulong *)soc.exceptvec;	/* magic */

	r = 0;
	if (getncpus() < 2 || cpu == m->machno ||
	    cpu >= MAXMACH || cpu >= navailcpus)
		return -1;

	oldvec = *evp;
	l1cache->wb();			/* start next cpu w same view of ram */
	*evp = rstaddr = PADDR(_vrst);	/* will start cpu executing at _vrst */
	coherence();
	l1cache->wb();
	unfreeze(cpu);

	for (i = 2000; i > 0 && *evp == rstaddr; i--)
		delay(1);
	if (i <= 0 || *evp != cpu) {
		iprint("cpu%d: didn't start!\n", cpu);
		stopcpu(cpu);		/* make sure it's stopped */
		r = -1;
	}
	*evp = oldvec;
	return r;
}

static void
cksecure(void)
{
	ulong db;
	extern ulong getdebug(void);

	if (getscr() & 1)
		panic("cpu%d: running non-secure", m->machno);
	db = getdebug();
	if (db)
		iprint("cpu%d: debug enable reg %#lux\n", m->machno, db);
}

ulong
smpon(void)
{
	ulong aux;

	/* cortex-a9 model-specific configuration */
	aux = getauxctl();
	putauxctl(aux | CpACsmp | CpACmaintbcast);
	return aux;
}

void
cortexa9cachecfg(void)
{
	/* cortex-a9 model-specific configuration */
	putauxctl(getauxctl() | CpACparity | CpAClwr0line | CpACl2pref);
}

/*
 * called on a cpu other than 0 from cpureset in l.s,
 * from _vrst in lexception.s.
 * mmu and l1 (and system-wide l2) caches and coherency (smpon) are on,
 * but interrupts are disabled.
 * our mmu is using an exact copy of cpu0's l1 page table
 * as it was after userinit ran.
 */
void
cpustart(void)
{
	int ms;
	ulong *evp;
	Power *pwr;

	up = nil;
	if (active.machs[m->machno]) {
		serialputc('?');
		serialputc('r');
		panic("cpu%d: resetting after start", m->machno);
	}
	assert(m->machno != 0);

	errata();
	cortexa9cachecfg();
	memdiag(&testmem);

	machinit();			/* bumps nmach, adds bit to machs */
	machoff(m->machno);		/* not ready to go yet */

	/* clock signals and scu are system-wide and already on */
	clockshutdown();		/* kill any watch-dog timer */

	trapinit();
	clockinit();			/* sets loop delay */
	timersinit();
	cpuidprint();

	/*
	 * notify cpu0 that we're up so it can proceed to l1diag.
	 */
	evp = (ulong *)soc.exceptvec;	/* magic */
	*evp = m->machno;
	coherence();

	l1diag();		/* contend with other cpus to verify sanity */

	/*
	 * pwr->noiopwr == 0
	 * pwr->detect == 0x1ff (default, all disabled)
	 */
	pwr = (Power *)soc.power;
	assert(pwr->gatests == MASK(7)); /* everything has power */

	/*
	 * 8169 has to initialise before we get past this, thus cpu0
	 * has to schedule processes first.
	 */
	if (Debug)
		iprint("cpu%d: waiting for 8169\n", m->machno);
	for (ms = 0; !l1ptstable.word && ms < 5000; ms += 10) {
		delay(10);
		cachedinvse(&l1ptstable.word, sizeof l1ptstable.word);
	}
	if (!l1ptstable.word)
		iprint("cpu%d: 8169 unreasonably slow; proceeding\n", m->machno);
	/* now safe to copy cpu0's l1 pt in mmuinit */

	mmuinit();			/* update our l1 pt from cpu0's */
	fpon();
	machon(m->machno);		/* now ready to go and be scheduled */

	if (Debug)
		iprint("cpu%d: scheding\n", m->machno);
	schedinit();
	panic("cpu%d: schedinit returned", m->machno);
}

/* mainly used to break out of wfi */
void
sgintr(Ureg *ureg, void *)
{
	iprint("cpu%d: got sgi\n", m->machno);
	/* try to prod cpu1 into life when it gets stuck */
	if (m->machno != 0)
		clockprod(ureg);
}

void
archreset(void)
{
	static int beenhere;

	if (beenhere)
		return;
	beenhere = 1;

	/* conservative temporary values until archconfinit runs */
	m->cpuhz = 1000 * Mhz;			/* trimslice speed */
	m->delayloop = m->cpuhz/2000;		/* initial estimate */

	prcachecfg();

	clockson();
	/* all partitions were powered up by u-boot, so needn't do anything */
	archconfinit();
//	resetusb();
	fpon();

	if (irqtooearly)
		panic("archreset: too early for irqenable");
	irqenable(Cpu0irq, sgintr, nil, "cpu0");
	irqenable(Cpu1irq, sgintr, nil, "cpu1");
	/* ... */
}

void
archreboot(void)
{
	Clkrst *clk = (Clkrst *)soc.clkrst;

	assert(m->machno == 0);
	iprint("archreboot: reset!\n");
	delay(20);

	clk->rstdevl |= Sysreset;
	coherence();
	delay(500);

	/* shouldn't get here */
	splhi();
	iprint("awaiting reset");
	for(;;) {
		delay(1000);
		print(".");
	}
}

void
kbdinit(void)
{
}

static void
missing(ulong addr, char *name)
{
	static int firstmiss = 1;

	if (addr == 0) {
		iprint("address zero for %s\n", name);
		return;
	}
	if (probeaddr(addr) >= 0)
		return;
	missed++;
	if (firstmiss) {
		iprint("missing:");
		firstmiss = 0;
	} else
		iprint(",\n\t");
	iprint(" %s at %#lux", name, addr);
}

/* verify that all the necessary device registers are accessible */
void
chkmissing(void)
{
	delay(10);
	missing(KZERO, "dram");
	missing(soc.intr, "intr ctlr");
	missing(soc.intrdist, "intr distrib");
	missing(soc.tmr[0], "tegra timer1");
	missing(soc.uart[0], "console uart");
	missing(soc.pci, "pcie");
	missing(soc.ether, "ether8169");
	missing(soc.µs, "µs counter");
	if (missed)
		iprint("\n");
	delay(10);
}

void
archflashwp(Flash*, int)
{
}

/*
 * for ../port/devflash.c:/^flashreset
 * retrieve flash type, virtual base and length and return 0;
 * return -1 on error (no flash)
 */
int
archflashreset(int bank, Flash *f)
{
	if(bank != 0)
		return -1;
panic("archflashreset: rewrite for nor & nand flash on ts");
	/*
	 * this is set up for the igepv2 board.
	 */
	f->type = "onenand";
	f->addr = (void*)VIRTNOR;		/* mapped here by archreset */
	f->size = 0;				/* done by probe */
	f->width = 1;
	f->interleave = 0;
	return 0;
}