git: 9front

ref: c83d6627b1ecdabdd1864e6c9a524aa90bfd1a9d
dir: /sys/src/9/teg2/cache-l2-pl310.c/

View raw version
/*
 * PL310 level 2 cache (non-architectural bag on the side)
 *
 * guaranteed to work incorrectly with default settings; must set Sharovr.
 *
 * clean & invalidate (wbinv) is buggy, so we work around erratum 588369
 * by disabling write-back and cache line-fill before, and restoring after.
 */
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
#include "dat.h"
#include "fns.h"
#include "io.h"
#include "../port/error.h"
#include "arm.h"

#define NWAYS(l2p)	((l2p)->auxctl & Assoc16way? 16: 8)
#define L2P		((L2pl310 *)soc.l2cache)

enum {
	L2size		= 1024 * 1024,	/* according to the tegra 2 manual */
	Wayszgran	= 16 * KiB,	/* granularity of way sizes */
};

typedef struct L2pl310 L2pl310;
typedef struct Pl310op Pl310op;

struct Pl310op {
	ulong	pa;
	ulong	_pad;
	ulong	indexway;
	ulong	way;
};

struct L2pl310 {
	ulong	id;
	ulong	type;
	uchar	_pad0[0x100 - 0x8];
	ulong	ctl;
	ulong	auxctl;

	uchar	_pad1[0x730 - 0x108];	/* boring regs */
	ulong	sync;
	uchar	_pad2[0x740 - 0x734];
	ulong	r3p0sync;		/* workaround for r3p0 bug */
	uchar	_pad3[0x770 - 0x744];
	Pl310op	inv;			/* inv.indexway doesn't exist */
	uchar	_pad4[0x7b0 - 0x780];
	Pl310op	clean;
	uchar	_pad5[0x7f0 - 0x7c0];
	Pl310op	cleaninv;
	uchar	_pad6[0xc00 - 0x7d0];
	ulong	filtstart;
	ulong	filtend;
	uchar	_pad6[0xf40 - 0xc08];
	ulong	debug;
	/* ... */
};

enum {
	/* ctl bits */
	L2enable = 1,

	/* auxctl bits */
	Ipref	= 1<<29,		/* prefetch enables */
	Dpref	= 1<<28,
	Mbo	= 1<<25,
	Sharovr	= 1<<22, /* shared attribute override (i.e., work right!) */
	Parity	= 1<<21,
	Waycfgshift= 17,
	Waycfgmask = (1<<3) - 1,
	Assoc16way = 1<<16,
	/*
	 * optim'n to 0 cache lines; must be enabled in a9(?!).
	 * set CpAClwr0line on all cpus 1st.
	 */
	Fullline0= 1<<0,

	/* debug bits */
	Wt	= 1<<1,			/* write-through, not write-back */
	Nolinefill= 1<<0,

	Basecfg = Wt | Nolinefill,
};

static Lock l2lock;
static int disallowed;			/* by user: *l2off= in plan9.ini */
static int l2ison;
static int bg_op_running;
static ulong waysmask;

static Cacheimpl l2cacheimpl;

static void
awaitbgop(void)
{
	while (bg_op_running)
		;
}

static void
getlock(void)
{
	awaitbgop();		/* wait at normal PL first */
	ilock(&l2lock);
	awaitbgop();		/* wait under lock */
}

static void
l2pl310sync(void)
{
	L2P->sync = 0;
	coherence();
}

/* call this first to set sets/ways configuration */
void
l2pl310init(void)
{
	int waysz, nways;
	ulong new;
	L2pl310 *l2p = L2P;
	static int configed;

	if (getconf("*l2off") != nil) {
//		iprint("l2 cache (pl310) disabled\n");
		disallowed = 1;
		return;
	}
	if (l2ison || configed)
		return;
	l2cache = &l2cacheimpl;
	cachedwb();

	/*
	 * default config is:
	 * l2: ext unified, 8 ways 512 sets 32 bytes/line => 128KB
	 * but the tegra 2 manual says there's 1MB available.
	 * ways or way-size may be fixed by hardware; the only way to tell
	 * is to try to change the setting and read it back.
	 */
	l2pl310sync();
	l2cache->inv();

	/* figure out number of ways */
	l2pl310sync();
	nways = NWAYS(l2p);
	if (!(l2p->auxctl & Assoc16way)) {
		l2p->auxctl |= Assoc16way;
		coherence();
		l2pl310sync();
		nways = NWAYS(l2p);
//		iprint("\nl2: was set for 8 ways, asked for 16, got %d\n", nways);
	}
	waysmask = MASK(nways);

	/* figure out way size (and thus number of sets) */
	waysz = L2size / nways;
	new = l2p->auxctl & ~(Waycfgmask << Waycfgshift) |
		(log2(waysz / Wayszgran) + 1) << Waycfgshift;
	l2p->auxctl = new;
	coherence();
	l2pl310sync();
	l2cache->inv();

//	iprint("\nl2: configed %d ways, %d sets (way size %d)\n", nways,
//		waysz / CACHELINESZ, waysz);
	if (l2p->auxctl != new)
		iprint("l2 config %#8.8lux didn't stick; is now %#8.8lux\n",
			new, l2p->auxctl);
	configed++;
}

void
l2pl310info(Memcache *cp)
{
	int pow2;
	ulong waysz;
	L2pl310 *l2p = L2P;

	memset(cp, 0, sizeof *cp);
	if (!l2ison)
		return;

	l2pl310init();
	assert((l2p->id >> 24) == 'A');
	cp->level = 2;
	cp->type = Unified;
	cp->external = Extcache;
	cp->setsways = Cara | Cawa | Cawt | Cawb;
	cp->l1ip = 3<<14;				/* PIPT */
	cp->setsh = cp->waysh = 0;			/* bag on the side */

	cp->linelen = CACHELINESZ;
	cp->log2linelen = log2(CACHELINESZ);

	cp->nways = NWAYS(l2p);
	pow2 = ((l2p->auxctl >> Waycfgshift) & Waycfgmask) - 1;
	if (pow2 < 0)
		pow2 = 0;
	waysz = (1 << pow2) * Wayszgran;
	cp->nsets = waysz / CACHELINESZ;
}

void
l2pl310on(void)
{
	ulong ctl;
	L2pl310 *l2p = L2P;

	if (getconf("*l2off") != nil) {
//		iprint("l2 cache (pl310) disabled\n");
		disallowed = 1;
		return;
	}
	if (l2ison)
		return;

	l2pl310init();
	l2cache->inv();

	/*
	 * drain l1.  can't turn it off (which would make locks not work)
	 * because doing so makes references below to the l2 registers wedge
	 * the system.
	 */
	cacheuwbinv();
	cacheiinv();

	/*
	 * this is only called once, on cpu0 at startup,
	 * so we don't need locks here.
	 * must do all configuration before enabling l2 cache.
	 */
	l2p->filtend = 0;
	coherence();
	l2p->filtstart = 0;		/* no enable bit */
	l2p->debug = 0;			/* write-back, line fills allowed */
	coherence();

	ctl = l2p->auxctl;
	/* don't change number of sets & ways, but reset all else. */
	ctl &= Waycfgmask << Waycfgshift | Assoc16way;
	ctl |= Sharovr;		/* actually work correctly for a change */
	ctl |= Mbo | Ipref | Dpref | Parity | Fullline0;
	l2p->auxctl = ctl;
	coherence();

	l2p->ctl |= L2enable;
	coherence();

	l2ison = 1;

//	iprint("l2 cache (pl310) now on\n");
}

void
l2pl310off(void)
{
	if (!l2ison)
		return;
	l2cache->wbinv();
	getlock();
	L2P->ctl &= ~L2enable;
	coherence();
	l2ison = 0;
	iunlock(&l2lock);
}


static void
applyrange(ulong *reg, void *ava, int len)
{
	uintptr va, endva;

	if (disallowed || !l2ison)
		return;
	if (len < 0)
		panic("l2cache*se called with negative length");
	endva = (uintptr)ava + len;
	for (va = (uintptr)ava & ~(CACHELINESZ-1); va < endva;
	     va += CACHELINESZ)
		*reg = PADDR(va);
	l2pl310sync();
}

void
l2pl310invse(void *va, int bytes)
{
	uintptr start, end;
	L2pl310 *l2p = L2P;

	/*
	 * if start & end addresses are not on cache-line boundaries,
	 * flush first & last cachelines before invalidating.
	 */
	start = (uintptr)va;
	end = start + bytes;
	getlock();
	if (start % CACHELINESZ != 0) {
//		iprint("l2pl310invse: unaligned start %#p from %#p\n", start,
//			getcallerpc(&va));
		applyrange(&l2p->clean.pa, va, 1);
	}
	if (end % CACHELINESZ != 0) {
//		iprint("l2pl310invse: unaligned end %#p from %#p\n", end,
//			getcallerpc(&va));
		applyrange(&l2p->clean.pa, (char *)va + bytes, 1);
	}

	applyrange(&l2p->inv.pa, va, bytes);
	iunlock(&l2lock);
}

void
l2pl310wbse(void *va, int bytes)
{
	getlock();
	applyrange(&L2P->clean.pa, va, bytes);
	iunlock(&l2lock);
}

/*
 * assume that ldrex/strex (thus locks) won't work when Wt in is effect,
 * so don't manipulate locks between setting and clearing Wt.
 */
void
l2pl310wbinvse(void *va, int bytes)
{
	int odb;
	L2pl310 *l2p = L2P;

	if (!l2ison)
		return;
	getlock();
	applyrange(&l2p->clean.pa, va, bytes);	/* paranoia */

	odb = l2p->debug;
	l2p->debug |= Wt | Nolinefill;		/* erratum workaround */
	coherence();

	applyrange(&l2p->cleaninv.pa, va, bytes);

	l2p->debug = odb;
	iunlock(&l2lock);
}


/*
 * we want to wait for completion at normal PL.
 * if waiting is interrupted, interrupt code that calls
 * these ops could deadlock on a uniprocessor, so we only
 * give up l2lock before waiting on multiprocessors.
 * in this port, only cpu 0 gets interrupts other than local timer ones.
 */

void
l2pl310inv(void)
{
	L2pl310 *l2p = L2P;

	if (disallowed)
		return;

	getlock();
	bg_op_running = 1;
	l2p->inv.way = waysmask;
	coherence();
	if (conf.nmach > 1)
		iunlock(&l2lock);

	while (l2p->inv.way & waysmask)
		;

	if (conf.nmach > 1)
		ilock(&l2lock);
	l2pl310sync();
	bg_op_running = 0;
	iunlock(&l2lock);
}

/*
 * maximum time seen is 2542µs, typical is 625µs.
 */
void
l2pl310wb(void)
{
	L2pl310 *l2p = L2P;

	if (disallowed || !l2ison)
		return;

	getlock();
	bg_op_running = 1;
	l2p->clean.way = waysmask;
	coherence();
	if (conf.nmach > 1)
		iunlock(&l2lock);

	while (l2p->clean.way & waysmask)
		;

	if (conf.nmach > 1)
		ilock(&l2lock);
	l2pl310sync();
	bg_op_running = 0;
	iunlock(&l2lock);
}

void
l2pl310wbinv(void)
{
	int odb;
	L2pl310 *l2p = L2P;

	if (disallowed || !l2ison)
		return;

	l2pl310wb();			/* paranoia */

	getlock();
	bg_op_running = 1;
	odb = l2p->debug;
	l2p->debug |= Wt | Nolinefill;	/* erratum workaround */
	coherence();

	l2p->cleaninv.way = waysmask;
	coherence();
	if (conf.nmach > 1)
		iunlock(&l2lock);

	while (l2p->cleaninv.way & waysmask)
		;

	if (conf.nmach > 1)
		ilock(&l2lock);
	l2pl310sync();
	l2p->debug = odb;
	bg_op_running = 0;
	iunlock(&l2lock);
}

static Cacheimpl l2cacheimpl = {
	.info	= l2pl310info,
	.on	= l2pl310on,
	.off	= l2pl310off,

	.inv	= l2pl310inv,
	.wb	= l2pl310wb,
	.wbinv	= l2pl310wbinv,

	.invse	= l2pl310invse,
	.wbse	= l2pl310wbse,
	.wbinvse= l2pl310wbinvse,
};