ref: c3cfd06564ab868d1e78bf5fe8fe6fac07202de9
dir: /sys/src/9/kw/mmu.c/
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
#include "dat.h"
#include "fns.h"
#include "io.h"
#include "arm.h"
#define L1X(va)		FEXT((va), 20, 12)
#define L2X(va)		FEXT((va), 12, 8)
enum {
	L1lo		= UZERO/MiB,		/* L1X(UZERO)? */
	L1hi		= (USTKTOP+MiB-1)/MiB,	/* L1X(USTKTOP+MiB-1)? */
};
#define ISHOLE(pte)	((pte) == 0)
/* dump level 1 page table at virtual addr l1 */
void
mmudump(PTE *l1)
{
	int i, type, rngtype;
	uintptr pa, startva, startpa;
	uvlong va, endva;
	PTE pte;
	iprint("\n");
	endva = startva = startpa = 0;
	rngtype = 0;
	/* dump first level of ptes */
	for (va = i = 0; i < 4096; i++) {
		pte = l1[i];
		pa = pte & ~(MB - 1);
		type = pte & (Fine|Section|Coarse);
		if (ISHOLE(pte)) {
			if (endva != 0) {	/* open range? close it */
				iprint("l1 maps va (%#lux-%#llux) -> pa %#lux type %#ux\n",
					startva, endva-1, startpa, rngtype);
				endva = 0;
			}
		} else {
			if (endva == 0) {	/* no open range? start one */
				startva = va;
				startpa = pa;
				rngtype = type;
			}
			endva = va + MB;	/* continue the open range */
		}
		va += MB;
	}
	if (endva != 0)			/* close an open range */
		iprint("l1 maps va (%#lux-%#llux) -> pa %#lux type %#ux\n",
			startva, endva-1, startpa, rngtype);
}
#ifdef CRYPTOSANDBOX
extern uchar sandbox[64*1024+BY2PG];
#endif
/* identity map `mbs' megabytes from phys */
void
mmuidmap(uintptr phys, int mbs)
{
	PTE *l1;
	uintptr pa, fpa;
	pa = ttbget();
	l1 = KADDR(pa);
	for (fpa = phys; mbs-- > 0; fpa += MiB)
		l1[L1X(fpa)] = fpa|Dom0|L1AP(Krw)|Section;
	coherence();
	mmuinvalidate();
	cacheuwbinv();
	l2cacheuwbinv();
}
void
mmuinit(void)
{
	PTE *l1, *l2;
	uintptr pa, i;
	pa = ttbget();
	l1 = KADDR(pa);
	/*
	 * map high vectors to start of dram, but only 4K, not 1MB.
	 */
	pa -= MACHSIZE+2*1024;
	l2 = KADDR(pa);
	memset(l2, 0, 1024);
	/* vectors step on u-boot, but so do page tables */
	l2[L2X(HVECTORS)] = PHYSDRAM|L2AP(Krw)|Small;
	l1[L1X(HVECTORS)] = pa|Dom0|Coarse;	/* vectors -> ttb-machsize-2k */
	/* double map vectors at virtual 0 so reset will see them */
	pa -= 1024;
	l2 = KADDR(pa);
	memset(l2, 0, 1024);
	l2[L2X(0)] = PHYSDRAM|L2AP(Krw)|Small;
	l1[L1X(0)] = pa|Dom0|Coarse;
	/*
	 * set up L2 ptes for PHYSIO (i/o registers), with smaller pages to
	 * enable user-mode access to a few devices.
	 */
	pa -= 1024;
	l2 = KADDR(pa);
	/* identity map by default */
	for (i = 0; i < 1024/4; i++)
		l2[L2X(VIRTIO + i*BY2PG)] = (PHYSIO + i*BY2PG)|L2AP(Krw)|Small;
#ifdef CRYPTOSANDBOX
	/*
	 * rest is to let rae experiment with the crypto hardware
	 */
	/* access to cycle counter */
	l2[L2X(soc.clock)] = soc.clock | L2AP(Urw)|Small;
	/* cesa registers; also visible in user space */
	for (i = 0; i < 16; i++)
		l2[L2X(soc.cesa + i*BY2PG)] = (soc.cesa + i*BY2PG) |
			L2AP(Urw)|Small;
	/* crypto sram; remapped to unused space and visible in user space */
	l2[L2X(PHYSIO + 0xa0000)] = PHYSCESASRAM | L2AP(Urw)|Small;
	/* 64k of scratch dram */
	for (i = 0; i < 16; i++)
		l2[L2X(PHYSIO + 0xb0000 + i*BY2PG)] =
			(PADDR((uintptr)sandbox & ~(BY2PG-1)) + i*BY2PG) |
			 L2AP(Urw) | Small;
#endif
	l1[L1X(VIRTIO)] = pa|Dom0|Coarse;
	coherence();
	mmuinvalidate();
	cacheuwbinv();
	l2cacheuwbinv();
	m->mmul1 = l1;
//	mmudump(l1);			/* DEBUG.  too early to print */
}
static void
mmul2empty(Proc* proc, int clear)
{
	PTE *l1;
	Page **l2, *page;
	l1 = m->mmul1;
	l2 = &proc->mmul2;
	for(page = *l2; page != nil; page = page->next){
		if(clear)
			memset((void*)page->va, 0, BY2PG);
		l1[page->daddr] = Fault;
		l2 = &page->next;
	}
	*l2 = proc->mmul2cache;
	proc->mmul2cache = proc->mmul2;
	proc->mmul2 = nil;
}
static void
mmul1empty(void)
{
#ifdef notdef			/* there's a bug in here */
	PTE *l1;
	/* clean out any user mappings still in l1 */
	if(m->mmul1lo > L1lo){
		if(m->mmul1lo == 1)
			m->mmul1[L1lo] = Fault;
		else
			memset(&m->mmul1[L1lo], 0, m->mmul1lo*sizeof(PTE));
		m->mmul1lo = L1lo;
	}
	if(m->mmul1hi < L1hi){
		l1 = &m->mmul1[m->mmul1hi];
		if((L1hi - m->mmul1hi) == 1)
			*l1 = Fault;
		else
			memset(l1, 0, (L1hi - m->mmul1hi)*sizeof(PTE));
		m->mmul1hi = L1hi;
	}
#else
	memset(&m->mmul1[L1lo], 0, (L1hi - L1lo)*sizeof(PTE));
#endif /* notdef */
}
void
mmuswitch(Proc* proc)
{
	int x;
	PTE *l1;
	Page *page;
	/* do kprocs get here and if so, do they need to? */
	if(m->mmupid == proc->pid && !proc->newtlb)
		return;
	m->mmupid = proc->pid;
	/* write back dirty and invalidate l1 caches */
	cacheuwbinv();
	if(proc->newtlb){
		mmul2empty(proc, 1);
		proc->newtlb = 0;
	}
	mmul1empty();
	/* move in new map */
	l1 = m->mmul1;
	for(page = proc->mmul2; page != nil; page = page->next){
		x = page->daddr;
		l1[x] = PPN(page->pa)|Dom0|Coarse;
		/* know here that L1lo < x < L1hi */
		if(x+1 - m->mmul1lo < m->mmul1hi - x)
			m->mmul1lo = x+1;
		else
			m->mmul1hi = x;
	}
	/* make sure map is in memory */
	/* could be smarter about how much? */
	cachedwbse(&l1[L1X(UZERO)], (L1hi - L1lo)*sizeof(PTE));
	l2cacheuwbse(&l1[L1X(UZERO)], (L1hi - L1lo)*sizeof(PTE));
	/* lose any possible stale tlb entries */
	mmuinvalidate();
//	mmudump(l1);
	//print("mmuswitch l1lo %d l1hi %d %d\n",
	//	m->mmul1lo, m->mmul1hi, proc->kp);
//print("\n");
}
void
flushmmu(void)
{
	int s;
	s = splhi();
	up->newtlb = 1;
	mmuswitch(up);
	splx(s);
}
void
mmurelease(Proc* proc)
{
	/* write back dirty and invalidate l1 caches */
	cacheuwbinv();
	mmul2empty(proc, 0);
	freepages(proc->mmul2cache, nil, 0);
	proc->mmul2cache = nil;
	mmul1empty();
	/* make sure map is in memory */
	/* could be smarter about how much? */
	cachedwbse(&m->mmul1[L1X(UZERO)], (L1hi - L1lo)*sizeof(PTE));
	l2cacheuwbse(&m->mmul1[L1X(UZERO)], (L1hi - L1lo)*sizeof(PTE));
	/* lose any possible stale tlb entries */
	mmuinvalidate();
}
void
putmmu(uintptr va, uintptr pa, Page* page)
{
	int x;
	Page *pg;
	PTE *l1, *pte;
	x = L1X(va);
	l1 = &m->mmul1[x];
	//print("putmmu(%#p, %#p, %#p) ", va, pa, page->pa);
	//print("mmul1 %#p l1 %#p *l1 %#ux x %d pid %d\n",
	//	m->mmul1, l1, *l1, x, up->pid);
	if(*l1 == Fault){
		/* wasteful - l2 pages only have 256 entries - fix */
		if(up->mmul2cache == nil){
			/* auxpg since we don't need much? memset if so */
			pg = newpage(1, 0, 0);
			pg->va = VA(kmap(pg));
		}
		else{
			pg = up->mmul2cache;
			up->mmul2cache = pg->next;
			memset((void*)pg->va, 0, BY2PG);
		}
		pg->daddr = x;
		pg->next = up->mmul2;
		up->mmul2 = pg;
		/* force l2 page to memory */
		cachedwbse((void *)pg->va, BY2PG);
		l2cacheuwbse((void *)pg->va, BY2PG);
		*l1 = PPN(pg->pa)|Dom0|Coarse;
		cachedwbse(l1, sizeof *l1);
		l2cacheuwbse(l1, sizeof *l1);
		//print("l1 %#p *l1 %#ux x %d pid %d\n", l1, *l1, x, up->pid);
		if(x >= m->mmul1lo && x < m->mmul1hi){
			if(x+1 - m->mmul1lo < m->mmul1hi - x)
				m->mmul1lo = x+1;
			else
				m->mmul1hi = x;
		}
	}
	pte = (void*)KADDR(PPN(*l1));
	//print("pte %#p index %ld %#ux\n", pte, L2X(va), *(pte+L2X(va)));
	/* protection bits are
	 *	PTERONLY|PTEVALID;
	 *	PTEWRITE|PTEVALID;
	 *	PTEWRITE|PTEUNCACHED|PTEVALID;
	 */
	x = Small;
	if(!(pa & PTEUNCACHED))
		x |= Cached|Buffered;
	if(pa & PTEWRITE)
		x |= L2AP(Urw);
	else
		x |= L2AP(Uro);
	pte[L2X(va)] = PPN(pa)|x;
	cachedwbse(&pte[L2X(va)], sizeof pte[0]);
	l2cacheuwbse(&pte[L2X(va)], sizeof pte[0]);
	/* clear out the current entry */
	mmuinvalidateaddr(PPN(va));
	/*
	 *  write back dirty entries - we need this because pio() in
	 *  fault.c is writing via a different virt addr and won't clean
	 *  its changes out of the dcache.  Page coloring doesn't work
	 *  on this mmu because the l1 virtual cache is set associative
	 *  rather than direct mapped.
	 */
	cachedwbinv();
	if(page->txtflush){
		cacheiinv();
		page->txtflush = 0;
	}
	//print("putmmu %#p %#p %#p\n", va, pa, PPN(pa)|x);
}
void*
mmuuncache(void* v, usize size)
{
	int x;
	PTE *pte;
	uintptr va;
	/*
	 * Simple helper for ucalloc().
	 * Uncache a Section, must already be
	 * valid in the MMU.
	 */
	va = (uintptr)v;
	assert(!(va & (1*MiB-1)) && size == 1*MiB);
	x = L1X(va);
	pte = &m->mmul1[x];
	if((*pte & (Fine|Section|Coarse)) != Section)
		return nil;
	*pte &= ~(Cached|Buffered);
	mmuinvalidateaddr(va);
	cachedwbse(pte, 4);
	l2cacheuwbse(pte, 4);
	return v;
}
uintptr
mmukmap(uintptr va, uintptr pa, usize size)
{
	int x;
	PTE *pte;
	/*
	 * Stub.
	 */
	assert(!(va & (1*MiB-1)) && !(pa & (1*MiB-1)) && size == 1*MiB);
	x = L1X(va);
	pte = &m->mmul1[x];
	if(*pte != Fault)
		return 0;
	*pte = pa|Dom0|L1AP(Krw)|Section;
	mmuinvalidateaddr(va);
	cachedwbse(pte, 4);
	l2cacheuwbse(pte, 4);
	return va;
}
uintptr
mmukunmap(uintptr va, uintptr pa, usize size)
{
	int x;
	PTE *pte;
	/*
	 * Stub.
	 */
	assert(!(va & (1*MiB-1)) && !(pa & (1*MiB-1)) && size == 1*MiB);
	x = L1X(va);
	pte = &m->mmul1[x];
	if(*pte != (pa|Dom0|L1AP(Krw)|Section))
		return 0;
	*pte = Fault;
	mmuinvalidateaddr(va);
	cachedwbse(pte, 4);
	l2cacheuwbse(pte, 4);
	return va;
}
/*
 * Return the number of bytes that can be accessed via KADDR(pa).
 * If pa is not a valid argument to KADDR, return 0.
 */
uintptr
cankaddr(uintptr pa)
{
	if(pa < PHYSDRAM + 512*MiB)		/* assumes PHYSDRAM is 0 */
		return PHYSDRAM + 512*MiB - pa;
	return 0;
}
/* from 386 */
void*
vmap(uintptr pa, usize size)
{
	uintptr pae, va;
	usize o, osize;
	/*
	 * XXX - replace with new vm stuff.
	 * Crock after crock - the first 4MB is mapped with 2MB pages
	 * so catch that and return good values because the current mmukmap
	 * will fail.
	 */
	if(pa+size < 4*MiB)
		return (void*)(kseg0|pa);
	osize = size;
	o = pa & (BY2PG-1);
	pa -= o;
	size += o;
	size = PGROUND(size);
	va = kseg0|pa;
	pae = mmukmap(va, pa, size);
	if(pae == 0 || pae-size != pa)
		panic("vmap(%#p, %ld) called from %#p: mmukmap fails %#p",
			pa+o, osize, getcallerpc(&pa), pae);
	return (void*)(va+o);
}
/* from 386 */
void
vunmap(void* v, usize size)
{
	/*
	 * XXX - replace with new vm stuff.
	 * Can't do this until do real vmap for all space that
	 * might be used, e.g. stuff below 1MB which is currently
	 * mapped automagically at boot but that isn't used (or
	 * at least shouldn't be used) by the kernel.
	upafree(PADDR(v), size);
	 */
	USED(v, size);
}
/*
 * Notes.
 * Everything is in domain 0;
 * domain 0 access bits in the DAC register are set
 * to Client, which means access is controlled by the
 * permission values set in the PTE.
 *
 * L1 access control for the kernel is set to 1 (RW,
 * no user mode access);
 * L2 access control for the kernel is set to 1 (ditto)
 * for all 4 AP sets;
 * L1 user mode access is never set;
 * L2 access control for user mode is set to either
 * 2 (RO) or 3 (RW) depending on whether text or data,
 * for all 4 AP sets.
 * (To get kernel RO set AP to 0 and S bit in control
 * register c1).
 * Coarse L1 page-tables are used. They have 256 entries
 * and so consume 1024 bytes per table.
 * Small L2 page-tables are used. They have 1024 entries
 * and so consume 4096 bytes per table.
 *
 * 4KiB. That's the size of 1) a page, 2) the
 * size allocated for an L2 page-table page (note only 1KiB
 * is needed per L2 page - to be dealt with later) and
 * 3) the size of the area in L1 needed to hold the PTEs
 * to map 1GiB of user space (0 -> 0x3fffffff, 1024 entries).
 */