ref: babf901b4a508c3ec5d1f89655f10377bbdf9637
dir: /os/pc/mmu.c/
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
#include "dat.h"
#include "fns.h"
#include "io.h"
#define DATASEGM(p) { 0xFFFF, SEGG|SEGB|(0xF<<16)|SEGP|SEGPL(p)|SEGDATA|SEGW }
#define EXECSEGM(p) { 0xFFFF, SEGG|SEGD|(0xF<<16)|SEGP|SEGPL(p)|SEGEXEC|SEGR }
#define TSSSEGM(b,p) { ((b)<<16)|sizeof(Tss),\
((b)&0xFF000000)|(((b)>>16)&0xFF)|SEGTSS|SEGPL(p)|SEGP }
Segdesc gdt[NGDT] =
{
[NULLSEG] { 0, 0}, /* null descriptor */
[KDSEG] DATASEGM(0), /* kernel data/stack */
[KESEG] EXECSEGM(0), /* kernel code */
[UDSEG] DATASEGM(3), /* user data/stack */
[UESEG] EXECSEGM(3), /* user code */
[TSSSEG] TSSSEGM(0,0), /* tss segment */
};
static void
taskswitch(ulong pdb, ulong stack)
{
Tss *tss;
tss = m->tss;
tss->ss0 = KDSEL;
tss->esp0 = stack;
tss->ss1 = KDSEL;
tss->esp1 = stack;
tss->ss2 = KDSEL;
tss->esp2 = stack;
tss->cr3 = pdb;
putcr3(pdb);
}
/*
* On processors that support it, we set the PTEGLOBAL bit in
* page table and page directory entries that map kernel memory.
* Doing this tells the processor not to bother flushing them
* from the TLB when doing the TLB flush associated with a
* context switch (write to CR3). Since kernel memory mappings
* are never removed, this is safe. (If we ever remove kernel memory
* mappings, we can do a full flush by turning off the PGE bit in CR4,
* writing to CR3, and then turning the PGE bit back on.)
*
* See also mmukmap below.
*
* Processor support for the PTEGLOBAL bit is enabled in devarch.c.
*/
static void
memglobal(void)
{
int i, j;
ulong *pde, *pte;
/* only need to do this once, on bootstrap processor */
if(m->machno != 0)
return;
if(!m->havepge)
return;
pde = m->pdb;
for(i=512; i<1024; i++){ /* 512: start at entry for virtual 0x80000000 */
if(pde[i] & PTEVALID){
pde[i] |= PTEGLOBAL;
if(!(pde[i] & PTESIZE)){
pte = KADDR(pde[i]&~(BY2PG-1));
for(j=0; j<1024; j++)
if(pte[j] & PTEVALID)
pte[j] |= PTEGLOBAL;
}
}
}
}
void
mmuinit(void)
{
ulong x, *p;
ushort ptr[3];
memglobal();
m->tss = malloc(sizeof(Tss));
memset(m->tss, 0, sizeof(Tss));
m->tss->iomap = 0xDFFF<<16;
/*
* We used to keep the GDT in the Mach structure, but it
* turns out that that slows down access to the rest of the
* page. Since the Mach structure is accessed quite often,
* it pays off anywhere from a factor of 1.25 to 2 on real
* hardware to separate them (the AMDs are more sensitive
* than Intels in this regard). Under VMware it pays off
* a factor of about 10 to 100.
*/
memmove(m->gdt, gdt, sizeof gdt);
x = (ulong)m->tss;
m->gdt[TSSSEG].d0 = (x<<16)|sizeof(Tss);
m->gdt[TSSSEG].d1 = (x&0xFF000000)|((x>>16)&0xFF)|SEGTSS|SEGPL(0)|SEGP;
ptr[0] = sizeof(gdt)-1;
x = (ulong)m->gdt;
ptr[1] = x & 0xFFFF;
ptr[2] = (x>>16) & 0xFFFF;
lgdt(ptr);
ptr[0] = sizeof(Segdesc)*256-1;
x = IDTADDR;
ptr[1] = x & 0xFFFF;
ptr[2] = (x>>16) & 0xFFFF;
lidt(ptr);
/* make kernel text unwritable */
for(x = KTZERO; x < (ulong)etext; x += BY2PG){
p = mmuwalk(m->pdb, x, 2, 0);
if(p == nil)
panic("mmuinit");
*p &= ~PTEWRITE;
}
taskswitch(PADDR(m->pdb), (ulong)m + BY2PG);
ltr(TSSSEL);
}
ulong*
mmuwalk(ulong* pdb, ulong va, int level, int create)
{
ulong pa, *table;
/*
* Walk the page-table pointed to by pdb and return a pointer
* to the entry for virtual address va at the requested level.
* If the entry is invalid and create isn't requested then bail
* out early. Otherwise, for the 2nd level walk, allocate a new
* page-table page and register it in the 1st level.
*/
table = &pdb[PDX(va)];
if(!(*table & PTEVALID) && create == 0)
return 0;
switch(level){
default:
return 0;
case 1:
return table;
case 2:
if(*table & PTESIZE)
panic("mmuwalk2: va %luX entry %luX\n", va, *table);
if(!(*table & PTEVALID)){
pa = PADDR(xspanalloc(BY2PG, BY2PG, 0));
*table = pa|PTEWRITE|PTEVALID;
}
table = KADDR(PPN(*table));
return &table[PTX(va)];
}
}
static Lock mmukmaplock;
int
mmukmapsync(ulong va)
{
Mach *mach0;
ulong entry, *pte;
mach0 = MACHP(0);
ilock(&mmukmaplock);
if((pte = mmuwalk(mach0->pdb, va, 1, 0)) == nil){
iunlock(&mmukmaplock);
return 0;
}
if(!(*pte & PTESIZE) && mmuwalk(mach0->pdb, va, 2, 0) == nil){
iunlock(&mmukmaplock);
return 0;
}
entry = *pte;
if(!(m->pdb[PDX(va)] & PTEVALID))
m->pdb[PDX(va)] = entry;
// if(up && up->mmupdb){
// ((ulong*)up->mmupdb->va)[PDX(va)] = entry;
// mmuflushtlb(up->mmupdb->pa);
// }
// else
mmuflushtlb(PADDR(m->pdb));
iunlock(&mmukmaplock);
return 1;
}
ulong
mmukmap(ulong pa, ulong va, int size)
{
Mach *mach0;
ulong ova, pae, *table, pgsz, *pte, x;
int pse, sync;
mach0 = MACHP(0);
if((mach0->cpuiddx & 0x08) && (getcr4() & 0x10))
pse = 1;
else
pse = 0;
sync = 0;
pa = PPN(pa);
if(va == 0)
va = (ulong)KADDR(pa);
else
va = PPN(va);
ova = va;
pae = pa + size;
ilock(&mmukmaplock);
while(pa < pae){
table = &mach0->pdb[PDX(va)];
/*
* Possibly already mapped.
*/
if(*table & PTEVALID){
if(*table & PTESIZE){
/*
* Big page. Does it fit within?
* If it does, adjust pgsz so the correct end can be
* returned and get out.
* If not, adjust pgsz up to the next 4MB boundary
* and continue.
*/
x = PPN(*table);
if(x != pa)
panic("mmukmap1: pa %luX entry %luX\n",
pa, *table);
x += 4*MB;
if(pae <= x){
pa = pae;
break;
}
pgsz = x - pa;
pa += pgsz;
va += pgsz;
continue;
}
else{
/*
* Little page. Walk to the entry.
* If the entry is valid, set pgsz and continue.
* If not, make it so, set pgsz, sync and continue.
*/
pte = mmuwalk(mach0->pdb, va, 2, 0);
if(pte && *pte & PTEVALID){
x = PPN(*pte);
if(x != pa)
panic("mmukmap2: pa %luX entry %luX\n",
pa, *pte);
pgsz = BY2PG;
pa += pgsz;
va += pgsz;
sync++;
continue;
}
}
}
/*
* Not mapped. Check if it can be mapped using a big page -
* starts on a 4MB boundary, size >= 4MB and processor can do it.
* If not a big page, walk the walk, talk the talk.
* Sync is set.
*
* If we're creating a kernel mapping, we know that it will never
* expire and thus we can set the PTEGLOBAL bit to make the entry
* persist in the TLB across flushes. If we do add support later for
* unmapping kernel addresses, see devarch.c for instructions on
* how to do a full TLB flush.
*/
if(pse && (pa % (4*MB)) == 0 && (pae >= pa+4*MB)){
*table = pa|PTESIZE|PTEWRITE|PTEUNCACHED|PTEVALID;
if((va&KZERO) && m->havepge)
*table |= PTEGLOBAL;
pgsz = 4*MB;
}
else{
pte = mmuwalk(mach0->pdb, va, 2, 1);
*pte = pa|PTEWRITE|PTEUNCACHED|PTEVALID;
if((va&KZERO) && m->havepge)
*pte |= PTEGLOBAL;
pgsz = BY2PG;
}
pa += pgsz;
va += pgsz;
sync++;
}
iunlock(&mmukmaplock);
/*
* If something was added
* then need to sync up.
*/
if(sync)
mmukmapsync(ova);
return pa;
}
void*
vmap(ulong pa, int size)
{
pa = upamalloc(pa, size, 0);
if(pa == 0)
return nil;
return KADDR(pa);
}
void
vunmap(void *va, int size)
{
if(va != nil)
upafree(PADDR(va), size);
}
int
segflush(void*, ulong)
{
return 0;
}