ref: 0196d80a85932d6630b0daa58ec77f54d8481b35
dir: /sys/src/9/xen/xensystem.c/
/*
* xensystem.c
*
* TODO: we could handle mmu updates more efficiently by
* using a multicall.
* XXX perhaps we should check return values and panic on failure?
*/
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
#include "dat.h"
#include "fns.h"
#include "io.h"
#include "ureg.h"
#define LOG(a)
/*
* These functions replace all the inlines that are used on Linux systems
*/
/* in xen.s */
int xencall1(int op);
int xencall2(int op, ulong arg1);
int xencall3(int op, ulong arg1, ulong arg2);
int xencall4(int op, ulong arg1, ulong arg2, ulong arg3);
int xencall5(int op, ulong arg1, ulong arg2, ulong arg3, ulong arg4);
int xencall6(int op, ulong arg1, ulong arg2, ulong arg3, ulong arg4, ulong arg5);
int
HYPERVISOR_update_va_mapping(ulong va, uvlong newval, ulong flags)
{
int ret;
ret = xencall5(__HYPERVISOR_update_va_mapping, va, newval, newval>>32, flags);
if(ret < 0)
panic("update_va_mapping failed");
return ret;
}
long
HYPERVISOR_set_timer_op(uvlong timeout)
{
ulong hi, lo;
hi = timeout>>32;
lo = timeout;
return xencall3(__HYPERVISOR_set_timer_op, lo, hi);
}
int
HYPERVISOR_set_trap_table(trap_info_t *table)
{
return xencall2(__HYPERVISOR_set_trap_table, (ulong)table);
}
int
HYPERVISOR_mmu_update(mmu_update_t *req, int count,
int *success_count, domid_t domid)
{
return xencall5(__HYPERVISOR_mmu_update, (ulong)req, count, (ulong)success_count, domid);
}
int
HYPERVISOR_mmuext_op(struct mmuext_op *op, int count, int *scount, domid_t domid)
{
return xencall5(__HYPERVISOR_mmuext_op, (ulong)op, count, (ulong)scount, domid);
}
int
HYPERVISOR_set_gdt(unsigned long *frame_list, int entries)
{
return xencall3(__HYPERVISOR_set_gdt, (ulong)frame_list, entries);
}
int
HYPERVISOR_stack_switch(ulong ss, ulong esp)
{
return xencall3(__HYPERVISOR_stack_switch, ss, esp);
}
/* XXX match evfunc and fsfunc prototypes? */
int
HYPERVISOR_set_callbacks(ulong evss, ulong evfunc, ulong fsss, ulong fsfunc)
{
return xencall5(__HYPERVISOR_set_callbacks, evss, evfunc, fsss, fsfunc);
}
int
HYPERVISOR_fpu_taskswitch(void)
{
return xencall1(__HYPERVISOR_fpu_taskswitch);
}
int
HYPERVISOR_yield(void)
{
return xencall3(__HYPERVISOR_sched_op, SCHEDOP_yield, 0);
}
int
HYPERVISOR_block(void)
{
return xencall3(__HYPERVISOR_sched_op, SCHEDOP_block, 0);
}
int
HYPERVISOR_shutdown(int reboot)
{
sched_shutdown_t arg;
arg.reason = reboot? SHUTDOWN_reboot : SHUTDOWN_poweroff;
return xencall3(__HYPERVISOR_sched_op, SCHEDOP_shutdown, (ulong)&arg);
}
int
HYPERVISOR_multicall(void *call_list, int nr_calls)
{
return xencall3(__HYPERVISOR_multicall, (ulong)call_list, nr_calls);
}
int
HYPERVISOR_event_channel_op(void *op)
{
return xencall2(__HYPERVISOR_event_channel_op, (ulong)op);
}
int
HYPERVISOR_xen_version(int cmd, void *arg)
{
return xencall3(__HYPERVISOR_xen_version, cmd, (ulong)arg);
}
int
HYPERVISOR_console_io(int cmd, int count, char *str)
{
return xencall4(__HYPERVISOR_console_io, cmd, count, (ulong)str);
}
int
HYPERVISOR_grant_table_op(int cmd, gnttab_setup_table_t *setup, int count)
{
return xencall4(__HYPERVISOR_grant_table_op, cmd, (ulong)setup, count);
}
int
HYPERVISOR_memory_op(int cmd, struct xen_memory_reservation *arg)
{
return xencall3(__HYPERVISOR_memory_op, cmd, (ulong)arg);
}
/*
* XXX this comment is leftover from old code. revisit and update.
*
* The use of 'barrier' in the following reflects their use as local-lock
* operations. Reentrancy must be prevented (e.g., __cli()) /before/ following
* critical operations are executed. All critical operatiosn must complete
* /before/ reentrancy is permitted (e.g., __sti()). Alpha architecture also
* includes these barriers, for example.
*/
/*
* conversions to machine page numbers, pages and addresses
*/
#define MFN(pa) (patomfn[(pa)>>PGSHIFT])
#define MFNPG(pa) ((uvlong)MFN(pa)<<PGSHIFT)
#define PA2MA(pa) (MFNPG(pa) | PGOFF(pa))
#define VA2MA(va) PA2MA(PADDR(va))
#define VA2MFN(va) MFN(PADDR(va))
ulong hypervisor_virt_start;
ulong xentop;
start_info_t *xenstart;
shared_info_t *HYPERVISOR_shared_info;
ulong *patomfn;
ulong *matopfn;
int
xenpdptpin(ulong va)
{
struct mmuext_op op;
ulong mfn;
mfn = MFN(PADDR(va));
LOG(dprint("pdptpin %lux %lux\n", va, mfn);)
print("pdptpin %lux %lux\n", va, mfn);
/* mark page readonly first */
HYPERVISOR_update_va_mapping(va, ((uvlong)mfn<<PGSHIFT)|PTEVALID, UVMF_INVLPG|UVMF_LOCAL);
/* L3 here refers to page directory pointer table (PAE mode) */
op.cmd = MMUEXT_PIN_L3_TABLE;
op.arg1.mfn = mfn;
if (HYPERVISOR_mmuext_op(&op, 1, 0, DOMID_SELF) == 0)
return 1;
HYPERVISOR_update_va_mapping(va, ((uvlong)mfn<<PGSHIFT)|PTEVALID|PTEWRITE, UVMF_INVLPG|UVMF_LOCAL);
return 0;
}
int
xenpgdpin(ulong va)
{
struct mmuext_op op;
ulong mfn;
mfn = MFN(PADDR(va));
LOG(dprint("pdpin %lux %lux\n", va, mfn);)
/* mark page readonly first */
HYPERVISOR_update_va_mapping(va, ((uvlong)mfn<<PGSHIFT)|PTEVALID, UVMF_INVLPG|UVMF_LOCAL);
/* to confuse you, L2 here refers to page directories */
op.cmd = MMUEXT_PIN_L2_TABLE;
op.arg1.mfn = mfn;
if (HYPERVISOR_mmuext_op(&op, 1, 0, DOMID_SELF) == 0)
return 1;
HYPERVISOR_update_va_mapping(va, ((uvlong)mfn<<PGSHIFT)|PTEVALID|PTEWRITE, UVMF_INVLPG|UVMF_LOCAL);
return 0;
}
int
xenptpin(ulong va)
{
struct mmuext_op op;
ulong mfn;
mfn = MFN(PADDR(va));
LOG(dprint("pin %lux %lux\n", va, mfn);)
/* mark page readonly first */
HYPERVISOR_update_va_mapping(va, ((uvlong)mfn<<PGSHIFT)|PTEVALID, UVMF_INVLPG|UVMF_LOCAL);
/* to confuse you, L1 here refers to page tables */
op.cmd = MMUEXT_PIN_L1_TABLE;
op.arg1.mfn = mfn;
if (HYPERVISOR_mmuext_op(&op, 1, 0, DOMID_SELF) == 0)
return 1;
HYPERVISOR_update_va_mapping(va, ((uvlong)mfn<<PGSHIFT)|PTEVALID|PTEWRITE, UVMF_INVLPG|UVMF_LOCAL);
return 0;
}
void
xenptunpin(ulong va)
{
struct mmuext_op op;
ulong mfn;
mfn = MFN(PADDR(va));
LOG(dprint("unpin %lux %lux\n", va, mfn);)
op.cmd = MMUEXT_UNPIN_TABLE;
op.arg1.mfn = mfn;
if(HYPERVISOR_mmuext_op(&op, 1, 0, DOMID_SELF)<0)
panic("xenptunpin va=%lux called from %lux", va, getcallerpc(&va));
/* mark page read-write */
HYPERVISOR_update_va_mapping(va, ((uvlong)mfn<<PGSHIFT)|PTEVALID|PTEWRITE, UVMF_INVLPG|UVMF_LOCAL);
}
void
xenptswitch(ulong pa)
{
struct mmuext_op op;
op.cmd = MMUEXT_NEW_BASEPTR;
op.arg1.mfn = MFN(pa);
if(HYPERVISOR_mmuext_op(&op, 1, 0, DOMID_SELF)<0)
panic("xenptswitch");
}
void
xentlbflush(void)
{
struct mmuext_op op;
op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
HYPERVISOR_mmuext_op(&op, 1, 0, DOMID_SELF);
}
/* update a pte using a machine page frame number */
void
xenupdatema(ulong *ptr, uvlong val)
{
mmu_update_t u;
u.ptr = VA2MA(ptr);
u.val = val;
if(HYPERVISOR_mmu_update(&u, 1, 0, DOMID_SELF) < 0)
panic("xenupdatema - pte %lux value %llux (was %llux) called from %lux", (ulong)ptr, val, *(uvlong*)ptr, getcallerpc(&ptr));
}
/* update a pte using a guest "physical" page number */
void
xenupdate(ulong *ptr, ulong val)
{
mmu_update_t u;
u.ptr = VA2MA(ptr);
u.val = PA2MA(val);
if(HYPERVISOR_mmu_update(&u, 1, 0, DOMID_SELF) < 0)
panic("xenupdate - pte %lux value %lux (%llux) called from %lux", (ulong)ptr, val, PA2MA(val), getcallerpc(&ptr));
}
void
acceptframe(int ref, void *va)
{
ulong mfn;
mfn = xengrantend(ref);
if (mfn == 0)
panic("can't accept page frame");
LOG(dprint("acceptframe ref %d va %lux mfn %lux\n", ref, (ulong)va, mfn);)
VA2MFN(va) = mfn;
mmumapframe((ulong)va, mfn);
}
int
donateframe(int domid, void *va)
{
ulong mfn;
int ref;
ulong *pte;
struct xen_memory_reservation mem;
mfn = VA2MFN(va);
ref = xengrant(domid, mfn, GTF_accept_transfer);
LOG(dprint("grant transfer %lux (%lux) -> %d\n", (ulong)va, mfn, ref);)
pte = mmuwalk(m->pdb, (ulong)va, 2, 0);
xenupdatema(pte, 0);
set_xen_guest_handle(mem.extent_start, &mfn);
mem.nr_extents = 1;
mem.extent_order = 0;
mem.address_bits = 0;
mem.domid = DOMID_SELF;
if (HYPERVISOR_memory_op(XENMEM_decrease_reservation, &mem) != 1)
panic("XENMEM_decrease_reservation");
VA2MFN(va) = ~0;
return ref;
}
int
shareframe(int domid, void *va, int write)
{
ulong mfn;
int ref;
int flags;
mfn = VA2MFN(va);
flags = GTF_permit_access;
if (!write)
flags |= GTF_readonly;
ref = xengrant(domid, mfn, flags);
LOG(dprint("grant shared %lux (%lux) -> %d\n", (ulong)va, mfn, ref);)
return ref;
}
/*
* Upcall from hypervisor, entered with evtchn_upcall_pending masked.
*/
void
xenupcall(Ureg *ureg)
{
vcpu_info_t *vcpu;
shared_info_t *s;
ulong sel1, sel2, n1, n2, port;
ureg->ecode = 0;
s = HYPERVISOR_shared_info;
vcpu = &HYPERVISOR_shared_info->vcpu_info[0];
for (;;) {
vcpu->evtchn_upcall_pending = 0;
sel1 = xchgl((uint*)&vcpu->evtchn_pending_sel, 0);
while(sel1) {
n1 = ffs(sel1);
sel1 &= ~(1<<n1);
sel2 = xchgl((uint*)&s->evtchn_pending[n1], 0);
while(sel2) {
n2 = ffs(sel2);
sel2 &= ~(1<<n2);
port = (n1<<5) + n2;
ureg->trap = 100+port;
trap(ureg);
}
}
if (vcpu->evtchn_upcall_pending)
continue;
vcpu->evtchn_upcall_mask = 0;
if (vcpu->evtchn_upcall_pending == 0)
break;
vcpu->evtchn_upcall_mask = 1;
}
}
static int
xenirqenable(Vctl *v, int shared)
{
if(!shared){
uint port = v->vno-100;
HYPERVISOR_shared_info->evtchn_mask[port/32] &= ~(1<<(port%32));
}
return 0;
}
static int
xenirqdisable(Vctl *v, int shared)
{
if(!shared){
uint port = v->vno-100;
HYPERVISOR_shared_info->evtchn_mask[port/32] |= (1<<(port%32));
}
return 0;
}
/*
* tbdf field is abused to distinguish virqs from channels:
*
* tbdf=BUSUNKNOWN -> irq is a virq to be bound to a channel
* tbdf=0 -> irq is a channel number
*/
int
xenintrassign(Vctl *v)
{
evtchn_op_t op;
uint port;
if (v->tbdf != BUSUNKNOWN) {
op.cmd = EVTCHNOP_bind_virq;
op.u.bind_virq.virq = v->irq;
op.u.bind_virq.vcpu = m->machno;
if(HYPERVISOR_event_channel_op(&op) != 0){
print("xenintrenable: bind %d failed", v->irq);
return -1;
}
port = op.u.bind_virq.port;
} else
port = v->irq;
if (port > 155)
return -1;
v->enable = xenirqenable;
v->disable = xenirqdisable;
return 100+port;
}
int
xenintrvecno(int irq)
{
return irq;
}
int
islo(void)
{
vcpu_info_t *cpu;
cpu = &HYPERVISOR_shared_info->vcpu_info[m->machno]; // XXX m->shared
return (cpu->evtchn_upcall_mask == 0);
}
/*
* Note: Portable code expects spllo <= spl* <= spldone for
* accounting purposes. Lets hope the compiler doesn't reorder
* us.
*/
int
spllo(void)
{
vcpu_info_t *cpu = &HYPERVISOR_shared_info->vcpu_info[m->machno]; // XXX m->shared
if(cpu->evtchn_upcall_mask == 0)
return 0;
m->splpc = 0;
cpu->evtchn_upcall_mask = 0;
/*
* If an event arrived while masked off,
* use a dummy call to trigger delivery
*/
if (cpu->evtchn_upcall_pending)
HYPERVISOR_xen_version(0, 0);
return 1;
}
int
splhi(void)
{
ulong dummy;
vcpu_info_t *cpu = &HYPERVISOR_shared_info->vcpu_info[m->machno]; // XXX m->shared
int oldmask;
oldmask = xchgb(&cpu->evtchn_upcall_mask, 1);
if (cpu->evtchn_upcall_mask != 1)
panic("xchgb");
/* XXX ad-hoc ¨getcallerpc" because we have no arguments */
m->splpc = (&dummy)[1];
return oldmask;
}
void
splx(int x)
{
if(x)
splhi();
else
spllo();
}
/* marker for profiling in portable code */
void
spldone(void)
{
}
/* allocate an event channel */
int
xenchanalloc(int dom)
{
evtchn_op_t op;
op.cmd = EVTCHNOP_alloc_unbound;
op.u.alloc_unbound.dom = DOMID_SELF;
op.u.alloc_unbound.remote_dom = dom;
if (HYPERVISOR_event_channel_op(&op) != 0)
panic("xenchanalloc");
return op.u.alloc_unbound.port;
}
/* notify over an event channel */
void
xenchannotify(int port)
{
evtchn_op_t op;
op.cmd = EVTCHNOP_send;
op.u.send.port = port;
HYPERVISOR_event_channel_op(&op);
}
void
halt(void)
{
extern int nrdy;
splhi();
if (nrdy) {
spllo();
return;
}
HYPERVISOR_block();
}
void
mb(void)
{
coherence();
}