git: 9front

ref: bab432c417fe1cc3f910ef1b87f9d4abdc6cd1ab
dir: /sys/src/cmd/vmx/virtio.c/

View raw version
#include <u.h>
#include <libc.h>
#include <thread.h>
#include "dat.h"
#include "fns.h"

#include <ip.h>		/* parseether() */
#include <libsec.h>	/* genrandom() */

typedef struct VIODev VIODev;
typedef struct VIOQueue VIOQueue;
typedef struct VIOBuf VIOBuf;
typedef struct VIONetDev VIONetDev;
typedef struct VIOBlkDev VIOBlkDev;

enum {
	BUFCHAIN = 1,
	BUFWR = 2,
	
	USEDNOIRQ = 1,
	
	DRIVEROK = 4, /* devstat */
};

struct VIOBuf {
	u32int flags;
	VIOQueue *qu;
	void *p;
	u64int addr;
	u32int len;
	u32int idx;
	VIOBuf *next, *head;
	u32int rptr, wptr;
};

struct VIOQueue {
	QLock;
	Rendez;
	VIODev *d;
	u8int (*desc)[16], *avail, *used;
	u16int size;
	u32int addr;
	u16int availidx, usedidx;
	void (*notify)(VIOQueue*);
	int livebuf;
	Rendez livebufrend;
};

struct VIONetDev {
	int readfd, writefd;
	u8int mac[6];
	enum {
		VNETPROMISC = 1,
		VNETALLMULTI = 2,
		VNETALLUNI = 4,
		VNETNOMULTI = 8,
		VNETNOUNI = 16,
		VNETNOBCAST = 32,
		
		VNETHEADER = 1<<31,
	} flags;
	u64int macbloom, multibloom;
};

struct VIOBlkDev {
	int fd;
	uvlong size;
};

struct VIODev {
	PCIDev *pci;
	u32int devfeat, guestfeat;
	u16int qsel;
	u8int devstat, isrstat;
	VIOQueue *qu;
	int nqu, allocqu;
	u32int (*io)(int, u16int, u32int, int, VIODev *);
	void (*reset)(VIODev *);
	union {
		VIONetDev net;
		VIOBlkDev blk;
	};
};

static void
vioirq_(void *arg)
{
	VIODev *d;
	int val;
	
	d = ((void**)arg)[0];
	val = (uintptr)((void**)arg)[1];
	if(val != 0)
		d->isrstat |= val;
	else
		d->isrstat = 0;
	pciirq(d->pci, d->isrstat);
	free(arg);
}

static void
vioirq(VIODev *d, int val)
{
	void **v;
	
	assert(d != nil);
	v = emalloc(sizeof(void*)*2);
	v[0] = d;
	v[1] = (void *) val;
	sendnotif(vioirq_, v);
}

static void *
checkdesc(VIOQueue *q, int i)
{
	if(i >= q->size){
		vmerror("virtio device %#x: invalid next pointer %d in queue (size %d), ignoring descriptor", q->d->pci->bdf, i, q->size);
		return nil;
	}
	return q->desc[i];
}

VIOBuf *
viogetbuf(VIOQueue *q, int wait)
{
	u16int gidx;
	VIOBuf *b, *rb, **bp;
	void *dp;
	
	qlock(q);
waitloop:
	while((q->d->devstat & DRIVEROK) == 0 || q->desc == nil || (gidx = GET16(q->avail, 2), gidx == q->availidx)){
		if(!wait){
			qunlock(q);
			return nil;
		}
		rsleep(q);
	}
	dp = checkdesc(q, GET16(q->avail, 4 + 2 * (q->availidx % q->size)));
	rb = nil;
	bp = &rb;
	for(;;){
		b = emalloc(sizeof(VIOBuf));
		b->qu = q;
		b->idx = (u8int(*)[16])dp - q->desc;
		b->addr = GET64(dp, 0);
		b->len = GET32(dp, 8);
		b->flags = GET16(dp, 12);
		b->p = gptr(b->addr, b->len);
		if(b->p == nil){
			vmerror("virtio device %#x: invalid buffer pointer %#p in queue, ignoring descriptor", q->d->pci->bdf, (void*)b->addr);
			free(b);
			break;
		}
		*bp = b;
		b->head = rb;
		bp = &b->next;
		if((b->flags & BUFCHAIN) == 0) break;
		dp = checkdesc(q, GET16(dp, 14));
		if(dp == nil) break;
	}
	q->availidx++;
	if(rb == nil) goto waitloop;
	q->livebuf++;
	qunlock(q);
	return rb;
}

void
vioputbuf(VIOBuf *b)
{
	VIOBuf *bn;
	VIOQueue *q;
	u8int *p;
	
	if(b == nil) return;
	q = b->qu;
	qlock(q);
	if((q->d->devstat & DRIVEROK) == 0){
		qunlock(q);
		goto end;
	}
	if(q->used == nil)
		vmerror("virtio device %#x: address was set to an invalid value while holding buffer", q->d->pci->bdf);
	else{
		p = q->used + 4 + 8 * (q->usedidx % q->size);
		PUT32(p, 4, b->wptr);
		PUT32(p, 0, b->idx);
		PUT16(q->used, 2, ++q->usedidx);
	}
	if(--q->livebuf <= 0)
		rwakeup(&q->livebufrend);
	qunlock(q);
	if(q->avail != nil && (GET16(q->avail, 0) & USEDNOIRQ) == 0)
		vioirq(q->d, 1);
end:
	while(b != nil){
		bn = b->next;
		free(b);
		b = bn;
	}
}

ulong
vioqread(VIOBuf *b, void *v, ulong n)
{
	VIOBuf *c;
	u32int p;
	int rc;
	ulong m;
	
	p = b->rptr;
	c = b;
	rc = 0;
	for(;;){
		if(rc >= n) return rc;
		for(;;){
			if(c == nil) return rc;
			if((c->flags & BUFWR) == 0){
				if(p < c->len) break;
				p -= c->len;
			}
			c = c->next;
		}
		m = c->len - p;
		if(m > n - rc) m = n - rc;
		memmove(v, (u8int*)c->p + p, m);
		p += m, rc += m;
		v = (u8int*)v + m;
		b->rptr += m;
	}
}

ulong
vioqwrite(VIOBuf *b, void *v, ulong n)
{
	VIOBuf *c;
	u32int p;
	int rc;
	ulong m;
	
	p = b->wptr;
	c = b;
	rc = 0;
	for(;;){
		if(rc >= n) return rc;
		for(;;){
			if(c == nil) return rc;
			if((c->flags & BUFWR) != 0){
				if(p < c->len) break;
				p -= c->len;
			}
			c = c->next;
		}
		m = c->len - p;
		if(m > n - rc) m = n - rc;
		memmove((u8int*)c->p + p, v, m);
		p += m, rc += m;
		v = (u8int*)v + m;
		b->wptr += m;
	}
}

ulong
vioqrem(VIOBuf *b, int wr)
{
	VIOBuf *c;
	u32int p;
	ulong rc;
	
	p = wr ? b->wptr : b->rptr;
	for(c = b;; c = c->next){
		if(c == nil) return 0;
		if(((c->flags & BUFWR) != 0) == wr){
			if(p < c->len) break;
			p -= c->len;
		}
	}
	rc = c->len - p;
	for(c = c->next; c != nil; c = c->next)
		if(((c->flags & BUFWR) != 0) == wr)
			rc += c->len;
	return rc;
}

static void
vioqaddrset(VIOQueue *q, u64int addr)
{
	void *p;
	int sz1, sz;

	addr <<= 12;
	sz1 = -(-(18 * q->size + 4) & -4096);
	sz = sz1 + (-(-(8 * q->size + 6) & -4096));
	p = gptr(addr, sz);
	if(p == nil)
		vmerror("virtio device %#x: attempt to set queue to invalid address %#p", q->d->pci->bdf, (void *) addr);
	qlock(q);
	q->addr = addr;
	if(p == nil){
		q->desc = nil;
		q->avail = nil;
		q->used = nil;
	}else{
		q->desc = p;
		q->avail = (u8int*)p + 16 * q->size;
		q->used = (u8int*)p + sz1;
		rwakeupall(q);
	}
	qunlock(q);
}

static void
vioqreset(VIOQueue *q)
{
	q->desc = nil;
	q->avail = nil;
	q->used = nil;
	q->addr = 0;
	q->availidx = 0;
	q->usedidx = 0;
}

static void
viodevstatset(VIODev *v, u32int val)
{
	int i;

	v->devstat = val;
	if(val == 0){
		if(v->reset != nil)
			v->reset(v);
		v->guestfeat = 0;
		vioirq(v, 0);
		for(i = 0; i < v->nqu; i++){
			qlock(&v->qu[i]);
			while(v->qu[i].livebuf > 0)
				rsleep(&v->qu[i].livebufrend);
			vioqreset(&v->qu[i]);
			qunlock(&v->qu[i]);
		}
	}else{
		for(i = 0; i < v->nqu; i++)
			v->qu[i].notify(&v->qu[i]);
	}
}

u32int
vioio(int isin, u16int port, u32int val, int sz, void *vp)
{
	VIODev *v;
	int rc;
	static char whinebuf[32];
	
	v = vp;
	switch(isin << 16 | port){
	case 0x4: v->guestfeat = val; return 0;
	case 0x8: if(v->qsel < v->nqu) vioqaddrset(&v->qu[v->qsel], val); return 0;
	case 0xe: v->qsel = val; return 0;
	case 0x10: if(val < v->nqu) v->qu[val].notify(&v->qu[val]); return 0;
	case 0x12: viodevstatset(v, val); return 0;
	case 0x10000: return v->devfeat;
	case 0x10004: return v->guestfeat;
	case 0x10008: return v->qsel >= v->nqu ? 0 : v->qu[v->qsel].addr;
	case 0x1000c: return v->qsel >= v->nqu ? 0 : v->qu[v->qsel].size;
	case 0x1000e: return v->qsel;
	case 0x10010: return 0;
	case 0x10012: return v->devstat;
	case 0x10013: rc = v->isrstat; vioirq(v, 0); return rc;
	}
	if(port >= 20 && v->io != nil)
		return v->io(isin, port - 20, val, sz, v);
	snprint(whinebuf, sizeof(whinebuf), "virtio device %6x", v->pci->bdf);
	return iowhine(isin, port, val, sz, whinebuf);
}

VIODev *
mkviodev(u16int devid, u32int pciclass, u32int subid, int queues)
{
	VIODev *d;
	
	d = emalloc(sizeof(VIODev));
	d->pci = mkpcidev(allocbdf(), devid << 16 | 0x1AF4, pciclass << 8, 1);
	d->pci->subid = subid << 16;
	mkpcibar(d->pci, BARIO, 0, 256, vioio, d);
	d->qu = emalloc(queues * sizeof(VIOQueue));
	d->allocqu = queues;
	return d;
}

static void
viowakeup(VIOQueue *q)
{
	qlock(q);
	rwakeupall(q);
	qunlock(q);
}

VIOQueue *
mkvioqueue(VIODev *d, int sz, void (*fn)(VIOQueue*))
{
	VIOQueue *q;

	assert(sz > 0 && sz <= 32768 && (sz & sz - 1) == 0 && fn != nil && d->nqu < d->allocqu);
	q = d->qu + d->nqu++;
	q->Rendez.l = q;
	q->livebufrend.l = q;
	q->size = sz;
	q->d = d;
	q->notify = fn;
	vioqreset(q);
	return q;
}

int
bloomhash(u8int *mac)
{
	int x;

	x = mac[0];
	x ^= mac[0] >> 6 ^ mac[1] << 2;
	x ^= mac[1] >> 4 ^ mac[2] << 4;
	x ^= mac[2] >> 2;
	x ^= mac[3];
	x ^= mac[3] >> 6 ^ mac[4] << 2;
	x ^= mac[4] >> 4 ^ mac[5] << 4;
	x ^= mac[5] >> 2;
	return x & 63;
}

int
viomacok(VIODev *d, u8int *mac)
{
	static u8int bcast[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};

	if((d->net.flags & VNETPROMISC) != 0) return 1;
	if((mac[0] & 1) == 0){
		if((d->net.flags & (VNETNOUNI|VNETALLUNI)) != 0)
			return (d->net.flags & VNETNOUNI) == 0;
		if(memcmp(mac, d->net.mac, 6) == 0) return 1;
		if(d->net.macbloom == 0) return 0;
		return d->net.macbloom >> bloomhash(mac) & 1;
	}else if(memcmp(mac, bcast, 6) == 0)
		return (d->net.flags & VNETNOBCAST) == 0;
	else{
		if((d->net.flags & (VNETNOMULTI|VNETALLMULTI)) != 0)
			return (d->net.flags & VNETNOMULTI) == 0;
		if(d->net.multibloom == 0) return 0;
		return d->net.multibloom >> bloomhash(mac) & 1;
	}
}

void
vionetrproc(void *vp)
{
	VIODev *v;
	VIOQueue *q;
	VIOBuf *vb;
	uchar rxhead[10];
	uchar rxbuf[1600];
	int rc;
	
	threadsetname("vionetrproc");
	v = vp;
	q = &v->qu[0];
	memset(rxhead, 0, sizeof(rxhead));
	for(;;){
		rc = read(v->net.readfd, rxbuf, sizeof(rxbuf));
		if(rc == 0){
			vmerror("read(vionetrproc): eof");
			threadexits("read: eof");
		}
		if(rc < 0){
			vmerror("read(vionetrproc): %r");
			threadexits("read: %r");
		}
		if(rc < 14){
			vmerror("vionetrproc: short packet received (len=%d)", rc);
			continue;
		}
		if(!viomacok(v, rxbuf))
			continue;
		vb = viogetbuf(q, 1);
		if(vb == nil){
			vmerror("viogetbuf: %r");
			continue;
		}
		vioqwrite(vb, rxhead, sizeof(rxhead));
		vioqwrite(vb, rxbuf, rc);
		vioputbuf(vb);
	}
}

void
vionetwproc(void *vp)
{
	VIODev *v;
	VIOQueue *q;
	VIOBuf *vb;
	uchar txhead[10];
	uchar txbuf[1610];
	int rc, len;
	uvlong ns;
	
	threadsetname("vionetwproc");
	v = vp;
	q = &v->qu[1];
	for(;;){
		vb = viogetbuf(q, 1);
		if(vb == nil){
			vmerror("viogetbuf: %r");
			threadexits("viogetbuf: %r");
		}
		vioqread(vb, txhead, sizeof(txhead));
		len = vioqread(vb, txbuf+10, sizeof(txbuf)-10);
		if(len == sizeof(txbuf)-10){
			vmerror("virtio net: ignoring excessively long packet");
			vioputbuf(vb);
			continue;
		}
		if(len < 14){
			/* openbsd ends up sending lots of zero length packets sometimes */
			if(len != 0)
				vmerror("virtio net: ignoring short packet (length=%d)", len);
			vioputbuf(vb);
			continue;
		}else if(len < 60){ /* openbsd doesn't seem to know about ethernet minimum packet lengths either */
			memset(txbuf + 10 + len, 0, 60 - len);
			len = 60;
		}
		if((v->net.flags & VNETHEADER) != 0){
			txbuf[0] = len  >> 8;
			txbuf[1] = len;
			ns = nanosec();
			txbuf[2] = ns >> 56;
			txbuf[3] = ns >> 48;
			txbuf[4] = ns >> 40;
			txbuf[5] = ns >> 32;
			txbuf[6] = ns >> 24;
			txbuf[7] = ns >> 16;
			txbuf[8] = ns >> 8;
			txbuf[9] = ns;
			rc = write(v->net.writefd, txbuf, len + 10);
		}else
			rc = write(v->net.writefd, txbuf + 10, len);
		vioputbuf(vb);
		if(rc < 0){
			vmerror("write(vionetwproc): %r");
			continue;
		}
		if(rc < len){
			vmerror("write(vionetwproc): incomplete write (%d < %d)", rc, len);
			continue;
		}
	}
}

u32int
vionetio(int isin, u16int port, u32int val, int sz, VIODev *v)
{
	switch(isin << 16 | port){
	case 0x10000: case 0x10001: case 0x10002: case 0x10003:
		return GET32(v->net.mac, 0) >> (port & 3) * 8;
	case 0x10004: case 0x10005: case 0x10006: case 0x10007:
		return (GET16(v->net.mac, 4) | 1 << 16) >> (port & 3) * 8;
	}
	return iowhine(isin, port, val, sz, "virtio net");
}

int
vionettables(VIODev *d, VIOBuf *b)
{
	u8int buf[4];
	u8int mac[6];
	u64int bloom[2];
	int i, l;
	
	bloom[0] = 0;
	bloom[1] = 0;
	for(i = 0; i < 2; i++){
		if(vioqread(b, buf, 4) < 4)
			return 1;
		l = GET32(buf, 0);
		while(l--){
			if(vioqread(b, mac, 6) < 6)
				return 1;
			bloom[i] |= 1ULL<<bloomhash(mac);
		}
	}
	d->net.macbloom = bloom[0];
	d->net.multibloom = bloom[1];
	return 0;
}

void
vionetcmd(VIOQueue *q)
{
	VIODev *d;
	VIOBuf *b;
	u8int cmd[2], buf[6];
	u8int ack;
	int fl;

	d = q->d;
	for(; b = viogetbuf(q, 0), b != nil; vioputbuf(b)){
		if(vioqread(b, cmd, 2) < 2){
			ack = 1;
			vioqwrite(b, &ack, 1);
			continue;
		}
		ack = 0;
		switch(cmd[0] << 8 | cmd[1]){
		case 0x0000: fl = VNETPROMISC; goto flag;
		case 0x0001: fl = VNETALLMULTI; goto flag;
		case 0x0002: fl = VNETALLUNI; goto flag;
		case 0x0003: fl = VNETNOMULTI; goto flag;
		case 0x0004: fl = VNETNOUNI; goto flag;
		case 0x0005: fl = VNETNOBCAST; goto flag;
		flag:
			if(vioqread(b, buf, 1) < 1) ack = 1;
			else if(buf[0] == 1) d->net.flags |= fl;
			else if(buf[0] == 0) d->net.flags &= ~fl;
			else ack = 1;
			break;
		case 0x0100: /* MAC_TABLE_SET */
			ack = vionettables(d, b);
			break;
		case 0x0101: /* MAC_ADDR_SET */
			if(vioqread(b, buf, 6) < 6) ack = 1;
			else memmove(d->net.mac, buf, 6);
			break;
		default:
			ack = 1;
		}
		vioqwrite(b, &ack, 1);
	}
}

void
vionetreset(VIODev *d)
{
	d->net.flags &= VNETHEADER;
	d->net.macbloom = 0;
	d->net.multibloom = 0;
}

int
mkvionet(char *net)
{
	int fd, cfd;
	VIODev *d;
	char *ea;
	int flags;
	enum { VNETFILE = 1 };

	ea = nil;
	flags = 0;
	for(;;){
		if(strncmp(net, "hdr!", 4) == 0){
			net += 4;
			flags |= VNETHEADER;
		}else if(strncmp(net, "file!", 5) == 0){
			net += 5;
			flags |= VNETFILE;
		}else if(strncmp(net, "ea:", 3) == 0){
			net = strchr(ea = net+3, '!');
			if(net++ == nil){
				werrstr("missing: !");
				return -1;
			}
		}else
			break;
	}
	if((flags & VNETFILE) != 0){
		flags &= ~VNETFILE;
		fd = open(net, ORDWR);
		if(fd < 0) return -1;
	}else{
		fd = dial(netmkaddr("-1", net, nil), nil, nil, &cfd);
		if(fd < 0) return -1;
		if(cfd >= 0) {
			write(cfd, "promiscuous", 11);
			write(cfd, "bridge", 6);
		}
	}
	
	d = mkviodev(0x1000, 0x020000, 1, 3);
	mkvioqueue(d, 1024, viowakeup);
	mkvioqueue(d, 1024, viowakeup);
	mkvioqueue(d, 32, vionetcmd);
	if(ea == nil || parseether(d->net.mac, ea)){
		genrandom(d->net.mac, 6);
		d->net.mac[0] = d->net.mac[0] & ~1 | 2;
	}
	d->net.flags = flags;
	d->devfeat = 1<<5|1<<16|1<<17|1<<18|1<<20;
	d->io = vionetio;
	d->reset = vionetreset;
	d->net.readfd = d->net.writefd = fd;
	proccreate(vionetrproc, d, 8192);
	proccreate(vionetwproc, d, 8192);
	return 0;
}

u32int
vioblkio(int isin, u16int port, u32int val, int sz, VIODev *v)
{
	switch(isin << 16 | port){
	case 0x10000: case 0x10001: case 0x10002: case 0x10003:
		return (u32int)v->blk.size >> (port & 3) * 8;
	case 0x10004: case 0x10005: case 0x10006: case 0x10007:
		return (u32int)(v->blk.size >> 32) >> (port & 3) * 8;
	}
	return iowhine(isin, port, val, sz, "virtio blk");
}

void
vioblkproc(void *vp)
{
	VIODev *v;
	VIOQueue *q;
	VIOBuf *b;
	u8int cmd[16];
	u8int ack;
	char buf[8192];
	uvlong addr;
	int rc, n, m;
	
	threadsetname("vioblkproc");
	v = vp;
	q = &v->qu[0];
	for(;;){
		b = viogetbuf(q, 1);
		if(b == nil){
			vmerror("vioblkproc: viogetbuf: %r");
			threadexits("vioblkproc: viogetbuf: %r");
		}
		ack = 0;
		if(vioqread(b, cmd, sizeof(cmd)) < sizeof(cmd)) goto nope;
		addr = GET64(cmd, 8);
		switch(GET32(cmd, 0)){
		case 0:
			n = vioqrem(b, 1) - 1;
			if(n < 0 || addr * 512 + n > v->blk.size * 512){
				ack = 1;
				break;
			}
			seek(v->blk.fd, addr << 9, 0);
			for(; n > 0; n -= rc){
				rc = sizeof(buf);
				if(n < rc) rc = n;
				rc = read(v->blk.fd, buf, rc);
				if(rc < 0) vmerror("read(vioblkproc): %r");
				if(rc <= 0){
					ack = 1;
					break;
				}
				vioqwrite(b, buf, rc);
			}
			break;
		case 1:
			n = vioqrem(b, 0);
			if(addr * 512 + n > v->blk.size * 512){
				ack = 1;
				break;
			}
			seek(v->blk.fd, addr << 9, 0);
			for(; n > 0; n -= rc){
				m = vioqread(b, buf, sizeof(buf));
				rc = write(v->blk.fd, buf, m);
				if(rc < 0) vmerror("write(vioblkproc): %r");
				if(rc < m){
					ack = 1;
					break;
				}
			}
			break;
		default:
		nope:
			ack = 2;
		}
		vioqwrite(b, &ack, 1);
		vioputbuf(b);
	}
}

int
mkvioblk(char *fn)
{
	int fd;
	VIODev *d;
	
	fd = open(fn, ORDWR);
	if(fd < 0) return -1;
	d = mkviodev(0x1000, 0x018000, 2, 1);
	mkvioqueue(d, 32, viowakeup);
	d->io = vioblkio;
	d->blk.fd = fd;
	d->blk.size = seek(fd, 0, 2) >> 9;
	proccreate(vioblkproc, d, 16384);
	return 0;
}