ref: 86b470d877870dd1c48f57d98586b48278cd9b5c
parent: 83f489cf5d6e3740ae33e8b400099ada12254670
author: Keegan Saunders <keegan@undefinedbehaviour.org>
date: Fri Sep 15 18:29:01 EDT 2023
kernel: move virtio10 drivers to port These drivers are also known to work on arm64, so make them available
--- a/sys/src/9/pc/ethervirtio10.c
+++ /dev/null
@@ -1,793 +1,0 @@
-/*
- * virtio 1.0 ethernet driver
- * http://docs.oasis-open.org/virtio/virtio/v1.0/virtio-v1.0.html
- *
- * In contrast to ethervirtio.c, this driver handles the non-legacy
- * interface for virtio ethernet which uses mmio for all register accesses
- * and requires a laborate pci capability structure dance to get working.
- *
- * It is kind of pointless as it is most likely slower than
- * port i/o (harder to emulate on the pc platform).
- *
- * The reason why this driver is needed it is that vultr set the
- * disable-legacy=on option in the -device parameter for qemu
- * on their hypervisor.
- */
-#include "u.h"
-#include "../port/lib.h"
-#include "mem.h"
-#include "dat.h"
-#include "fns.h"
-#include "io.h"
-#include "../port/pci.h"
-#include "../port/error.h"
-#include "../port/netif.h"
-#include "../port/etherif.h"
-
-typedef struct Vconfig Vconfig;
-typedef struct Vnetcfg Vnetcfg;
-
-typedef struct Vring Vring;
-typedef struct Vdesc Vdesc;
-typedef struct Vused Vused;
-typedef struct Vheader Vheader;
-typedef struct Vqueue Vqueue;
-
-typedef struct Ctlr Ctlr;
-
-enum {
- /* §2.1 Device Status Field */
- Sacknowledge = 1,
- Sdriver = 2,
- Sdriverok = 4,
- Sfeaturesok = 8,
- Sfailed = 128,
-
- /* flags in Qnetstatus */
- Nlinkup = (1<<0),
- Nannounce = (1<<1),
-
- /* feat[0] bits */
- Fmac = 1<<5,
- Fstatus = 1<<16,
- Fctrlvq = 1<<17,
- Fctrlrx = 1<<18,
-
- /* feat[1] bits */
- Fversion1 = 1<<(32-32),
-
- /* vring used flags */
- Unonotify = 1,
- /* vring avail flags */
- Rnointerrupt = 1,
-
- /* descriptor flags */
- Dnext = 1,
- Dwrite = 2,
- Dindirect = 4,
-
- /* struct sizes */
- VringSize = 4,
- VdescSize = 16,
- VusedSize = 8,
- VheaderSize = 12,
-
- Vrxq = 0,
- Vtxq = 1,
- Vctlq = 2,
-
- /* class/cmd for Vctlq */
- CtrlRx = 0x00,
- CmdPromisc = 0x00,
- CmdAllmulti = 0x01,
- CtrlMac = 0x01,
- CmdMacTableSet = 0x00,
- CtrlVlan= 0x02,
- CmdVlanAdd = 0x00,
- CmdVlanDel = 0x01,
-};
-
-struct Vconfig {
- u32int devfeatsel;
- u32int devfeat;
- u32int drvfeatsel;
- u32int drvfeat;
-
- u16int msixcfg;
- u16int nqueues;
-
- u8int status;
- u8int cfggen;
- u16int queuesel;
-
- u16int queuesize;
- u16int queuemsixvect;
-
- u16int queueenable;
- u16int queuenotifyoff;
-
- u64int queuedesc;
- u64int queueavail;
- u64int queueused;
-};
-
-struct Vnetcfg
-{
- u16int mac0;
- u16int mac1;
- u16int mac2;
- u16int status;
- u16int maxqueuepairs;
- u16int mtu;
-};
-
-struct Vring
-{
- u16int flags;
- u16int idx;
-};
-
-struct Vdesc
-{
- u64int addr;
- u32int len;
- u16int flags;
- u16int next;
-};
-
-struct Vused
-{
- u32int id;
- u32int len;
-};
-
-struct Vheader
-{
- u8int flags;
- u8int segtype;
- u16int hlen;
- u16int seglen;
- u16int csumstart;
- u16int csumend;
-};
-
-struct Vqueue
-{
- Rendez;
-
- uint qsize;
- uint qmask;
-
- Vdesc *desc;
-
- Vring *avail;
- u16int *availent;
- u16int *availevent;
-
- Vring *used;
- Vused *usedent;
- u16int *usedevent;
- u16int lastused;
-
- uint nintr;
- uint nnote;
-
- /* notify register */
- void *notify;
-};
-
-struct Ctlr {
- Lock;
-
- QLock ctllock;
-
- int attached;
-
- /* registers */
- Vconfig *cfg;
- Vnetcfg *dev;
- u8int *isr;
- u8int *notify;
- u32int notifyoffmult;
-
- uvlong port;
- Pcidev *pcidev;
- Ctlr *next;
- int active;
- ulong feat[2];
- int nqueue;
-
- /* virtioether has 3 queues: rx, tx and ctl */
- Vqueue queue[3];
-};
-
-static Ctlr *ctlrhead;
-
-static int
-vhasroom(void *v)
-{
- Vqueue *q = v;
- return q->lastused != q->used->idx;
-}
-
-static void
-vqnotify(Ctlr *ctlr, int x)
-{
- Vqueue *q;
-
- coherence();
- q = &ctlr->queue[x];
- if(q->used->flags & Unonotify)
- return;
- q->nnote++;
- *((u16int*)q->notify) = x;
-}
-
-static void
-txproc(void *v)
-{
- Vheader *header;
- Block **blocks;
- Ether *edev;
- Ctlr *ctlr;
- Vqueue *q;
- Vused *u;
- Block *b;
- int i, j;
-
- edev = v;
- ctlr = edev->ctlr;
- q = &ctlr->queue[Vtxq];
-
- header = smalloc(VheaderSize);
- blocks = smalloc(sizeof(Block*) * (q->qsize/2));
-
- for(i = 0; i < q->qsize/2; i++){
- j = i << 1;
- q->desc[j].addr = PADDR(header);
- q->desc[j].len = VheaderSize;
- q->desc[j].next = j | 1;
- q->desc[j].flags = Dnext;
-
- q->availent[i] = q->availent[i + q->qsize/2] = j;
-
- j |= 1;
- q->desc[j].next = 0;
- q->desc[j].flags = 0;
- }
-
- q->avail->flags &= ~Rnointerrupt;
-
- while(waserror())
- ;
-
- while((b = qbread(edev->oq, 1000000)) != nil){
- for(;;){
- /* retire completed packets */
- while((i = q->lastused) != q->used->idx){
- u = &q->usedent[i & q->qmask];
- i = (u->id & q->qmask) >> 1;
- if(blocks[i] == nil)
- break;
- freeb(blocks[i]);
- blocks[i] = nil;
- q->lastused++;
- }
-
- /* have free slot? */
- i = q->avail->idx & (q->qmask >> 1);
- if(blocks[i] == nil)
- break;
-
- /* ring full, wait and retry */
- if(!vhasroom(q))
- sleep(q, vhasroom, q);
- }
-
- /* slot is free, fill in descriptor */
- blocks[i] = b;
- j = (i << 1) | 1;
- q->desc[j].addr = PADDR(b->rp);
- q->desc[j].len = BLEN(b);
- coherence();
- q->avail->idx++;
- vqnotify(ctlr, Vtxq);
- }
-
- pexit("ether out queue closed", 1);
-}
-
-static void
-rxproc(void *v)
-{
- Vheader *header;
- Block **blocks;
- Ether *edev;
- Ctlr *ctlr;
- Vqueue *q;
- Vused *u;
- Block *b;
- int i, j;
-
- edev = v;
- ctlr = edev->ctlr;
- q = &ctlr->queue[Vrxq];
-
- header = smalloc(VheaderSize);
- blocks = smalloc(sizeof(Block*) * (q->qsize/2));
-
- for(i = 0; i < q->qsize/2; i++){
- j = i << 1;
- q->desc[j].addr = PADDR(header);
- q->desc[j].len = VheaderSize;
- q->desc[j].next = j | 1;
- q->desc[j].flags = Dwrite|Dnext;
-
- q->availent[i] = q->availent[i + q->qsize/2] = j;
-
- j |= 1;
- q->desc[j].next = 0;
- q->desc[j].flags = Dwrite;
- }
-
- q->avail->flags &= ~Rnointerrupt;
-
- while(waserror())
- ;
-
- for(;;){
- /* replenish receive ring */
- do {
- i = q->avail->idx & (q->qmask >> 1);
- if(blocks[i] != nil)
- break;
- if((b = iallocb(ETHERMAXTU)) == nil)
- break;
- blocks[i] = b;
- j = (i << 1) | 1;
- q->desc[j].addr = PADDR(b->rp);
- q->desc[j].len = BALLOC(b);
- coherence();
- q->avail->idx++;
- } while(q->avail->idx != q->used->idx);
- vqnotify(ctlr, Vrxq);
-
- /* wait for any packets to complete */
- if(!vhasroom(q))
- sleep(q, vhasroom, q);
-
- /* retire completed packets */
- while((i = q->lastused) != q->used->idx) {
- u = &q->usedent[i & q->qmask];
- i = (u->id & q->qmask) >> 1;
- if((b = blocks[i]) == nil)
- break;
-
- blocks[i] = nil;
- b->wp = b->rp + u->len - VheaderSize;
- etheriq(edev, b);
- q->lastused++;
- }
- }
-}
-
-static int
-vctlcmd(Ether *edev, uchar class, uchar cmd, uchar *data, int ndata)
-{
- uchar hdr[2], ack[1];
- Ctlr *ctlr;
- Vqueue *q;
- Vdesc *d;
- int i;
-
- ctlr = edev->ctlr;
- q = &ctlr->queue[Vctlq];
- if(q->qsize < 3)
- return -1;
-
- qlock(&ctlr->ctllock);
- while(waserror())
- ;
-
- ack[0] = 0x55;
- hdr[0] = class;
- hdr[1] = cmd;
-
- d = &q->desc[0];
- d->addr = PADDR(hdr);
- d->len = sizeof(hdr);
- d->next = 1;
- d->flags = Dnext;
- d++;
- d->addr = PADDR(data);
- d->len = ndata;
- d->next = 2;
- d->flags = Dnext;
- d++;
- d->addr = PADDR(ack);
- d->len = sizeof(ack);
- d->next = 0;
- d->flags = Dwrite;
-
- i = q->avail->idx & q->qmask;
- q->availent[i] = 0;
- coherence();
-
- q->avail->flags &= ~Rnointerrupt;
- q->avail->idx++;
- vqnotify(ctlr, Vctlq);
- while(!vhasroom(q))
- sleep(q, vhasroom, q);
- q->lastused = q->used->idx;
- q->avail->flags |= Rnointerrupt;
-
- qunlock(&ctlr->ctllock);
- poperror();
-
- if(ack[0] != 0)
- print("#l%d: vctlcmd: %ux.%ux -> %ux\n", edev->ctlrno, class, cmd, ack[0]);
-
- return ack[0];
-}
-
-static void
-interrupt(Ureg*, void* arg)
-{
- Ether *edev;
- Ctlr *ctlr;
- Vqueue *q;
- int i;
-
- edev = arg;
- ctlr = edev->ctlr;
- if(*ctlr->isr & 1){
- for(i = 0; i < ctlr->nqueue; i++){
- q = &ctlr->queue[i];
- if(vhasroom(q)){
- q->nintr++;
- wakeup(q);
- }
- }
- }
-}
-
-static void
-attach(Ether* edev)
-{
- char name[KNAMELEN];
- Ctlr* ctlr;
- int i;
-
- ctlr = edev->ctlr;
- ilock(ctlr);
- if(ctlr->attached){
- iunlock(ctlr);
- return;
- }
- ctlr->attached = 1;
-
- /* enable the queues */
- for(i = 0; i < ctlr->nqueue; i++){
- ctlr->cfg->queuesel = i;
- ctlr->cfg->queueenable = 1;
- }
-
- /* driver is ready */
- ctlr->cfg->status |= Sdriverok;
-
- iunlock(ctlr);
-
- /* start kprocs */
- snprint(name, sizeof name, "#l%drx", edev->ctlrno);
- kproc(name, rxproc, edev);
- snprint(name, sizeof name, "#l%dtx", edev->ctlrno);
- kproc(name, txproc, edev);
-}
-
-static long
-ifstat(Ether *edev, void *a, long n, ulong offset)
-{
- int i, l;
- char *p;
- Ctlr *ctlr;
- Vqueue *q;
-
- ctlr = edev->ctlr;
-
- p = smalloc(READSTR);
-
- l = snprint(p, READSTR, "devfeat %32.32lub %32.32lub\n", ctlr->feat[1], ctlr->feat[0]);
- l += snprint(p+l, READSTR-l, "devstatus %8.8ub\n", ctlr->cfg->status);
-
- for(i = 0; i < ctlr->nqueue; i++){
- q = &ctlr->queue[i];
- l += snprint(p+l, READSTR-l,
- "vq%d %#p size %d avail->idx %d used->idx %d lastused %hud nintr %ud nnote %ud\n",
- i, q, q->qsize, q->avail->idx, q->used->idx, q->lastused, q->nintr, q->nnote);
- }
-
- n = readstr(offset, a, n, p);
- free(p);
-
- return n;
-}
-
-static void
-shutdown(Ether* edev)
-{
- Ctlr *ctlr = edev->ctlr;
-
- coherence();
- ctlr->cfg->status = 0;
- coherence();
-
- pciclrbme(ctlr->pcidev);
-}
-
-static void
-promiscuous(void *arg, int on)
-{
- Ether *edev = arg;
- uchar b[1];
-
- b[0] = on != 0;
- vctlcmd(edev, CtrlRx, CmdPromisc, b, sizeof(b));
-}
-
-static void
-multicast(void *arg, uchar*, int)
-{
- Ether *edev = arg;
- uchar b[1];
-
- b[0] = edev->nmaddr > 0;
- vctlcmd(edev, CtrlRx, CmdAllmulti, b, sizeof(b));
-}
-
-static int
-initqueue(Vqueue *q, int size)
-{
- uchar *p;
-
- q->desc = mallocalign(VdescSize*size, 16, 0, 0);
- if(q->desc == nil)
- return -1;
- p = mallocalign(VringSize + 2*size + 2, 2, 0, 0);
- if(p == nil){
-FreeDesc:
- free(q->desc);
- q->desc = nil;
- return -1;
- }
- q->avail = (void*)p;
- p += VringSize;
- q->availent = (void*)p;
- p += sizeof(u16int)*size;
- q->availevent = (void*)p;
- p = mallocalign(VringSize + VusedSize*size + 2, 4, 0, 0);
- if(p == nil){
- free(q->avail);
- q->avail = nil;
- goto FreeDesc;
- }
- q->used = (void*)p;
- p += VringSize;
- q->usedent = (void*)p;
- p += VusedSize*size;
- q->usedevent = (void*)p;
-
- q->qsize = size;
- q->qmask = q->qsize - 1;
-
- q->lastused = q->avail->idx = q->used->idx = 0;
-
- q->avail->flags |= Rnointerrupt;
-
- return 0;
-}
-
-static int
-matchvirtiocfgcap(Pcidev *p, int cap, int off, int typ)
-{
- int bar;
-
- if(cap != 9 || pcicfgr8(p, off+3) != typ)
- return 1;
-
- /* skip invalid or non memory bars */
- bar = pcicfgr8(p, off+4);
- if(bar < 0 || bar >= nelem(p->mem)
- || p->mem[bar].size == 0
- || (p->mem[bar].bar & 3) != 0)
- return 1;
-
- return 0;
-}
-
-static int
-virtiocap(Pcidev *p, int typ)
-{
- return pcienumcaps(p, matchvirtiocfgcap, typ);
-}
-
-static void*
-virtiomapregs(Pcidev *p, int cap, int size)
-{
- int bar, len;
- uvlong addr;
-
- if(cap < 0)
- return nil;
- bar = pcicfgr8(p, cap+4) % nelem(p->mem);
- addr = pcicfgr32(p, cap+8);
- len = pcicfgr32(p, cap+12);
- if(size <= 0)
- size = len;
- else if(len < size)
- return nil;
- if(addr+len > p->mem[bar].size)
- return nil;
- addr += p->mem[bar].bar & ~0xFULL;
- return vmap(addr, size);
-}
-
-static Ctlr*
-pciprobe(void)
-{
- Ctlr *c, *h, *t;
- Pcidev *p;
- Vconfig *cfg;
- int bar, cap, n, i;
-
- h = t = nil;
-
- /* §4.1.2 PCI Device Discovery */
- for(p = nil; p = pcimatch(p, 0x1AF4, 0x1041);){
- /* non-transitional devices will have a revision > 0 */
- if(p->rid == 0)
- continue;
- if((cap = virtiocap(p, 1)) < 0)
- continue;
- bar = pcicfgr8(p, cap+4) % nelem(p->mem);
- cfg = virtiomapregs(p, cap, sizeof(Vconfig));
- if(cfg == nil)
- continue;
- if((c = mallocz(sizeof(Ctlr), 1)) == nil){
- print("ethervirtio: no memory for Ctlr\n");
- break;
- }
- c->cfg = cfg;
- c->pcidev = p;
- c->port = p->mem[bar].bar & ~0xFULL;
-
- pcienable(p);
- c->dev = virtiomapregs(p, virtiocap(p, 4), sizeof(Vnetcfg));
- if(c->dev == nil)
- goto Baddev;
- c->isr = virtiomapregs(p, virtiocap(p, 3), 0);
- if(c->isr == nil)
- goto Baddev;
- cap = virtiocap(p, 2);
- c->notify = virtiomapregs(p, cap, 0);
- if(c->notify == nil)
- goto Baddev;
- c->notifyoffmult = pcicfgr32(p, cap+16);
-
- /* device reset */
- coherence();
- cfg->status = 0;
- while(cfg->status != 0)
- delay(1);
- cfg->status = Sacknowledge|Sdriver;
-
- /* negotiate feature bits */
- cfg->devfeatsel = 1;
- c->feat[1] = cfg->devfeat;
-
- cfg->devfeatsel = 0;
- c->feat[0] = cfg->devfeat;
-
- cfg->drvfeatsel = 1;
- cfg->drvfeat = c->feat[1] & Fversion1;
-
- cfg->drvfeatsel = 0;
- cfg->drvfeat = c->feat[0] & (Fmac|Fctrlvq|Fctrlrx);
-
- cfg->status |= Sfeaturesok;
-
- for(i=0; i<nelem(c->queue); i++){
- cfg->queuesel = i;
- n = cfg->queuesize;
- if(n == 0 || (n & (n-1)) != 0){
- if(i < 2)
- print("ethervirtio: queue %d has invalid size %d\n", i, n);
- break;
- }
- if(initqueue(&c->queue[i], n) < 0)
- break;
- c->queue[i].notify = c->notify + c->notifyoffmult * cfg->queuenotifyoff;
- coherence();
- cfg->queuedesc = PADDR(c->queue[i].desc);
- cfg->queueavail = PADDR(c->queue[i].avail);
- cfg->queueused = PADDR(c->queue[i].used);
- }
- if(i < 2){
- print("ethervirtio: no queues\n");
-Baddev:
- pcidisable(p);
- /* TODO, vunmap */
- free(c);
- continue;
- }
- c->nqueue = i;
-
- if(h == nil)
- h = c;
- else
- t->next = c;
- t = c;
- }
-
- return h;
-}
-
-
-static int
-reset(Ether* edev)
-{
- static uchar zeros[Eaddrlen];
- Ctlr *ctlr;
- int i;
-
- if(ctlrhead == nil)
- ctlrhead = pciprobe();
-
- for(ctlr = ctlrhead; ctlr != nil; ctlr = ctlr->next){
- if(ctlr->active)
- continue;
- if(edev->port == 0 || edev->port == ctlr->port){
- ctlr->active = 1;
- break;
- }
- }
-
- if(ctlr == nil)
- return -1;
-
- edev->ctlr = ctlr;
- edev->port = ctlr->port;
- edev->irq = ctlr->pcidev->intl;
- edev->tbdf = ctlr->pcidev->tbdf;
- edev->mbps = 1000;
- edev->link = 1;
-
- if((ctlr->feat[0] & Fmac) != 0 && memcmp(edev->ea, zeros, Eaddrlen) == 0){
- for(i = 0; i < Eaddrlen; i++)
- edev->ea[i] = ((uchar*)ctlr->dev)[i];
- } else {
- for(i = 0; i < Eaddrlen; i++)
- ((uchar*)ctlr->dev)[i] = edev->ea[i];
- }
-
- edev->arg = edev;
-
- edev->attach = attach;
- edev->shutdown = shutdown;
- edev->ifstat = ifstat;
-
- if((ctlr->feat[0] & (Fctrlvq|Fctrlrx)) == (Fctrlvq|Fctrlrx)){
- edev->multicast = multicast;
- edev->promiscuous = promiscuous;
- }
-
- pcisetbme(ctlr->pcidev);
- intrenable(edev->irq, interrupt, edev, edev->tbdf, edev->name);
-
- return 0;
-}
-
-void
-ethervirtio10link(void)
-{
- addethercard("virtio10", reset);
-}
--- a/sys/src/9/pc/sdvirtio10.c
+++ /dev/null
@@ -1,810 +1,0 @@
-/*
- * virtio 1.0 disk driver
- * http://docs.oasis-open.org/virtio/virtio/v1.0/virtio-v1.0.html
- *
- * In contrast to sdvirtio.c, this driver handles the non-legacy
- * interface for virtio disk which uses mmio for all register accesses
- * and requires a laborate pci capability structure dance to get working.
- *
- * It is kind of pointless as it is most likely slower than
- * port i/o (harder to emulate on the pc platform).
- *
- * The reason why this driver is needed it is that vultr set the
- * disable-legacy=on option in the -device parameter for qemu
- * on their hypervisor.
- */
-#include "u.h"
-#include "../port/lib.h"
-#include "mem.h"
-#include "dat.h"
-#include "fns.h"
-#include "io.h"
-#include "../port/pci.h"
-#include "ureg.h"
-#include "../port/error.h"
-
-#include "../port/sd.h"
-
-typedef struct Vscsidev Vscsidev;
-typedef struct Vblkdev Vblkdev;
-
-typedef struct Vconfig Vconfig;
-typedef struct Vring Vring;
-typedef struct Vdesc Vdesc;
-typedef struct Vused Vused;
-typedef struct Vqueue Vqueue;
-typedef struct Vdev Vdev;
-
-
-/* device types */
-enum {
- TypBlk = 2,
- TypSCSI = 8,
-};
-
-/* status flags */
-enum {
- Acknowledge = 1,
- Driver = 2,
- FeaturesOk = 8,
- DriverOk = 4,
- Failed = 0x80,
-};
-
-/* descriptor flags */
-enum {
- Next = 1,
- Write = 2,
- Indirect = 4,
-};
-
-/* struct sizes */
-enum {
- VringSize = 4,
-};
-
-enum {
- CDBSIZE = 32,
- SENSESIZE = 96,
-};
-
-
-struct Vscsidev
-{
- u32int num_queues;
- u32int seg_max;
- u32int max_sectors;
- u32int cmd_per_lun;
- u32int event_info_size;
- u32int sense_size;
- u32int cdb_size;
- u16int max_channel;
- u16int max_target;
- u32int max_lun;
-};
-
-struct Vblkdev
-{
- u64int capacity;
-};
-
-struct Vconfig {
- u32int devfeatsel;
- u32int devfeat;
- u32int drvfeatsel;
- u32int drvfeat;
-
- u16int msixcfg;
- u16int nqueues;
-
- u8int status;
- u8int cfggen;
- u16int queuesel;
-
- u16int queuesize;
- u16int queuemsixvect;
-
- u16int queueenable;
- u16int queuenotifyoff;
-
- u64int queuedesc;
- u64int queueavail;
- u64int queueused;
-};
-
-struct Vring
-{
- u16int flags;
- u16int idx;
-};
-
-struct Vdesc
-{
- u64int addr;
- u32int len;
- u16int flags;
- u16int next;
-};
-
-struct Vused
-{
- u32int id;
- u32int len;
-};
-
-struct Vqueue
-{
- Lock;
-
- Vdev *dev;
- void *notify;
- int idx;
-
- int size;
-
- int free;
- int nfree;
-
- Vdesc *desc;
-
- Vring *avail;
- u16int *availent;
- u16int *availevent;
-
- Vring *used;
- Vused *usedent;
- u16int *usedevent;
- u16int lastused;
-
- void *rock[];
-};
-
-struct Vdev
-{
- int typ;
-
- Pcidev *pci;
-
- uvlong port;
- ulong feat[2];
-
- int nqueue;
- Vqueue *queue[16];
-
- void *dev; /* device specific config (for scsi) */
-
- /* registers */
- Vconfig *cfg;
- u8int *isr;
- u8int *notify;
- u32int notifyoffmult;
-
- Vdev *next;
-};
-
-static Vqueue*
-mkvqueue(int size)
-{
- Vqueue *q;
- uchar *p;
- int i;
-
- q = malloc(sizeof(*q) + sizeof(void*)*size);
- p = mallocalign(
- PGROUND(sizeof(Vdesc)*size +
- VringSize +
- sizeof(u16int)*size +
- sizeof(u16int)) +
- PGROUND(VringSize +
- sizeof(Vused)*size +
- sizeof(u16int)),
- BY2PG, 0, 0);
- if(p == nil || q == nil){
- print("virtio: no memory for Vqueue\n");
- free(p);
- free(q);
- return nil;
- }
-
- q->desc = (void*)p;
- p += sizeof(Vdesc)*size;
- q->avail = (void*)p;
- p += VringSize;
- q->availent = (void*)p;
- p += sizeof(u16int)*size;
- q->availevent = (void*)p;
- p += sizeof(u16int);
-
- p = (uchar*)PGROUND((uintptr)p);
- q->used = (void*)p;
- p += VringSize;
- q->usedent = (void*)p;
- p += sizeof(Vused)*size;
- q->usedevent = (void*)p;
-
- q->free = -1;
- q->nfree = q->size = size;
- for(i=0; i<size; i++){
- q->desc[i].next = q->free;
- q->free = i;
- }
-
- return q;
-}
-
-static int
-matchvirtiocfgcap(Pcidev *p, int cap, int off, int typ)
-{
- int bar;
-
- if(cap != 9 || pcicfgr8(p, off+3) != typ)
- return 1;
-
- /* skip invalid or non memory bars */
- bar = pcicfgr8(p, off+4);
- if(bar < 0 || bar >= nelem(p->mem)
- || p->mem[bar].size == 0
- || (p->mem[bar].bar & 3) != 0)
- return 1;
-
- return 0;
-}
-
-static int
-virtiocap(Pcidev *p, int typ)
-{
- return pcienumcaps(p, matchvirtiocfgcap, typ);
-}
-
-static void*
-virtiomapregs(Pcidev *p, int cap, int size)
-{
- int bar, len;
- uvlong addr;
-
- if(cap < 0)
- return nil;
- bar = pcicfgr8(p, cap+4) % nelem(p->mem);
- addr = pcicfgr32(p, cap+8);
- len = pcicfgr32(p, cap+12);
- if(size <= 0)
- size = len;
- else if(len < size)
- return nil;
- if(addr+len > p->mem[bar].size)
- return nil;
- addr += p->mem[bar].bar & ~0xFULL;
- return vmap(addr, size);
-}
-
-static Vdev*
-viopnpdevs(int typ)
-{
- Vdev *vd, *h, *t;
- Vconfig *cfg;
- Vqueue *q;
- Pcidev *p;
- int cap, bar;
- int n, i;
-
- h = t = nil;
- for(p = nil; p = pcimatch(p, 0x1AF4, 0x1040+typ);){
- if(p->rid == 0)
- continue;
- if((cap = virtiocap(p, 1)) < 0)
- continue;
- bar = pcicfgr8(p, cap+4) % nelem(p->mem);
- cfg = virtiomapregs(p, cap, sizeof(Vconfig));
- if(cfg == nil)
- continue;
- if((vd = malloc(sizeof(*vd))) == nil){
- print("virtio: no memory for Vdev\n");
- break;
- }
- vd->port = p->mem[bar].bar & ~0xFULL;
- vd->typ = typ;
- vd->pci = p;
- vd->cfg = cfg;
- pcienable(p);
-
- vd->isr = virtiomapregs(p, virtiocap(p, 3), 0);
- if(vd->isr == nil){
-Baddev:
- pcidisable(p);
- /* TODO: vunmap */
- free(vd);
- continue;
- }
- cap = virtiocap(p, 2);
- vd->notify = virtiomapregs(p, cap, 0);
- if(vd->notify == nil)
- goto Baddev;
- vd->notifyoffmult = pcicfgr32(p, cap+16);
-
- /* reset */
- cfg->status = 0;
- while(cfg->status != 0)
- delay(1);
- cfg->status = Acknowledge|Driver;
-
- /* negotiate feature bits */
- cfg->devfeatsel = 1;
- vd->feat[1] = cfg->devfeat;
- cfg->devfeatsel = 0;
- vd->feat[0] = cfg->devfeat;
- cfg->drvfeatsel = 1;
- cfg->drvfeat = vd->feat[1] & 1;
- cfg->drvfeatsel = 0;
- cfg->drvfeat = 0;
- cfg->status |= FeaturesOk;
-
- for(i=0; i<nelem(vd->queue); i++){
- cfg->queuesel = i;
- n = cfg->queuesize;
- if(n == 0 || (n & (n-1)) != 0)
- break;
- if((q = mkvqueue(n)) == nil)
- break;
- q->notify = vd->notify + vd->notifyoffmult * cfg->queuenotifyoff;
- q->dev = vd;
- q->idx = i;
- vd->queue[i] = q;
- coherence();
- cfg->queuedesc = PADDR(q->desc);
- cfg->queueavail = PADDR(q->avail);
- cfg->queueused = PADDR(q->used);
- }
- vd->nqueue = i;
-
- if(h == nil)
- h = vd;
- else
- t->next = vd;
- t = vd;
- }
-
- return h;
-}
-
-struct Rock {
- int done;
- Rendez *sleep;
-};
-
-static void
-vqinterrupt(Vqueue *q)
-{
- int id, free, m;
- struct Rock *r;
- Rendez *z;
-
- m = q->size-1;
-
- ilock(q);
- while((q->lastused ^ q->used->idx) & m){
- id = q->usedent[q->lastused++ & m].id;
- if(r = q->rock[id]){
- q->rock[id] = nil;
- z = r->sleep;
- r->done = 1; /* hands off */
- if(z != nil)
- wakeup(z);
- }
- do {
- free = id;
- id = q->desc[free].next;
- q->desc[free].next = q->free;
- q->free = free;
- q->nfree++;
- } while(q->desc[free].flags & Next);
- }
- iunlock(q);
-}
-
-static void
-viointerrupt(Ureg *, void *arg)
-{
- Vdev *vd = arg;
-
- if(vd->isr[0] & 1)
- vqinterrupt(vd->queue[vd->typ == TypSCSI ? 2 : 0]);
-}
-
-static int
-viodone(void *arg)
-{
- return ((struct Rock*)arg)->done;
-}
-
-static void
-vqio(Vqueue *q, int head)
-{
- struct Rock rock;
-
- rock.done = 0;
- rock.sleep = &up->sleep;
- q->rock[head] = &rock;
- q->availent[q->avail->idx & (q->size-1)] = head;
- coherence();
- q->avail->idx++;
- iunlock(q);
- if((q->used->flags & 1) == 0)
- *((u16int*)q->notify) = q->idx;
- while(!rock.done){
- while(waserror())
- ;
- tsleep(rock.sleep, viodone, &rock, 1000);
- poperror();
-
- if(!rock.done)
- vqinterrupt(q);
- }
-}
-
-static int
-vioblkreq(Vdev *vd, int typ, void *a, long count, long secsize, uvlong lba)
-{
- int need, free, head;
- Vqueue *q;
- Vdesc *d;
-
- u8int status;
- struct Vioblkreqhdr {
- u32int typ;
- u32int prio;
- u64int lba;
- } req;
-
- need = 2;
- if(a != nil)
- need = 3;
-
- status = -1;
- req.typ = typ;
- req.prio = 0;
- req.lba = lba;
-
- q = vd->queue[0];
- ilock(q);
- while(q->nfree < need){
- iunlock(q);
-
- if(!waserror())
- tsleep(&up->sleep, return0, 0, 500);
- poperror();
-
- ilock(q);
- }
-
- head = free = q->free;
-
- d = &q->desc[free]; free = d->next;
- d->addr = PADDR(&req);
- d->len = sizeof(req);
- d->flags = Next;
-
- if(a != nil){
- d = &q->desc[free]; free = d->next;
- d->addr = PADDR(a);
- d->len = secsize*count;
- d->flags = typ ? Next : (Write|Next);
- }
-
- d = &q->desc[free]; free = d->next;
- d->addr = PADDR(&status);
- d->len = sizeof(status);
- d->flags = Write;
-
- q->free = free;
- q->nfree -= need;
-
- /* queue io, unlock and wait for completion */
- vqio(q, head);
-
- return status;
-}
-
-static int
-vioscsireq(SDreq *r)
-{
- u8int resp[4+4+2+2+SENSESIZE];
- u8int req[8+8+3+CDBSIZE];
- int free, head;
- u32int len;
- Vqueue *q;
- Vdesc *d;
- Vdev *vd;
- SDunit *u;
- Vscsidev *scsi;
-
- u = r->unit;
- vd = u->dev->ctlr;
- scsi = vd->dev;
-
- memset(resp, 0, sizeof(resp));
- memset(req, 0, sizeof(req));
- req[0] = 1;
- req[1] = u->subno;
- req[2] = r->lun>>8;
- req[3] = r->lun&0xFF;
- *(u64int*)(&req[8]) = (uintptr)r;
-
- memmove(&req[8+8+3], r->cmd, r->clen);
-
- q = vd->queue[2];
- ilock(q);
- while(q->nfree < 3){
- iunlock(q);
-
- if(!waserror())
- tsleep(&up->sleep, return0, 0, 500);
- poperror();
-
- ilock(q);
- }
-
- head = free = q->free;
-
- d = &q->desc[free]; free = d->next;
- d->addr = PADDR(req);
- d->len = 8+8+3+scsi->cdb_size;
- d->flags = Next;
-
- if(r->write && r->dlen > 0){
- d = &q->desc[free]; free = d->next;
- d->addr = PADDR(r->data);
- d->len = r->dlen;
- d->flags = Next;
- }
-
- d = &q->desc[free]; free = d->next;
- d->addr = PADDR(resp);
- d->len = 4+4+2+2+scsi->sense_size;
- d->flags = Write;
-
- if(!r->write && r->dlen > 0){
- d->flags |= Next;
-
- d = &q->desc[free]; free = d->next;
- d->addr = PADDR(r->data);
- d->len = r->dlen;
- d->flags = Write;
- }
-
- q->free = free;
- q->nfree -= 2 + (r->dlen > 0);
-
- /* queue io, unlock and wait for completion */
- vqio(q, head);
-
- /* response+status */
- r->status = resp[10];
- if(resp[11] != 0)
- r->status = SDcheck;
-
- /* sense_len */
- len = *((u32int*)&resp[0]);
- if(len > 0){
- if(len > sizeof(r->sense))
- len = sizeof(r->sense);
- memmove(r->sense, &resp[4+4+2+2], len);
- r->flags |= SDvalidsense;
- }
-
- /* data residue */
- len = *((u32int*)&resp[4]);
- if(len > r->dlen)
- r->rlen = 0;
- else
- r->rlen = r->dlen - len;
-
- return r->status;
-
-}
-
-static long
-viobio(SDunit *u, int lun, int write, void *a, long count, uvlong lba)
-{
- long ss, cc, max, ret;
- Vdev *vd;
-
- vd = u->dev->ctlr;
- if(vd->typ == TypSCSI)
- return scsibio(u, lun, write, a, count, lba);
-
- max = 32;
- ss = u->secsize;
- ret = 0;
- while(count > 0){
- if((cc = count) > max)
- cc = max;
- if(vioblkreq(vd, write != 0, (uchar*)a + ret, cc, ss, lba) != 0)
- error(Eio);
- ret += cc*ss;
- count -= cc;
- lba += cc;
- }
- return ret;
-}
-
-static int
-viorio(SDreq *r)
-{
- int i, count, rw;
- uvlong lba;
- SDunit *u;
- Vdev *vd;
-
- u = r->unit;
- vd = u->dev->ctlr;
- if(vd->typ == TypSCSI)
- return vioscsireq(r);
- if(r->cmd[0] == 0x35 || r->cmd[0] == 0x91){
- if(vioblkreq(vd, 4, nil, 0, 0, 0) != 0)
- return sdsetsense(r, SDcheck, 3, 0xc, 2);
- return sdsetsense(r, SDok, 0, 0, 0);
- }
- if((i = sdfakescsi(r)) != SDnostatus)
- return r->status = i;
- if((i = sdfakescsirw(r, &lba, &count, &rw)) != SDnostatus)
- return i;
- r->rlen = viobio(u, r->lun, rw == SDwrite, r->data, count, lba);
- return r->status = SDok;
-}
-
-static int
-vioonline(SDunit *u)
-{
- Vdev *vd;
- Vblkdev *blk;
- uvlong cap;
-
- vd = u->dev->ctlr;
- if(vd->typ == TypSCSI)
- return scsionline(u);
-
- blk = vd->dev;
- cap = blk->capacity;
- if(u->sectors != cap){
- u->sectors = cap;
- u->secsize = 512;
- return 2;
- }
- return 1;
-}
-
-static int
-vioverify(SDunit *u)
-{
- Vdev *vd;
-
- vd = u->dev->ctlr;
- if(vd->typ == TypSCSI)
- return scsiverify(u);
-
- return 1;
-}
-
-SDifc sdvirtio10ifc;
-
-static int
-vioenable(SDev *sd)
-{
- char name[32];
- Vdev *vd;
- int i;
-
- vd = sd->ctlr;
- pcisetbme(vd->pci);
- snprint(name, sizeof(name), "%s (%s)", sd->name, sd->ifc->name);
- intrenable(vd->pci->intl, viointerrupt, vd, vd->pci->tbdf, name);
- coherence();
-
- for(i = 0; i < vd->nqueue; i++){
- vd->cfg->queuesel = i;
- vd->cfg->queueenable = 1;
- }
- vd->cfg->status |= DriverOk;
-
- return 1;
-}
-
-static int
-viodisable(SDev *sd)
-{
- char name[32];
- Vdev *vd;
-
- vd = sd->ctlr;
- snprint(name, sizeof(name), "%s (%s)", sd->name, sd->ifc->name);
- intrdisable(vd->pci->intl, viointerrupt, vd, vd->pci->tbdf, name);
- pciclrbme(vd->pci);
- return 1;
-}
-
-static SDev*
-viopnp(void)
-{
- SDev *s, *h, *t;
- Vdev *vd;
- int id;
-
- h = t = nil;
-
- id = 'F';
- for(vd = viopnpdevs(TypBlk); vd; vd = vd->next){
- if(vd->nqueue == 0)
- continue;
-
- if((vd->dev = virtiomapregs(vd->pci, virtiocap(vd->pci, 4), sizeof(Vblkdev))) == nil)
- break;
- if((s = malloc(sizeof(*s))) == nil)
- break;
- s->ctlr = vd;
- s->idno = id++;
- s->ifc = &sdvirtio10ifc;
- s->nunit = 1;
- if(h)
- t->next = s;
- else
- h = s;
- t = s;
- }
-
- id = '0';
- for(vd = viopnpdevs(TypSCSI); vd; vd = vd->next){
- Vscsidev *scsi;
-
- if(vd->nqueue < 3)
- continue;
-
- if((scsi = virtiomapregs(vd->pci, virtiocap(vd->pci, 4), sizeof(Vscsidev))) == nil)
- break;
- if(scsi->max_target == 0){
- vunmap(scsi, sizeof(Vscsidev));
- continue;
- }
- if((scsi->cdb_size > CDBSIZE) || (scsi->sense_size > SENSESIZE)){
- print("sdvirtio: cdb %ud or sense size %ud too big\n",
- scsi->cdb_size, scsi->sense_size);
- vunmap(scsi, sizeof(Vscsidev));
- continue;
- }
- vd->dev = scsi;
-
- if((s = malloc(sizeof(*s))) == nil)
- break;
- s->ctlr = vd;
- s->idno = id++;
- s->ifc = &sdvirtio10ifc;
- s->nunit = scsi->max_target;
-
- if(h)
- t->next = s;
- else
- h = s;
- t = s;
- }
- return h;
-}
-
-SDifc sdvirtio10ifc = {
- "virtio10", /* name */
-
- viopnp, /* pnp */
- nil, /* legacy */
- vioenable, /* enable */
- viodisable, /* disable */
-
- vioverify, /* verify */
- vioonline, /* online */
- viorio, /* rio */
- nil, /* rctl */
- nil, /* wctl */
-
- viobio, /* bio */
- nil, /* probe */
- nil, /* clear */
- nil, /* rtopctl */
- nil, /* wtopctl */
-};
--- /dev/null
+++ b/sys/src/9/port/ethervirtio10.c
@@ -1,0 +1,793 @@
+/*
+ * virtio 1.0 ethernet driver
+ * http://docs.oasis-open.org/virtio/virtio/v1.0/virtio-v1.0.html
+ *
+ * In contrast to ethervirtio.c, this driver handles the non-legacy
+ * interface for virtio ethernet which uses mmio for all register accesses
+ * and requires a laborate pci capability structure dance to get working.
+ *
+ * It is kind of pointless as it is most likely slower than
+ * port i/o (harder to emulate on the pc platform).
+ *
+ * The reason why this driver is needed it is that vultr set the
+ * disable-legacy=on option in the -device parameter for qemu
+ * on their hypervisor.
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "../port/pci.h"
+#include "../port/error.h"
+#include "../port/netif.h"
+#include "../port/etherif.h"
+
+typedef struct Vconfig Vconfig;
+typedef struct Vnetcfg Vnetcfg;
+
+typedef struct Vring Vring;
+typedef struct Vdesc Vdesc;
+typedef struct Vused Vused;
+typedef struct Vheader Vheader;
+typedef struct Vqueue Vqueue;
+
+typedef struct Ctlr Ctlr;
+
+enum {
+ /* §2.1 Device Status Field */
+ Sacknowledge = 1,
+ Sdriver = 2,
+ Sdriverok = 4,
+ Sfeaturesok = 8,
+ Sfailed = 128,
+
+ /* flags in Qnetstatus */
+ Nlinkup = (1<<0),
+ Nannounce = (1<<1),
+
+ /* feat[0] bits */
+ Fmac = 1<<5,
+ Fstatus = 1<<16,
+ Fctrlvq = 1<<17,
+ Fctrlrx = 1<<18,
+
+ /* feat[1] bits */
+ Fversion1 = 1<<(32-32),
+
+ /* vring used flags */
+ Unonotify = 1,
+ /* vring avail flags */
+ Rnointerrupt = 1,
+
+ /* descriptor flags */
+ Dnext = 1,
+ Dwrite = 2,
+ Dindirect = 4,
+
+ /* struct sizes */
+ VringSize = 4,
+ VdescSize = 16,
+ VusedSize = 8,
+ VheaderSize = 12,
+
+ Vrxq = 0,
+ Vtxq = 1,
+ Vctlq = 2,
+
+ /* class/cmd for Vctlq */
+ CtrlRx = 0x00,
+ CmdPromisc = 0x00,
+ CmdAllmulti = 0x01,
+ CtrlMac = 0x01,
+ CmdMacTableSet = 0x00,
+ CtrlVlan= 0x02,
+ CmdVlanAdd = 0x00,
+ CmdVlanDel = 0x01,
+};
+
+struct Vconfig {
+ u32int devfeatsel;
+ u32int devfeat;
+ u32int drvfeatsel;
+ u32int drvfeat;
+
+ u16int msixcfg;
+ u16int nqueues;
+
+ u8int status;
+ u8int cfggen;
+ u16int queuesel;
+
+ u16int queuesize;
+ u16int queuemsixvect;
+
+ u16int queueenable;
+ u16int queuenotifyoff;
+
+ u64int queuedesc;
+ u64int queueavail;
+ u64int queueused;
+};
+
+struct Vnetcfg
+{
+ u16int mac0;
+ u16int mac1;
+ u16int mac2;
+ u16int status;
+ u16int maxqueuepairs;
+ u16int mtu;
+};
+
+struct Vring
+{
+ u16int flags;
+ u16int idx;
+};
+
+struct Vdesc
+{
+ u64int addr;
+ u32int len;
+ u16int flags;
+ u16int next;
+};
+
+struct Vused
+{
+ u32int id;
+ u32int len;
+};
+
+struct Vheader
+{
+ u8int flags;
+ u8int segtype;
+ u16int hlen;
+ u16int seglen;
+ u16int csumstart;
+ u16int csumend;
+};
+
+struct Vqueue
+{
+ Rendez;
+
+ uint qsize;
+ uint qmask;
+
+ Vdesc *desc;
+
+ Vring *avail;
+ u16int *availent;
+ u16int *availevent;
+
+ Vring *used;
+ Vused *usedent;
+ u16int *usedevent;
+ u16int lastused;
+
+ uint nintr;
+ uint nnote;
+
+ /* notify register */
+ void *notify;
+};
+
+struct Ctlr {
+ Lock;
+
+ QLock ctllock;
+
+ int attached;
+
+ /* registers */
+ Vconfig *cfg;
+ Vnetcfg *dev;
+ u8int *isr;
+ u8int *notify;
+ u32int notifyoffmult;
+
+ uvlong port;
+ Pcidev *pcidev;
+ Ctlr *next;
+ int active;
+ ulong feat[2];
+ int nqueue;
+
+ /* virtioether has 3 queues: rx, tx and ctl */
+ Vqueue queue[3];
+};
+
+static Ctlr *ctlrhead;
+
+static int
+vhasroom(void *v)
+{
+ Vqueue *q = v;
+ return q->lastused != q->used->idx;
+}
+
+static void
+vqnotify(Ctlr *ctlr, int x)
+{
+ Vqueue *q;
+
+ coherence();
+ q = &ctlr->queue[x];
+ if(q->used->flags & Unonotify)
+ return;
+ q->nnote++;
+ *((u16int*)q->notify) = x;
+}
+
+static void
+txproc(void *v)
+{
+ Vheader *header;
+ Block **blocks;
+ Ether *edev;
+ Ctlr *ctlr;
+ Vqueue *q;
+ Vused *u;
+ Block *b;
+ int i, j;
+
+ edev = v;
+ ctlr = edev->ctlr;
+ q = &ctlr->queue[Vtxq];
+
+ header = smalloc(VheaderSize);
+ blocks = smalloc(sizeof(Block*) * (q->qsize/2));
+
+ for(i = 0; i < q->qsize/2; i++){
+ j = i << 1;
+ q->desc[j].addr = PADDR(header);
+ q->desc[j].len = VheaderSize;
+ q->desc[j].next = j | 1;
+ q->desc[j].flags = Dnext;
+
+ q->availent[i] = q->availent[i + q->qsize/2] = j;
+
+ j |= 1;
+ q->desc[j].next = 0;
+ q->desc[j].flags = 0;
+ }
+
+ q->avail->flags &= ~Rnointerrupt;
+
+ while(waserror())
+ ;
+
+ while((b = qbread(edev->oq, 1000000)) != nil){
+ for(;;){
+ /* retire completed packets */
+ while((i = q->lastused) != q->used->idx){
+ u = &q->usedent[i & q->qmask];
+ i = (u->id & q->qmask) >> 1;
+ if(blocks[i] == nil)
+ break;
+ freeb(blocks[i]);
+ blocks[i] = nil;
+ q->lastused++;
+ }
+
+ /* have free slot? */
+ i = q->avail->idx & (q->qmask >> 1);
+ if(blocks[i] == nil)
+ break;
+
+ /* ring full, wait and retry */
+ if(!vhasroom(q))
+ sleep(q, vhasroom, q);
+ }
+
+ /* slot is free, fill in descriptor */
+ blocks[i] = b;
+ j = (i << 1) | 1;
+ q->desc[j].addr = PADDR(b->rp);
+ q->desc[j].len = BLEN(b);
+ coherence();
+ q->avail->idx++;
+ vqnotify(ctlr, Vtxq);
+ }
+
+ pexit("ether out queue closed", 1);
+}
+
+static void
+rxproc(void *v)
+{
+ Vheader *header;
+ Block **blocks;
+ Ether *edev;
+ Ctlr *ctlr;
+ Vqueue *q;
+ Vused *u;
+ Block *b;
+ int i, j;
+
+ edev = v;
+ ctlr = edev->ctlr;
+ q = &ctlr->queue[Vrxq];
+
+ header = smalloc(VheaderSize);
+ blocks = smalloc(sizeof(Block*) * (q->qsize/2));
+
+ for(i = 0; i < q->qsize/2; i++){
+ j = i << 1;
+ q->desc[j].addr = PADDR(header);
+ q->desc[j].len = VheaderSize;
+ q->desc[j].next = j | 1;
+ q->desc[j].flags = Dwrite|Dnext;
+
+ q->availent[i] = q->availent[i + q->qsize/2] = j;
+
+ j |= 1;
+ q->desc[j].next = 0;
+ q->desc[j].flags = Dwrite;
+ }
+
+ q->avail->flags &= ~Rnointerrupt;
+
+ while(waserror())
+ ;
+
+ for(;;){
+ /* replenish receive ring */
+ do {
+ i = q->avail->idx & (q->qmask >> 1);
+ if(blocks[i] != nil)
+ break;
+ if((b = iallocb(ETHERMAXTU)) == nil)
+ break;
+ blocks[i] = b;
+ j = (i << 1) | 1;
+ q->desc[j].addr = PADDR(b->rp);
+ q->desc[j].len = BALLOC(b);
+ coherence();
+ q->avail->idx++;
+ } while(q->avail->idx != q->used->idx);
+ vqnotify(ctlr, Vrxq);
+
+ /* wait for any packets to complete */
+ if(!vhasroom(q))
+ sleep(q, vhasroom, q);
+
+ /* retire completed packets */
+ while((i = q->lastused) != q->used->idx) {
+ u = &q->usedent[i & q->qmask];
+ i = (u->id & q->qmask) >> 1;
+ if((b = blocks[i]) == nil)
+ break;
+
+ blocks[i] = nil;
+ b->wp = b->rp + u->len - VheaderSize;
+ etheriq(edev, b);
+ q->lastused++;
+ }
+ }
+}
+
+static int
+vctlcmd(Ether *edev, uchar class, uchar cmd, uchar *data, int ndata)
+{
+ uchar hdr[2], ack[1];
+ Ctlr *ctlr;
+ Vqueue *q;
+ Vdesc *d;
+ int i;
+
+ ctlr = edev->ctlr;
+ q = &ctlr->queue[Vctlq];
+ if(q->qsize < 3)
+ return -1;
+
+ qlock(&ctlr->ctllock);
+ while(waserror())
+ ;
+
+ ack[0] = 0x55;
+ hdr[0] = class;
+ hdr[1] = cmd;
+
+ d = &q->desc[0];
+ d->addr = PADDR(hdr);
+ d->len = sizeof(hdr);
+ d->next = 1;
+ d->flags = Dnext;
+ d++;
+ d->addr = PADDR(data);
+ d->len = ndata;
+ d->next = 2;
+ d->flags = Dnext;
+ d++;
+ d->addr = PADDR(ack);
+ d->len = sizeof(ack);
+ d->next = 0;
+ d->flags = Dwrite;
+
+ i = q->avail->idx & q->qmask;
+ q->availent[i] = 0;
+ coherence();
+
+ q->avail->flags &= ~Rnointerrupt;
+ q->avail->idx++;
+ vqnotify(ctlr, Vctlq);
+ while(!vhasroom(q))
+ sleep(q, vhasroom, q);
+ q->lastused = q->used->idx;
+ q->avail->flags |= Rnointerrupt;
+
+ qunlock(&ctlr->ctllock);
+ poperror();
+
+ if(ack[0] != 0)
+ print("#l%d: vctlcmd: %ux.%ux -> %ux\n", edev->ctlrno, class, cmd, ack[0]);
+
+ return ack[0];
+}
+
+static void
+interrupt(Ureg*, void* arg)
+{
+ Ether *edev;
+ Ctlr *ctlr;
+ Vqueue *q;
+ int i;
+
+ edev = arg;
+ ctlr = edev->ctlr;
+ if(*ctlr->isr & 1){
+ for(i = 0; i < ctlr->nqueue; i++){
+ q = &ctlr->queue[i];
+ if(vhasroom(q)){
+ q->nintr++;
+ wakeup(q);
+ }
+ }
+ }
+}
+
+static void
+attach(Ether* edev)
+{
+ char name[KNAMELEN];
+ Ctlr* ctlr;
+ int i;
+
+ ctlr = edev->ctlr;
+ ilock(ctlr);
+ if(ctlr->attached){
+ iunlock(ctlr);
+ return;
+ }
+ ctlr->attached = 1;
+
+ /* enable the queues */
+ for(i = 0; i < ctlr->nqueue; i++){
+ ctlr->cfg->queuesel = i;
+ ctlr->cfg->queueenable = 1;
+ }
+
+ /* driver is ready */
+ ctlr->cfg->status |= Sdriverok;
+
+ iunlock(ctlr);
+
+ /* start kprocs */
+ snprint(name, sizeof name, "#l%drx", edev->ctlrno);
+ kproc(name, rxproc, edev);
+ snprint(name, sizeof name, "#l%dtx", edev->ctlrno);
+ kproc(name, txproc, edev);
+}
+
+static long
+ifstat(Ether *edev, void *a, long n, ulong offset)
+{
+ int i, l;
+ char *p;
+ Ctlr *ctlr;
+ Vqueue *q;
+
+ ctlr = edev->ctlr;
+
+ p = smalloc(READSTR);
+
+ l = snprint(p, READSTR, "devfeat %32.32lub %32.32lub\n", ctlr->feat[1], ctlr->feat[0]);
+ l += snprint(p+l, READSTR-l, "devstatus %8.8ub\n", ctlr->cfg->status);
+
+ for(i = 0; i < ctlr->nqueue; i++){
+ q = &ctlr->queue[i];
+ l += snprint(p+l, READSTR-l,
+ "vq%d %#p size %d avail->idx %d used->idx %d lastused %hud nintr %ud nnote %ud\n",
+ i, q, q->qsize, q->avail->idx, q->used->idx, q->lastused, q->nintr, q->nnote);
+ }
+
+ n = readstr(offset, a, n, p);
+ free(p);
+
+ return n;
+}
+
+static void
+shutdown(Ether* edev)
+{
+ Ctlr *ctlr = edev->ctlr;
+
+ coherence();
+ ctlr->cfg->status = 0;
+ coherence();
+
+ pciclrbme(ctlr->pcidev);
+}
+
+static void
+promiscuous(void *arg, int on)
+{
+ Ether *edev = arg;
+ uchar b[1];
+
+ b[0] = on != 0;
+ vctlcmd(edev, CtrlRx, CmdPromisc, b, sizeof(b));
+}
+
+static void
+multicast(void *arg, uchar*, int)
+{
+ Ether *edev = arg;
+ uchar b[1];
+
+ b[0] = edev->nmaddr > 0;
+ vctlcmd(edev, CtrlRx, CmdAllmulti, b, sizeof(b));
+}
+
+static int
+initqueue(Vqueue *q, int size)
+{
+ uchar *p;
+
+ q->desc = mallocalign(VdescSize*size, 16, 0, 0);
+ if(q->desc == nil)
+ return -1;
+ p = mallocalign(VringSize + 2*size + 2, 2, 0, 0);
+ if(p == nil){
+FreeDesc:
+ free(q->desc);
+ q->desc = nil;
+ return -1;
+ }
+ q->avail = (void*)p;
+ p += VringSize;
+ q->availent = (void*)p;
+ p += sizeof(u16int)*size;
+ q->availevent = (void*)p;
+ p = mallocalign(VringSize + VusedSize*size + 2, 4, 0, 0);
+ if(p == nil){
+ free(q->avail);
+ q->avail = nil;
+ goto FreeDesc;
+ }
+ q->used = (void*)p;
+ p += VringSize;
+ q->usedent = (void*)p;
+ p += VusedSize*size;
+ q->usedevent = (void*)p;
+
+ q->qsize = size;
+ q->qmask = q->qsize - 1;
+
+ q->lastused = q->avail->idx = q->used->idx = 0;
+
+ q->avail->flags |= Rnointerrupt;
+
+ return 0;
+}
+
+static int
+matchvirtiocfgcap(Pcidev *p, int cap, int off, int typ)
+{
+ int bar;
+
+ if(cap != 9 || pcicfgr8(p, off+3) != typ)
+ return 1;
+
+ /* skip invalid or non memory bars */
+ bar = pcicfgr8(p, off+4);
+ if(bar < 0 || bar >= nelem(p->mem)
+ || p->mem[bar].size == 0
+ || (p->mem[bar].bar & 3) != 0)
+ return 1;
+
+ return 0;
+}
+
+static int
+virtiocap(Pcidev *p, int typ)
+{
+ return pcienumcaps(p, matchvirtiocfgcap, typ);
+}
+
+static void*
+virtiomapregs(Pcidev *p, int cap, int size)
+{
+ int bar, len;
+ uvlong addr;
+
+ if(cap < 0)
+ return nil;
+ bar = pcicfgr8(p, cap+4) % nelem(p->mem);
+ addr = pcicfgr32(p, cap+8);
+ len = pcicfgr32(p, cap+12);
+ if(size <= 0)
+ size = len;
+ else if(len < size)
+ return nil;
+ if(addr+len > p->mem[bar].size)
+ return nil;
+ addr += p->mem[bar].bar & ~0xFULL;
+ return vmap(addr, size);
+}
+
+static Ctlr*
+pciprobe(void)
+{
+ Ctlr *c, *h, *t;
+ Pcidev *p;
+ Vconfig *cfg;
+ int bar, cap, n, i;
+
+ h = t = nil;
+
+ /* §4.1.2 PCI Device Discovery */
+ for(p = nil; p = pcimatch(p, 0x1AF4, 0x1041);){
+ /* non-transitional devices will have a revision > 0 */
+ if(p->rid == 0)
+ continue;
+ if((cap = virtiocap(p, 1)) < 0)
+ continue;
+ bar = pcicfgr8(p, cap+4) % nelem(p->mem);
+ cfg = virtiomapregs(p, cap, sizeof(Vconfig));
+ if(cfg == nil)
+ continue;
+ if((c = mallocz(sizeof(Ctlr), 1)) == nil){
+ print("ethervirtio: no memory for Ctlr\n");
+ break;
+ }
+ c->cfg = cfg;
+ c->pcidev = p;
+ c->port = p->mem[bar].bar & ~0xFULL;
+
+ pcienable(p);
+ c->dev = virtiomapregs(p, virtiocap(p, 4), sizeof(Vnetcfg));
+ if(c->dev == nil)
+ goto Baddev;
+ c->isr = virtiomapregs(p, virtiocap(p, 3), 0);
+ if(c->isr == nil)
+ goto Baddev;
+ cap = virtiocap(p, 2);
+ c->notify = virtiomapregs(p, cap, 0);
+ if(c->notify == nil)
+ goto Baddev;
+ c->notifyoffmult = pcicfgr32(p, cap+16);
+
+ /* device reset */
+ coherence();
+ cfg->status = 0;
+ while(cfg->status != 0)
+ delay(1);
+ cfg->status = Sacknowledge|Sdriver;
+
+ /* negotiate feature bits */
+ cfg->devfeatsel = 1;
+ c->feat[1] = cfg->devfeat;
+
+ cfg->devfeatsel = 0;
+ c->feat[0] = cfg->devfeat;
+
+ cfg->drvfeatsel = 1;
+ cfg->drvfeat = c->feat[1] & Fversion1;
+
+ cfg->drvfeatsel = 0;
+ cfg->drvfeat = c->feat[0] & (Fmac|Fctrlvq|Fctrlrx);
+
+ cfg->status |= Sfeaturesok;
+
+ for(i=0; i<nelem(c->queue); i++){
+ cfg->queuesel = i;
+ n = cfg->queuesize;
+ if(n == 0 || (n & (n-1)) != 0){
+ if(i < 2)
+ print("ethervirtio: queue %d has invalid size %d\n", i, n);
+ break;
+ }
+ if(initqueue(&c->queue[i], n) < 0)
+ break;
+ c->queue[i].notify = c->notify + c->notifyoffmult * cfg->queuenotifyoff;
+ coherence();
+ cfg->queuedesc = PADDR(c->queue[i].desc);
+ cfg->queueavail = PADDR(c->queue[i].avail);
+ cfg->queueused = PADDR(c->queue[i].used);
+ }
+ if(i < 2){
+ print("ethervirtio: no queues\n");
+Baddev:
+ pcidisable(p);
+ /* TODO, vunmap */
+ free(c);
+ continue;
+ }
+ c->nqueue = i;
+
+ if(h == nil)
+ h = c;
+ else
+ t->next = c;
+ t = c;
+ }
+
+ return h;
+}
+
+
+static int
+reset(Ether* edev)
+{
+ static uchar zeros[Eaddrlen];
+ Ctlr *ctlr;
+ int i;
+
+ if(ctlrhead == nil)
+ ctlrhead = pciprobe();
+
+ for(ctlr = ctlrhead; ctlr != nil; ctlr = ctlr->next){
+ if(ctlr->active)
+ continue;
+ if(edev->port == 0 || edev->port == ctlr->port){
+ ctlr->active = 1;
+ break;
+ }
+ }
+
+ if(ctlr == nil)
+ return -1;
+
+ edev->ctlr = ctlr;
+ edev->port = ctlr->port;
+ edev->irq = ctlr->pcidev->intl;
+ edev->tbdf = ctlr->pcidev->tbdf;
+ edev->mbps = 1000;
+ edev->link = 1;
+
+ if((ctlr->feat[0] & Fmac) != 0 && memcmp(edev->ea, zeros, Eaddrlen) == 0){
+ for(i = 0; i < Eaddrlen; i++)
+ edev->ea[i] = ((uchar*)ctlr->dev)[i];
+ } else {
+ for(i = 0; i < Eaddrlen; i++)
+ ((uchar*)ctlr->dev)[i] = edev->ea[i];
+ }
+
+ edev->arg = edev;
+
+ edev->attach = attach;
+ edev->shutdown = shutdown;
+ edev->ifstat = ifstat;
+
+ if((ctlr->feat[0] & (Fctrlvq|Fctrlrx)) == (Fctrlvq|Fctrlrx)){
+ edev->multicast = multicast;
+ edev->promiscuous = promiscuous;
+ }
+
+ pcisetbme(ctlr->pcidev);
+ intrenable(edev->irq, interrupt, edev, edev->tbdf, edev->name);
+
+ return 0;
+}
+
+void
+ethervirtio10link(void)
+{
+ addethercard("virtio10", reset);
+}
--- /dev/null
+++ b/sys/src/9/port/sdvirtio10.c
@@ -1,0 +1,810 @@
+/*
+ * virtio 1.0 disk driver
+ * http://docs.oasis-open.org/virtio/virtio/v1.0/virtio-v1.0.html
+ *
+ * In contrast to sdvirtio.c, this driver handles the non-legacy
+ * interface for virtio disk which uses mmio for all register accesses
+ * and requires a laborate pci capability structure dance to get working.
+ *
+ * It is kind of pointless as it is most likely slower than
+ * port i/o (harder to emulate on the pc platform).
+ *
+ * The reason why this driver is needed it is that vultr set the
+ * disable-legacy=on option in the -device parameter for qemu
+ * on their hypervisor.
+ */
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "../port/pci.h"
+#include "ureg.h"
+#include "../port/error.h"
+
+#include "../port/sd.h"
+
+typedef struct Vscsidev Vscsidev;
+typedef struct Vblkdev Vblkdev;
+
+typedef struct Vconfig Vconfig;
+typedef struct Vring Vring;
+typedef struct Vdesc Vdesc;
+typedef struct Vused Vused;
+typedef struct Vqueue Vqueue;
+typedef struct Vdev Vdev;
+
+
+/* device types */
+enum {
+ TypBlk = 2,
+ TypSCSI = 8,
+};
+
+/* status flags */
+enum {
+ Acknowledge = 1,
+ Driver = 2,
+ FeaturesOk = 8,
+ DriverOk = 4,
+ Failed = 0x80,
+};
+
+/* descriptor flags */
+enum {
+ Next = 1,
+ Write = 2,
+ Indirect = 4,
+};
+
+/* struct sizes */
+enum {
+ VringSize = 4,
+};
+
+enum {
+ CDBSIZE = 32,
+ SENSESIZE = 96,
+};
+
+
+struct Vscsidev
+{
+ u32int num_queues;
+ u32int seg_max;
+ u32int max_sectors;
+ u32int cmd_per_lun;
+ u32int event_info_size;
+ u32int sense_size;
+ u32int cdb_size;
+ u16int max_channel;
+ u16int max_target;
+ u32int max_lun;
+};
+
+struct Vblkdev
+{
+ u64int capacity;
+};
+
+struct Vconfig {
+ u32int devfeatsel;
+ u32int devfeat;
+ u32int drvfeatsel;
+ u32int drvfeat;
+
+ u16int msixcfg;
+ u16int nqueues;
+
+ u8int status;
+ u8int cfggen;
+ u16int queuesel;
+
+ u16int queuesize;
+ u16int queuemsixvect;
+
+ u16int queueenable;
+ u16int queuenotifyoff;
+
+ u64int queuedesc;
+ u64int queueavail;
+ u64int queueused;
+};
+
+struct Vring
+{
+ u16int flags;
+ u16int idx;
+};
+
+struct Vdesc
+{
+ u64int addr;
+ u32int len;
+ u16int flags;
+ u16int next;
+};
+
+struct Vused
+{
+ u32int id;
+ u32int len;
+};
+
+struct Vqueue
+{
+ Lock;
+
+ Vdev *dev;
+ void *notify;
+ int idx;
+
+ int size;
+
+ int free;
+ int nfree;
+
+ Vdesc *desc;
+
+ Vring *avail;
+ u16int *availent;
+ u16int *availevent;
+
+ Vring *used;
+ Vused *usedent;
+ u16int *usedevent;
+ u16int lastused;
+
+ void *rock[];
+};
+
+struct Vdev
+{
+ int typ;
+
+ Pcidev *pci;
+
+ uvlong port;
+ ulong feat[2];
+
+ int nqueue;
+ Vqueue *queue[16];
+
+ void *dev; /* device specific config (for scsi) */
+
+ /* registers */
+ Vconfig *cfg;
+ u8int *isr;
+ u8int *notify;
+ u32int notifyoffmult;
+
+ Vdev *next;
+};
+
+static Vqueue*
+mkvqueue(int size)
+{
+ Vqueue *q;
+ uchar *p;
+ int i;
+
+ q = malloc(sizeof(*q) + sizeof(void*)*size);
+ p = mallocalign(
+ PGROUND(sizeof(Vdesc)*size +
+ VringSize +
+ sizeof(u16int)*size +
+ sizeof(u16int)) +
+ PGROUND(VringSize +
+ sizeof(Vused)*size +
+ sizeof(u16int)),
+ BY2PG, 0, 0);
+ if(p == nil || q == nil){
+ print("virtio: no memory for Vqueue\n");
+ free(p);
+ free(q);
+ return nil;
+ }
+
+ q->desc = (void*)p;
+ p += sizeof(Vdesc)*size;
+ q->avail = (void*)p;
+ p += VringSize;
+ q->availent = (void*)p;
+ p += sizeof(u16int)*size;
+ q->availevent = (void*)p;
+ p += sizeof(u16int);
+
+ p = (uchar*)PGROUND((uintptr)p);
+ q->used = (void*)p;
+ p += VringSize;
+ q->usedent = (void*)p;
+ p += sizeof(Vused)*size;
+ q->usedevent = (void*)p;
+
+ q->free = -1;
+ q->nfree = q->size = size;
+ for(i=0; i<size; i++){
+ q->desc[i].next = q->free;
+ q->free = i;
+ }
+
+ return q;
+}
+
+static int
+matchvirtiocfgcap(Pcidev *p, int cap, int off, int typ)
+{
+ int bar;
+
+ if(cap != 9 || pcicfgr8(p, off+3) != typ)
+ return 1;
+
+ /* skip invalid or non memory bars */
+ bar = pcicfgr8(p, off+4);
+ if(bar < 0 || bar >= nelem(p->mem)
+ || p->mem[bar].size == 0
+ || (p->mem[bar].bar & 3) != 0)
+ return 1;
+
+ return 0;
+}
+
+static int
+virtiocap(Pcidev *p, int typ)
+{
+ return pcienumcaps(p, matchvirtiocfgcap, typ);
+}
+
+static void*
+virtiomapregs(Pcidev *p, int cap, int size)
+{
+ int bar, len;
+ uvlong addr;
+
+ if(cap < 0)
+ return nil;
+ bar = pcicfgr8(p, cap+4) % nelem(p->mem);
+ addr = pcicfgr32(p, cap+8);
+ len = pcicfgr32(p, cap+12);
+ if(size <= 0)
+ size = len;
+ else if(len < size)
+ return nil;
+ if(addr+len > p->mem[bar].size)
+ return nil;
+ addr += p->mem[bar].bar & ~0xFULL;
+ return vmap(addr, size);
+}
+
+static Vdev*
+viopnpdevs(int typ)
+{
+ Vdev *vd, *h, *t;
+ Vconfig *cfg;
+ Vqueue *q;
+ Pcidev *p;
+ int cap, bar;
+ int n, i;
+
+ h = t = nil;
+ for(p = nil; p = pcimatch(p, 0x1AF4, 0x1040+typ);){
+ if(p->rid == 0)
+ continue;
+ if((cap = virtiocap(p, 1)) < 0)
+ continue;
+ bar = pcicfgr8(p, cap+4) % nelem(p->mem);
+ cfg = virtiomapregs(p, cap, sizeof(Vconfig));
+ if(cfg == nil)
+ continue;
+ if((vd = malloc(sizeof(*vd))) == nil){
+ print("virtio: no memory for Vdev\n");
+ break;
+ }
+ vd->port = p->mem[bar].bar & ~0xFULL;
+ vd->typ = typ;
+ vd->pci = p;
+ vd->cfg = cfg;
+ pcienable(p);
+
+ vd->isr = virtiomapregs(p, virtiocap(p, 3), 0);
+ if(vd->isr == nil){
+Baddev:
+ pcidisable(p);
+ /* TODO: vunmap */
+ free(vd);
+ continue;
+ }
+ cap = virtiocap(p, 2);
+ vd->notify = virtiomapregs(p, cap, 0);
+ if(vd->notify == nil)
+ goto Baddev;
+ vd->notifyoffmult = pcicfgr32(p, cap+16);
+
+ /* reset */
+ cfg->status = 0;
+ while(cfg->status != 0)
+ delay(1);
+ cfg->status = Acknowledge|Driver;
+
+ /* negotiate feature bits */
+ cfg->devfeatsel = 1;
+ vd->feat[1] = cfg->devfeat;
+ cfg->devfeatsel = 0;
+ vd->feat[0] = cfg->devfeat;
+ cfg->drvfeatsel = 1;
+ cfg->drvfeat = vd->feat[1] & 1;
+ cfg->drvfeatsel = 0;
+ cfg->drvfeat = 0;
+ cfg->status |= FeaturesOk;
+
+ for(i=0; i<nelem(vd->queue); i++){
+ cfg->queuesel = i;
+ n = cfg->queuesize;
+ if(n == 0 || (n & (n-1)) != 0)
+ break;
+ if((q = mkvqueue(n)) == nil)
+ break;
+ q->notify = vd->notify + vd->notifyoffmult * cfg->queuenotifyoff;
+ q->dev = vd;
+ q->idx = i;
+ vd->queue[i] = q;
+ coherence();
+ cfg->queuedesc = PADDR(q->desc);
+ cfg->queueavail = PADDR(q->avail);
+ cfg->queueused = PADDR(q->used);
+ }
+ vd->nqueue = i;
+
+ if(h == nil)
+ h = vd;
+ else
+ t->next = vd;
+ t = vd;
+ }
+
+ return h;
+}
+
+struct Rock {
+ int done;
+ Rendez *sleep;
+};
+
+static void
+vqinterrupt(Vqueue *q)
+{
+ int id, free, m;
+ struct Rock *r;
+ Rendez *z;
+
+ m = q->size-1;
+
+ ilock(q);
+ while((q->lastused ^ q->used->idx) & m){
+ id = q->usedent[q->lastused++ & m].id;
+ if(r = q->rock[id]){
+ q->rock[id] = nil;
+ z = r->sleep;
+ r->done = 1; /* hands off */
+ if(z != nil)
+ wakeup(z);
+ }
+ do {
+ free = id;
+ id = q->desc[free].next;
+ q->desc[free].next = q->free;
+ q->free = free;
+ q->nfree++;
+ } while(q->desc[free].flags & Next);
+ }
+ iunlock(q);
+}
+
+static void
+viointerrupt(Ureg *, void *arg)
+{
+ Vdev *vd = arg;
+
+ if(vd->isr[0] & 1)
+ vqinterrupt(vd->queue[vd->typ == TypSCSI ? 2 : 0]);
+}
+
+static int
+viodone(void *arg)
+{
+ return ((struct Rock*)arg)->done;
+}
+
+static void
+vqio(Vqueue *q, int head)
+{
+ struct Rock rock;
+
+ rock.done = 0;
+ rock.sleep = &up->sleep;
+ q->rock[head] = &rock;
+ q->availent[q->avail->idx & (q->size-1)] = head;
+ coherence();
+ q->avail->idx++;
+ iunlock(q);
+ if((q->used->flags & 1) == 0)
+ *((u16int*)q->notify) = q->idx;
+ while(!rock.done){
+ while(waserror())
+ ;
+ tsleep(rock.sleep, viodone, &rock, 1000);
+ poperror();
+
+ if(!rock.done)
+ vqinterrupt(q);
+ }
+}
+
+static int
+vioblkreq(Vdev *vd, int typ, void *a, long count, long secsize, uvlong lba)
+{
+ int need, free, head;
+ Vqueue *q;
+ Vdesc *d;
+
+ u8int status;
+ struct Vioblkreqhdr {
+ u32int typ;
+ u32int prio;
+ u64int lba;
+ } req;
+
+ need = 2;
+ if(a != nil)
+ need = 3;
+
+ status = -1;
+ req.typ = typ;
+ req.prio = 0;
+ req.lba = lba;
+
+ q = vd->queue[0];
+ ilock(q);
+ while(q->nfree < need){
+ iunlock(q);
+
+ if(!waserror())
+ tsleep(&up->sleep, return0, 0, 500);
+ poperror();
+
+ ilock(q);
+ }
+
+ head = free = q->free;
+
+ d = &q->desc[free]; free = d->next;
+ d->addr = PADDR(&req);
+ d->len = sizeof(req);
+ d->flags = Next;
+
+ if(a != nil){
+ d = &q->desc[free]; free = d->next;
+ d->addr = PADDR(a);
+ d->len = secsize*count;
+ d->flags = typ ? Next : (Write|Next);
+ }
+
+ d = &q->desc[free]; free = d->next;
+ d->addr = PADDR(&status);
+ d->len = sizeof(status);
+ d->flags = Write;
+
+ q->free = free;
+ q->nfree -= need;
+
+ /* queue io, unlock and wait for completion */
+ vqio(q, head);
+
+ return status;
+}
+
+static int
+vioscsireq(SDreq *r)
+{
+ u8int resp[4+4+2+2+SENSESIZE];
+ u8int req[8+8+3+CDBSIZE];
+ int free, head;
+ u32int len;
+ Vqueue *q;
+ Vdesc *d;
+ Vdev *vd;
+ SDunit *u;
+ Vscsidev *scsi;
+
+ u = r->unit;
+ vd = u->dev->ctlr;
+ scsi = vd->dev;
+
+ memset(resp, 0, sizeof(resp));
+ memset(req, 0, sizeof(req));
+ req[0] = 1;
+ req[1] = u->subno;
+ req[2] = r->lun>>8;
+ req[3] = r->lun&0xFF;
+ *(u64int*)(&req[8]) = (uintptr)r;
+
+ memmove(&req[8+8+3], r->cmd, r->clen);
+
+ q = vd->queue[2];
+ ilock(q);
+ while(q->nfree < 3){
+ iunlock(q);
+
+ if(!waserror())
+ tsleep(&up->sleep, return0, 0, 500);
+ poperror();
+
+ ilock(q);
+ }
+
+ head = free = q->free;
+
+ d = &q->desc[free]; free = d->next;
+ d->addr = PADDR(req);
+ d->len = 8+8+3+scsi->cdb_size;
+ d->flags = Next;
+
+ if(r->write && r->dlen > 0){
+ d = &q->desc[free]; free = d->next;
+ d->addr = PADDR(r->data);
+ d->len = r->dlen;
+ d->flags = Next;
+ }
+
+ d = &q->desc[free]; free = d->next;
+ d->addr = PADDR(resp);
+ d->len = 4+4+2+2+scsi->sense_size;
+ d->flags = Write;
+
+ if(!r->write && r->dlen > 0){
+ d->flags |= Next;
+
+ d = &q->desc[free]; free = d->next;
+ d->addr = PADDR(r->data);
+ d->len = r->dlen;
+ d->flags = Write;
+ }
+
+ q->free = free;
+ q->nfree -= 2 + (r->dlen > 0);
+
+ /* queue io, unlock and wait for completion */
+ vqio(q, head);
+
+ /* response+status */
+ r->status = resp[10];
+ if(resp[11] != 0)
+ r->status = SDcheck;
+
+ /* sense_len */
+ len = *((u32int*)&resp[0]);
+ if(len > 0){
+ if(len > sizeof(r->sense))
+ len = sizeof(r->sense);
+ memmove(r->sense, &resp[4+4+2+2], len);
+ r->flags |= SDvalidsense;
+ }
+
+ /* data residue */
+ len = *((u32int*)&resp[4]);
+ if(len > r->dlen)
+ r->rlen = 0;
+ else
+ r->rlen = r->dlen - len;
+
+ return r->status;
+
+}
+
+static long
+viobio(SDunit *u, int lun, int write, void *a, long count, uvlong lba)
+{
+ long ss, cc, max, ret;
+ Vdev *vd;
+
+ vd = u->dev->ctlr;
+ if(vd->typ == TypSCSI)
+ return scsibio(u, lun, write, a, count, lba);
+
+ max = 32;
+ ss = u->secsize;
+ ret = 0;
+ while(count > 0){
+ if((cc = count) > max)
+ cc = max;
+ if(vioblkreq(vd, write != 0, (uchar*)a + ret, cc, ss, lba) != 0)
+ error(Eio);
+ ret += cc*ss;
+ count -= cc;
+ lba += cc;
+ }
+ return ret;
+}
+
+static int
+viorio(SDreq *r)
+{
+ int i, count, rw;
+ uvlong lba;
+ SDunit *u;
+ Vdev *vd;
+
+ u = r->unit;
+ vd = u->dev->ctlr;
+ if(vd->typ == TypSCSI)
+ return vioscsireq(r);
+ if(r->cmd[0] == 0x35 || r->cmd[0] == 0x91){
+ if(vioblkreq(vd, 4, nil, 0, 0, 0) != 0)
+ return sdsetsense(r, SDcheck, 3, 0xc, 2);
+ return sdsetsense(r, SDok, 0, 0, 0);
+ }
+ if((i = sdfakescsi(r)) != SDnostatus)
+ return r->status = i;
+ if((i = sdfakescsirw(r, &lba, &count, &rw)) != SDnostatus)
+ return i;
+ r->rlen = viobio(u, r->lun, rw == SDwrite, r->data, count, lba);
+ return r->status = SDok;
+}
+
+static int
+vioonline(SDunit *u)
+{
+ Vdev *vd;
+ Vblkdev *blk;
+ uvlong cap;
+
+ vd = u->dev->ctlr;
+ if(vd->typ == TypSCSI)
+ return scsionline(u);
+
+ blk = vd->dev;
+ cap = blk->capacity;
+ if(u->sectors != cap){
+ u->sectors = cap;
+ u->secsize = 512;
+ return 2;
+ }
+ return 1;
+}
+
+static int
+vioverify(SDunit *u)
+{
+ Vdev *vd;
+
+ vd = u->dev->ctlr;
+ if(vd->typ == TypSCSI)
+ return scsiverify(u);
+
+ return 1;
+}
+
+SDifc sdvirtio10ifc;
+
+static int
+vioenable(SDev *sd)
+{
+ char name[32];
+ Vdev *vd;
+ int i;
+
+ vd = sd->ctlr;
+ pcisetbme(vd->pci);
+ snprint(name, sizeof(name), "%s (%s)", sd->name, sd->ifc->name);
+ intrenable(vd->pci->intl, viointerrupt, vd, vd->pci->tbdf, name);
+ coherence();
+
+ for(i = 0; i < vd->nqueue; i++){
+ vd->cfg->queuesel = i;
+ vd->cfg->queueenable = 1;
+ }
+ vd->cfg->status |= DriverOk;
+
+ return 1;
+}
+
+static int
+viodisable(SDev *sd)
+{
+ char name[32];
+ Vdev *vd;
+
+ vd = sd->ctlr;
+ snprint(name, sizeof(name), "%s (%s)", sd->name, sd->ifc->name);
+ intrdisable(vd->pci->intl, viointerrupt, vd, vd->pci->tbdf, name);
+ pciclrbme(vd->pci);
+ return 1;
+}
+
+static SDev*
+viopnp(void)
+{
+ SDev *s, *h, *t;
+ Vdev *vd;
+ int id;
+
+ h = t = nil;
+
+ id = 'F';
+ for(vd = viopnpdevs(TypBlk); vd; vd = vd->next){
+ if(vd->nqueue == 0)
+ continue;
+
+ if((vd->dev = virtiomapregs(vd->pci, virtiocap(vd->pci, 4), sizeof(Vblkdev))) == nil)
+ break;
+ if((s = malloc(sizeof(*s))) == nil)
+ break;
+ s->ctlr = vd;
+ s->idno = id++;
+ s->ifc = &sdvirtio10ifc;
+ s->nunit = 1;
+ if(h)
+ t->next = s;
+ else
+ h = s;
+ t = s;
+ }
+
+ id = '0';
+ for(vd = viopnpdevs(TypSCSI); vd; vd = vd->next){
+ Vscsidev *scsi;
+
+ if(vd->nqueue < 3)
+ continue;
+
+ if((scsi = virtiomapregs(vd->pci, virtiocap(vd->pci, 4), sizeof(Vscsidev))) == nil)
+ break;
+ if(scsi->max_target == 0){
+ vunmap(scsi, sizeof(Vscsidev));
+ continue;
+ }
+ if((scsi->cdb_size > CDBSIZE) || (scsi->sense_size > SENSESIZE)){
+ print("sdvirtio: cdb %ud or sense size %ud too big\n",
+ scsi->cdb_size, scsi->sense_size);
+ vunmap(scsi, sizeof(Vscsidev));
+ continue;
+ }
+ vd->dev = scsi;
+
+ if((s = malloc(sizeof(*s))) == nil)
+ break;
+ s->ctlr = vd;
+ s->idno = id++;
+ s->ifc = &sdvirtio10ifc;
+ s->nunit = scsi->max_target;
+
+ if(h)
+ t->next = s;
+ else
+ h = s;
+ t = s;
+ }
+ return h;
+}
+
+SDifc sdvirtio10ifc = {
+ "virtio10", /* name */
+
+ viopnp, /* pnp */
+ nil, /* legacy */
+ vioenable, /* enable */
+ viodisable, /* disable */
+
+ vioverify, /* verify */
+ vioonline, /* online */
+ viorio, /* rio */
+ nil, /* rctl */
+ nil, /* wctl */
+
+ viobio, /* bio */
+ nil, /* probe */
+ nil, /* clear */
+ nil, /* rtopctl */
+ nil, /* wtopctl */
+};