ref: 56bd332994102b4e194819abb5e087a8895452f9
dir: /iobuf.c/
#include "all.h" u64 nbuckets = 0; /* number of hash buckets, -m changes it */ Hiob *hiob = nil; /* array of nbuckets */ Extents frees = {0};/* extents of free blocks on the disk */ extern u8 synchronouswrites; /* extents of Blocksize units of memory used to store the disk block contents in memory for the buffer cache and write queue */ Extents memunits = {0}; u8 *memunitpool = nil; u8 *memunitstart = nil; /* using nunits + 1 for alignment */ void initmemunitpool(u64 nunits) { memunitstart = sbrk((nunits+1) * Blocksize); memunitpool = memunitstart+Blocksize- ((u64)memunitstart%Blocksize); initextents(&memunits); if(chatty9p > 4) dprint("initmemunitpool: memunitpool %p nunits*Blocksize %p\n", memunitpool, nunits*Blocksize); if(nunits > 0) bfree(&memunits, 0, nunits); else panic("invalid nunits %llud\n", nunits); } u8 * allocmemunits(u16 len) { u64 m; u8 *a; u8 repeat; s8 st; st = -1; for(repeat = 0; repeat < 10 && (st = balloc(&memunits, len, &m)) < 0; repeat++){ showextents(2, "out of memunits: ", &memunits); if(sync()==0) repeat = 9; } if(st < 0) panic("out of memory\n"); if(chatty9p > 4) dprint("allocmemunit: memunitpool %p m %p\n", memunitpool, m); a = memunitpool+(m*Blocksize); memset(a, 0, len*Blocksize); return a; } void freememunits(u8 *m, u16 len) { if(m == 0) panic("freememunit: m == 0\n"); if((m-memunitpool)%Blocksize) panic("freememunit: (m-memunitpool)%%Blocksize %llud\n", (u64)(m-memunitpool)%Blocksize); bfree(&memunits, (m-memunitpool)/Blocksize, len); } u64 sync(void) { Iobuf *p, *s; Hiob *hp; u64 nlocked, i; nlocked = 0; for(i = 0; i < nbuckets; i++){ hp=&hiob[i]; qlock(hp); if((s = hp->link) != nil){ p=s; do{ if(p->xiobuf[0] == Tdentry && p->append != nil){ if(canwlock(p)){ qunlock(hp); if(flush(p) == 0) wunlock(p); qlock(hp); }else{ // cannot be sure about this without a lock // if(p->xiobuf[0] == Tdentry) nlocked++; } } p = p->fore; }while(p != s); } qunlock(hp); } return nlocked; } /* add an Iobuf to the collisions lru linked list hp must be locked */ Iobuf * newbuf(Hiob *hp, u16 len) { Iobuf *p, *q; p = emalloc9p(sizeof(Iobuf)); q = hp->link; if(q != nil){ p->fore = q; p->back = q->back; q->back = p; p->back->fore = p; }else{ hp->link = p; p->fore = p; p->back = p; } p->blkno = 0; p->len = len; p->xiobuf = allocmemunits(len); return p; } /* Get the Iobuf of the disk block at addr from the buffer cache for my use. All disk accesses go through the buffer cache. getbuf() selects the Iobuf for our use from the buffer cache. putbuf() returns the Iobuf back to the buffer cache. Any Iobuf access happens only between the getbuf() and putbuf() calls. The Iobuf's are grouped into a least-recently-used circular list of buffers. The most recently used Iobuf is pointed to by Hiob.link. Iobuf.fore is the next recently used buffer. Iobuf.back is the oldest recently used buffer. Hiob.link->back is the oldest buffer that will be reused first. */ Iobuf * getbuf(u64 blkno, u16 len, u8 readonly, u8 freshalloc) { Hiob *hp; Iobuf *s, *p; u64 ncollisions; hp = &hiob[blkno%nbuckets]; if(chatty9p > 4) dprint("getbuf blkno %llud blkno%%nbuckets %llud pc 0x%p" " hiob 0x%p hp 0x%p readonly %d\n", blkno, blkno%nbuckets, getcallerpc(&blkno), hiob, hp, readonly); qlock(hp); s = hp->link; if(s == nil) goto new; for(p=s, ncollisions = 0;;){ ncollisions++; if(p->blkno == blkno){ if(p != s){ /* remove p from its current position in the lru circular buffer */ p->back->fore = p->fore; p->fore->back = p->back; /* make p the hb->link and put it at the back of existing link */ p->fore = s; p->back = s->back; s->back = p; p->back->fore = p; hp->link = p; } incref(p); qunlock(hp); if(chatty9p > 4) dprint(" in cache, after qunlock(hp) hp 0x%p blkno %llud\n", hp, blkno); if(p->len != len){ if(chatty9p > 4) dprint("getbuf refresh used blkno %llud, size in memory is %d" " and not %d, caller %#p\n", blkno, p->len, len, getcallerpc(&blkno)); if(p->len == 0 || len == 0) panic("getbuf: p->len == 0 || len == 0 p->len %d len %d", p->len, len); wlock(p); if(chatty9p > 4) dprint(" after wlock() blkno %llud\n", blkno); freememunits(p->xiobuf, p->len); p->xiobuf = allocmemunits(len); p->len = len; p->freshalloc = freshalloc; if(freshalloc == 0) devread(blkno, p->xiobuf, len); if(readonly){ if(chkwunlock(p) == 0){ showbuf(p); panic("getbuf chkwunlock(p) == 0 called by %#p\n", getcallerpc(&blkno)); } rlock(p); } }else if(readonly){ if(chatty9p > 4) dprint(" in cache iobuf 0x%p has len %llud blkno %llud len %llud .." " rlock()\n", p, p->len, blkno, len); rlock(p); }else{ wlock(p); if(chatty9p > 4) dprint(" after wlock() blkno %llud\n", blkno); } decref(p); return p; } p = p->fore; if(p == s) break; } /* maxed out our allowed number of collisions, try to steal an older Iobuf without any ref's and not in the write queue. Ncollisions is a soft limit. We are not moving the stolen buffer to the top of the circular linked list, but, setting this stolen buffer as the lru. I figure it should not matter much either way. If it does, there is a changelru() function to do so in the git history that can be reused. dirties is decremented without a wlock() on the buffer in dowrite(). Using a wlock() in dowrite() deadlocks with putwrite(). getbuf() guarantees that even a free'ed block cannot be stolen until the dirties == 0. This avoids dirty blocks being stolen by other block numbers. incref(dirties) only happens with a wlock() in putwrite(). */ if(ncollisions >= Ncollisions){ Another: do{ p = s->back; if(p->ref == 0 && canwlock(p)){ if(p->ref > 0){ wunlock(p); goto Another; } /* allocate on flush as many as possible */ if(p->xiobuf[0] == Tdentry && p->append != nil){ if(flush(p)) goto Another; } if(p->len != len){ freememunits(p->xiobuf, p->len); p->xiobuf = allocmemunits(len); p->len = len; }else memset(p->xiobuf, 0, p->len*Blocksize); hp->link = p; if(chatty9p > 4) dprint(" stealing iobuf 0x%p for blkno %llud len %llud\n", p, p->len, blkno); goto found; /* p is wlock() */ } s = p; }while(p != hp->link); } /* no unlocked blocks available; add a new one */ new: if(chatty9p > 4) dprint(" adding new Iobuf for blkno %llud\n", blkno); p = newbuf(hp, len); if(chatty9p > 4) dprint(" .. wlock() blkno %llud\n", blkno); wlock(p); found: p->blkno = blkno; qunlock(hp); if(chatty9p > 4) dprint(" after qunlock(hp) hp 0x%p blkno %llud\n", hp, blkno); p->freshalloc = freshalloc; if(freshalloc == 0) devread(blkno, p->xiobuf, len); if(readonly){ if(chatty9p > 4) dprint("new buffer: switching from wlock() to rlock() blkno %llud\n", blkno); incref(p); wunlock(p); rlock(p); decref(p); } return p; } Iobuf * getbufchk(u64 blkno, u16 len, u8 readonly, int tag, u64 qpath) { Iobuf *b; if(chatty9p > 4) dprint("getbufchk caller pc 0x%p\n", getcallerpc(&blkno)); b = getbuf(blkno, len, readonly, Bused); if(b != nil) if(tag != Tdata){ recentmetadata(b->m, &b->cur, &b->new); if(readonly == 0){ /* writable */ memcpy(b->new, b->cur, Blocksize); b->new->verd++; } } if(checktag(b, len, tag, qpath) == 0){ putbuf(b, 0); panic("checktag on %llud failed %s\n", blkno, errstring[Ephase]); } if(b->io == nil) panic("b->io == nil blkno %llud readonly %d tag %d" " qpath %llud b->blkno %llud caller %#p\n", blkno, readonly, tag, qpath, b->blkno, getcallerpc(&blkno)); return b; } Iobuf * getmetachk(u64 blkno, u8 readonly, int tag, u64 qpath) { return getbufchk(blkno, Metadataunits, readonly, tag, qpath); } Iobuf * getmeta(u64 blkno, u8 readonly, u8 freshalloc) { Iobuf *b; b = getbuf(blkno, Metadataunits, readonly, freshalloc); if(b == nil) return nil; recentmetadata(b->m, &b->cur, &b->new); if(readonly == 0){ /* writable */ memcpy(b->new, b->cur, Blocksize); b->new->verd++; } return b; } /* put the Iobuf of the disk block at addr back into the buffer cache for others to use. writes to disk if changed. if(chatty9p > 4) dprint("putbuf p->blkno 0x%d t->c->type %d devtab[t->c->type]->dc %c\n" " p 0x%p p->readonly %d\n" " p->xiobuf 0x%p", p->blkno, t->c->type, devtab[t->c->type]->dc, p, p->readonly, p->xiobuf); */ void bkp(u64 srcbno, u8 *contents, u64 bno, u64 qpath) { Iobuf *buf; if(bno == 0){ dprint("bkp %llud: invalid backup location %llud, qpath %llud\n", srcbno, bno, qpath); return; } buf = getmetachk(bno, Bwritable, Tdentry, qpath); if(buf == nil){ panic("bkp: buf == nil\n"); } memcpy(buf->new->buf, contents, Ddatasize); buf->new->mtime = nsec(); // if(qpath == Qproot0 || qpath == Qproot1){ // buf->d->mode &= ~DMDIR; /* to avoid recursive du -a */ // } putbuf(buf, 1); } void putbuf(Iobuf *p, u8 dowrite) { u8 buf[Ddatasize]; u64 srcbno; if(p == nil){ panic("putbuf p == nil called by %#p\n", getcallerpc(&p)); dprint("%s\n", errstring[Ephase]); return; } if(p->io == nil){ showbuf(p); panic("putbuf p->io == nil by %#p\n", getcallerpc(&p)); dprint("%s\n", errstring[Ephase]); return; } if(chatty9p > 4) dprint("putbuf p->blkno %llud\n", p->blkno); if(p->readers){ chkrunlock(p); if(chatty9p > 4) dprint(" .. runlock()'ed\n"); }else{ srcbno = p->blkno; if(dowrite){ if(p->xiobuf[0] == Tdata){ devwrite(p->blkno, p->xiobuf, p->len); }else{ if(p->blkno == config.config.srcbno || p->blkno == config.super.srcbno || p->blkno == config.root.srcbno) memcpy(buf, p->new->buf, Ddatasize); if(p->freshalloc) devwrite(p->blkno, p->xiobuf, Metadataunits); else devwrite(p->blkno+(p->new>p->cur?1:0), p->new, 1); } } if(chkwunlock(p) == 0){ showbuf(p); panic("putbuf: chkwunlock(p) == 0 called by %#p\n", getcallerpc(&p)); } if(dowrite){ if(srcbno == config.config.srcbno){ bkp(srcbno, buf, config.config.dest[0], Qpconfig0); }else if(srcbno == config.super.srcbno){ bkp(srcbno, buf, config.super.dest[0], Qpsuper0); }else if(srcbno == config.root.srcbno){ bkp(srcbno, buf, config.root.dest[0], Qproot0); } } } } /* only caller is freeblockbuf(). These blocks do not need to be written to the disk. Hence, avoiding putwrite(). */ void putbuffree(Iobuf *p) { if(p == nil){ panic("putbuffree p == nil called by %#p\n", getcallerpc(&p)); dprint("%s\n", errstring[Ephase]); return; } if(p->io == nil){ showbuf(p); panic("putbuffree p->io == nil by %#p\n", getcallerpc(&p)); dprint("%s\n", errstring[Ephase]); return; } if(chatty9p > 4) dprint("putbuffree p->blkno %llud\n", p->blkno); if(p->readers){ chkrunlock(p); // if(chatty9p > 4) panic(" .. runlock()'ed\n"); }else{ if(canwlock(p)){ panic("putbuffree: buffer not locked %llud\n", p->blkno); } if(chkwunlock(p) == 0){ showbuf(p); panic("putbuffree chkwunlock(p) == 0 called by %#p\n", getcallerpc(&p)); } } } int checktag(Iobuf *p, u16 len, u8 tag, u64 qpath) { uintptr pc; u16 ptag; u64 pqpath; if(tag == Tdata){ ptag = p->io->tag; pqpath = p->io->path; }else{ ptag = ((Dentry*)p->cur)->tag; pqpath = ((Dentry*)p->cur)->path; } if(len != p->len || tag != ptag || (qpath != Qpnone && qpath != pqpath)){ pc = getcallerpc(&p); dprint(" tag = %G; expected %G; blkno = %llud\n", (uint)ptag, (uint)tag, p->blkno); if(qpath == Qpnone){ dprint("checktag pc=%p disk %s(block %llud) tag/path=%s/%llud;" " expected %s len %llud p->len %llud\n", pc, devfile, (u64)p->blkno, tagnames[ptag], (u64)pqpath, tagnames[tag], len, p->len); } else { dprint(" tag/path = %G/%llux; expected %G/%llux\n", (uint)ptag, pqpath, tag, qpath); dprint("checktag pc=%p disk %s(block %llud) tag/path=%s/%llud;" " expected %s/%llud\n", pc, devfile, (u64)p->blkno, tagnames[ptag], (u64)pqpath, tagnames[tag], (u64)qpath); } return 0; } return 1; } void settag(Iobuf *p, u8 tag, u64 qpath) { if(p->readers) panic("settag %s(%llux) tag/path=%s/%llud: not Bwritable\n", devfile, (u64)p->blkno, tagnames[tag], qpath); if(p->io == nil) panic("settag %s(%llux) tag/path=%s/%llud: p->io == nil\n", devfile, (u64)p->blkno, tagnames[tag], qpath); if(tag == Tdata){ p->io->tag = Tdata; p->io->path = qpath; }else{ ((Dentry*)p->new)->tag = tag; ((Dentry*)p->new)->path = ((Dentry*)p->new)->qpath = qpath; } } void *amalloc(u64 n){ void *p; if(p = mallocalign(n, sizeof(u64), 0, 0)) memset(p, 0, n); else sysfatal("malloc: %r"); setmalloctag(p, getcallerpc(&n)); return p; } /* * Prepare nbuckets of hash buckets. Each bucket will point to a * linked list of collisions. The collisions are ordered into a * least-recently-used (lru) linked list. */ void iobufinit(void) { int i; Hiob *hp; while(prime(nbuckets) == 0) nbuckets++; if(chatty9p) dprint("mafs: iobufinit %ud hash buckets\n", nbuckets); hiob = amalloc(nbuckets*sizeof(Hiob)); hp = hiob; if(chatty9p > 4) dprint("iobufinit: hiob 0x%p\n", hiob); for(i=0; i<nbuckets; i++){ qlock(hp); qunlock(hp); hp++; } } void showbuf(Iobuf *p) { if(p == nil){ dprint("showbuf p == nil called by %#p\n", p, getcallerpc(&p)); return; } dprint("showbuf p 0x%p ref %d readers %d writer %d" " blkno %llud len %d" " fore 0x%p back 0x%p" " xiobuf 0x%p" " caller %#p\n", p, p->ref, p->readers, p->writer, p->blkno, p->len, p->fore, p->back, p->xiobuf, getcallerpc(&p)); if(p->io != nil) showblock(2, (u8*)p->io); }