code: mafs

ref: 5c21eca829380c28769f79beabb7cc51f1787f97
dir: /iobuf.c/

View raw version
#include	"all.h"

#define	DEBUG	0

u32  nbuckets = 0;	/* nbuckets derived from -m or Nbuckets */
Hiob *hiob = nil;	/* array of nbuckets */
Extents frees = {0};

/*
   add an Iobuf to the collisions lru linked list
   hp must be locked
 */
Iobuf *
newbuf(Hiob *hp, u16 len)
{
	Iobuf *p, *q;

	if(len == 0){
		panic("newbuf len == 0 caller %#p\n",
				getcallerpc(&hp));
	}
	p = emalloc9p(sizeof(Iobuf));

	q = hp->link;
	if(q != nil){
		p->fore = q;
		p->back = q->back;
		q->back = p;
		p->back->fore = p;
	}else{
		hp->link = p;
		p->fore = p;
		p->back = p;
	}
	p->blkno = 0;
	p->len   = len;
	p->xiobuf = emalloc9p(len*Rawblocksize);
	return p;
}

/*
	Get the Iobuf of the disk block at addr from the buffer cache
	for my use.

	All disk accesses go through the buffer cache. getbuf() selects
	the Iobuf for our use from the buffer cache. putbuf() returns the
	Iobuf back to the buffer cache.

	Any Iobuf access happens only between the getbuf() and putbuf() calls.

	The Iobuf's are grouped into a least-recently-used circular list
	of buffers. The most recently used Iobuf is pointed to by Hiob.link.
	Iobuf.fore is the next recently used buffer.
	Iobuf.back is the oldest recently used buffer.
	Hiob.link->back is the oldest buffer that will be reused first.
 */
Iobuf *
getbuf(u64 blkno, u16 len, int flags)
{
	Hiob *hp;
	Iobuf *s, *p;
	s8 ncollisions;

	if(len == 0){
		panic("getbuf len == 0 blkno %llud flags %d caller %#p\n",
				blkno, flags, getcallerpc(&blkno));
	}
	hp = &hiob[blkno%nbuckets];

	if(chatty9p > 4)
		dprint("getbuf blkno %llud len %d blkno%%nbuckets %llud pc 0x%p"
				" hiob 0x%p hp 0x%p flags 0x%ux %d\n",
			blkno, len, blkno%nbuckets, getcallerpc(&blkno),
			hiob, hp, flags, flags);
	qlock(hp);
	s = hp->link;
	if(s == nil)
		goto new;
	for(p=s, ncollisions = 0;;){
		ncollisions++;
		if(p->blkno == blkno){
			if(p != s){
				/* remove p from its current position in the lru circular buffer */
				p->back->fore = p->fore;
				p->fore->back = p->back;

				/* make p the hb->link and put it at the back of existing link */
				p->fore = s;
				p->back = s->back;
				s->back = p;
				p->back->fore = p;
				hp->link = p;
			}
			incref(p);
			qunlock(hp);
			if(chatty9p > 4)
				dprint("	in cache, after qunlock(hp) hp 0x%p blkno %llud\n",
						hp, blkno);
			if(p->len != len){
				if(chatty9p > 4)
				dprint("getbuf refresh used blkno %llud, size in memory is %d"
						" and not %d, caller %#p\n",
						blkno, p->len, len, getcallerpc(&blkno));
				if(p->len == 0 || len == 0)
					panic("getbuf: p->len == 0 || len == 0 p->len %d len %d",
							p->len, len);
				wlock(p);
				if(chatty9p > 4)
					dprint("	after wlock() blkno %llud\n", blkno);
				free(p->xiobuf);
				p->xiobuf = emalloc9p(len*Rawblocksize);
				p->len = len;
				devread(blkno, p->xiobuf, len);
				if((flags & Bmod) == 0){
					chkwunlock(p);
					rlock(p);
				}
			}else if(flags & Bmod){
				wlock(p);
				if(chatty9p > 4)
					dprint("	after wlock() blkno %llud\n", blkno);
			}else{
				if(chatty9p > 4)
					dprint("	in cache iobuf 0x%p has len %llud blkno %llud len %llud .."
							" rlock()\n", p, p->len, blkno, len);
				rlock(p);
			}
			decref(p);
			p->flags |= flags;
			return p;
		}
		p = p->fore;
		if(p == s)
			break;
	}
	/* TODO Is this still needed? would be helpful for read ahead */
	/*if(flags == Bprobe){
		qunlock(hp);
		return nil;
	}*/

	/* maxed out our allowed number of collisions,
		try to steal an older Iobuf without any ref's.
		Ncollisions is a soft limit.
	 */
	if(ncollisions >= Ncollisions){
		do{
			p = s->back;
			if(p->ref == 0 && canwlock(p)){
				if(p->len != len){
					free(p->xiobuf);
					p->xiobuf = emalloc9p(len*Rawblocksize);
					p->len = len;
				}
				hp->link = p;
				if(chatty9p > 4)
					dprint("	stealing iobuf 0x%p of %llud blocks"
							" for blkno %llud len %llud\n",
							p, p->len, blkno, len);
				goto found;	/* p is wlock() */
			}
			s = p;
		}while(p != hp->link);
	}

	/* no unlocked blocks available; add a new one */
new:
	if(chatty9p > 4)
		dprint("	adding new Iobuf for blkno %llud\n", blkno);
	p = newbuf(hp, len);
	if(chatty9p > 4)
		dprint(" .. wlock() blkno %llud\n", blkno);
	wlock(p);

found:
	p->blkno = blkno;
	qunlock(hp);
	if(chatty9p > 4)
		dprint("	after qunlock(hp) hp 0x%p blkno %llud\n",
				hp, blkno);
	devread(blkno, p->xiobuf, len);
	if((flags & Bmod) == 0){
		if(chatty9p > 4)
		dprint("new buffer: switching from wlock() to rlock() blkno %llud\n", blkno);
		incref(p);
		wunlock(p);
		rlock(p);
		decref(p);
	}
	p->flags = flags;
	return p;
}

Iobuf *
getbufchk(u64 blkno, u16 len, int flags, int tag, u64 qpath)
{
	Iobuf *b;

	if(chatty9p > 4)
		dprint("getbufchk caller pc 0x%p\n", getcallerpc(&blkno));
	if(len == 0){
		panic("getbufchk len == 0 blkno %llud flags %d tag %d"
				" qpath %llud caller %#p\n",
				blkno, flags, tag, qpath, getcallerpc(&blkno));
	}
	b = getbuf(blkno, len, flags);
	if(b != nil)
		if(checktag(b, tag, qpath) == 0){
			putbuf(b);
			panic("checktag on %llud failed %s\n", blkno, errstring[Ephase]);
		}
	if(b->io == nil)
		panic("b->io == nil blkno %llud flags %d tag %d"
				" qpath %llud b->blkno %llud %d caller %#p\n",
				blkno, flags, tag, qpath, b->blkno, b->len,
				getcallerpc(&blkno));
	return b;
}

/*
	put the Iobuf of the disk block at addr back into
		the buffer cache for others to use.
	writes to disk if changed.

	if(chatty9p > 4)
		dprint("putbuf p->blkno 0x%d t->c->type %d devtab[t->c->type]->dc %c\n"
				"	p 0x%p p->flags 0x%ux %d\n"
				"	p->xiobuf 0x%p",
				p->blkno, t->c->type, devtab[t->c->type]->dc,
				p, p->flags, p->flags, p->xiobuf);

 */
void
bkp(Iobuf *p, u64 bno, u64 qpath)
{
	Iobuf *buf;

	if(p == nil){
		dprint("bpk: p is nil invalid backup location %d, qpath %llud\n",
				bno, qpath);
		return;
	}
	if(bno == 0){
		dprint("bkp %d: invalid backup location %d, qpath %llud\n",
				p->blkno, bno, qpath);
		return;
	}

	buf = getbufchk(bno, 1, Bmod, Tdata, qpath);
	if(buf == nil){
		panic("bkp: buf == nil\n");
	}
	memcpy(buf->io->buf, p->io->buf, Blocksize);
	if(qpath == Qproot0 || qpath == Qproot1){
		buf->io->d.mode &= ~DMDIR; /* to avoid recursive du -a */
	}
	putbuf(buf);
}

void
putbuf(Iobuf *p)
{
	u32 n;
	s8 i;

	if(p == nil){
		panic("putbuf p == nil called by %#p\n", getcallerpc(&p));
		dprint("%s\n", errstring[Ephase]);
		return;
	}
	if(p->io == nil){
		showbuf(p);
		panic("putbuf p->io == nil by %#p\n", getcallerpc(&p));
		dprint("%s\n", errstring[Ephase]);
		return;
	}
	if(p->len == 0){
		showbuf(p);
		panic("putbuf p->len == 0 by %#p\n", getcallerpc(&p));
		dprint("%s\n", errstring[Ephase]);
		return;
	}
	
	if(chatty9p > 4)
		dprint("putbuf p->blkno %llud\n", p->blkno);
	if(p->flags & Bmod){
		if(canwlock(p)){
			panic("putbuf: buffer not locked %llud\n", p->blkno);
		}
		p->io->dirty = 1;
		if((n = devwrite(p->blkno, p->io, p->len)) != p->len*Rawblocksize){
			dprint("%s\n", errstring[Esystem]);
			panic("error writing block %llud: %d bytes: %r\n",
					p->blkno, n);
		}
		p->io->dirty = 0;
		devwritedirtyclear(p->blkno);
		if(p->blkno == config.config.srcbno){
			for(i=0; i<Nbkp; i++)
				bkp(p, config.config.dest[i], Qpconfig0+i*3);
		}else if(p->blkno == config.super.srcbno){
			for(i=0; i<Nbkp; i++)
				bkp(p, config.super.dest[i], Qpsuper0+i*3);
		}else if(p->blkno == config.root.srcbno){
			for(i=0; i<Nbkp; i++)
				bkp(p, config.root.dest[i], Qproot0+i*3);
		}
	}

	if(p->flags & Bmod){
		p->flags = 0;
		chkwunlock(p);
		if(chatty9p > 4)
		dprint(" .. wunlock()'ed\n");
	}else{
		p->flags = 0;
		chkrunlock(p);
		if(chatty9p > 4)
		dprint(" .. runlock()'ed\n");
	}
}

int
checktag(Iobuf *p, u16 tag, u64 qpath)
{
	uintptr pc;

	if(tag != p->io->type || (qpath != Qpnone && qpath != p->io->path)){
		pc = getcallerpc(&p);

		dprint("	tag = %G; expected %G; blkno = %llud\n",
				(uint)p->io->type, (uint)tag, p->blkno);
		if(qpath == Qpnone){
			dprint("checktag pc=%p disk %s(block %llud) tag/path=%s/%llud;"
					" expected %s\n",
					pc, devfile, (u64)p->blkno,
					tagnames[p->io->type], (u64)p->io->path,
					tagnames[tag]);
		} else {
				dprint("	tag/path = %G/%llux; expected %G/%llux\n",
						(uint)p->io->type, p->io->path, tag, qpath);
				dprint("checktag pc=%p disk %s(block %llud) tag/path=%s/%llud;"
						" expected %s/%llud\n",
						pc, devfile, (u64)p->blkno,
						tagnames[p->io->type], (u64)p->io->path,
						tagnames[tag], (u64)qpath);
		}
		return 0;
	}
	return 1;
}

void
settag(Iobuf *p, int tagtype, u64 qpath)
{
	if((p->flags & Bmod) == 0)
		panic("settag %s(%llux) tag/path=%s/%llud: not Bmod\n",
				devfile, (u64)p->blkno, tagnames[tagtype], qpath);
	if(p->io == nil)
		panic("settag %s(%llux) tag/path=%s/%llud: p->io == nil\n",
				devfile, (u64)p->blkno, tagnames[tagtype], qpath);
	p->io->len = p->len;
	p->io->type = tagtype;
	p->io->path = qpath;
}

void *amalloc(u64 n){
	void *p;

	if(p = mallocalign(n, sizeof(u64), 0, 0))
		memset(p, 0, n);
	else
		sysfatal("malloc: %r");
	setmalloctag(p, getcallerpc(&n));
	return p;
}

/*
 * Prepare nbuckets of hash buckets. Each bucket will point to a
 * linked list of collisions. The collisions are ordered into a
 * least-recently-used (lru) linked list.
 */
void
iobufinit(void)
{
	int i;
	Hiob *hp;

	while(prime(nbuckets) == 0)
		nbuckets++;
	if(chatty9p)
	dprint("mafs: iobufinit %ud hash buckets\n", nbuckets);
	hiob = amalloc(nbuckets*sizeof(Hiob));

	hp = hiob;
	if(chatty9p > 4)
		dprint("iobufinit: hiob 0x%p\n", hiob);
	for(i=0; i<nbuckets; i++){
		qlock(hp);
		qunlock(hp);
		hp++;
	}
}

void
showbuf(Iobuf *p)
{
	if(p == nil){
		dprint("showbuf p == nil called by %#p\n",
				p, getcallerpc(&p));
		return;
	}
	dprint("showbuf p 0x%p blkno %llud len %d\n"
			"	fore 0x%p back 0x%p\n"
			"	xiobuf 0x%p flags 0x%x\n"
			"	caller %#p\n",
			p, p->blkno, p->len,
			p->fore, p->back,
			p->xiobuf, p->flags,
			getcallerpc(&p));
	if(p->io != nil)
		showblock((u8*)p->io);
}