code: mafs

ref: e5fae21d7c28103cbd4d5414538f18a448a60766
dir: /iobuf.c/

View raw version
#include "all.h"

u64  nbuckets = 0;	/* number of hash buckets, -m changes it */
Hiob *hiob = nil;	/* array of nbuckets */
Extents frees = {0};/* extents of free blocks on the disk */

/*
	extents of Blocksize units of memory used to store
	the disk block contents in memory for the buffer cache
	and write queue
 */
Extents memunits = {0};
u8 *memunitpool = nil;
u8 *memunitstart = nil;

/* using nunits + 1 for alignment */
void
initmemunitpool(u64 nunits)
{
	memunitstart = sbrk((nunits+1) * Blocksize);
	memunitpool = memunitstart+Blocksize- ((u64)memunitstart%Blocksize);
	initextents(&memunits, "memunits", fsflush);
	if(chatty9p > 4)
		dprint("initmemunitpool: memunitpool %p nunits*Blocksize %p\n",
				memunitpool, nunits*Blocksize);
	if(nunits > 0)
		bfree(&memunits, 0, nunits);
	else
		panic("invalid nunits %llud\n", nunits);
}

u8 *
allocmemunits(u16 len)
{
	u64 m;
	u8 *a;

	m = balloc(&memunits, len);
	if(chatty9p > 4)
		dprint("allocmemunit: memunitpool %p m %p\n",
				memunitpool, m);
	a = memunitpool+(m*Blocksize);
	memset(a, 0, len*Blocksize);
	return a;
}

void
freememunits(u8 *m, u16 len)
{
	if(m == 0)
		panic("freememunit: m == 0\n");
	if((m-memunitpool)%Blocksize)
		panic("freememunit: (m-memunitpool)%%Blocksize %llud\n",
				(u64)(m-memunitpool)%Blocksize);
	bfree(&memunits, (m-memunitpool)/Blocksize, len);
}

s32
showhashbucket(Iobuf *p, s8 *buf, s32 nbuf)
{
	s32 n;
	char locked[32];
	u8 tag;
	char *name;

	if(p == nil)
		return 0;

	if(canwlock(p)){
		strcpy(locked, "unlocked");
		wunlock(p);
	}else
		strcpy(locked, "locked");
	if(p->tag == Tdata)
		n = snprint(buf, nbuf, "%s %llud %s\n",
					tagnames[p->tag], p->blkno, locked);
	else{
		if(p->m->d[0].tag > Tdata && p->m->d[0].tag < MAXTAG){
			tag = p->m->d[0].tag;
			name = p->m->d[0].name;
		}else{
			tag = p->m->d[1].tag;
			name = p->m->d[1].name;
		}
		if(tag == Tdentry)
			n = snprint(buf, nbuf, "%s %llud %s %lluds %s %s %llud\n",
					tagnames[tag], p->blkno, name, (nsec()-p->atime)/Nsec,
					locked, p->append!=nil?"data":"nil", p->appendsize);
		else
			n = snprint(buf, nbuf, "%s %llud %lluds %s\n",
					tagnames[tag], p->blkno, (nsec()-p->atime)/Nsec, locked);
	}
	return n;
}

s32
showhashbuckets(s8 *buf, s32 nbuf)
{
	Iobuf *p;
	Hiob *hp;
	s32 n;
	u64 i;

	for(i = n = 0; i < nbuckets; i++){
		hp=&hiob[i];
		qlock(hp);
			if(hp->link != nil){
				n += snprint(buf+n, nbuf-n, "i %llud ", i);
				n += showhashbucket(hp->link, buf+n, nbuf-n);
				for(p = hp->link->fore;
					p!=hp->link;
					p=p->fore){
					n+=showhashbucket(p, buf+n, nbuf-n);
				}
				// n+=snprint(buf+n,nbuf-n,"\n");
			}
		qunlock(hp);
	}
	return n;
}

/*
	flushold:
		flush old unflushed data appended to the Tdentry to the disk
		remove old Iobuf's until hp->n <= Ncollisions
	sync: send all Tdata to the disk
 */
void
flushold(void)
{
	Iobuf *p;
	Hiob *hp;
	u64 i;
//	char buf[1024]={'\0'};

	/* flush old unflushed data appended to the Tdentry */
/*	dprint("flushold start\n");
	showhashbuckets(buf, 1024);
	dprint(buf);*/
Again:
	for(i = 0; i < nbuckets; i++){
		hp=&hiob[i];
		qlock(hp);
		if(hp->link != nil){
			for(p = hp->link->back; p!=hp->link; p=p->back){
				/* all new data from here */
				if(p->atime > nsec()-Nrefresh)
					break;
				if(p->ref == 0 &&
					p->tag == Tdentry &&
					p->append != nil &&
					canwlock(p)){
					qunlock(hp);
					flush(p);
					goto Again;
				}
			}
			if(p == hp->link &&
				p->ref == 0 &&
				p->atime < nsec()-Nrefresh &&
				p->tag == Tdentry &&
				p->append != nil &&
				canwlock(p)){
				qunlock(hp);
				flush(p);
				goto Oldbufs;
			}
		}
		qunlock(hp);
	}

Oldbufs:
	/* remove old Iobuf's until hp->n <= Ncollisions */
	for(i = 0; i < nbuckets; i++){
		hp=&hiob[i];
		qlock(hp);
		if(hp->n > Ncollisions){
			if(hp->link != nil){
				for(p = hp->link->back;
					hp->n > Ncollisions && p!=hp->link;
					p=p->back){
					/* all new data from here */
					if(p->atime > nsec()-Nrefresh)
						break;
					if(p->ref == 0 &&
						canwlock(p)){
						incref(p); /* not needed */
						/* remove p from its current position in the lru circular buffer */
						p->back->fore = p->fore;
						p->fore->back = p->back;
						hp->n--;
						qunlock(hp);
						flush(p);
						freememunits(p->xiobuf, p->len);
						free(p);
						goto Oldbufs;
					}
				}
			}
		}
		qunlock(hp);
	}
}

u64
sync(void)
{
	Iobuf *p;
	Hiob *hp;
	u64 nlocked, i;

	/* flush any unflushed data appended to the Tdentry */
Resync:
	nlocked = 0;
	for(i = 0; i < nbuckets; i++){
		hp=&hiob[i];
		qlock(hp);
		if(hp->link != nil){
			for(p = hp->link->back; p!=hp->link; p=p->back){
				if(p->tag == Tdentry &&
					p->append != nil){
						if(p->ref == 0 && canwlock(p)){
							qunlock(hp);
							flush(p);
							goto Resync;
						}else
							nlocked++;
				}
			}
			if(p == hp->link &&	p->tag == Tdentry){
				if(p->ref == 0 &&
					p->append != nil &&
					canwlock(p)){
					qunlock(hp);
					flush(p);
					goto Resync;
				}
			}
		}
		qunlock(hp);
	}
	return nlocked;
}

/*
	Get the Iobuf of the disk block at addr from the buffer cache
	for my use.

	All disk accesses go through the buffer cache. getbuf() selects
	the Iobuf for our use from the buffer cache. putbuf() returns the
	Iobuf back to the buffer cache.

	Any Iobuf access happens only between the getbuf() and putbuf() calls.

	The Iobuf's are grouped into a least-recently-used circular list
	of buffers. The most recently used Iobuf is pointed to by Hiob.link.
	Iobuf.fore is the next recently used buffer.
	Iobuf.back is the oldest recently used buffer.
	Hiob.link->back is the oldest buffer that will be reused first.
 */
Iobuf *
getbuf(u64 blkno, u16 len, u8 readonly, u8 freshalloc)
{
	Hiob *hp;
	Iobuf *s, *p;

	if(len > Maxdatablockunits)
		panic("getbuf(): invalid len %ud blkno %llud\n", len, blkno);
	hp = &hiob[blkno%nbuckets];
	if(chatty9p > 4)
		dprint("getbuf blkno %llud blkno%%nbuckets %llud pc 0x%p"
				" hiob 0x%p hp 0x%p readonly %d\n",
			blkno, blkno%nbuckets, getcallerpc(&blkno),
			hiob, hp, readonly);
	qlock(hp);
	s = hp->link;
	if(s == nil)
		goto new;
	for(p=s;;){
		if(p->blkno == blkno){
			if(p != s){
				/* remove p from its current position in the lru circular buffer */
				p->back->fore = p->fore;
				p->fore->back = p->back;

				/* make p the hb->link and put it at the back of existing link */
				p->fore = s;
				p->back = s->back;
				s->back = p;
				p->back->fore = p;
				hp->link = p;
			}
			incref(p);
			qunlock(hp);
			if(chatty9p > 4)
				dprint("	in cache, after qunlock(hp) hp 0x%p blkno %llud\n",
						hp, blkno);
			if(p->len != len){
				wlock(p);
				/* has someone done this change already? */
				if(p->len != len){
					freememunits(p->xiobuf, p->len);
					p->xiobuf = allocmemunits(len);
					p->len = len;
					p->freshalloc = freshalloc;
					if(freshalloc == 0)
						devread(blkno, p->xiobuf, len);
					if(readonly){
						if(chkwunlock(p) == 0){
							showbuf(p);
							panic("getbuf chkwunlock(p) == 0 called by %#p\n", getcallerpc(&blkno));
						}
						rlock(p);
					}
				}
			}else if(readonly){
				if(chatty9p > 4)
					dprint("	in cache iobuf 0x%p has len %llud blkno %llud len %llud .."
							" rlock()\n", p, p->len, blkno, len);
				rlock(p);
			}else{
				wlock(p);
				if(chatty9p > 4)
					dprint("	after wlock() blkno %llud\n", blkno);
			}
			decref(p);
			return p;
		}
		p = p->fore;
		if(p == s)
			break;
	}

	/* maxed out our allowed number of collisions,
		try to steal an older Iobuf without any ref's and not in the write queue.
		Ncollisions is a soft limit.
		We are not moving the stolen buffer to the top of the circular linked list,
		but, setting this stolen buffer as the lru. I figure it should not matter
		much either way. If it does, there is a changelru() function to do so in the
		git history that can be reused.

		incref(Iobuf) only happens with a qlock(hash bucket).
	 */
	if(hp->n >= Ncollisions){
		for(p = hp->link->back; p != hp->link; p = p->back){
			if(p->ref == 0 && p->append != nil){
				if(canwlock(p)){
					/* p->ref cannot change without a lock on the hash bucket */
					if(p->append != nil){
						wunlock(p);
						continue;
					}
					incref(p);
					goto found;	/* p is wlock() */
				}
			}
		}
	}

	/* no unlocked blocks available; add a new one */
new:
	if(chatty9p > 4)
		dprint("	adding new Iobuf for blkno %llud\n", blkno);
	p = emalloc9p(sizeof(Iobuf));
	wlock(p);
	incref(p);
	hp->n++;

found:
	p->blkno = blkno;
	s = hp->link;
	if(s != nil){
		/* for stolen Iobuf */
		if(p->fore != nil && p->back != nil){
			/* remove p from its current position in the lru circular buffer */
			p->back->fore = p->fore;
			p->fore->back = p->back;
		}
		/* make p the hb->link and put it at the back of existing link */
		p->fore = s;
		p->back = s->back;
		s->back = p;
		p->back->fore = p;
	}else{
		p->fore = p;
		p->back = p;
	}
	hp->link = p;
	qunlock(hp);
	if(chatty9p > 4)
		dprint("	after qunlock(hp) hp 0x%p blkno %llud\n",
				hp, blkno);
	if(p->len != len){
		if(p->len > 0)
			freememunits(p->xiobuf, p->len);
		p->xiobuf = allocmemunits(len);
		p->len = len;
	}else
		memset(p->xiobuf, 0, p->len*Blocksize);
	p->freshalloc = freshalloc;
	if(freshalloc == 0)
		devread(blkno, p->xiobuf, len);
	if(readonly){
		wunlock(p);
		rlock(p);
	}
	decref(p);
	return p;
}

Iobuf *
getbufchk(u64 blkno, u16 len, u8 readonly, int tag, u64 qpath)
{
	Iobuf *b;

	if(chatty9p > 4)
		dprint("getbufchk caller pc 0x%p\n", getcallerpc(&blkno));
	b = getbuf(blkno, len, readonly, Bused);
	if(b != nil)
		if(tag > Tdata && tag < MAXTAG){
			recentmetadata(b->m, &b->cur, &b->new);
			if(readonly == 0){ /* writable */
				memcpy(b->new, b->cur, Blocksize);
				b->new->verd++;
			}
		}
		if(checktag(b, len, tag, qpath) == 0){
			putbuf(b, 0);
			panic("checktag on %llud failed %s\n", blkno, errstring[Ephase]);
		}
	if(b->io == nil)
		panic("b->io == nil blkno %llud readonly %d tag %d"
				" qpath %llud b->blkno %llud caller %#p\n",
				blkno, readonly, tag, qpath, b->blkno,
				getcallerpc(&blkno));
	return b;
}

Iobuf *
getmetachk(u64 blkno, u8 readonly, int tag, u64 qpath)
{
	return getbufchk(blkno, Metadataunits, readonly, tag, qpath);
}

Iobuf *
getmeta(u64 blkno, u8 readonly, u8 freshalloc)
{
	Iobuf *b;

	b = getbuf(blkno, Metadataunits, readonly, freshalloc);
	if(b == nil)
		return nil;
	recentmetadata(b->m, &b->cur, &b->new);
	if(readonly == 0){ /* writable */
		memcpy(b->new, b->cur, Blocksize);
		b->new->verd++;
	}
	return b;
}

/*
	put the Iobuf of the disk block at addr back into
		the buffer cache for others to use.
	writes to disk if changed.

	if(chatty9p > 4)
		dprint("putbuf p->blkno 0x%d t->c->type %d devtab[t->c->type]->dc %c\n"
				"	p 0x%p p->readonly %d\n"
				"	p->xiobuf 0x%p",
				p->blkno, t->c->type, devtab[t->c->type]->dc,
				p, p->readonly, p->xiobuf);

 */
void
bkp(u64 srcbno, u8 *contents, u64 bno, u64 qpath)
{
	Iobuf *buf;

	if(bno == 0){
		dprint("bkp %llud: invalid backup location %llud, qpath %llud\n",
				srcbno, bno, qpath);
		return;
	}

	buf = getmetachk(bno, Bwritable, Tdentry, qpath);
	if(buf == nil){
		panic("bkp: buf == nil\n");
	}
	memcpy(buf->new->buf, contents, Ddatasize);
	buf->new->mtime = nsec();
//	if(qpath == Qproot0 || qpath == Qproot1){
//		buf->d->mode &= ~DMDIR; /* to avoid recursive du -a */
//	}
	putbuf(buf, 1);
}

void
putbuf(Iobuf *p, u8 dowrite)
{
	u8 buf[Ddatasize];
	u64 srcbno;

	if(p == nil){
		panic("putbuf p == nil called by %#p\n", getcallerpc(&p));
		dprint("%s\n", errstring[Ephase]);
		return;
	}
	if(p->io == nil){
		showbuf(p);
		panic("putbuf p->io == nil by %#p\n", getcallerpc(&p));
		dprint("%s\n", errstring[Ephase]);
		return;
	}
	
	if(chatty9p > 4)
		dprint("putbuf p->blkno %llud\n", p->blkno);
	p->atime = nsec();
	if(p->readers){
		chkrunlock(p);
		if(chatty9p > 4)
		dprint(" .. runlock()'ed\n");
	}else{
		srcbno = p->blkno;
		if(dowrite){
			if(p->tag == Tdata){
				devwrite(p->blkno, p->xiobuf, p->len);
			}else{
				if(p->blkno == config.config.srcbno ||
					p->blkno == config.super.srcbno ||
					p->blkno == config.root.srcbno)
					memcpy(buf, p->new->buf, Ddatasize);
				if(p->freshalloc)
					devwrite(p->blkno, p->xiobuf, Metadataunits);
				else
					devwrite(p->blkno+(p->new>p->cur?1:0), p->new, 1);
			}
		}else if(p->tag > Tdata && p->tag < MAXTAG){
			/*
				make the new->ver below the cur->ver so the new gets
				overwritten on the next access instead of a memcpy() to
				copy the contents over.
			 */
			p->new->verd = p->cur->verd-1;
		}
		if(chkwunlock(p) == 0){
			showbuf(p);
			panic("putbuf: chkwunlock(p) == 0 called by %#p\n", getcallerpc(&p));
		}
		if(dowrite){
			if(srcbno == config.config.srcbno){
				bkp(srcbno, buf, config.config.dest[0], Qpconfig0);
			}else if(srcbno == config.super.srcbno){
				bkp(srcbno, buf, config.super.dest[0], Qpsuper0);
			}else if(srcbno == config.root.srcbno){
				bkp(srcbno, buf, config.root.dest[0], Qproot0);
			}
		}
	}
}

int
checktag(Iobuf *p, u16 len, u8 tag, u64 qpath)
{
	uintptr pc;
	u16 ptag;
	u64 pqpath;

	if(tag == Tdata){
		ptag = p->io->tag;
		pqpath = p->xiobuf64[p->len*Nu64perblock -1];
	}else{
		ptag = ((Dentry*)p->cur)->tag;
		pqpath = ((Dentry*)p->cur)->path;
	}

	if(len != p->len ||
		tag != ptag ||
		(qpath != Qpnone && pqpath != qpath)){
		pc = getcallerpc(&p);

		dprint("	tag = %G; expected %G; blkno = %llud\n",
				(uint)ptag, (uint)tag, p->blkno);
		if(qpath == Qpnone){
			dprint("checktag pc=%p disk %s(block %llud) tag/path=%s/%llud;"
					" expected %s len %llud p->len %llud\n",
					pc, devfile, p->blkno,
					tagnames[ptag], pqpath,
					tagnames[tag],
					len, p->len);
		} else {
				dprint("	tag/path = %G/%llux; expected %G/%llux\n",
						(uint)ptag, pqpath, tag, qpath);
				dprint("checktag pc=%p disk %s(block %llud) tag/path=%s/%llud;"
						" expected %s/%llud\n",
						pc, devfile, p->blkno,
						tagnames[ptag], pqpath,
						tagnames[tag], qpath);
				panic("checktag failed\n");
		}
		return 0;
	}
	return 1;
}

void
settag(Iobuf *p, u8 tag, u64 qpath)
{
	if(p->readers)
		panic("settag %s(%llux) tag/path=%s/%llud: not Bwritable\n",
				devfile, (u64)p->blkno, tagnames[tag], qpath);
	if(p->io == nil)
		panic("settag %s(%llux) tag/path=%s/%llud: p->io == nil\n",
				devfile, (u64)p->blkno, tagnames[tag], qpath);
	if(tag == Tdata){
		p->io->tag = Tdata;
		p->xiobuf64[p->len*Nu64perblock -1] = qpath;
	}else{
		((Dentry*)p->new)->tag = tag;
		((Dentry*)p->new)->path = qpath;
	}
	p->tag = tag;
}

void *amalloc(u64 n){
	void *p;

	if(p = mallocalign(n, sizeof(u64), 0, 0))
		memset(p, 0, n);
	else
		sysfatal("malloc: %r");
	setmalloctag(p, getcallerpc(&n));
	return p;
}

/*
 * Prepare nbuckets of hash buckets. Each bucket will point to a
 * linked list of collisions. The collisions are ordered into a
 * least-recently-used (lru) linked list.
 */
void
iobufinit(void)
{
	int i;
	Hiob *hp;

	while(prime(nbuckets) == 0)
		nbuckets++;
	if(chatty9p)
	dprint("mafs: iobufinit %ud hash buckets\n", nbuckets);
	hiob = amalloc(nbuckets*sizeof(Hiob));

	hp = hiob;
	if(chatty9p > 4)
		dprint("iobufinit: hiob 0x%p\n", hiob);
	for(i=0; i<nbuckets; i++){
		qlock(hp);
		qunlock(hp);
		hp++;
	}
}

void
showbuf(Iobuf *p)
{
	if(p == nil){
		dprint("showbuf p == nil called by %#p\n",
				p, getcallerpc(&p));
		return;
	}
	dprint("showbuf p 0x%p %s ref %d readers %d writer %d"
			" blkno %llud len %d"
			" fore 0x%p back 0x%p"
			" xiobuf 0x%p"
			" caller %#p\n",
			p, tagnames[p->tag], p->ref, p->readers, p->writer,
			p->blkno, p->len,
			p->fore, p->back,
			p->xiobuf,
			getcallerpc(&p));
	if(p->io != nil)
		showblock(2, (u8*)p->io);
}