code: purgatorio

ref: d3d1b3b47ff80f451c6c9f8b7f1262fef6545f2b
dir: /liblogfs/write.c/

View raw version
#include "logfsos.h"
#include "logfs.h"
#include "local.h"

typedef struct AllocState AllocState;
struct AllocState {
	long oldblock;
	int markbad;
};

Pageset
logfsdatapagemask(int pages, int base)
{
	if(pages == BITSPERSET)
		return ~(Pageset)0;
	return (((Pageset)1 << pages) - 1) << (BITSPERSET - base - pages);
}

static Pageset
fastgap(Pageset w, uint n)
{
	Pageset s;
//print("fastgap(0x%.8ux, %d)\n", w, n);
	if(w == 0 || n < 1 || n > BITSPERSET)
		return 0;
/*
#	unroll the following loop 5 times:
#		while(n > 1){
#			s := n >> 1;
#			w &= w<<s;
#			n -= s;
#		}
*/
	s = n >> 1;
	w &= w << s;
	n -= s;
	s = n >> 1;
	w &= w << s;
	n -= s;
	s = n >> 1;
	w &= w << s;
	n -= s;
	s = n >> 1;
	w &= w << s;
	n -= s;
	s = n >> 1;
	if(BITSPERSET == 64){	/* extra time if 64 bits */
		w &= w << s;
		n -= s;
		s = n >> 1;
	}
	return w & (w << s);
}

static int
nlz(Pageset x)
{
	int n, c;

	if(x == 0)
		return BITSPERSET;
	if(x & PAGETOP)
		return 0;
	n = BITSPERSET;
	c = BITSPERSET/2;
	do {
		Pageset y;
		y = x >> c;
		if(y != 0) {
			n -= c;
			x = y;
		}
	} while((c >>= 1) != 0);
	return n - x;
}

static Pageset
findgap(Pageset w, uint n)
{
	Pageset m;

	do {
		m  = fastgap(w, n);
		if(m)
			break;
		n--;
	} while(n);
	if(n == 0)
		return 0;
	return logfsdatapagemask(n, nlz(m));
}

static int
bitcount(Pageset mask)
{
	Pageset m;
	int rv;

	rv = 0;
	for(m = PAGETOP; m != 0; m >>= 1)
		if(mask & m)
			rv++;
	return rv;
}

static char *
allocdatapages(LogfsServer *server, u32int count, int *countp, long *blockindexp, int *pagep, u32int *flashaddr, AllocState *state)
{
	LogfsLowLevel *ll = server->ll;
	long b, blockindex;
	DataBlock *db;
	int pagebase;
	u32int pages = (count + (1 << ll->l2pagesize) - 1) >> ll->l2pagesize;
	u32int gapmask;
	long bestfreeblockindex;
	int bestfree;
	int pagesperblock = 1 << ll->l2pagesperblock;
	int apages;
	char *errmsg;
	int didsomething;

	state->oldblock = -1;
	state->markbad = 0;
	if(pages > pagesperblock)
		pages = pagesperblock;
	/*
	 * fill in gaps first
	 */
	bestfreeblockindex = -1;
	bestfree = 0;
	for(blockindex = 0; blockindex < server->ndatablocks; blockindex++) {
		db = server->datablock + blockindex;
		if(db->block < 0)
			continue;
		gapmask = findgap(db->free & ~db->dirty, pages);
//print("blockindex %ld free 0x%.8ux dirty 0x%.8ux gapmask %.8ux\n", blockindex, db->free, db->dirty, gapmask);
		if(gapmask != 0) {
			/*
			 * this is free and !dirty
			 */
			b = db->block;
			USED(b);
			goto done;
		}
		else {
			int free = bitcount(db->free & logfsdatapagemask(pagesperblock, 0));
			if(free > 0 && (bestfreeblockindex < 0 || free > bestfree)) {
				bestfreeblockindex = blockindex;
				bestfree = free;
			}
		}
	}
//print("out of space - need to clean up a data block\n");
	if(bestfreeblockindex >= 0) {
//print("best block index %ld (%ld) %d bits\n", bestfreeblockindex, server->datablock[bestfreeblockindex].block, bestfree);
		/*
		 * clean up data block
		 */
		b = logfsfindfreeblock(ll, AllocReasonTransfer);
		while(b >= 0) {
			char *errmsg;
			LogfsLowLevelReadResult llrr;
			long oldblock;
			int markedbad;

			db = server->datablock + bestfreeblockindex;
			oldblock = db->block;
			errmsg = logfsservercopyactivedata(server, b, bestfreeblockindex, 0, &llrr, &markedbad);
			if(errmsg) {
				if(!markedbad)
					return errmsg;
				b = logfsfindfreeblock(ll, AllocReasonTransfer);
			}
			else {
				Pageset available;
				/*
				 * if page0 is free, then we must ensure that we use it otherwise
				 * in tagged storage such as nand, the block tag is not written
				 * in all cases, it is safer to erase the block afterwards to
				 * preserve the data for as long as possible (we could choose
				 * to erase the old block now if page0 has already been copied)
				 */
				blockindex = bestfreeblockindex;
				state->oldblock = oldblock;
				state->markbad = llrr != LogfsLowLevelReadResultOk;
				available = db->free & ~db->dirty;
				if(available & PAGETOP)
					available = logfsdatapagemask(nlz(~available), 0);
				gapmask = findgap(available, pages);
				goto done;
			}
		}
	}
	/*
	 * use already erased blocks, so long as there are a few free
	 */
	b = logfsfindfreeblock(ll, AllocReasonDataExtend);
	if(b >= 0) {
useerased:
		for(blockindex = 0, db = server->datablock; blockindex < server->ndatablocks; blockindex++, db++)
			if(db->block < 0)
				break;
		if(blockindex == server->ndatablocks)
			server->ndatablocks++;
		db->path = mkdatapath(blockindex, 0);
		db->block = b;
		(*ll->setblocktag)(ll, b, LogfsTdata);
		(*ll->setblockpath)(ll, b, db->path);
		db->free = logfsdatapagemask(pagesperblock, 0);
		db->dirty = 0;
		gapmask = db->free;
		goto done;
	}
	/*
	 * last resort; try to steal from log
	 */
//print("last resort\n");
	errmsg = logfsserverlogsweep(server, 0, &didsomething);
	if(errmsg)
		return errmsg;
	if(didsomething) {
		/*
		 * this can only create whole free blocks, so...
		 */
//print("findfree after last resort\n");
		b = logfsfindfreeblock(ll, AllocReasonDataExtend);
		if(b >= 0) {
//print("*********************************************************\n");
			goto useerased;
		}
	}
	*countp = 0;
	return nil;
done:
	/*
	 * common finish - needs gapmask, blockindex, db
	 */
	apages = bitcount(gapmask);
	pagebase = nlz(gapmask);
	if(apages > pages)
		apages = pages;
	gapmask = logfsdatapagemask(apages, pagebase);
	if(server->trace > 1)
		print("allocdatapages: block %ld(%ld) pages %d mask 0x%.8ux pagebase %d apages %d\n",
			blockindex, db->block, pages, gapmask, pagebase, apages);
	db->free &= ~gapmask;
	db->dirty |= gapmask;
	*pagep = pagebase;
	*blockindexp = blockindex;
	*flashaddr = logfsspo2flashaddr(server, blockindex, pagebase, 0);
	*countp = apages << ll->l2pagesize;
	if(*countp > count)
		*countp = count;
	return nil;
}

typedef struct Page {
	u32int pageaddr;
	int ref;
} Page;

typedef struct DataStructure {
	LogfsServer *server;
	int nentries;
	int maxentries;
	Page *array;
} DataStructure;

static int
deltapage(DataStructure *ds, u32int pageaddr, int add, int delta)
{
	int i;
	for(i = 0; i < ds->nentries; i++)
		if(ds->array[i].pageaddr == pageaddr) {
			ds->array[i].ref += delta;
			return 1;
		}
	if(!add)
		return 1;
	if(ds->maxentries == 0) {
		ds->array = logfsrealloc(nil, sizeof(Page) * 100);
		if(ds->array == nil)
			return 0;
		ds->maxentries = 100;
	}
	else if(ds->nentries >= ds->maxentries) {
		void *a = logfsrealloc(ds->array, ds->maxentries * 2 * sizeof(Page));
		if(a == nil)
			return 0;
		ds->array = a;
		ds->maxentries *= 2;
	}
	ds->array[ds->nentries].pageaddr = pageaddr;
	ds->array[ds->nentries++].ref = delta;
	return 1;
}

/*
 * only called for data addresses
 */
static int
deltapages(DataStructure *ds, LogfsLowLevel *ll, u32int baseflashaddr, int range, int add, int delta)
{
	long seq;
	int page, offset;
	int pages;
	u32int pageaddr;
	int x;

//print("deltapages(%ud, %ud, %d, %d)\n", baseflashaddr, limitflashaddr, add, delta);
	logfsflashaddr2spo(ds->server, baseflashaddr, &seq, &page, &offset);
	pages = (offset + range + (1 << ll->l2pagesize) - 1) >> ll->l2pagesize;
	pageaddr = (seq << ll->l2pagesperblock) + page;
 	for(x = 0; x < pages; x++, pageaddr++)
		if(!deltapage(ds, pageaddr, add, delta))
			return 0;
	return 1;
}

static int
findpageset(void *magic, u32int baseoffset, u32int limitoffset, Extent *e, u32int extentoffset)
{
	DataStructure *ds = magic;
	LogfsLowLevel *ll;
	u32int flashaddr;
	u32int range;
	u32int residue;

	if(e == nil || (e->flashaddr & LogAddr) != 0)
		return 1;
	ll = ds->server->ll;
//print("baseoffset %ud limitoffset %ud min %ud max %ud\n", baseoffset, limitoffset, e->min, e->max);
	flashaddr = e->flashaddr;
	if(extentoffset)
		if(!deltapages(ds, ll, flashaddr, extentoffset, 1, 1))
			return -1;
	flashaddr += extentoffset;
	range = limitoffset - baseoffset;
	if(!deltapages(ds, ll, flashaddr, range, 1, -1))
		return -1;
	flashaddr += range;
	residue = e->max - e->min - (extentoffset + range);
	if(residue)
		if(!deltapages(ds, ll, flashaddr, residue, 1, 1))
			return -1;
	return 1;
}

static int
addpagereferences(void *magic, Extent *e, int hole)
{
	DataStructure *ds = magic;

	if(hole || (e->flashaddr & LogAddr) != 0)
		return 1;
	return deltapages(ds, ds->server->ll, e->flashaddr, e->max - e->min, 0, 1) ? 1 : -1;
}

static char *
zappages(LogfsServer *server, Entry *e, u32int min, u32int max)
{
	DataStructure ds;
	long seq;
	int x, rv, page;
	Page *p;

	if(min >= e->u.file.length)
		return nil;		/* no checks necessary */
	if(min == 0 && max >= e->u.file.length) {
		/* replacing entire file */
		logfsextentlistwalk(e->u.file.extent, logfsunconditionallymarkfreeanddirty, server);
		return nil;
	}
	/* hard after that - this will need to be improved */
	/*
	 * current algorithm
	 * build a list of all pages referenced by the extents being removed, and count the
 	 * number of references
	 * then subtract the number of references to each page in entire file
	 * any pages with a reference count == 0 can be removed
	 */
	ds.server = server;
	ds.nentries = 0;
	ds.maxentries = 0;
	ds.array = nil;
	rv = logfsextentlistwalkrange(e->u.file.extent, findpageset, &ds, min, max);
	if(rv < 0 || ds.nentries == 0)
		goto Out;
	if(server->trace > 1){
		print("pass 1\n");
		for(x = 0; x < ds.nentries; x++){
			p = &ds.array[x];
			seq = p->pageaddr >> server->ll->l2pagesperblock;
			page = p->pageaddr & ((1 << server->ll->l2pagesperblock) - 1);
			print("block %lud page %ud ref %d\n", seq, page, p->ref);
		}
		print("pass 2\n");
	}
	rv = logfsextentlistwalk(e->u.file.extent, addpagereferences, &ds);
	if(rv >= 0){
		for(x = 0; x < ds.nentries; x++){
			p = &ds.array[x];
			seq = p->pageaddr >> server->ll->l2pagesperblock;
			page = p->pageaddr & ((1 << server->ll->l2pagesperblock) - 1);
			if(server->trace > 1)
				print("block %lud page %ud ref %d\n", seq, page, p->ref);
			if(p->ref == 0)
				logfsfreedatapages(server, seq, logfsdatapagemask(1, page));
		}
	}
Out:
	logfsfreemem(ds.array);
	return rv < 0 ? Enomem : nil;
}

static void
disposeofoldblock(LogfsServer *server, AllocState *state)
{
	if(state->oldblock >= 0) {
		if(server->testflags & LogfsTestDontFettleDataBlock) {
			/* take the block out of commission */
			(*server->ll->setblocktag)(server->ll, state->oldblock, LogfsTworse);
			server->testflags &= ~LogfsTestDontFettleDataBlock;
		}
		else {
			if(state->markbad)
				(*server->ll->markblockbad)(server->ll, state->oldblock);
			else
				logfsbootfettleblock(server->lb, state->oldblock, LogfsTnone, ~0, nil);
		}
		state->oldblock = -1;
	}
}

char *
logfsserverwrite(LogfsServer *server, u32int fid, u32int offset, u32int count, uchar *buf, u32int *rcount)
{
	Fid *f;
	Entry *e;
	u32int now;
	char *muid;
	int muidlen;
	LogfsLowLevel *ll = server->ll;

	if(server->trace > 1)
		print("logfsserverwrite(%ud, %ud, %ud)\n", fid, offset, count);
	f = logfsfidmapfindentry(server->fidmap, fid);
	if(f == nil)
		return logfsebadfid;
	if(f->openmode < 0)
		return logfsefidnotopen;
	if((f->openmode & 3) == OREAD)
		return logfseaccess;
	e = f->entry;
	if(e->deadandgone)
		return Eio;
	if(e->qid.type & QTDIR)
		return Eperm;
	if(e->perm & DMAPPEND)
		offset = e->u.file.length;
	now = logfsnow();
	*rcount = count;
	muid = logfsisfindidfromname(server->is, f->uname);
	muidlen = strlen(muid);
	while(count) {
		Extent extent;
		int thistime;
		char *errmsg;
		thistime = lognicesizeforwrite(server, 1, count, muidlen);
		if(thistime == 0) {
			int p;
			u32int n;
			long blockindex;
			int pagebase;
			AllocState state;
			int pagesize = 1 << ll->l2pagesize;
		reallocate:
			errmsg = allocdatapages(server, count, &thistime, &blockindex, &pagebase, &extent.flashaddr, &state);
			if(errmsg)
				return errmsg;
			if(thistime == 0)
				return logfselogfull;
			for(p = pagebase, n = 0; n < thistime; p++, n += pagesize) {
				u32int mask;
				DataBlock *db = server->datablock + blockindex;
				errmsg = (*ll->writepage)(ll, buf + n, db->block, p);
				if(errmsg) {
					if(strcmp(errmsg, Eio) != 0) {
						/*
						 * something horrid happened down below
						 * recover without writing any more than we have to
						 */
						if(p != 0) {
							/*
							 * page 0 was either written already, or has been written in this loop
							 * thus the block referenced is valid on the media. all we need to do
							 * is lose the old block, mark the written pages as free (so they can
							 * be scavenged), and don't bother with the log message
							 */
							disposeofoldblock(server, &state);
							mask = logfsdatapagemask(p - pagebase - 1, pagebase);
							db->free |= mask;
							db->dirty |= mask;
							return errmsg;
						}
						/*
						 * page 0 failed to write (so nothing written at all)
						 * this is either an entirely free block (no erased block in savestate),
						 * or a copy of a scavenged block (erased block in savestate)
						 */
						if(state.oldblock < 0) {
							/*
							 * newly selected erased block (blockindex == server->ndatablocks - 1)
							 * mark it bad, lose it from the datablock table
							 */
							(*ll->markblockbad)(ll, db->block);
							db->block = -1;
							if(blockindex == server->ndatablocks - 1)
								server->ndatablocks--;
							return errmsg;
						}
						/*
						 * page 0 of a data scavenge copy
						 * mark it bad, restore state (old block)
						 */
						(*ll->markblockbad)(ll, db->block);
						db->block = state.oldblock;
						return errmsg;
					}
					/*
					 * write error on target block
					 *
					 * if it is a replacement (state saved)
					 *	mark the new block bad, restore state and try again
					 *
					 * if it is not replaced (no state saved)
					 *	replace block, and try again
					 */
					if(state.oldblock >= 0) {
						(*ll->markblockbad)(ll, db->block);
						db->block = state.oldblock;
					}
					else {
						errmsg = logfsserverreplacedatablock(server, blockindex);
						if(errmsg)
							return errmsg;
					}
					goto reallocate;
				}
				mask = logfsdatapagemask(1, p);
				db->free &= ~mask;
				db->dirty |= mask;
			}
			/* well, we managed to write the data out */
			errmsg = logfslogwrite(server, 1, e->qid.path, offset, thistime, now, e->u.file.cvers,
				muid, nil, &extent.flashaddr);
			/*
			 * now we can dispose of the original data block, if any
			 * this is regardless of whether we succeeded in writing a log message, as
			 * if this block is not erased, there will be a duplicate
			 */
			disposeofoldblock(server, &state);
		}
		else {
			if(thistime > count)
				thistime = count;
			errmsg = logfslogwrite(server, 1, e->qid.path, offset, thistime, now, e->u.file.cvers,
				muid, buf, &extent.flashaddr);
		}
		/*
		 * here if we failed to write the log message
		 */
		if(errmsg)
			return errmsg;
		if(server->trace > 1)
			print("logfsserverwrite: %d bytes at flashaddr 0x%.8ux\n", thistime, extent.flashaddr);
		extent.min = offset;
		extent.max = offset + thistime;
		errmsg = zappages(server, e, extent.min, extent.max);
		if(errmsg)
			return errmsg;
		errmsg = logfsextentlistinsert(e->u.file.extent, &extent, nil);
		if(errmsg)
			return errmsg;
		e->muid = muid;
		e->mtime = now;
		offset += thistime;	
		if(e->u.file.length < offset)
			e->u.file.length = offset;
		count -= thistime;
		buf += thistime;
		e->qid.vers++;
	}
	return nil;
}