code: mafs

Download patch

ref: 5f15f2246d0fdac0fa98a0a910d40827edfaf68f
parent: 0124e0661a92123df8184b654be6ceabf7482324
author: 9ferno <gophone2015@gmail.com>
date: Thu Nov 10 10:24:52 EST 2022

another documentation update

--- a/9p.c
+++ b/9p.c
@@ -1269,10 +1269,10 @@
 s32
 append(Dentry *d, u64 dblkno, char *wbuf, s32 wbufsize)
 {
-	Iobuf *buf, *bufs[(Iounit/Blocksize)+1];
+	Iobuf *buf, *bufs[Ntogether];
 	s32 howmuch;
 	u64 blkno, lastblksize, written, i, nblocks, startreli;
-	u64 blknos[(Iounit/Blocksize)+1];
+	u64 blknos[Ntogether];
 
 	if(d == nil || wbuf == nil || wbufsize == 0)
 		return 0;
--- a/all.h
+++ b/all.h
@@ -78,17 +78,17 @@
 	};
 
 	/*
-		only used by mafs.
+		This field is used by mafs to ensure that Iobufs are not reused
+		while there are pending writes.
 
-		Using Ref to avoid using a wlock() in dowrite.
-		wlock() in dowrite() causes a deadlock with putwrite()
-
-		dirties is decremented without a wlock() on the buffer in dowrite().
-		Using a wlock() in dowrite() deadlocks with putwrite().
+		dowrite() uses a Ref instead of a wlock() to mark Iobuf's
+		with pending writes.
+		Using a wlock() in dowrite() causes a deadlock with putwrite()
+		especially when the writer queue is full.
 		getbuf() guarantees that even a free'ed block cannot be
 		stolen until the dirties == 0. This avoids dirty blocks
-		being stolen by other block numbers.
-		incref(dirties) only happens with a wlock() in putwrite().
+		being stolen for other block numbers.
+		incref(dirties) only happens while holding a wlock() in putwrite().
 	 */
 	Ref	dirties;	/* number of versions of this block yet to be written by the writer */
 };
--- a/dat.h
+++ b/dat.h
@@ -166,12 +166,13 @@
  */
 enum {
 	Blocksize	= Rawblocksize - sizeof(Tag),
-	Namelen	= (Blocksize-sizeof(Dentry1)),		/* maximum size of the name of a file or directory */
+	Namelen	= (Blocksize-sizeof(Dentry1)),	/* maximum size of the name of a file or directory */
 
 	Iounit		= MAXRPC, /* in bytes */
+	Ntogether	= (Iounit/Blocksize)+1,		/* max probable size for jumbo writes */
 
-	Ndentryperblock	= 1, /* Blocksize / sizeof(Dentry), */
-	Nindperblock	= Blocksize / sizeof(u64), /* number of pointers per block */
+	Ndentryperblock	= 1, /* Blocksize/sizeof(Dentry), */
+	Nindperblock	= Blocksize/sizeof(u64),/* number of pointers per block */
 };
 
 #pragma pack on
--- a/docs/mafs.ms
+++ b/docs/mafs.ms
@@ -479,9 +479,9 @@
 .sp
 To find the actual block number where the first block (zero'th as zero indexed) of a file is stored:
 .nf
-tests/6.reli 0 # command, below is the output of this command
-reli 0
-dblock[0]
+	tests/6.reli 0 # command, below is the output of this command
+	reli 0
+	dblock[0]
 .fi
 .sp
 To find the actual block number where the second block of a file is stored:
@@ -509,7 +509,7 @@
 	reli 57731387017
 	iblock[5]
 	Tind5 reli 56800235583 is at [61]
-		Tind4 reli 916132831 is at [61]
+	Tind4 reli 916132831 is at [61]
 	Tind3 reli 14776335 is at [61]
 	Tind2 reli 238327 is at [61]
 	Tind1 reli 3843 is at [61]
@@ -705,9 +705,9 @@
 .sp
 An Iobuf is protected by a read-write lock (RWlock). This ensures synchronization across multiple processes updating the same file.
 .sp
-getbuf(), putbuf() and putbuffree() are used to manage Iobuf's. The contents of an Iobuf is not touched unless it is locked between getbuf(), putbuf() and putbuffree() calls. The Iobuf.dirties Ref is decremented by the writer's dowrite() without a lock(). This is to avoid deadlocks between putbuf() and the writer especially when the writer queue is full.
+getbuf(), putbuf(), putbufs() and putbuffree() are used to manage Iobuf's. The contents of an Iobuf is not touched unless it is locked by getbuf(). It is unlocked by putbuf(), putbufs() or putbuffree() calls. The Iobuf.dirties Ref is decremented by the mafs writer's dowrite() without a lock(). This is to avoid deadlocks between putbuf() and the writer especially when the writer queue is full.
 .sp
-allocblock() allocates a free block into an Iobuf.
+allocblock() allocates a free block into an Iobuf. allocblocks() allocates a bunch of free blocks with their own Iobuf's.
 .sp
 freeblock() erases the Iobuf and returns the block to the free block management routines.
 .sp
@@ -714,11 +714,21 @@
 Iobuf's are organized into a list of hash buckets to speed up access.
 .sp
 .nf
+Hiob *hiob = nil;	/* array of nbuckets */
 struct Hiob		/* Hash bucket */
 {
 	Iobuf* link;	/* least recently used Iobuf in the circular linked list */
 	QLock;		/* controls access to this hash bucket */
 };
+struct Content	/* used to unmarshall the disk contents */
+{
+	union{
+		u8 buf[Blocksize];
+		u64 bufa[Nindperblock];
+		Dentry d;
+	};
+	Tag;
+};
 struct Iobuf
 {
 	Ref;
@@ -731,17 +741,17 @@
 		Content *io;	/* cast'able to contents */
 	};
 	/*
-		only used by mafs.
+		This field is used by mafs to ensure that Iobufs are not reused
+		while there are pending writes.
 
-		Using Ref to avoid using a wlock() in dowrite.
-		wlock() in dowrite() causes a deadlock with putwrite()
-
-		dirties is decremented without a wlock() on the buffer in dowrite().
-		Using a wlock() in dowrite() deadlocks with putwrite().
+		dowrite() uses a Ref instead of a wlock() to mark Iobuf's
+		with pending writes.
+		Using a wlock() in dowrite() causes a deadlock with putwrite()
+		especially when the writer queue is full.
 		getbuf() guarantees that even a free'ed block cannot be
 		stolen until the dirties == 0. This avoids dirty blocks
-		being stolen by other block numbers.
-		incref(dirties) only happens with a wlock() in putwrite().
+		being stolen for other block numbers.
+		incref(dirties) only happens while holding a wlock() in putwrite().
 	 */
 	Ref	dirties;	/* number of versions of this block yet to be written by the writer */
 };
@@ -810,13 +820,14 @@
 Asynchronous writes of Mafs
 .ft R
 .sp
-The blocks to be written to a disk are stored to a linked list represented by:
+The blocks to be written to a disk are stored in a linked list represented by:
+.br
 .nf
 struct Dirties
 {
-	QLock lck;		/* controls access to this queue */
+	QLock lck;		/* controls access to this writer queue */
 	Wbuf *head, *tail;	/* linked list of dirty blocks yet to be written to the disk */
-	s32 n;
+	s32 n;			/* number of dirty blocks in this linked list */
 	Rendez isfull;		/* write throttling */
 	Rendez isempty; 	/* writer does not have to keep polling to find work */
 } drts = {0};
@@ -824,8 +835,8 @@
 struct Wbuf
 {
 	u64	blkno;	/* block number on the disk, primary key */
-	Wbuf *prev, *next;
-	Iobuf *iobuf;	/* pointer to the used Iobuf in the buffer cache */
+	Wbuf *prev, *next;	/* writer queue */
+	Iobuf *iobuf;		/* pointer to the used Iobuf in the buffer cache */
 	union{
 		u8	payload;	/* "real" contents */
 		Content io;	/* cast'able to contents */
@@ -833,17 +844,17 @@
 };
 .fi
 .sp
-A single writer process takes the blocks from the Dirties linked list on a FIFO (first-in-first-out) basis and writes them to the disk. putbuf() adds blocks to the end of this linked list.
+A single writer process takes the blocks from the Dirties linked list on a FIFO (first-in-first-out) basis and writes them to the disk. putbuf() and putbufs() add blocks to the end of this linked list, the writer queue.
 .sp
-The dirty blocks not yet written to the disk remain in the buffer cache and cannot be stolen when a need for new Iobuf arises.
+The dirty blocks not yet written to the disk remain in the buffer cache and cannot be stolen when a need for a new Iobuf arises.
 .sp
 Free'd blocks are not written to the disk to avoid writing blanks to a disk.
 .sp
 The writer throttles input when there are more than Npendingwrites waiting to be written. This can be adjusted with the -w parameter.
 .sp
-The alternative to having a single writer process is to have each worker write to the disk, as mfs does. Synchronous writes throttles writes to disk write speed. With asynchronous writes, memory is used to hold the data until written to the disk. This shows increased write throughput until we fill up memory. After filling up memory, writes happen at disk speed. Asynchronous writes have the side effect of a single disk write queue.
+The alternative to having a single writer process is to have each worker process write to the disk, as mfs does. Synchronous writes throttle writes to disk write speed. With asynchronous writes, memory is used to hold the data until written to the disk. This shows increased write throughput until we fill up memory. After filling up memory, writes happen at disk speed. Asynchronous writes have the side effect of a single disk write queue.
 .sp
-The ideal npendingwrites = ((ups time in seconds )/2) * (diskspeed in bytes/second) / Rawblocksize.
+The ideal npendingwrites = ((ups time in seconds)/2) * (diskspeed in bytes/second) / Rawblocksize.
 .sp
 .sp
 .ne 4
@@ -875,20 +886,17 @@
 	u64 start;				/* where this extent starts from */
 	u64 len; 				/* how many units in this extent */
 
-	/*
-		circular least recently used linked list
-		limited to Nlru items
-	 */
+	/* circular least recently used linked list limited to Nlru items */
 	struct Extent *prev, *next;
 };
 struct Extents {
-	Extent *head;		/* find the first block in a jiffy */
+	Extent *head;	/* find the first block in a jiffy */
 	QLock lck;
 	u32 n;			/* number of extents */
-	Rendez isempty; 	/* fully used, nothing available */
+	Rendez isempty;	/* fully used, nothing available */
 
-	u8 nlru;			/* number of items in the lru linked list */
-	Extent *lru;		/* least recently used extent in the circular lru linked list */
+	u8 nlru;		/* number of items in the lru linked list */
+	Extent *lru;	/* least recently used extent in the circular lru linked list */
 };
 .fi
 .sp
@@ -1167,7 +1175,7 @@
 disk/used	List the used blocks by traversing all directory entries.
 disk/block	Show the contents of a block.
 disk/unused	Given a list of used blocks, lists the unused blocks.
-disk/updatefrees	update the contents of /adm/frees.
+disk/updatefrees	Update the contents of /adm/frees.
 .TE
 .sp
 .TS
@@ -1222,6 +1230,7 @@
 	mount -c /srv/mafs_myservice /n/mafs_myservice
 .fi
 .sp
+.ne 7
 Ream and start mafs on a file. Also, mount thet filesystem at /n/mafs_myservice.
 .sp
 .nf
@@ -1245,7 +1254,7 @@
 	mount -c /srv/mafs_sdF1 /n/mafs_sdF1
 
 	# for using the mafs file system on the disk later on
-	disk/mafs  /dev/sdF1/fs sdF1	# no -r
+	disk/mafs  /dev/sdF1/fs	# no -r
 	mount -c /srv/mafs_sdF1 /n/mafs_sdF1
 .fi
 .sp
@@ -1253,7 +1262,7 @@
 .sp
 .nf
 	dd -if /dev/zero -of disk.file -bs 512 -count 4096;
-	mount -c <{disk/mafs -s -r mafs_disk.file -m 1 -n mafs_disk.file \\
+	mount -c <{disk/mafs -s -r mafs_disk.file \\
 		  <[0=1]} /n/mafs_disk.file
 .fi
 .sp
@@ -1279,7 +1288,7 @@
 .sp
 	disk/block  tests/test.0/disk 22
 .sp
-Traverse the directory heirarchy and write out all the used block numbers. disk/reconcile uses the output of this to reconcile the list of used blocks with the list of free blocks. Also, writes the invalid blocks to stderr. Starting from root, walk down each directory entry printing out the linked blocks with invalid tags. Why not just write out the list of dirty blocks too? instead of using a different command for it?
+Traverse the directory heirarchy and write out all the used block numbers. disk/reconcile uses the output of this to reconcile the list of used blocks with the list of free blocks. Also, writes the invalid blocks to stderr. Starting from root, walk down each directory entry printing out the linked blocks with invalid tags. (Why not just write out the list of dirty blocks too? instead of using a different command for it?)
 .sp
 	disk/used  tests/test.0/disk
 .sp
@@ -1310,8 +1319,8 @@
 Build the list of free blocks. This should match the contents of /adm/frees.
 .sp
 .nf
-	disk/unused <{disk/used /dev/sdF1/fs} 11721040049 # 11721040049 is the total number of disk blocks
-	disk/unused <{disk/used test.0/disk} 32 # 32 is the total number of disk blocks
+	disk/unused <{disk/used /dev/sdF1/fs} 11721040049 # 11721040049 = total number of disk blocks
+	disk/unused <{disk/used test.0/disk} 32 # 32 = total number of disk blocks
 .fi
 .sp
 .ne 5
@@ -1318,8 +1327,8 @@
 Change the contents of /adm/frees.
 .sp
 .nf
-	disk/updatefrees tests/test.0/disk <{disk/unused <{disk/used tests/test.0/disk} 32 | tr -d '	'}
-	disk/updatefrees /dev/sdF1/fs <{disk/unused <{disk/used /dev/sdF1/fs} 11721040049 | tr -d '	'}
+	disk/updatefrees tests/test.0/disk <{disk/unused <{disk/used tests/test.0/disk} 32}
+	disk/updatefrees /dev/sdF1/fs <{disk/unused <{disk/used /dev/sdF1/fs} 11721040049}
 .fi
 .sp
 .ne 5
@@ -1415,10 +1424,10 @@
 .in 3n
 .br
 .ti 0
-1. Initializes a disk for mafs.
+1. Initialize a disk for mafs.
 .br
 .ti 0
-2. Run mfs and mafs on that dsk.
+2. Run mfs or mafs on that dsk.
 .br
 .ti 0
 3. Stop mfs or mafs.
@@ -1453,7 +1462,7 @@
 tests/test.b	duplicate of test.2 but seeded with random data
 _
 tests/test.d	seed with random data and do mkdir -p a/b/c/d/e/f/g/h
-tests/test.e	seed with random data and test that directory and file deletions
+tests/test.e	seed with random data and test directory and file deletions
 .TE
 .sp
 .TS
--- a/extents.h
+++ b/extents.h
@@ -16,20 +16,17 @@
 	u64 start;					/* where this extent starts from */
 	u64 len; 					/* how many units in this extent */
 
-	/*
-		circular least recently used linked list
-		limited to Nlru items
-	 */
+	/* circular least recently used linked list limited to Nlru items */
 	struct Extent *prev, *next;
 };
 struct Extents {
-	Extent *head;		/* find the first block in a jiffy */
+	Extent *head;	/* find the first block in a jiffy */
 	QLock lck;
-	u32 n;				/* number of extents */
-	Rendez isempty; 	/* fully used, nothing available */
+	u32 n;			/* number of extents */
+	Rendez isempty; /* fully used, nothing available */
 
-	u8 nlru;			/* number of items in the lru linked list */
-	Extent *lru;		/* least recently used extent in the circular lru linked list */
+	u8 nlru;		/* number of items in the lru linked list */
+	Extent *lru;	/* least recently used extent in the circular lru linked list */
 };
 
 extern int chatty9p;
--- a/mafs.c
+++ b/mafs.c
@@ -106,7 +106,8 @@
 
 	/* 2/3rds of the memory for the pending writes
 		and 1/3rd for the buffer cache
-		leaving 4*(Iounit/Blocksize) for jumbo writes
+		leaving 4*Ntogether for jumbo writes when the writer
+		queue is full
 	 */
 	if(nmemunits == 0)
 		nmemunits = size/Rawblocksize > 8*MiB ? 8*MiB : size/Rawblocksize;
@@ -113,9 +114,9 @@
 	if(nmemunits < KiB)
 		nmemunits = KiB;
 	if(npendingwrites == 0)
-		npendingwrites = 2*(nmemunits-(4*(Iounit/Blocksize)))/3;
+		npendingwrites = 2*(nmemunits-(4*Ntogether))/3;
 	if(nbuckets == 0)
-		nbuckets = (nmemunits-(4*(Iounit/Blocksize)))/(3*Ncollisions);
+		nbuckets = (nmemunits-(4*Ntogether))/(3*Ncollisions);
 
 	if(chatty9p){
 		dprint("\nPlan 9 %d-bit file server with %d-deep indirect blocks\n",
--- a/mfs.c
+++ b/mfs.c
@@ -103,10 +103,7 @@
 	if(size == 0)
 		panic("null size %s", devfile);
 
-	/* 2/3rds of the memory for the pending writes
-		and 1/3rd for the buffer cache
-		leaving 4*(Iounit/Blocksize) for jumbo writes
-	 */
+	/* All memory for the buffer cache */
 	if(nmemunits == 0)
 		nmemunits = size/Rawblocksize > 8*MiB ? 8*MiB : size/Rawblocksize;
 	if(nmemunits < KiB)
--- a/tests/regress.rc
+++ b/tests/regress.rc
@@ -26,9 +26,9 @@
 
 	# start service
 	if(~ $"debug '')
-		mount -c <{disk/^$cmd -s -r $test -h 10 $disk <[0=1]} /n/^$service
+		mount -c <{disk/^$cmd -s -r $test $disk <[0=1]} /n/^$service
 	if not # if auth negotiation breaks, something is writing to fd 1.
-		mount -c <{disk/^$cmd $"debug -s -r $test -h 10 $disk <[0=1]} /n/^$service
+		mount -c <{disk/^$cmd $"debug -s -r $test $disk <[0=1]} /n/^$service
 	echo service status: $status
 
 	sleep 1
--- a/writer.c
+++ b/writer.c
@@ -14,7 +14,7 @@
 {
 	QLock lck;			/* controls access to this queue */
 	Wbuf *head, *tail;	/* linked list of dirty blocks yet to be written to the disk */
-	s32 n;
+	s32 n;			/* number of dirty blocks in this linked list */
 	Rendez isfull;		/* write throttling */
 	Rendez isempty; 	/* writer does not have to keep polling to find work */
 } drts = {0};
@@ -22,7 +22,7 @@
 struct Wbuf
 {
 	u64	blkno;		/* block number on the disk, primary key */
-	Wbuf *prev, *next;
+	Wbuf *prev, *next;	/* writer queue */
 	Iobuf *iobuf;	/* pointer to the used Iobuf in the buffer cache */
 	union{
 		u8	*payload;	/* "real" contents */
@@ -122,7 +122,7 @@
 void
 putwrites(Iobuf **bs, u64 len)
 {
-	Wbuf *w, *ws[(Iounit/Blocksize)+1];
+	Wbuf *w, *ws[Ntogether];
 	u8 empty;
 	u64 i;
 	Iobuf *b;
@@ -174,7 +174,7 @@
 void
 dowrite(void)
 {
-	Wbuf *b, *blks[128];
+	Wbuf *b, *blks[Ntogether];
 	u64 prevblkno, startblkno, n, wn, i;
 	u8 full, *jumbo;
 
@@ -204,7 +204,7 @@
 		n = 1;
 		prevblkno = startblkno = drts.head->blkno;
 		for(b = drts.head->next;
-				n <= drts.n && b != nil && b->blkno == prevblkno+1 && n < 128;
+				n <= drts.n && b != nil && b->blkno == prevblkno+1 && n < Ntogether;
 				b = b->next){
 			prevblkno=b->blkno;
 			n++;