code: plan9front

Download patch

ref: c6ca07ad4e2124c2e072ad63b54da494ac799312
parent: 9e8a7578f063f4caa5ceae9b997ff72a88dbaa58
author: cinap_lenrek <cinap_lenrek@felloff.net>
date: Sat Jun 3 18:16:10 EDT 2023

sdnvme: add smart file for full smart info

Instead of cramming everything to the ctl file,
provide a separate file with the SMART/health information
using the format that matches the NVMe specification closely.

Also, the command to read the health information log page
can be namespace specific as indicated by bit 0 of LPA in
controller identification information.

--- a/rc/bin/inst/mountdist
+++ b/rc/bin/inst/mountdist
@@ -73,7 +73,7 @@
 	echo Please wait... Scanning storage devices...
 
 	parts=`{ls /dev/sd*/* /srv/sd*/*.iso >[2]/dev/null |
-		grep -v '/(plan9.*|nvram|ctl|log|raw)$'}
+		grep -v '/(plan9.*|nvram|ctl|log|led|raw|smart)$'}
 	for (i in $parts) {
 		echo -n '	'^$i
 		t=`{fstype $i}
--- a/sys/src/9/boot/local.rc
+++ b/sys/src/9/boot/local.rc
@@ -22,7 +22,7 @@
 		for(i in `{ls -p $d}){
 			p=$d/$i
 			switch($i){
-			case ctl raw log led
+			case ctl raw log led smart
 				;
 			case plan9 nvram swap
 				echo $p
--- a/sys/src/9/port/sdnvme.c
+++ b/sys/src/9/port/sdnvme.c
@@ -294,11 +294,106 @@
 	return r->status = SDok;
 }
 
+static u64int
+get64(uchar *p)
+{
+	return p[0] | p[1]<<8 | p[2]<<16 | p[3]<<24
+		| (u64int)p[4]<<32
+		| (u64int)p[5]<<40
+		| (u64int)p[6]<<48
+		| (u64int)p[7]<<56;
+}
+
+static long
+readsmart(SDunit *u, Chan *, void *a, long n, vlong off)
+{
+	Ctlr *ctlr = u->dev->ctlr;
+	char *buf, *p, *e;
+	uchar *info;
+	u32int nsid, *q;
+	WS ws;
+
+	buf = smalloc(READSTR);
+	if(waserror()){
+		free(buf);
+		nexterror();
+	}
+	p = buf;
+	e = buf + READSTR;
+
+	info = mallocalign(0x1000, ctlr->mps, 0, 0);
+	if(info == nil)
+		error(Enomem);
+	if(waserror()){
+		free(info);
+		nexterror();
+	}
+
+	/*
+	 * Log Page Attributes (LPA) Bit0: If set to '1' then te controller
+	 * supports SMART / Health information log page on a per namespace basis.
+	 */
+	nsid = (ctlr->ident[261] & 1) != 0 ? ctlr->nsid[u->subno] : 0xffffffff;
+
+	q = qcmd(&ws, ctlr, 1, 0x02, nsid, info, 0x1000);
+	q[10] = (512/4)<<16 | 0x2;
+	q[11] = 0;
+	q[12] = 0;
+	q[13] = 0;
+	q[14] = 0;
+	checkstatus(wcmd(&ws, q), "read SMART/health info");
+	dmaflush(0, info, 0x1000);
+
+	p = seprint(p, e, "Critical Warning:\t");
+	if(info[0]&(1<<0))
+		p = seprint(p, e, "Available Spare,");
+	if(info[0]&(1<<1))
+		p = seprint(p, e, "Temperature Exceeded,");
+	if(info[0]&(1<<2))
+		p = seprint(p, e, "Reliability Degraded,");
+	if(info[0]&(1<<3))
+		p = seprint(p, e, "Read only mode,");
+	if(info[0]&(1<<4))
+		p = seprint(p, e, "Backup failed,");
+	p[-1] = '\n';
+
+	p = seprint(p, e, "Temperature:\t%d\n", (info[2]<<8 | info[1]) - 273);
+
+	p = seprint(p, e, "Available Spare:\t%d%%\n", info[3]);
+	p = seprint(p, e, "Available Spare Threshold:\t%d%%\n", info[4]);
+
+	p = seprint(p, e, "Percentage Used:\t%d%%\n", info[5]);
+
+	p = seprint(p, e, "Data Units Read:\t%llud\n", get64(info+32));
+	p = seprint(p, e, "Data Units Written:\t%llud\n", get64(info+48));
+	p = seprint(p, e, "Host Read Commands:\t%llud\n", get64(info+64));
+	p = seprint(p, e, "Host Write Commands:\t%llud\n", get64(info+80));
+	p = seprint(p, e, "Controller Busy Time:\t%llud:%.2d\n", get64(info+96)/60, (int)(get64(info+96)%60));
+	p = seprint(p, e, "Power Cycles:\t%llud\n", get64(info+112));
+	p = seprint(p, e, "Power On Hours:\t%llud\n", get64(info+128));
+	p = seprint(p, e, "Unsafe Shutdowns:\t%llud\n", get64(info+144));
+	p = seprint(p, e, "Media Errors:\t%llud\n", get64(info+160));
+	USED(p);
+
+	free(info);
+	poperror();
+
+	n = readstr(off, a, n, buf);
+	free(buf);
+	poperror();
+
+	return n;
+}
+
 static int
 nvmeverify(SDunit *u)
 {
 	Ctlr *ctlr = u->dev->ctlr;
-	return u->subno < ctlr->nnsid;
+
+	if(u->subno >= ctlr->nnsid)
+		return 0;
+	sdaddfile(u, "smart", 0440, eve, readsmart, nil);
+	return 1;
 }
 
 static int
@@ -323,12 +418,7 @@
 		return 0;
 	}
 	dmaflush(0, info, 0x1000);
-	p = info;
-	u->sectors = p[0] | p[1]<<8 | p[2]<<16 | p[3]<<24
-		| (u64int)p[4]<<32
-		| (u64int)p[5]<<40
-		| (u64int)p[6]<<48
-		| (u64int)p[7]<<56;
+	u->sectors = get64(info);
 	p = &info[128 + 4*(info[26]&15)];
 	lbaf = p[0] | p[1]<<8 | p[2]<<16 | p[3]<<24;
 	u->secsize = 1<<((lbaf>>16)&0xFF);
@@ -348,10 +438,6 @@
 {
 	Ctlr *ctlr;
 	char *e, *s;
-	u8int *data;
-	u32int *q;
-	u64int n;
-	WS ws;
 
 	if((ctlr = u->dev->ctlr) == nil || ctlr->ident == nil)
 		return 0;
@@ -363,37 +449,6 @@
 	p = seprint(p, e, "serial\t%.20s\n", (char*)ctlr->ident+4);
 	p = seprint(p, e, "firm\t%.8s\n", (char*)ctlr->ident+64);
 	p = seprint(p, e, "geometry %llud %lud\n", u->sectors, u->secsize);
-
-	/* SMART/health */
-	if((data = mallocalign(0x1000, ctlr->mps, 0, 0)) != nil){
-		q = qcmd(&ws, ctlr, 1, 0x02, 0xffffffff, data, 0x1000);
-		q[10] = (512/4)<<16 | 0x2;
-		q[11] = 0;
-		q[12] = 0;
-		q[13] = 0;
-		q[14] = 0;
-		if(wcmd(&ws, q) == 0){
-			dmaflush(0, data, 0x1000);
-			p = seprint(p, e, "temperature\t%d\n", (data[2]<<8 | data[1]) - 273);
-			p = seprint(p, e, "spare\t%d%%\n", data[3]);
-			p = seprint(p, e, "used\t%d%%\n", data[5]);
-			/* 16 bytes long, ignore the upper half */
-			n = data[144]<<0 | data[145]<<8 | data[146]<<16 | data[147]<<24
-				| (u64int)data[148]<<32
-				| (u64int)data[149]<<40
-				| (u64int)data[150]<<48
-				| (u64int)data[151]<<56;
-			p = seprint(p, e, "unsafe shutdowns\t%llud\n", n);
-			/* 16 bytes long, ignore the upper half */
-			n = data[160]<<0 | data[161]<<8 | data[162]<<16 | data[163]<<24
-				| (u64int)data[164]<<32
-				| (u64int)data[165]<<40
-				| (u64int)data[166]<<48
-				| (u64int)data[167]<<56;
-			p = seprint(p, e, "integrity errors\t%llud\n", n);
-		}
-		free(data);
-	}
 
 	return p-s;
 }