git: 9front

Download patch

ref: 38ae7b4136f0fb2d2ea981ed18b7173c55927cfd
parent: c2efa4e3cfe20b4bdc5dacd6898c96675d4e3ea4
author: cinap_lenrek <cinap_lenrek@centraldogma>
date: Tue Oct 4 14:48:31 EDT 2011

use file(1) in page and mothra to detect file type

--- a/sys/src/cmd/file.c
+++ b/sys/src/cmd/file.c
@@ -589,7 +589,7 @@
 	0x43614c66,	0xFFFFFFFF,	"FLAC audio file\n",	OCTET,
 	0x30800CC0,	0xFFFFFFFF,	"inferno .dis executable\n", OCTET,
 	0x04034B50,	0xFFFFFFFF,	"zip archive\n", "application/zip",
-	070707,		0xFFFF,		"cpio archive\n", OCTET,
+	070707,		0xFFFF,		"cpio archive\n", "application/x-cpio",
 	0x2F7,		0xFFFF,		"tex dvi\n", "application/dvi",
 	0xfaff,		0xfeff,		"mp3 audio\n",	"audio/mpeg",
 	0xfeff0000,	0xffffffff,	"utf-32be\n",	"text/plain charset=utf-32be",
@@ -752,8 +752,7 @@
 	chksum = strtol(hdr->chksum, 0, 8);
 	if (hdr->name[0] != '\0' && checksum(hp) == chksum) {
 		if (strcmp(hdr->magic, "ustar") == 0)
-			print(mime? "application/x-ustar\n":
-				"posix tar archive\n");
+			print(mime? "application/x-ustar\n": "posix tar archive\n");
 		else
 			print(mime? "application/x-tar\n": "tar archive\n");
 		return 1;
@@ -772,6 +771,9 @@
 	char	*mime;
 } file_string[] =
 {
+	"\x1f\x9d",		"compressed",			2,	"application/x-compress",
+	"\x1f\x8b",		"gzip compressed",		2,	"application/x-gzip",
+	"BZh",			"bzip2 compressed",		3,	"application/x-bzip2",
 	"!<arch>\n__.SYMDEF",	"archive random library",	16,	"application/octet-stream",
 	"!<arch>\n",		"archive",			8,	"application/octet-stream",
 	"070707",		"cpio archive - ascii header",	6,	"application/octet-stream",
@@ -787,15 +789,19 @@
 	"GIF",			"GIF image", 			3,	"image/gif",
 	"\0PC Research, Inc\0",	"ghostscript fax file",		18,	"application/ghostscript",
 	"%PDF",			"PDF",				4,	"application/pdf",
-	"<html>\n",		"HTML file",			7,	"text/html",
-	"<HTML>\n",		"HTML file",			7,	"text/html",
+	"<!DOCTYPE",		"HTML file",			9,	"text/html",
+	"<!doctype",		"HTML file",			9,	"text/html",
+	"<!--",			"HTML file",			4,	"text/html",
+	"<html>",		"HTML file",			6,	"text/html",
+	"<HTML>",		"HTML file",			6,	"text/html",
+	"<?xml",		"HTML file",			5,	"text/html",
 	"\111\111\052\000",	"tiff",				4,	"image/tiff",
 	"\115\115\000\052",	"tiff",				4,	"image/tiff",
 	"\377\330\377\340",	"jpeg",				4,	"image/jpeg",
 	"\377\330\377\341",	"jpeg",				4,	"image/jpeg",
 	"\377\330\377\333",	"jpeg",				4,	"image/jpeg",
-	"BM",			"bmp",				2,	"image/bmp",
-	"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1",	"microsoft office document",	8,	"application/octet-stream",
+	"BM",			"bmp",				2,	"image/bmp", 
+	"\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1",	"microsoft office document",	8,	"application/doc",
 	"<MakerFile ",		"FrameMaker file",		11,	"application/framemaker",
 	"\033E\033",	"HP PCL printer data",		3,	OCTET,
 	"\033%-12345X",	"HPJCL file",		9,	"application/hpjcl",
@@ -916,10 +922,21 @@
 
 char*	html_string[] =
 {
+	"?xml",
+	"!doctype",
+	"html",
+	"head",
 	"title",
+	"link",
+	"meta",
 	"body",
-	"head",
+	"script",
 	"strong",
+	"input",
+	"table",
+	"form",
+	"font",
+	"div",
 	"h1",
 	"h2",
 	"h3",
@@ -926,11 +943,20 @@
 	"h4",
 	"h5",
 	"h6",
+	"ol",
 	"ul",
 	"li",
 	"dl",
 	"br",
+	"hr",
 	"em",
+	"th",
+	"tr",
+	"td",
+	"p",
+	"b",
+	"i",
+	"a",
 	0,
 };
 
@@ -952,13 +978,13 @@
 		if(*p == '/')
 			p++;
 		q = p;
-		while(p < buf+nbuf && *p != '>')
+		while(p < buf+nbuf && isalpha(*p))
 			p++;
 		if (p >= buf+nbuf)
 			break;
 		for(i = 0; html_string[i]; i++) {
 			if(cistrncmp(html_string[i], (char*)q, p-q) == 0) {
-				if(count++ > 4) {
+				if(++count > 2) {
 					print(mime ? "text/html\n" : "HTML file\n");
 					return 1;
 				}
@@ -1145,13 +1171,13 @@
 	cs /= 8.;
 	if(cs <= 24.322) {
 		if(buf[0]==0x1f && buf[1]==0x9d)
-			print(mime ? OCTET : "compressed\n");
+			print(mime ? "application/x-compress" : "compressed\n");
 		else
 		if(buf[0]==0x1f && buf[1]==0x8b)
-			print(mime ? OCTET : "gzip compressed\n");
+			print(mime ? "application/x-gzip" : "gzip compressed\n");
 		else
 		if(buf[0]=='B' && buf[1]=='Z' && buf[2]=='h')
-			print(mime ? OCTET : "bzip2 compressed\n");
+			print(mime ? "application/x-bzip2" : "bzip2 compressed\n");
 		else
 			print(mime ? OCTET : "encrypted\n");
 		return 1;
--- a/sys/src/cmd/mothra/mothra.h
+++ b/sys/src/cmd/mothra/mothra.h
@@ -44,11 +44,14 @@
 enum{
 	PLAIN,
 	HTML,
+
 	GIF,
 	JPEG,
 	PNG,
 	BMP,
+
 	GUNZIP,
+	COMPRESS,
 	PAGE,
 };
 
--- a/sys/src/cmd/mothra/snoop.c
+++ b/sys/src/cmd/mothra/snoop.c
@@ -7,60 +7,121 @@
 #include "mothra.h"
 
 int
-snooptype(int fd)
+filetype(int fd, char *typ, int ntyp)
 {
-	int pfd[2], typ, n;
-	char buf[1024];
+	int ifd[2], ofd[2], xfd[2], n;
+	char *argv[3], buf[4096];
 
-	typ = PLAIN;
-	if((n = readn(fd, buf, sizeof(buf)-1)) < 0)
-		return typ;
-	buf[n] = 0;
-	if(cistrstr(buf, "<?xml") ||
-		cistrstr(buf, "<!DOCTYPE") ||
-		cistrstr(buf, "<HTML") ||
-		cistrstr(buf, "<head"))
-		typ = HTML;
-	else if(memcmp(buf, "\x1F\x8B", 2) == 0)
-		typ = GUNZIP;
-	else if(memcmp(buf, "\377\330\377", 3) == 0)
-		typ = JPEG;
-	else if(memcmp(buf, "\211PNG\r\n\032\n", 3) == 0)
-		typ = PNG;
-	else if(memcmp(buf, "GIF", 3) == 0)
-		typ = GIF;
-	else if(memcmp(buf, "BM", 2) == 0)
-		typ = BMP;
-	else if(memcmp(buf, "PK\x03\x04", 4) == 0)
-		typ = PAGE;
-	else if(memcmp(buf, "%PDF-", 5) == 0 || strstr(buf, "%!"))
-		typ = PAGE;
-	else if(memcmp(buf, "x T ", 4) == 0)
-		typ = PAGE;
-	else if(memcmp(buf, "\xF7\x02\x01\x83\x92\xC0\x1C;", 8) == 0)
-		typ = PAGE;
-	else if(memcmp(buf, "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", 8) == 0)
-		typ = PAGE;
-	else if(memcmp(buf, "\111\111\052\000", 4) == 0) 
-		typ = PAGE;
-	else if(memcmp(buf, "\115\115\000\052", 4) == 0)
-		typ = PAGE;
-	if(pipe(pfd) >= 0){
-		switch(rfork(RFFDG|RFPROC|RFNOWAIT)){
-		case -1:
-			break;
-		case 0:
-			close(pfd[0]);
-			do {
-				if(write(pfd[1], buf, n) != n)
-					break;
-			} while((n = read(fd, buf, sizeof(buf))) > 0);
-			exits(nil);
-		default:
-			dup(pfd[0], fd);
-		}
-		close(pfd[1]);
-		close(pfd[0]);
+	typ[0] = 0;
+	if((n = readn(fd, buf, sizeof(buf))) < 0)
+		return -1;
+	if(n == 0)
+		return 0;
+	if(pipe(ifd) < 0)
+		return -1;
+	if(pipe(ofd) < 0){
+Err1:
+		close(ifd[0]);
+		close(ifd[1]);
+		return -1;
 	}
-	return typ;
+	switch(rfork(RFFDG|RFPROC|RFNOWAIT)){
+	case -1:
+		close(ofd[0]);
+		close(ofd[1]);
+		goto Err1;	
+	case 0:
+		dup(ifd[1], 0);
+		dup(ofd[1], 1);
+
+		close(ifd[1]);
+		close(ifd[0]);
+		close(ofd[1]);
+		close(ofd[0]);
+		close(fd);
+
+		argv[0] = "file";
+		argv[1] = "-m";
+		argv[2] = 0;
+		exec("/bin/file", argv);
+	}
+	close(ifd[1]);
+	close(ofd[1]);
+
+	if(rfork(RFFDG|RFPROC|RFNOWAIT) == 0){
+		close(fd);
+		close(ofd[0]);
+		write(ifd[0], buf, n);
+		exits(nil);
+	}
+	close(ifd[0]);
+
+	if(pipe(xfd) < 0){
+		close(ofd[0]);
+		return -1;
+	}
+	switch(rfork(RFFDG|RFPROC|RFNOWAIT)){
+	case -1:
+		break;
+	case 0:
+		close(ofd[0]);
+		close(xfd[0]);
+		do {
+			if(write(xfd[1], buf, n) != n)
+				break;
+		} while((n = read(fd, buf, sizeof(buf))) > 0);
+		exits(nil);
+	default:
+		dup(xfd[0], fd);
+	}
+	close(xfd[0]);
+	close(xfd[1]);
+
+	if((n = readn(ofd[0], typ, ntyp-1)) < 0)
+		n = 0;
+	close(ofd[0]);
+	while(n > 0 && typ[n-1] == '\n')
+		n--;
+	typ[n] = 0;
+	return 0;
+}
+
+int
+snooptype(int fd)
+{
+	static struct {
+		char	*typ;
+		int	val;
+	} tab[] = {
+	"text/plain",			PLAIN,
+	"text/html",			HTML,
+
+	"image/jpeg",			JPEG,
+	"image/gif",			GIF,
+	"image/png",			PNG,
+	"image/bmp",			BMP,
+
+	"application/x-gzip",		GUNZIP,
+	"application/x-compress",	COMPRESS,
+
+	"application/pdf",		PAGE,
+	"application/postscript",	PAGE,
+	"application/ghostscript",	PAGE,
+	"application/troff",		PAGE,
+
+	"application/zip",		PAGE,
+	"application/x-tar",		PAGE,
+	"application/x-ustar",		PAGE,
+
+	"image/",			PAGE,
+	"text/",			PLAIN,
+	};
+	char buf[128];
+	int i;
+	if(filetype(fd, buf, sizeof(buf)) < 0)
+		return -1;
+	for(i=0; i<nelem(tab); i++)
+		if(strncmp(buf, tab[i].typ, strlen(tab[i].typ)) == 0)
+			return tab[i].val;
+	return -1;
 }
--- a/sys/src/cmd/page.c
+++ b/sys/src/cmd/page.c
@@ -144,11 +144,11 @@
 }
 
 int
-createtmp(ulong id, char *pfx)
+createtmp(char *pfx)
 {
+	static ulong id = 1;
 	char nam[64];
-
-	snprint(nam, sizeof nam, "%s%s%.12d%.8lux", pagespool, pfx, getpid(), id);
+	snprint(nam, sizeof nam, "%s%s%.12d%.8lux", pagespool, pfx, getpid(), id++);
 	return create(nam, OEXCL|ORCLOSE|ORDWR, 0600);
 }
 
@@ -219,7 +219,7 @@
 popenfile(Page*);
 
 int
-popenconv(Page *p)
+popenimg(Page *p)
 {
 	char nam[NPATH];
 	int fd;
@@ -250,6 +250,18 @@
 }
 
 int
+popenfilter(Page *p)
+{
+	seek(p->fd, 0, 0);
+	if(p->data){
+		pipeline(p->fd, "%s", (char*)p->data);
+		p->data = nil;
+	}
+	p->open = popenfile;
+	return p->open(p);
+}
+
+int
 popentape(Page *p)
 {
 	char mnt[32], cmd[64], *argv[4];
@@ -529,7 +541,7 @@
 		while((n = read(pdat[0], buf, sizeof(buf))) >= 0){
 			if(ofd >= 0 && (n <= 0 || infernobithdr(buf, n))){
 				snprint(nam, sizeof nam, "%d", i);
-				addpage(p, nam, popenconv, nil, ofd);
+				addpage(p, nam, popenimg, nil, ofd);
 				ofd = -1;
 			}
 			if(n <= 0)
@@ -536,7 +548,7 @@
 				break;
 			if(ofd < 0){
 				snprint(nam, sizeof nam, "%.4d", ++i);
-				if((ofd = createtmp((ulong)p, nam)) < 0)
+				if((ofd = createtmp(nam)) < 0)
 					ofd = dup(nullfd, -1);
 			}
 			if(write(ofd, buf, n) != n)
@@ -553,6 +565,51 @@
 }
 
 int
+filetype(char *buf, int nbuf, char *typ, int ntyp)
+{
+	int n, ifd[2], ofd[2];
+	char *argv[3];
+
+	typ[0] = 0;
+	if(pipe(ifd) < 0)
+		return -1;
+	if(pipe(ofd) < 0){
+		close(ifd[0]);
+		close(ifd[1]);
+		return -1;
+	}
+	if(rfork(RFFDG|RFPROC|RFNOWAIT) == 0){
+		dup(ifd[1], 0);
+		dup(ofd[1], 1);
+
+		close(ifd[1]);
+		close(ifd[0]);
+		close(ofd[1]);
+		close(ofd[0]);
+
+		argv[0] = "file";
+		argv[1] = "-m";
+		argv[2] = 0;
+		exec("/bin/file", argv);
+	}
+	close(ifd[1]);
+	close(ofd[1]);
+	if(rfork(RFFDG|RFPROC|RFNOWAIT) == 0){
+		close(ofd[0]);
+		write(ifd[0], buf, nbuf);
+		exits(nil);
+	}
+	close(ifd[0]);
+	if((n = readn(ofd[0], typ, ntyp-1)) < 0)
+		n = 0;
+	close(ofd[0]);
+	while(n > 0 && typ[n-1] == '\n')
+		n--;
+	typ[n] = 0;
+	return 0;
+}
+
+int
 dircmp(void *p1, void *p2)
 {
 	Dir *d1, *d2;
@@ -566,7 +623,33 @@
 int
 popenfile(Page *p)
 {
-	char buf[NBUF], *file;
+	static struct {
+		char	*typ;
+		void	*popen;
+		void	*data;
+	} tab[] = {
+	"application/pdf",		popengs,	nil,
+	"application/postscript",	popengs,	nil,
+	"application/troff",		popengs,	"lp -dstdout",
+	"text/plain",			popengs,	"lp -dstdout",
+	"text/html",			popengs,	"uhtml | html2ms | tbl | troff -ms | lp -dstdout",
+	"application/dvi",		popengs,	"dvips -Pps -r0 -q1 -f1",
+	"application/doc",		popengs,	"doc2ps",
+	"application/zip",		popentape,	"fs/zipfs",
+	"application/x-tar",		popentape,	"fs/tarfs",
+	"application/x-ustar",		popentape,	"fs/tarfs",
+	"application/x-compress",	popenfilter,	"uncompress",
+	"application/x-gzip",		popenfilter,	"gunzip",
+	"application/x-bzip2",		popenfilter,	"bunzip2",
+	"image/gif",			popenimg,	"gif -t9",
+	"image/jpeg",			popenimg,	"jpg -t9",
+	"image/png",			popenimg,	"png -t9",
+	"image/ppm",			popenimg,	"ppm -t9",
+	"image/bmp",			popenimg,	"bmp -t9",
+	"image/p9bit",			popenimg,	nil,
+	};
+
+	char buf[NBUF], typ[128], *file;
 	int i, n, fd, tfd;
 	Dir *d;
 
@@ -610,69 +693,23 @@
 	}
 	free(d);
 
-	memset(buf, 0, 32+1);
-	if((n = read(fd, buf, 32)) <= 0)
+	memset(buf, 0, NBUF/2);
+	if((n = readn(fd, buf, NBUF/2)) <= 0)
 		goto Err1;
-
-	p->fd = fd;
-	p->data = nil;
-	p->open = popenconv;
-	if(memcmp(buf, "%PDF-", 5) == 0 || strstr(buf, "%!"))
-		p->open = popengs;
-	else if(memcmp(buf, "x T ", 4) == 0){
-		p->data = "lp -dstdout";
-		p->open = popengs;
-	}
-	else if(cistrstr(buf, "<?xml") ||
-		cistrstr(buf, "<!DOCTYPE") ||
-		cistrstr(buf, "<HTML")){
-		p->data = "uhtml | html2ms | tbl | troff -ms | lp -dstdout";
-		p->open = popengs;
-	}
-	else if(memcmp(buf, "\xF7\x02\x01\x83\x92\xC0\x1C;", 8) == 0){
-		p->data = "dvips -Pps -r0 -q1 -f1";
-		p->open = popengs;
-	}
-	else if(memcmp(buf, "\x1F\x8B", 2) == 0){
-		p->data = "gunzip";
-		p->open = popengs;
-	}
-	else if(memcmp(buf, "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", 8) == 0){
-		p->data = "doc2ps";
-		p->open = popengs;
-	}
-	else if(memcmp(buf, "PK\x03\x04", 4) == 0){
-		p->data = "fs/zipfs";
-		p->open = popentape;
-	}else if(memcmp(buf, "GIF", 3) == 0)
-		p->data = "gif -t9";
-	else if(memcmp(buf, "\111\111\052\000", 4) == 0) 
-		p->data = "fb/tiff2pic | fb/3to1 rgbv | fb/pcp -tplan9";
-	else if(memcmp(buf, "\115\115\000\052", 4) == 0)
-		p->data = "fb/tiff2pic | fb/3to1 rgbv | fb/pcp -tplan9";
-	else if(memcmp(buf, "\377\330\377", 3) == 0)
-		p->data = "jpg -t9";
-	else if(memcmp(buf, "\211PNG\r\n\032\n", 3) == 0)
-		p->data = "png -t9";
-	else if(memcmp(buf, "\0PC Research, Inc", 17) == 0)
-		p->data = "aux/g3p9bit -g";
-	else if(memcmp(buf, "TYPE=ccitt-g31", 14) == 0)
-		p->data = "aux/g3p9bit -g";
-	else if(memcmp(buf, "II*", 3) == 0)
-		p->data = "aux/g3p9bit -g";
-	else if(memcmp(buf, "TYPE=", 5) == 0)
-		p->data = "fb/3to1 rgbv |fb/pcp -tplan9";
-	else if(buf[0] == 'P' && '0' <= buf[1] && buf[1] <= '9')
-		p->data = "ppm -t9";
-	else if(memcmp(buf, "BM", 2) == 0)
-		p->data = "bmp -t9";
-	else if(infernobithdr(buf, n))
-		p->data = nil;
-	else {
-		werrstr("unknown image format");
+	if(infernobithdr(buf, n))
+		strcpy(typ, "image/p9bit");
+	else
+		filetype(buf, n, typ, sizeof(typ));
+	for(i=0; i<nelem(tab); i++)
+		if(strncmp(typ, tab[i].typ, strlen(tab[i].typ)) == 0)
+			break;
+	if(i == nelem(tab)){
+		werrstr("unknown image format: %s", typ);
 		goto Err1;
 	}
-
+	p->fd = fd;
+	p->data = tab[i].data;
+	p->open = tab[i].popen;
 	if(seek(fd, 0, 0) < 0)
 		goto Noseek;
 	if((i = read(fd, buf+n, n)) < 0)
@@ -680,7 +717,7 @@
 	if(i != n || memcmp(buf, buf+n, i)){
 		n += i;
 	Noseek:
-		if((tfd = createtmp((ulong)p, "file")) < 0)
+		if((tfd = createtmp("file")) < 0)
 			goto Err1;
 		while(n > 0){
 			if(write(tfd, buf, n) != n)
@@ -1365,9 +1402,7 @@
 				if(s && strcmp(s, "quit")==0)
 					exits(0);
 				if(s && strcmp(s, "showdata")==0){
-					static ulong plumbid;
-
-					if((fd = createtmp(plumbid++, "plumb")) < 0){
+					if((fd = createtmp("plumb")) < 0){
 						fprint(2, "plumb: createtmp: %r\n");
 						goto Plumbfree;
 					}
--