ref: 83daaf4ee43ec79d87dab45c3d14e55b2adb8425
dir: /sys/src/cmd/aux/searchfs.c/
#include <u.h>
#include <libc.h>
#include <auth.h>
#include <fcall.h>
/*
 * caveat:
 * this stuff is only meant to work for ascii databases
 */
typedef struct Fid Fid;
typedef struct Fs Fs;
typedef struct Quick Quick;
typedef struct Match Match;
typedef struct Search Search;
enum
{
	OPERM	= 0x3,		/* mask of all permission types in open mode */
	Nfidhash	= 32,
	/*
	 * qids
	 */
	Qroot	= 1,
	Qsearch	= 2,
	Qstats	= 3,
};
/*
 * boyer-moore quick string matching
 */
struct Quick
{
	char	*pat;
	char	*up;		/* match string for upper case of pat */
	int	len;		/* of pat (and up) -1; used for fast search */
	uchar 	jump[256];	/* jump index table */
	int	miss;		/* amount to jump if we falsely match the last char */
};
extern void	quickmk(Quick*, char*, int);
extern void	quickfree(Quick*);
extern char*	quicksearch(Quick*, char*, char*);
/*
 * exact matching of a search string
 */
struct Match
{
	Match	*next;
	char	*pat;				/* null-terminated search string */
	char	*up;				/* upper case of pat */
	int	len;				/* length of both pat and up */
	int	(*op)(Match*, char*, char*);		/* method for this partiticular search */
};
struct Search
{
	Quick		quick;		/* quick match */
	Match		*match;		/* exact matches */
	int		skip;		/* number of matches to skip */
};
extern char*	searchsearch(Search*, char*, char*, int*);
extern Search*	searchparse(char*, char*);
extern void	searchfree(Search*);
struct Fid
{
	Lock;
	Fid	*next;
	Fid	**last;
	uint	fid;
	int	ref;		/* number of fcalls using the fid */
	int	attached;		/* fid has beed attached or cloned and not clunked */
	int	open;
	Qid	qid;
	Search	*search;		/* search patterns */
	char	*where;		/* current location in the database */
	int	n;		/* number of bytes left in found item */
};
int			dostat(int, uchar*, int);
void*			emalloc(uint);
void			fatal(char*, ...);
Match*			mkmatch(Match*, int(*)(Match*, char*, char*), char*);
Match*			mkstrmatch(Match*, char*);
char*		nextsearch(char*, char*, char**, char**);
int			strlook(Match*, char*, char*);
char*			strndup(char*, int);
int			tolower(int);
int			toupper(int);
char*			urlunesc(char*, char*);
void			usage(void);
struct Fs
{
	Lock;			/* for fids */
	Fid	*hash[Nfidhash];
	uchar	statbuf[1024];	/* plenty big enough */
};
extern	void	fsrun(Fs*, int);
extern	Fid*	getfid(Fs*, uint);
extern	Fid*	mkfid(Fs*, uint);
extern	void	putfid(Fs*, Fid*);
extern	char*	fsversion(Fs*, Fcall*);
extern	char*	fsauth(Fs*, Fcall*);
extern	char*	fsattach(Fs*, Fcall*);
extern	char*	fswalk(Fs*, Fcall*);
extern	char*	fsopen(Fs*, Fcall*);
extern	char*	fscreate(Fs*, Fcall*);
extern	char*	fsread(Fs*, Fcall*);
extern	char*	fswrite(Fs*, Fcall*);
extern	char*	fsclunk(Fs*, Fcall*);
extern	char*	fsremove(Fs*, Fcall*);
extern	char*	fsstat(Fs*, Fcall*);
extern	char*	fswstat(Fs*, Fcall*);
char	*(*fcalls[])(Fs*, Fcall*) =
{
	[Tversion]		fsversion,
	[Tattach]	fsattach,
	[Tauth]	fsauth,
	[Twalk]		fswalk,
	[Topen]		fsopen,
	[Tcreate]	fscreate,
	[Tread]		fsread,
	[Twrite]	fswrite,
	[Tclunk]	fsclunk,
	[Tremove]	fsremove,
	[Tstat]		fsstat,
	[Twstat]	fswstat
};
char	Eperm[] =	"permission denied";
char	Enotdir[] =	"not a directory";
char	Enotexist[] =	"file does not exist";
char	Eisopen[] = 	"file already open for I/O";
char	Einuse[] =	"fid is already in use";
char	Enofid[] = 	"no such fid";
char	Enotopen[] =	"file is not open";
char	Ebadsearch[] =	"bad search string";
Fs	fs;
char	*database;
char	*edatabase;
int	messagesize = 8192+IOHDRSZ;
void
main(int argc, char **argv)
{
	Dir *d;
	char buf[12], *mnt, *srv;
	int fd, p[2], n;
	mnt = "/tmp";
	srv = nil;
	ARGBEGIN{
		case 's':
			srv = ARGF();
			mnt = nil;
			break;
		case 'm':
			mnt = ARGF();
			break;
	}ARGEND
	fmtinstall('F', fcallfmt);
	if(argc != 1)
		usage();
	d = nil;
	fd = open(argv[0], OREAD);
	if(fd < 0 || (d=dirfstat(fd)) == nil)
		fatal("can't open %s: %r", argv[0]);
	n = d->length;
	free(d);
	if(n == 0)
		fatal("zero length database %s", argv[0]);
	database = emalloc(n);
	if(read(fd, database, n) != n)
		fatal("can't read %s: %r", argv[0]);
	close(fd);
	edatabase = database + n;
	if(pipe(p) < 0)
		fatal("pipe failed");
	switch(rfork(RFPROC|RFMEM|RFNOTEG|RFNAMEG)){
	case 0:
		fsrun(&fs, p[0]);
		exits(nil);
	case -1:	
		fatal("fork failed");
	}
	if(mnt == nil){
		if(srv == nil)
			usage();
		fd = create(srv, OWRITE, 0666);
		if(fd < 0){
			remove(srv);
			fd = create(srv, OWRITE, 0666);
			if(fd < 0){
				close(p[1]);
				fatal("create of %s failed", srv);
			}
		}
		sprint(buf, "%d", p[1]);
		if(write(fd, buf, strlen(buf)) < 0){
			close(p[1]);
			fatal("writing %s", srv);
		}
		close(p[1]);
		exits(nil);
	}
	if(mount(p[1], -1, mnt, MREPL, "") == -1){
		close(p[1]);
		fatal("mount failed");
	}
	close(p[1]);
	exits(nil);
}
/*
 * execute the search
 * do a quick match,
 * isolate the line in which the occured,
 * and try all of the exact matches
 */
char*
searchsearch(Search *search, char *where, char *end, int *np)
{
	Match *m;
	char *s, *e;
	*np = 0;
	if(search == nil || where == nil)
		return nil;
	for(;;){
		s = quicksearch(&search->quick, where, end);
		if(s == nil)
			return nil;
		e = memchr(s, '\n', end - s);
		if(e == nil)
			e = end;
		else
			e++;
		while(s > where && s[-1] != '\n')
			s--;
		for(m = search->match; m != nil; m = m->next){
			if((*m->op)(m, s, e) == 0)
				break;
		}
		if(m == nil){
			if(search->skip > 0)
				search->skip--;
			else{
				*np = e - s;
				return s;
			}
		}
		where = e;
	}
}
/*
 * parse a search string of the form
 * tag=val&tag1=val1...
 */
Search*
searchparse(char *search, char *esearch)
{
	Search *s;
	Match *m, *next, **last;
	char *tag, *val, *p;
	int ok;
	s = emalloc(sizeof *s);
	s->match = nil;
	/*
	 * acording to the http spec,
	 * repeated search queries are ingored.
	 * the last search given is performed on the original object
	 */
	while((p = memchr(s, '?', esearch - search)) != nil){
		search = p + 1;
	}
	while(search < esearch){
		search = nextsearch(search, esearch, &tag, &val);
		if(tag == nil)
			continue;
		ok = 0;
		if(strcmp(tag, "skip") == 0){
			s->skip = strtoul(val, &p, 10);
			if(*p == 0)
				ok = 1;
		}else if(strcmp(tag, "search") == 0){
			s->match = mkstrmatch(s->match, val);
			ok = 1;
		}
		free(tag);
		free(val);
		if(!ok){
			searchfree(s);
			return nil;
		}
	}
	if(s->match == nil){
		free(s);
		return nil;
	}
	/*
	 * order the matches by probability of occurance
	 * first cut is just by length
	 */
	for(ok = 0; !ok; ){
		ok = 1;
		last = &s->match;
		for(m = *last; m && m->next; m = *last){
			if(m->next->len > m->len){
				next = m->next;
				m->next = next->next;
				next->next = m;
				*last = next;
				ok = 0;
			}
			last = &m->next;
		}
	}
	/*
	 * convert the best search into a fast lookup
	 */
	m = s->match;
	s->match = m->next;
	quickmk(&s->quick, m->pat, 1);
	free(m->pat);
	free(m->up);
	free(m);
	return s;
}
void
searchfree(Search *s)
{
	Match *m, *next;
	if(s == nil)
		return;
	for(m = s->match; m; m = next){
		next = m->next;
		free(m->pat);
		free(m->up);
		free(m);
	}
	quickfree(&s->quick);
	free(s);
}
char*
nextsearch(char *search, char *esearch, char **tagp, char **valp)
{
	char *tag, *val;
	*tagp = nil;
	*valp = nil;
	for(tag = search; search < esearch && *search != '='; search++)
		;
	if(search == esearch)
		return search;
	tag = urlunesc(tag, search);
	search++;
	for(val = search; search < esearch && *search != '&'; search++)
		;
	val = urlunesc(val, search);
	if(search != esearch)
		search++;
	*tagp = tag;
	*valp = val;
	return search;
}
Match*
mkstrmatch(Match *m, char *pat)
{
	char *s;
	for(s = pat; *s; s++){
		if(*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r'){
			*s = 0;
			m = mkmatch(m, strlook, pat);
			pat = s + 1;
		}else
			*s = tolower(*s);
	}
	return mkmatch(m, strlook, pat);
}
Match*
mkmatch(Match *next, int (*op)(Match*, char*, char*), char *pat)
{
	Match *m;
	char *p;
	int n;
	n = strlen(pat);
	if(n == 0)
		return next;
	m = emalloc(sizeof *m);
	m->op = op;
	m->len = n;
	m->pat = strdup(pat);
	m->up = strdup(pat);
	for(p = m->up; *p; p++)
		*p = toupper(*p);
	for(p = m->pat; *p; p++)
		*p = tolower(*p);
	m->next = next;
	return m;
}
int
strlook(Match *m, char *str, char *e)
{
	char *pat, *up, *s;
	int c, pc, fc, fuc, n;
	n = m->len;
	fc = m->pat[0];
	fuc = m->up[0];
	for(; str + n <= e; str++){
		c = *str;
		if(c != fc && c != fuc)
			continue;
		s = str + 1;
		up = m->up + 1;
		for(pat = m->pat + 1; pc = *pat; pat++){
			c = *s;
			if(c != pc && c != *up)
				break;
			up++;
			s++;
		}
		if(pc == 0)
			return 1;
	}
	return 0;
}
/*
 * boyer-moore style pattern matching
 * implements an exact match for ascii
 * however, if mulitbyte upper-case and lower-case
 * characters differ in length or in more than one byte,
 * it only implements an approximate match
 */
void
quickmk(Quick *q, char *spat, int ignorecase)
{
	char *pat, *up;
	uchar *j;	
	int ep, ea, cp, ca, i, c, n;
	/*
	 * allocate the machine
	 */
	n = strlen(spat);
	if(n == 0){
		q->pat = nil;
		q->up = nil;
		q->len = -1;
		return;
	}
	pat = emalloc(2* n + 2);
	q->pat = pat;
	up = pat;
	if(ignorecase)
		up = pat + n + 1;
	q->up = up;
	while(c = *spat++){
		if(ignorecase){
			*up++ = toupper(c);
			c = tolower(c);
		}
		*pat++ = c;
	}
	pat = q->pat;
	up = q->up;
	pat[n] = up[n] = '\0';
	/*
	 * make the skipping table
	 */
	if(n > 255)
		n = 255;
	j = q->jump;
	memset(j, n, 256);
	n--;
	q->len = n;
	for(i = 0; i <= n; i++){
		j[(uchar)pat[i]] = n - i;
		j[(uchar)up[i]] = n - i;
	}
	
	/*
	 * find the minimum safe amount to skip
	 * if we match the last char but not the whole pat
	 */
	ep = pat[n];
	ea = up[n];
	for(i = n - 1; i >= 0; i--){
		cp = pat[i];
		ca = up[i];
		if(cp == ep || cp == ea || ca == ep || ca == ea)
			break;
	}
	q->miss = n - i;
}
void
quickfree(Quick *q)
{
	if(q->pat != nil)
		free(q->pat);
	q->pat = nil;
}
char *
quicksearch(Quick *q, char *s, char *e)
{
	char *pat, *up, *m, *ee;
	uchar *j;
	int len, n, c, mc;
	len = q->len;
	if(len < 0)
		return s;
	j = q->jump;
	pat = q->pat;
	up = q->up;
	s += len;
	ee = e - (len * 4 + 4);
	while(s < e){
		/*
		 * look for a match on the last char
		 */
		while(s < ee && (n = j[(uchar)*s])){
			s += n;
			s += j[(uchar)*s];
			s += j[(uchar)*s];
			s += j[(uchar)*s];
		}
		if(s >= e)
			return nil;
		while(n = j[(uchar)*s]){
			s += n;
			if(s >= e)
				return nil;
		}
		/*
		 * does the string match?
		 */
		m = s - len;
		for(n = 0; c = pat[n]; n++){
			mc = *m++;
			if(c != mc && mc != up[n])
				break;
		}
		if(!c)
			return s - len;
		s += q->miss;
	}
	return nil;
}
void
fsrun(Fs *fs, int fd)
{
	Fcall rpc;
	char *err;
	uchar *buf;
	int n;
	buf = emalloc(messagesize);
	while((n = read9pmsg(fd, buf, messagesize)) != 0){
		if(n < 0)
			fatal("mount read: %r");
		rpc.data = (char*)buf + IOHDRSZ;
		if(convM2S(buf, n, &rpc) != n)
			fatal("convM2S format error: %r");
		/*
		 * flushes are way too hard.
		 * a reply to the original message seems to work
		 */
		if(rpc.type == Tflush)
			continue;
		else if(rpc.type >= Tmax || !fcalls[rpc.type])
			err = "bad fcall type";
		else
			err = (*fcalls[rpc.type])(fs, &rpc);
		if(err){
			rpc.type = Rerror;
			rpc.ename = err;
		}else
			rpc.type++;
		n = convS2M(&rpc, buf, messagesize);
		// fprint(2, "send: %F\n", &rpc);
		if(write(fd, buf, n) != n)
			fatal("mount write");
	}
}
Fid*
mkfid(Fs *fs, uint fid)
{
	Fid *f;
	int h;
	h = fid % Nfidhash;
	for(f = fs->hash[h]; f; f = f->next){
		if(f->fid == fid)
			return nil;
	}
	f = emalloc(sizeof *f);
	f->next = fs->hash[h];
	if(f->next != nil)
		f->next->last = &f->next;
	f->last = &fs->hash[h];
	fs->hash[h] = f;
	f->fid = fid;
	f->ref = 1;
	f->attached = 1;
	f->open = 0;
	return f;
}
Fid*
getfid(Fs *fs, uint fid)
{
	Fid *f;
	int h;
	h = fid % Nfidhash;
	for(f = fs->hash[h]; f; f = f->next){
		if(f->fid == fid){
			if(f->attached == 0)
				break;
			f->ref++;
			return f;
		}
	}
	return nil;
}
void
putfid(Fs *, Fid *f)
{
	f->ref--;
	if(f->ref == 0 && f->attached == 0){
		*f->last = f->next;
		if(f->next != nil)
			f->next->last = f->last;
		if(f->search != nil)
			searchfree(f->search);
		free(f);
	}
}
char*
fsversion(Fs *, Fcall *rpc)
{
	if(rpc->msize < 256)
		return "version: message size too small";
	if(rpc->msize > messagesize)
		rpc->msize = messagesize;
	messagesize = rpc->msize;
	if(strncmp(rpc->version, "9P", 2) != 0)
		rpc->version = "unknown";
	else
		rpc->version = "9P2000";
	return nil;
}
char*
fsauth(Fs *, Fcall *)
{
	return "searchfs: authentication not required";
}
char*
fsattach(Fs *fs, Fcall *rpc)
{
	Fid *f;
	f = mkfid(fs, rpc->fid);
	if(f == nil)
		return Einuse;
	f->open = 0;
	f->qid.type = QTDIR;
	f->qid.path = Qroot;
	f->qid.vers = 0;
	rpc->qid = f->qid;
	putfid(fs, f);
	return nil;
}
char*
fswalk(Fs *fs, Fcall *rpc)
{
	Fid *f, *nf;
	int nqid, nwname, type;
	char *err, *name;
	ulong path;
	f = getfid(fs, rpc->fid);
	if(f == nil)
		return Enofid;
	nf = nil;
	if(rpc->fid != rpc->newfid){
		nf = mkfid(fs, rpc->newfid);
		if(nf == nil){
			putfid(fs, f);
			return Einuse;
		}
		nf->qid = f->qid;
		putfid(fs, f);
		f = nf;	/* walk f */
	}
	err = nil;
	path = f->qid.path;
	nwname = rpc->nwname;
	for(nqid=0; nqid<nwname; nqid++){
		if(path != Qroot){
			err = Enotdir;
			break;
		}
		name = rpc->wname[nqid];
		if(strcmp(name, "search") == 0){
			type = QTFILE;
			path = Qsearch;
		}else if(strcmp(name, "stats") == 0){
			type = QTFILE;
			path = Qstats;
		}else if(strcmp(name, ".") == 0 || strcmp(name, "..") == 0){
			type = QTDIR;
			path = path;
		}else{
			err = Enotexist;
			break;
		}
		rpc->wqid[nqid] = (Qid){path, 0, type};
	}
	if(nwname > 0){
		if(nf != nil && nqid < nwname)
			nf->attached = 0;
		if(nqid == nwname)
			f->qid = rpc->wqid[nqid-1];
	}
	putfid(fs, f);
	rpc->nwqid = nqid;
	f->open = 0;
	return err;
}
char *
fsopen(Fs *fs, Fcall *rpc)
{
	Fid *f;
	int mode;
	f = getfid(fs, rpc->fid);
	if(f == nil)
		return Enofid;
	if(f->open){
		putfid(fs, f);
		return Eisopen;
	}
	mode = rpc->mode & OPERM;
	if(mode == OEXEC
	|| f->qid.path == Qroot && (mode == OWRITE || mode == ORDWR)){
		putfid(fs, f);
		return Eperm;
	}
	f->open = 1;
	f->where = nil;
	f->n = 0;
	f->search = nil;
	rpc->qid = f->qid;
	rpc->iounit = messagesize-IOHDRSZ;
	putfid(fs, f);
	return nil;
}
char *
fscreate(Fs *, Fcall *)
{
	return Eperm;
}
char*
fsread(Fs *fs, Fcall *rpc)
{
	Fid *f;
	int n, off, count, len;
	f = getfid(fs, rpc->fid);
	if(f == nil)
		return Enofid;
	if(!f->open){
		putfid(fs, f);
		return Enotopen;
	}
	count = rpc->count;
	off = rpc->offset;
	rpc->count = 0;
	if(f->qid.path == Qroot){
		if(off > 0)
			rpc->count = 0;
		else
			rpc->count = dostat(Qsearch, (uchar*)rpc->data, count);
		putfid(fs, f);
		if(off == 0 && rpc->count <= BIT16SZ)
			return "directory read count too small";
		return nil;
	}
	if(f->qid.path == Qstats){
		len = 0;
	}else{
		for(len = 0; len < count; len += n){
			if(f->where == nil || f->search == nil)
				break;
			if(f->n == 0)
				f->where = searchsearch(f->search, f->where, edatabase, &f->n);
			n = f->n;
			if(n != 0){
				if(n > count-len)
					n = count-len;
				memmove(rpc->data+len, f->where, n);
				f->where += n;
				f->n -= n;
			}
		}
	}
	putfid(fs, f);
	rpc->count = len;
	return nil;
}
char*
fswrite(Fs *fs, Fcall *rpc)
{
	Fid *f;
	f = getfid(fs, rpc->fid);
	if(f == nil)
		return Enofid;
	if(!f->open || f->qid.path != Qsearch){
		putfid(fs, f);
		return Enotopen;
	}
	if(f->search != nil)
		searchfree(f->search);
	f->search = searchparse(rpc->data, rpc->data + rpc->count);
	if(f->search == nil){
		putfid(fs, f);
		return Ebadsearch;
	}
	f->where = database;
	f->n = 0;
	putfid(fs, f);
	return nil;
}
char *
fsclunk(Fs *fs, Fcall *rpc)
{
	Fid *f;
	f = getfid(fs, rpc->fid);
	if(f != nil){
		f->attached = 0;
		putfid(fs, f);
	}
	return nil;
}
char *
fsremove(Fs *, Fcall *)
{
	return Eperm;
}
char *
fsstat(Fs *fs, Fcall *rpc)
{
	Fid *f;
	f = getfid(fs, rpc->fid);
	if(f == nil)
		return Enofid;
	rpc->stat = fs->statbuf;
	rpc->nstat = dostat(f->qid.path, rpc->stat, sizeof fs->statbuf);
	putfid(fs, f);
	if(rpc->nstat <= BIT16SZ)
		return "stat count too small";
	return nil;
}
char *
fswstat(Fs *, Fcall *)
{
	return Eperm;
}
int
dostat(int path, uchar *buf, int nbuf)
{
	Dir d;
	switch(path){
	case Qroot:
		d.name = ".";
		d.mode = DMDIR|0555;
		d.qid.type = QTDIR;
		break;
	case Qsearch:
		d.name = "search";
		d.mode = 0666;
		d.qid.type = QTFILE;
		break;
	case Qstats:
		d.name = "stats";
		d.mode = 0666;
		d.qid.type = QTFILE;
		break;
	}
	d.qid.path = path;
	d.qid.vers = 0;
	d.length = 0;
	d.uid = d.gid = d.muid = "none";
	d.atime = d.mtime = time(nil);
	return convD2M(&d, buf, nbuf);
}
char *
urlunesc(char *s, char *e)
{
	char *t, *v;
	int c, n;
	v = emalloc((e - s) + 1);
	for(t = v; s < e; s++){
		c = *s;
		if(c == '%'){
			if(s + 2 >= e)
				break;
			n = s[1];
			if(n >= '0' && n <= '9')
				n = n - '0';
			else if(n >= 'A' && n <= 'F')
				n = n - 'A' + 10;
			else if(n >= 'a' && n <= 'f')
				n = n - 'a' + 10;
			else
				break;
			c = n;
			n = s[2];
			if(n >= '0' && n <= '9')
				n = n - '0';
			else if(n >= 'A' && n <= 'F')
				n = n - 'A' + 10;
			else if(n >= 'a' && n <= 'f')
				n = n - 'a' + 10;
			else
				break;
			s += 2;
			c = c * 16 + n;
		}
		*t++ = c;
	}
	*t = 0;
	return v;
}
int
toupper(int c)
{
	if(c >= 'a' && c <= 'z')
		c += 'A' - 'a';
	return c;
}
int
tolower(int c)
{
	if(c >= 'A' && c <= 'Z')
		c += 'a' - 'A';
	return c;
}
void
fatal(char *fmt, ...)
{
	va_list arg;
	char buf[1024];
	write(2, "searchfs: ", 8);
	va_start(arg, fmt);
	vseprint(buf, buf+1024, fmt, arg);
	va_end(arg);
	write(2, buf, strlen(buf));
	write(2, "\n", 1);
	exits(fmt);
}
void *
emalloc(uint n)
{
	void *p;
	p = malloc(n);
	if(p == nil)
		fatal("out of memory");
	memset(p, 0, n);
	return p;
}
void
usage(void)
{
	fprint(2, "usage: searchfs [-m mountpoint] [-s srvfile] database\n");
	exits("usage");
}