code: plan9front

ref: 88a8ca5c8de86f1f3e5b43a490e0e70c6b456072
dir: /sys/lib/man/permind/ptx1.c/

View raw version
/*
	permuted title index
	ptx [-t] [-i ignore] [-o only] [-w num] [-r]
	    [-c commands] [-g gap] [-f] [input]

	Ptx reads the input file and permutes on words in it.
	It excludes all words in the ignore file.
	Alternately it includes words in the only file.
	if neither is given it excludes the words in
	/sys/lib/man/permind/ignore.

	The width of the output line (except for -r field)
	can be changed to num,
	which is a troff width measure, ens by default.
	with no -w, num is 72n, or 100n under -t.
	the -f flag tells the program to fold the output
	the -t flag says the output is for troff
	font specifier -F implies -t.
	-g sets the gutter
	-h sets the hole between wrapped segments
	-r takes the first word on each line and makes it
	into a fifth field.
	-c inserts troff commands for font-setting etc at beginning
 */

#include <u.h>
#include <libc.h>
#include <stdio.h>
#include <ctype.h>

#define DEFLTX "/sys/lib/man/permind/ignore"
#define TILDE	0177		/* actually RUBOUT, not ~ */
#define	N	30
#define	MAX	N*BUFSIZ
#define LMAX	4096
#define MAXT	2048
#define MASK	03777
#define ON	1

#define isabreak(c) (btable[c])

char *getline(void);
void msg(char *, char *);
void extra(int);
void diag(char *, char *);
void cmpline(char *);
int cmpword(char *, char *, char *);
void putline(char *, char *);
void makek(void);
void getsort(void);
char *rtrim(char *, char *, int);
char *ltrim(char *, char *, int);
void putout(char *, char *);
void setlen(void);
void getlen(void);
int hash(char *, char *);
int storeh(int, char *);

int status;

char *hasht[MAXT];
char line[LMAX];
char mark[LMAX];
struct word {
	char *p;
	int w;
} word[LMAX/2];
char btable[256];
int ignore;
int only;
char *lenarg;
char *gutarg;
char *holarg;
int llen;
int spacesl;
int gutter;
int hole;
int mlen = LMAX;
int halflen;
int rflag;
char *strtbufp, *endbufp;


char *empty = "";
char *font = "R";
char *roff = "/bin/nroff";
char *troff = "/bin/troff";

char *infile = "/fd/0";
FILE *inptr;

FILE *outptr = stdout;

char *sortfile = "ptxsort";	/* output of sort program */
char nofold[] = {'-', 'd', 't', TILDE, 0};
char fold[] = {'-', 'd', 'f', 't', TILDE, 0};
char *sortopt = nofold;
FILE *sortptr;

char *kfile = "ptxmark";	/* ptxsort + troff goo for widths */
FILE *kptr;

char *wfile = "ptxwidth";	/* widths of words in ptxsort */
FILE *wptr;

char *bfile;	/*contains user supplied break chars */
FILE *bptr;

char *cmds;

void
main(int argc, char **argv)
{
	int c;
	char *bufp;
	char *pend;
	char *xfile;
	FILE *xptr;
	Waitmsg *w;

	/* argument decoding */
	xfile = DEFLTX;
	ARGBEGIN {
	case 'r':
		rflag = 1;
		break;
	case 'f':
		sortopt = fold;
		break;
	case 'w':
		if(lenarg)
			extra(ARGC());
		lenarg = ARGF();
		break;
	case 'c':
		if(cmds)
			extra(ARGC());
		cmds = ARGF();
	case 't':
		roff = troff;
		break;
	case 'g':
		if(gutarg)
			extra(ARGC());
		gutarg =  ARGF();
		break;
	case 'h':
		if(holarg)
			extra(ARGC());
		holarg =  ARGF();
		break;

	case 'i':
		if(only|ignore)
			extra(ARGC());
		ignore++;
		xfile = ARGF();
		break;

	case 'o':
		if(only|ignore)
			extra(ARGC());
		only++;
		xfile = ARGF();
		break;

	case 'b':
		if(bfile)
			extra(ARGC());
		bfile = ARGF();
		break;

	default:
		diag("Illegal argument:",*argv);
	} ARGEND

	if(lenarg == 0)
		lenarg = troff? "100n": "72n";
	if(gutarg == 0)
		gutarg = "3n";
	if(holarg == 0)
		holarg = gutarg;

	if(argc > 1)
		diag("Too many filenames",empty);
	if(argc == 1)
		infile = *argv;

	/* Default breaks of blank, tab and newline */
	btable[' '] = ON;
	btable['\t'] = ON;
	btable['\n'] = ON;
	if(bfile) {
		if((bptr = fopen(bfile,"r")) == NULL)
			diag("Cannot open break char file",bfile);

		while((c = getc(bptr)) != EOF)
			btable[c] = ON;
	}

	/*
	Allocate space for a buffer.  If only or ignore file present
	read it into buffer. Else read in default ignore file
	and put resulting words in buffer.
	*/

	if((strtbufp = calloc(N,BUFSIZ)) == NULL)
		diag("Out of memory space",empty);
	bufp = strtbufp;
	endbufp = strtbufp+MAX;

	if((xptr = fopen(xfile,"r")) == NULL)
		diag("Cannot open  file",xfile);

	while(bufp < endbufp && (c = getc(xptr)) != EOF)
		if(isabreak(c)) {
			if(storeh(hash(strtbufp,bufp),strtbufp))
				diag("Too many words",xfile);
			*bufp++ = '\0';
			strtbufp = bufp;
		} else
			*bufp++ = (isupper(c)?tolower(c):c);
	if (bufp >= endbufp)
		diag("Too many words in file",xfile);
	endbufp = --bufp;

	/* open output file for sorting */

	if((sortptr = fopen(sortfile, "w")) == NULL)
		diag("Cannot open output for sorting:",sortfile);

	/*
	get a line of data and compare each word for
	inclusion or exclusion in the sort phase
	*/
	if (infile!=0 && (inptr = fopen(infile,"r")) == NULL)
		diag("Cannot open data: ",infile);
	while((pend = getline()) != NULL)
		cmpline(pend);
	fclose(sortptr);

	if(fork()==0){
		execl("/bin/sort", "sort", sortopt, "+0", "-1", "+1", "-o",
			sortfile, sortfile, 0);
		diag("sort exec failed","");
	}
	if((w = wait()) == NULL || w->msg[0] != '\0')
		diag("sort failed","");
	free(w);

	makek();
	if(fork()==0){
		if(dup(create(wfile,OWRITE|OTRUNC,0666),1) == -1)
			diag("Cannot create width file:",wfile);
		execl(roff, roff, "-a", kfile, 0);
		diag("troff exec failed","");
	}
	if((w = wait()) == NULL || w->msg[0] != '\0')
		diag("troff failed", "");
	free(w);

	getsort();
	fflush(0);
	_exits(0);
/* I don't know what's wrong with the atexit func... */
/*	exits(0);	*/
}

void
msg(char *s, char *arg)
{
	fprintf(stderr,"ptx: %s %s\n",s,arg);
}

void
extra(int c)
{
	char s[] = "-x.";

	s[1] = c;
	diag("Extra option", s);
}

void
diag(char *s, char *arg)
{
	msg(s,arg);
	exits(s);
}


char*
getline(void)
{
	int c;
	char *linep;
	char *endlinep;

	endlinep= line + mlen;
	linep = line;
	/* Throw away leading white space */

	while(isspace(c = getc(inptr)))
		;
	if(c==EOF)
		return(0);
	ungetc(c,inptr);
	while((c = getc(inptr)) != EOF)
		switch (c) {
		case '\t':
			if(linep<endlinep)
				*linep++ = ' ';
			break;
		case '\n':
			while(isspace(*--linep))
				;
			*++linep = '\n';
			return(linep);
		default:
			if(linep < endlinep)
				*linep++ = c;
			break;
		}
	return(0);
}

void
cmpline(char *pend)
{
	char *pstrt, *pchar, *cp;
	char **hp;
	int flag;

	pchar = line;
	if(rflag)
		while(pchar < pend && !isspace(*pchar))
			pchar++;
	while(pchar < pend){
		/* eliminate white space */
		if(isabreak(*pchar++))
			continue;
		pstrt = --pchar;

		flag = 1;
		while(flag){
			if(isabreak(*pchar)) {
				hp = &hasht[hash(pstrt,pchar)];
				pchar--;
				while(cp = *hp++){
					if(hp == &hasht[MAXT])
						hp = hasht;
					/* possible match */
					if(cmpword(pstrt,pchar,cp)){
						/* exact match */
						if(!ignore && only)
							putline(pstrt,pend);
						flag = 0;
						break;
					}
				}
				/* no match */
				if(flag){
					if(ignore || !only)
						putline(pstrt,pend);
					flag = 0;
				}
			}
			pchar++;
		}
	}
}

int
cmpword(char *cpp, char *pend, char *hpp)
{
	char c;

	while(*hpp != '\0'){
		c = *cpp++;
		if((isupper(c)?tolower(c):c) != *hpp++)
			return(0);
	}
	if(--cpp == pend)
		return(1);
	return(0);
}

void
putline(char *strt, char *end)
{
	char *cp;

	for(cp=strt; cp<end; cp++)
		putc(*cp, sortptr);
	/* Add extra blank before TILDE to sort correctly with -fd option */
	putc(' ',sortptr);
	putc(TILDE,sortptr);
	for (cp=line; cp<strt; cp++)
		putc(*cp,sortptr);
	putc('\n',sortptr);
}

void
makek(void)
{
	int i, c;
	int nr = 0;

	if((sortptr = fopen(sortfile,"r")) == NULL)
		diag("Cannot open sorted data:",sortfile);
	if((kptr = fopen(kfile,"w")) == NULL)
		diag("Cannot create mark file:",kfile);
	if(cmds)
		fprintf(kptr,"%s\n",cmds);
	fprintf(kptr,
		".nf\n"
		".pl 1\n"
		".tr %c\\&\n", TILDE);
	setlen();

	while((c = getc(sortptr)) != EOF) {
		if(nr == 0) {
			fprintf(kptr,".di xx\n");
			nr++;
		}
		if(c == '\n') {
			fprintf(kptr,"\n.di\n");
			for(i=1; i<nr; i++)
				fprintf(kptr,"\\n(%.2d ",i);
			fprintf(kptr,"\n");
			nr = 0;
			continue;
		}
		if(isspace(c))
			fprintf(kptr,"\\k(%.2d",nr++);
		putc(c,kptr);
	}
	fclose(sortptr);
	fclose(kptr);
}

void
getsort(void)
{
	char *tilde, *linep, *markp;
	int i0, i1, i2, i3, i4, i5, i6, i7, w0, w6;

	if((sortptr = fopen(sortfile, "r")) == NULL)
		diag("Cannot open sorted data:", sortfile);
	if((wptr = fopen(wfile, "r")) == NULL)
		diag("Cannot open width file:", wfile);
	getlen();

	halflen = (llen-gutter)/2;

	while(fgets(line, sizeof(line), sortptr) != NULL) {
		if(fgets(mark, sizeof(mark), wptr) == NULL)
			diag("Phase error 1: premature EOF on width file",
				wfile);
		linep = line;
		markp = mark;
		i3 = i7 = 0;
		word[i7].p = linep;
		word[i7].w = 0;
		for(linep=line; *linep; linep++) {
			if(*linep == TILDE)
				i3 = i7;
			else if(*linep == '\n')
				break;
			else if(isspace(*linep)) {
				i7++;
				word[i7].p = linep;
				if(!markp)
					diag("Phase error 2: no widths for summary",
						line);
				word[i7].w = atoi(markp);
				markp = strchr(markp+1, ' ');
			}
		}
		i0 = 0;
		for(i1=i0; i1<i3; i1++)
			if(word[i1+1].w - word[i0].w >= halflen - spacesl)
				break;
		w0 = word[i1].w - word[i0].w;
		i4 = i3 + rflag;
		for(i6 = i7; i6>i4; i6--)
			if(word[i7].w - word[i6-1].w >= halflen)
				break;
		w6 = word[i7].w - word[i6].w - spacesl;
		for(i2=i1 ; i2<i3; i2++)
			if(word[i2+1].w - word[i1].w + w6 >= halflen-hole)
				break;
		for(i5=i6; i5>i4; i5--)
			if(word[i6].w - word[i5-1].w + w0 >= halflen-hole)
				break;

		printf(".xx \"");
		putout(word[i1].p+1,word[i2].p);
		if(i1<i2 && i2<i3) putchar('/');
		printf("\" \"");
		if(i5>i4 && i6==i5) putchar('/');
		putout(word[i6].p+1+(i6==i3),word[i7].p);
		printf("\" \"");
		putout(word[i0].p,word[i1].p);
		if(i2<i3 && i1==i2) putchar('/');
		printf("\" \"");
		if(i5>i4 && i6>i5) putchar('/');
		putout(word[i5].p+1+(i5==i3),word[i6].p);
		if(rflag) {
			printf("\" \"");
			putout(word[i3].p+2,word[i4].p);
		}
		printf("\"\n");
	}
}

void
putout(char *strt, char *end)
{
	char *cp;

	for(cp=strt; cp<end; )
		putc(*cp++,outptr);
}

void
setlen(void)
{
	fprintf(kptr,
		"\\w'\\h'%s''\n"
		"\\w' /'\n"
		"\\w'\\h'%s''\n"
		"\\w'\\h'%s''\n",lenarg,gutarg,holarg);
}

void
getlen(void)
{
	char s[128];

	s[0] = '\0';
	fgets(s,sizeof(s),kptr);
	llen = atoi(s);

	fgets(s,sizeof(s),kptr);
	spacesl = atoi(s);

	fgets(s,sizeof(s),kptr);
	gutter = atoi(s);

	fgets(s,sizeof(s),kptr);
	hole = atoi(s);
	if(hole < 2*spacesl)
		hole = 2*spacesl;
}

int
hash(char *strtp, char *endp)
{
	char *cp, c;
	int i, j, k;

	/* Return zero hash number for single letter words */
	if((endp - strtp) == 1)
		return(0);

	cp = strtp;
	c = *cp++;
	i = (isupper(c)?tolower(c):c);
	c = *cp;
	j = (isupper(c)?tolower(c):c);
	i = i*j;
	cp = --endp;
	c = *cp--;
	k = (isupper(c)?tolower(c):c);
	c = *cp;
	j = (isupper(c)?tolower(c):c);
	j = k*j;
	return (i ^ (j>>2)) & MASK;
}

int
storeh(int num, char *strtp)
{
	int i;

	for(i=num; i<MAXT; i++)
		if(hasht[i] == 0) {
			hasht[i] = strtp;
			return(0);
		}
	for(i=0; i<num; i++)
		if(hasht[i] == 0) {
			hasht[i] = strtp;
			return(0);
		}
	return(1);
}