ref: babf901b4a508c3ec5d1f89655f10377bbdf9637
dir: /appl/cmd/yacc.b/
implement Yacc;
include "sys.m";
sys: Sys;
print, fprint, sprint: import sys;
UTFmax: import Sys;
include "bufio.m";
bufio: Bufio;
Iobuf: import bufio;
include "draw.m";
Yacc: module
{
init: fn(ctxt: ref Draw->Context, argv: list of string);
};
Arg: adt
{
argv: list of string;
c: int;
opts: string;
init: fn(argv: list of string): ref Arg;
opt: fn(arg: self ref Arg): int;
arg: fn(arg: self ref Arg): string;
};
PARSER: con "/lib/yaccpar";
OFILE: con "tab.b";
FILEU: con "output";
FILED: con "tab.m";
FILEDEBUG: con "debug";
# the following are adjustable
# according to memory size
ACTSIZE: con 30000;
NSTATES: con 2000;
TEMPSIZE: con 2000;
SYMINC: con 50; # increase for non-term or term
RULEINC: con 50; # increase for max rule length prodptr[i]
PRODINC: con 100; # increase for productions prodptr
WSETINC: con 50; # increase for working sets wsets
STATEINC: con 200; # increase for states statemem
NAMESIZE: con 50;
NTYPES: con 63;
ISIZE: con 400;
PRIVATE: con 16rE000; # unicode private use
# relationships which must hold:
# TEMPSIZE >= NTERMS + NNONTERM + 1
# TEMPSIZE >= NSTATES
#
NTBASE: con 8r10000;
ERRCODE: con 8190;
ACCEPTCODE: con 8191;
YYLEXUNK: con 3;
TOKSTART: con 4; #index of first defined token
# no, left, right, binary assoc.
NOASC, LASC, RASC, BASC: con iota;
# flags for state generation
DONE, MUSTDO, MUSTLOOKAHEAD: con iota;
# flags for a rule having an action, and being reduced
ACTFLAG: con 16r4;
REDFLAG: con 16r8;
# output parser flags
YYFLAG1: con -1000;
# parse tokens
IDENTIFIER, MARK, TERM, LEFT, RIGHT, BINARY, PREC, LCURLY, IDENTCOLON, NUMBER, START, TYPEDEF, TYPENAME, MODULE: con PRIVATE+iota;
ENDFILE: con 0;
EMPTY: con 1;
WHOKNOWS: con 0;
OK: con 1;
NOMORE: con -1000;
# macros for getting associativity and precedence levels
ASSOC(i: int): int
{
return i & 3;
}
PLEVEL(i: int): int
{
return (i >> 4) & 16r3f;
}
TYPE(i: int): int
{
return (i >> 10) & 16r3f;
}
# macros for setting associativity and precedence levels
SETASC(i, j: int): int
{
return i | j;
}
SETPLEV(i, j: int): int
{
return i | (j << 4);
}
SETTYPE(i, j: int): int
{
return i | (j << 10);
}
# I/O descriptors
stderr: ref Sys->FD;
fdefine: ref Iobuf; # file for module definition
fdebug: ref Iobuf; # y.debug for strings for debugging
ftable: ref Iobuf; # y.tab.c file
finput: ref Iobuf; # input file
foutput: ref Iobuf; # y.output file
CodeData, CodeMod, CodeAct: con iota;
NCode: con 8192;
Code: adt
{
kind: int;
data: array of byte;
ndata: int;
next: cyclic ref Code;
};
codehead: ref Code;
codetail: ref Code;
modname: string; # name of module
suppressmod: int; # suppress module definition
stacksize := 200;
# communication variables between various I/O routines
infile: string; # input file name
numbval: int; # value of an input number
tokname: string; # input token name, slop for runes and 0
# structure declarations
Lkset: type array of int;
Pitem: adt
{
prod: array of int;
off: int; # offset within the production
first: int; # first term or non-term in item
prodno: int; # production number for sorting
};
Item: adt
{
pitem: Pitem;
look: Lkset;
};
Symb: adt
{
name: string;
value: int;
};
Wset: adt
{
pitem: Pitem;
flag: int;
ws: Lkset;
};
# storage of names
parser := PARSER;
yydebug: string;
# storage of types
ntypes: int; # number of types defined
typeset := array[NTYPES] of string; # pointers to type tags
# token information
ntokens := 0; # number of tokens
tokset: array of Symb;
toklev: array of int; # vector with the precedence of the terminals
# nonterminal information
nnonter := -1; # the number of nonterminals
nontrst: array of Symb;
start: int; # start symbol
# state information
nstate := 0; # number of states
pstate := array[NSTATES+2] of int; # index into statemem to the descriptions of the states
statemem : array of Item;
tystate := array[NSTATES] of int; # contains type information about the states
tstates : array of int; # states generated by terminal gotos
ntstates : array of int; # states generated by nonterminal gotos
mstates := array[NSTATES] of {* => 0}; # chain of overflows of term/nonterm generation lists
lastred: int; # number of last reduction of a state
defact := array[NSTATES] of int; # default actions of states
# lookahead set information
lkst: array of Lkset;
nolook := 0; # flag to turn off lookahead computations
tbitset := 0; # size of lookahead sets
clset: Lkset; # temporary storage for lookahead computations
# working set information
wsets: array of Wset;
cwp: int;
# storage for action table
amem: array of int; # action table storage
memp: int; # next free action table position
indgo := array[NSTATES] of int; # index to the stored goto table
# temporary vector, indexable by states, terms, or ntokens
temp1 := array[TEMPSIZE] of int; # temporary storage, indexed by terms + ntokens or states
lineno := 1; # current input line number
fatfl := 1; # if on, error is fatal
nerrors := 0; # number of errors
# assigned token type values
extval := 0;
ytabc := OFILE; # name of y.tab.c
# grammar rule information
nprod := 1; # number of productions
prdptr: array of array of int; # pointers to descriptions of productions
levprd: array of int; # precedence levels for the productions
rlines: array of int; # line number for this rule
# statistics collection variables
zzgoent := 0;
zzgobest := 0;
zzacent := 0;
zzexcp := 0;
zzclose := 0;
zzrrconf := 0;
zzsrconf := 0;
zzstate := 0;
# optimizer arrays
yypgo: array of array of int;
optst: array of array of int;
ggreed: array of int;
pgo: array of int;
maxspr: int; # maximum spread of any entry
maxoff: int; # maximum offset into a array
maxa: int;
# storage for information about the nonterminals
pres: array of array of array of int; # vector of pointers to productions yielding each nonterminal
pfirst: array of Lkset;
pempty: array of int; # vector of nonterminals nontrivially deriving e
# random stuff picked out from between functions
indebug := 0; # debugging flag for cpfir
pidebug := 0; # debugging flag for putitem
gsdebug := 0; # debugging flag for stagen
cldebug := 0; # debugging flag for closure
pkdebug := 0; # debugging flag for apack
g2debug := 0; # debugging for go2gen
adb := 0; # debugging for callopt
Resrv : adt
{
name: string;
value: int;
};
resrv := array[] of {
Resrv("binary", BINARY),
Resrv("module", MODULE),
Resrv("left", LEFT),
Resrv("nonassoc", BINARY),
Resrv("prec", PREC),
Resrv("right", RIGHT),
Resrv("start", START),
Resrv("term", TERM),
Resrv("token", TERM),
Resrv("type", TYPEDEF),};
zznewstate := 0;
init(nil: ref Draw->Context, argv: list of string)
{
sys = load Sys Sys->PATH;
bufio = load Bufio Bufio->PATH;
stderr = sys->fildes(2);
setup(argv); # initialize and read productions
tbitset = (ntokens+32)/32;
cpres(); # make table of which productions yield a given nonterminal
cempty(); # make a table of which nonterminals can match the empty string
cpfir(); # make a table of firsts of nonterminals
stagen(); # generate the states
yypgo = array[nnonter+1] of array of int;
optst = array[nstate] of array of int;
output(); # write the states and the tables
go2out();
hideprod();
summary();
callopt();
others();
if(fdefine != nil)
fdefine.close();
if(fdebug != nil)
fdebug.close();
if(ftable != nil)
ftable.close();
if(foutput != nil)
foutput.close();
}
setup(argv: list of string)
{
j, ty: int;
ytab := 0;
vflag := 0;
dflag := 0;
stem := 0;
stemc := "y";
foutput = nil;
fdefine = nil;
fdebug = nil;
arg := Arg.init(argv);
while(c := arg.opt()){
case c{
'v' or 'V' =>
vflag++;
'D' =>
yydebug = arg.arg();
'd' =>
dflag++;
'n' =>
stacksize = int arg.arg();
'o' =>
ytab++;
ytabc = arg.arg();
's' =>
stem++;
stemc = arg.arg();
'm' =>
suppressmod++;
* =>
usage();
}
}
argv = arg.argv;
if(len argv != 1)
usage();
if (suppressmod && dflag) {
sys->fprint(stderr, "yacc: -m and -d are exclusive\n");
usage();
}
if (stacksize < 1) {
sys->fprint(stderr, "yacc: stack size too small\n");
usage();
}
infile = hd argv;
finput = bufio->open(infile, Bufio->OREAD);
if(finput == nil)
error("cannot open '"+infile+"'");
openup(stemc, dflag, vflag, ytab, ytabc);
defin(0, "$end");
extval = PRIVATE; # tokens start in unicode 'private use'
defin(0, "error");
defin(1, "$accept");
defin(0, "$unk");
i := 0;
for(t := gettok(); t != MARK && t != ENDFILE; )
case t {
';' =>
t = gettok();
START =>
if(gettok() != IDENTIFIER)
error("bad %%start construction");
start = chfind(1, tokname);
t = gettok();
TYPEDEF =>
if(gettok() != TYPENAME)
error("bad syntax in %%type");
ty = numbval;
for(;;) {
t = gettok();
case t {
IDENTIFIER =>
if((t=chfind(1, tokname)) < NTBASE) {
j = TYPE(toklev[t]);
if(j != 0 && j != ty)
error("type redeclaration of token "+
tokset[t].name);
else
toklev[t] = SETTYPE(toklev[t], ty);
} else {
j = nontrst[t-NTBASE].value;
if(j != 0 && j != ty)
error("type redeclaration of nonterminal "+
nontrst[t-NTBASE].name);
else
nontrst[t-NTBASE].value = ty;
}
continue;
',' =>
continue;
';' =>
t = gettok();
}
break;
}
MODULE =>
cpymodule();
t = gettok();
LEFT or BINARY or RIGHT or TERM =>
# nonzero means new prec. and assoc.
lev := t-TERM;
if(lev)
i++;
ty = 0;
# get identifiers so defined
t = gettok();
# there is a type defined
if(t == TYPENAME) {
ty = numbval;
t = gettok();
}
for(;;) {
case t {
',' =>
t = gettok();
continue;
';' =>
break;
IDENTIFIER =>
j = chfind(0, tokname);
if(j >= NTBASE)
error(tokname+" defined earlier as nonterminal");
if(lev) {
if(ASSOC(toklev[j]))
error("redeclaration of precedence of "+tokname);
toklev[j] = SETASC(toklev[j], lev);
toklev[j] = SETPLEV(toklev[j], i);
}
if(ty) {
if(TYPE(toklev[j]))
error("redeclaration of type of "+tokname);
toklev[j] = SETTYPE(toklev[j],ty);
}
t = gettok();
if(t == NUMBER) {
tokset[j].value = numbval;
t = gettok();
}
continue;
}
break;
}
LCURLY =>
cpycode();
t = gettok();
* =>
error("syntax error");
}
if(t == ENDFILE)
error("unexpected EOF before %%");
if(modname == nil)
error("missing %module specification");
moreprod();
prdptr[0] = array[4] of {
NTBASE, # added production
start, # if start is 0, we will overwrite with the lhs of the first rule
1,
0
};
nprod = 1;
curprod := array[RULEINC] of int;
t = gettok();
if(t != IDENTCOLON)
error("bad syntax on first rule");
if(!start)
prdptr[0][1] = chfind(1, tokname);
# read rules
# put into prdptr array in the format
# target
# followed by id's of terminals and non-terminals
# followd by -nprod
while(t != MARK && t != ENDFILE) {
mem := 0;
# process a rule
rlines[nprod] = lineno;
if(t == '|')
curprod[mem++] = prdptr[nprod-1][0];
else if(t == IDENTCOLON) {
curprod[mem] = chfind(1, tokname);
if(curprod[mem] < NTBASE)
error("token illegal on LHS of grammar rule");
mem++;
} else
error("illegal rule: missing semicolon or | ?");
# read rule body
t = gettok();
for(;;){
while(t == IDENTIFIER) {
curprod[mem] = chfind(1, tokname);
if(curprod[mem] < NTBASE)
levprd[nprod] = toklev[curprod[mem]];
mem++;
if(mem >= len curprod){
ncurprod := array[mem+RULEINC] of int;
ncurprod[0:] = curprod;
curprod = ncurprod;
}
t = gettok();
}
if(t == PREC) {
if(gettok() != IDENTIFIER)
error("illegal %%prec syntax");
j = chfind(2, tokname);
if(j >= NTBASE)
error("nonterminal "+nontrst[j-NTBASE].name+" illegal after %%prec");
levprd[nprod] = toklev[j];
t = gettok();
}
if(t != '=')
break;
levprd[nprod] |= ACTFLAG;
addcode(CodeAct, "\n"+string nprod+"=>");
cpyact(curprod, mem);
# action within rule...
if((t=gettok()) == IDENTIFIER) {
# make it a nonterminal
j = chfind(1, "$$"+string nprod);
#
# the current rule will become rule number nprod+1
# enter null production for action
#
prdptr[nprod] = array[2] of {j, -nprod};
# update the production information
nprod++;
moreprod();
levprd[nprod] = levprd[nprod-1] & ~ACTFLAG;
levprd[nprod-1] = ACTFLAG;
rlines[nprod] = lineno;
# make the action appear in the original rule
curprod[mem++] = j;
if(mem >= len curprod){
ncurprod := array[mem+RULEINC] of int;
ncurprod[0:] = curprod;
curprod = ncurprod;
}
}
}
while(t == ';')
t = gettok();
curprod[mem++] = -nprod;
# check that default action is reasonable
if(ntypes && !(levprd[nprod]&ACTFLAG) && nontrst[curprod[0]-NTBASE].value) {
# no explicit action, LHS has value
tempty := curprod[1];
if(tempty < 0)
error("must return a value, since LHS has a type");
else
if(tempty >= NTBASE)
tempty = nontrst[tempty-NTBASE].value;
else
tempty = TYPE(toklev[tempty]);
if(tempty != nontrst[curprod[0]-NTBASE].value)
error("default action causes potential type clash");
else{
addcodec(CodeAct, '\n');
addcode(CodeAct, string nprod);
addcode(CodeAct, "=>\nyyval.");
addcode(CodeAct, typeset[tempty]);
addcode(CodeAct, " = yys[yyp+1].yyv.");
addcode(CodeAct, typeset[tempty]);
addcodec(CodeAct, ';');
}
}
moreprod();
prdptr[nprod] = array[mem] of int;
prdptr[nprod][0:] = curprod[:mem];
nprod++;
moreprod();
levprd[nprod] = 0;
}
#
# end of all rules
# dump out the prefix code
#
ftable.puts("implement ");
ftable.puts(modname);
ftable.puts(";\n");
dumpcode(CodeMod);
dumpmod();
dumpcode(CodeAct);
ftable.puts("YYEOFCODE: con 1;\n");
ftable.puts("YYERRCODE: con 2;\n");
ftable.puts("YYMAXDEPTH: con " + string stacksize + ";\n"); # was 150
#ftable.puts("yyval: YYSTYPE;\n");
#
# copy any postfix code
#
if(t == MARK) {
ftable.puts("\n#line\t");
ftable.puts(string lineno);
ftable.puts("\t\"");
ftable.puts(infile);
ftable.puts("\"\n");
while((c=finput.getc()) != Bufio->EOF)
ftable.putc(c);
}
finput.close();
}
#
# allocate enough room to hold another production
#
moreprod()
{
n := len prdptr;
if(nprod < n)
return;
n += PRODINC;
aprod := array[n] of array of int;
aprod[0:] = prdptr;
prdptr = aprod;
alevprd := array[n] of int;
alevprd[0:] = levprd;
levprd = alevprd;
arlines := array[n] of int;
arlines[0:] = rlines;
rlines = arlines;
}
#
# define s to be a terminal if t=0
# or a nonterminal if t=1
#
defin(nt: int, s: string): int
{
val := 0;
if(nt) {
nnonter++;
if(nnonter >= len nontrst){
anontrst := array[nnonter + SYMINC] of Symb;
anontrst[0:] = nontrst;
nontrst = anontrst;
}
nontrst[nnonter] = Symb(s, 0);
return NTBASE + nnonter;
}
# must be a token
ntokens++;
if(ntokens >= len tokset){
atokset := array[ntokens + SYMINC] of Symb;
atokset[0:] = tokset;
tokset = atokset;
atoklev := array[ntokens + SYMINC] of int;
atoklev[0:] = toklev;
toklev = atoklev;
}
tokset[ntokens].name = s;
toklev[ntokens] = 0;
# establish value for token
# single character literal
if(s[0] == ' ' && len s == 1+1){
val = s[1];
}else if(s[0] == ' ' && s[1] == '\\') { # escape sequence
if(len s == 2+1) {
# single character escape sequence
case s[2] {
'\'' => val = '\'';
'"' => val = '"';
'\\' => val = '\\';
'a' => val = '\a';
'b' => val = '\b';
'n' => val = '\n';
'r' => val = '\r';
't' => val = '\t';
'v' => val = '\v';
* =>
error("invalid escape "+s[1:3]);
}
}else if(s[2] == 'u' && len s == 2+1+4) { # \unnnn sequence
val = 0;
s = s[3:];
while(s != ""){
c := s[0];
if(c >= '0' && c <= '9')
c -= '0';
else if(c >= 'a' && c <= 'f')
c -= 'a' - 10;
else if(c >= 'A' && c <= 'F')
c -= 'A' - 10;
else
error("illegal \\unnnn construction");
val = val * 16 + c;
s = s[1:];
}
if(val == 0)
error("'\\u0000' is illegal");
}else
error("unknown escape");
}else
val = extval++;
tokset[ntokens].value = val;
return ntokens;
}
peekline := 0;
gettok(): int
{
i, match, c: int;
tokname = "";
for(;;){
lineno += peekline;
peekline = 0;
c = finput.getc();
while(c == ' ' || c == '\n' || c == '\t' || c == '\v' || c == '\r') {
if(c == '\n')
lineno++;
c = finput.getc();
}
# skip comment
if(c != '#')
break;
lineno += skipcom();
}
case c {
Bufio->EOF =>
return ENDFILE;
'{' =>
finput.ungetc();
return '=';
'<' =>
# get, and look up, a type name (union member name)
i = 0;
while((c=finput.getc()) != '>' && c != Bufio->EOF && c != '\n')
tokname[i++] = c;
if(c != '>')
error("unterminated < ... > clause");
for(i=1; i<=ntypes; i++)
if(typeset[i] == tokname) {
numbval = i;
return TYPENAME;
}
ntypes++;
numbval = ntypes;
typeset[numbval] = tokname;
return TYPENAME;
'"' or '\'' =>
match = c;
tokname[0] = ' ';
i = 1;
for(;;) {
c = finput.getc();
if(c == '\n' || c == Bufio->EOF)
error("illegal or missing ' or \"" );
if(c == '\\') {
tokname[i++] = '\\';
c = finput.getc();
} else if(c == match)
return IDENTIFIER;
tokname[i++] = c;
}
'%' =>
case c = finput.getc(){
'%' => return MARK;
'=' => return PREC;
'{' => return LCURLY;
}
getword(c);
# find a reserved word
for(c=0; c < len resrv; c++)
if(tokname == resrv[c].name)
return resrv[c].value;
error("invalid escape, or illegal reserved word: "+tokname);
'0' to '9' =>
numbval = c - '0';
while(isdigit(c = finput.getc()))
numbval = numbval*10 + c-'0';
finput.ungetc();
return NUMBER;
* =>
if(isword(c) || c=='.' || c=='$')
getword(c);
else
return c;
}
# look ahead to distinguish IDENTIFIER from IDENTCOLON
c = finput.getc();
while(c == ' ' || c == '\t'|| c == '\n' || c == '\v' || c == '\r' || c == '#') {
if(c == '\n')
peekline++;
# look for comments
if(c == '#')
peekline += skipcom();
c = finput.getc();
}
if(c == ':')
return IDENTCOLON;
finput.ungetc();
return IDENTIFIER;
}
getword(c: int)
{
i := 0;
while(isword(c) || isdigit(c) || c == '_' || c=='.' || c=='$') {
tokname[i++] = c;
c = finput.getc();
}
finput.ungetc();
}
#
# determine the type of a symbol
#
fdtype(t: int): int
{
v : int;
s: string;
if(t >= NTBASE) {
v = nontrst[t-NTBASE].value;
s = nontrst[t-NTBASE].name;
} else {
v = TYPE(toklev[t]);
s = tokset[t].name;
}
if(v <= 0)
error("must specify type for "+s);
return v;
}
chfind(t: int, s: string): int
{
if(s[0] == ' ')
t = 0;
for(i:=0; i<=ntokens; i++)
if(s == tokset[i].name)
return i;
for(i=0; i<=nnonter; i++)
if(s == nontrst[i].name)
return NTBASE+i;
# cannot find name
if(t > 1)
error(s+" should have been defined earlier");
return defin(t, s);
}
#
# saves module definition in Code
#
cpymodule()
{
if(gettok() != IDENTIFIER)
error("bad %%module construction");
if(modname != nil)
error("duplicate %%module construction");
modname = tokname;
level := 0;
for(;;) {
if((c:=finput.getc()) == Bufio->EOF)
error("EOF encountered while processing %%module");
case c {
'\n' =>
lineno++;
'{' =>
level++;
if(level == 1)
continue;
'}' =>
level--;
# we are finished copying
if(level == 0)
return;
}
addcodec(CodeMod, c);
}
if(codehead == nil || codetail.kind != CodeMod)
addcodec(CodeMod, '\n'); # ensure we add something
}
#
# saves code between %{ and %}
#
cpycode()
{
c := finput.getc();
if(c == '\n') {
c = finput.getc();
lineno++;
}
addcode(CodeData, "\n#line\t" + string lineno + "\t\"" + infile + "\"\n");
while(c != Bufio->EOF) {
if(c == '%') {
if((c=finput.getc()) == '}')
return;
addcodec(CodeData, '%');
}
addcodec(CodeData, c);
if(c == '\n')
lineno++;
c = finput.getc();
}
error("eof before %%}");
}
addcode(k: int, s: string)
{
for(i := 0; i < len s; i++)
addcodec(k, s[i]);
}
addcodec(k, c: int)
{
if(codehead == nil
|| k != codetail.kind
|| codetail.ndata >= NCode){
cd := ref Code(k, array[NCode+UTFmax] of byte, 0, nil);
if(codehead == nil)
codehead = cd;
else
codetail.next = cd;
codetail = cd;
}
codetail.ndata += sys->char2byte(c, codetail.data, codetail.ndata);
}
dumpcode(til: int)
{
for(; codehead != nil; codehead = codehead.next){
if(codehead.kind == til)
return;
if(ftable.write(codehead.data, codehead.ndata) != codehead.ndata)
error("can't write output file");
}
}
#
# write out the module declaration and any token info
#
dumpmod()
{
if(fdefine != nil) {
fdefine.puts(modname);
fdefine.puts(": module {\n");
}
if (!suppressmod) {
ftable.puts(modname);
ftable.puts(": module {\n");
}
for(; codehead != nil; codehead = codehead.next){
if(codehead.kind != CodeMod)
break;
if(ftable.write(codehead.data, codehead.ndata) != codehead.ndata)
error("can't write output file");
if(fdefine != nil && fdefine.write(codehead.data, codehead.ndata) != codehead.ndata)
error("can't write define file");
}
for(i:=TOKSTART; i<=ntokens; i++) {
# non-literals
c := tokset[i].name[0];
if(c != ' ' && c != '$') {
s := tokset[i].name+": con "+string tokset[i].value+";\n";
ftable.puts(s);
if(fdefine != nil)
fdefine.puts(s);
}
}
if(fdefine != nil)
fdefine.puts("};\n");
if (!suppressmod)
ftable.puts("\n};\n");
if(fdebug != nil) {
fdebug.puts("yytoknames = array[] of {\n");
for(i=1; i<=ntokens; i++) {
if(tokset[i].name != nil)
fdebug.puts("\t\""+chcopy(tokset[i].name)+"\",\n");
else
fdebug.puts("\t\"\",\n");
}
fdebug.puts("};\n");
}
}
#
# skip over comments
# skipcom is called after reading a '#'
#
skipcom(): int
{
c := finput.getc();
while(c != Bufio->EOF) {
if(c == '\n')
return 1;
c = finput.getc();
}
error("EOF inside comment");
return 0;
}
#
# copy limbo action to the next ; or closing }
#
cpyact(curprod: array of int, max: int)
{
addcode(CodeAct, "\n#line\t");
addcode(CodeAct, string lineno);
addcode(CodeAct, "\t\"");
addcode(CodeAct, infile);
addcode(CodeAct, "\"\n");
brac := 0;
loop: for(;;){
c := finput.getc();
swt: case c {
';' =>
if(brac == 0) {
addcodec(CodeAct, c);
return;
}
'{' =>
brac++;
'$' =>
s := 1;
tok := -1;
c = finput.getc();
# type description
if(c == '<') {
finput.ungetc();
if(gettok() != TYPENAME)
error("bad syntax on $<ident> clause");
tok = numbval;
c = finput.getc();
}
if(c == '$') {
addcode(CodeAct, "yyval");
# put out the proper tag...
if(ntypes) {
if(tok < 0)
tok = fdtype(curprod[0]);
addcode(CodeAct, "."+typeset[tok]);
}
continue loop;
}
if(c == '-') {
s = -s;
c = finput.getc();
}
j := 0;
if(isdigit(c)) {
while(isdigit(c)) {
j = j*10 + c-'0';
c = finput.getc();
}
finput.ungetc();
j = j*s;
if(j >= max)
error("Illegal use of $" + string j);
}else if(isword(c) || c == '_' || c == '.') {
# look for $name
finput.ungetc();
if(gettok() != IDENTIFIER)
error("$ must be followed by an identifier");
tokn := chfind(2, tokname);
fnd := -1;
if((c = finput.getc()) != '@')
finput.ungetc();
else if(gettok() != NUMBER)
error("@ must be followed by number");
else
fnd = numbval;
for(j=1; j<max; j++){
if(tokn == curprod[j]) {
fnd--;
if(fnd <= 0)
break;
}
}
if(j >= max)
error("$name or $name@number not found");
}else{
addcodec(CodeAct, '$');
if(s < 0)
addcodec(CodeAct, '-');
finput.ungetc();
continue loop;
}
addcode(CodeAct, "yys[yypt-" + string(max-j-1) + "].yyv");
# put out the proper tag
if(ntypes) {
if(j <= 0 && tok < 0)
error("must specify type of $" + string j);
if(tok < 0)
tok = fdtype(curprod[j]);
addcodec(CodeAct, '.');
addcode(CodeAct, typeset[tok]);
}
continue loop;
'}' =>
brac--;
if(brac)
break;
addcodec(CodeAct, c);
return;
'#' =>
# a comment
addcodec(CodeAct, c);
c = finput.getc();
while(c != Bufio->EOF) {
if(c == '\n') {
lineno++;
break swt;
}
addcodec(CodeAct, c);
c = finput.getc();
}
error("EOF inside comment");
'\''or '"' =>
# character string or constant
match := c;
addcodec(CodeAct, c);
while(c = finput.getc()) {
if(c == '\\') {
addcodec(CodeAct, c);
c = finput.getc();
if(c == '\n')
lineno++;
} else if(c == match)
break swt;
if(c == '\n')
error("newline in string or char const.");
addcodec(CodeAct, c);
}
error("EOF in string or character constant");
Bufio->EOF =>
error("action does not terminate");
'\n' =>
lineno++;
}
addcodec(CodeAct, c);
}
}
openup(stem: string, dflag, vflag, ytab: int, ytabc: string)
{
buf: string;
if(vflag) {
buf = stem + "." + FILEU;
foutput = bufio->create(buf, Bufio->OWRITE, 8r666);
if(foutput == nil)
error("can't create " + buf);
}
if(yydebug != nil) {
buf = stem + "." + FILEDEBUG;
fdebug = bufio->create(buf, Bufio->OWRITE, 8r666);
if(fdebug == nil)
error("can't create " + buf);
}
if(dflag) {
buf = stem + "." + FILED;
fdefine = bufio->create(buf, Bufio->OWRITE, 8r666);
if(fdefine == nil)
error("can't create " + buf);
}
if(ytab == 0)
buf = stem + "." + OFILE;
else
buf = ytabc;
ftable = bufio->create(buf, Bufio->OWRITE, 8r666);
if(ftable == nil)
error("can't create file " + buf);
}
#
# return a pointer to the name of symbol i
#
symnam(i: int): string
{
s: string;
if(i >= NTBASE)
s = nontrst[i-NTBASE].name;
else
s = tokset[i].name;
if(s[0] == ' ')
s = s[1:];
return s;
}
#
# write out error comment
#
error(s: string)
{
nerrors++;
fprint(stderr, "yacc: fatal error: %s, %s:%d\n", s, infile, lineno);
if(!fatfl)
return;
summary();
raise "fail:error";
}
#
# set elements 0 through n-1 to c
#
aryfil(v: array of int, n, c: int)
{
for(i:=0; i<n; i++)
v[i] = c;
}
#
# compute an array with the beginnings of productions yielding given nonterminals
# The array pres points to these lists
# the array pyield has the lists: the total size is only NPROD+1
#
cpres()
{
pres = array[nnonter+1] of array of array of int;
curres := array[nprod] of array of int;
for(i:=0; i<=nnonter; i++) {
n := 0;
c := i+NTBASE;
fatfl = 0; # make undefined symbols nonfatal
for(j:=0; j<nprod; j++)
if(prdptr[j][0] == c)
curres[n++] = prdptr[j][1:];
if(n == 0)
error("nonterminal " + nontrst[i].name + " not defined!");
else{
pres[i] = array[n] of array of int;
pres[i][0:] = curres[:n];
}
}
fatfl = 1;
if(nerrors) {
summary();
raise "fail:error";
}
}
dumppres()
{
for(i := 0; i <= nnonter; i++){
print("nonterm %d\n", i);
curres := pres[i];
for(j := 0; j < len curres; j++){
print("\tproduction %d:", j);
prd := curres[j];
for(k := 0; k < len prd; k++)
print(" %d", prd[k]);
print("\n");
}
}
}
#
# mark nonterminals which derive the empty string
# also, look for nonterminals which don't derive any token strings
#
cempty()
{
i, p, np: int;
prd: array of int;
pempty = array[nnonter+1] of int;
# first, use the array pempty to detect productions that can never be reduced
# set pempty to WHONOWS
aryfil(pempty, nnonter+1, WHOKNOWS);
# now, look at productions, marking nonterminals which derive something
more: for(;;){
for(i=0; i<nprod; i++) {
prd = prdptr[i];
if(pempty[prd[0] - NTBASE])
continue;
np = len prd - 1;
for(p = 1; p < np; p++)
if(prd[p] >= NTBASE && pempty[prd[p]-NTBASE] == WHOKNOWS)
break;
# production can be derived
if(p == np) {
pempty[prd[0]-NTBASE] = OK;
continue more;
}
}
break;
}
# now, look at the nonterminals, to see if they are all OK
for(i=0; i<=nnonter; i++) {
# the added production rises or falls as the start symbol ...
if(i == 0)
continue;
if(pempty[i] != OK) {
fatfl = 0;
error("nonterminal " + nontrst[i].name + " never derives any token string");
}
}
if(nerrors) {
summary();
raise "fail:error";
}
# now, compute the pempty array, to see which nonterminals derive the empty string
# set pempty to WHOKNOWS
aryfil(pempty, nnonter+1, WHOKNOWS);
# loop as long as we keep finding empty nonterminals
again: for(;;){
next: for(i=1; i<nprod; i++) {
# not known to be empty
prd = prdptr[i];
if(pempty[prd[0]-NTBASE] != WHOKNOWS)
continue;
np = len prd - 1;
for(p = 1; p < np; p++)
if(prd[p] < NTBASE || pempty[prd[p]-NTBASE] != EMPTY)
continue next;
# we have a nontrivially empty nonterminal
pempty[prd[0]-NTBASE] = EMPTY;
# got one ... try for another
continue again;
}
return;
}
}
dumpempty()
{
for(i := 0; i <= nnonter; i++)
if(pempty[i] == EMPTY)
print("non-term %d %s matches empty\n", i, symnam(i+NTBASE));
}
#
# compute an array with the first of nonterminals
#
cpfir()
{
s, n, p, np, ch: int;
curres: array of array of int;
prd: array of int;
wsets = array[nnonter+WSETINC] of Wset;
pfirst = array[nnonter+1] of Lkset;
for(i:=0; i<=nnonter; i++) {
wsets[i].ws = mkset();
pfirst[i] = mkset();
curres = pres[i];
n = len curres;
# initially fill the sets
for(s = 0; s < n; s++) {
prd = curres[s];
np = len prd - 1;
for(p = 0; p < np; p++) {
ch = prd[p];
if(ch < NTBASE) {
setbit(pfirst[i], ch);
break;
}
if(!pempty[ch-NTBASE])
break;
}
}
}
# now, reflect transitivity
changes := 1;
while(changes) {
changes = 0;
for(i=0; i<=nnonter; i++) {
curres = pres[i];
n = len curres;
for(s = 0; s < n; s++) {
prd = curres[s];
np = len prd - 1;
for(p = 0; p < np; p++) {
ch = prd[p] - NTBASE;
if(ch < 0)
break;
changes |= setunion(pfirst[i], pfirst[ch]);
if(!pempty[ch])
break;
}
}
}
}
if(!indebug)
return;
if(foutput != nil){
for(i=0; i<=nnonter; i++) {
foutput.putc('\n');
foutput.puts(nontrst[i].name);
foutput.puts(": ");
prlook(pfirst[i]);
foutput.putc(' ');
foutput.puts(string pempty[i]);
foutput.putc('\n');
}
}
}
#
# generate the states
#
stagen()
{
# initialize
nstate = 0;
tstates = array[ntokens+1] of {* => 0}; # states generated by terminal gotos
ntstates = array[nnonter+1] of {* => 0};# states generated by nonterminal gotos
amem = array[ACTSIZE] of {* => 0};
memp = 0;
clset = mkset();
pstate[0] = pstate[1] = 0;
aryfil(clset, tbitset, 0);
putitem(Pitem(prdptr[0], 0, 0, 0), clset);
tystate[0] = MUSTDO;
nstate = 1;
pstate[2] = pstate[1];
#
# now, the main state generation loop
# first pass generates all of the states
# later passes fix up lookahead
# could be sped up a lot by remembering
# results of the first pass rather than recomputing
#
first := 1;
for(more := 1; more; first = 0){
more = 0;
for(i:=0; i<nstate; i++) {
if(tystate[i] != MUSTDO)
continue;
tystate[i] = DONE;
aryfil(temp1, nnonter+1, 0);
# take state i, close it, and do gotos
closure(i);
# generate goto's
for(p:=0; p<cwp; p++) {
pi := wsets[p];
if(pi.flag)
continue;
wsets[p].flag = 1;
c := pi.pitem.first;
if(c <= 1) {
if(pstate[i+1]-pstate[i] <= p)
tystate[i] = MUSTLOOKAHEAD;
continue;
}
# do a goto on c
putitem(wsets[p].pitem, wsets[p].ws);
for(q:=p+1; q<cwp; q++) {
# this item contributes to the goto
if(c == wsets[q].pitem.first) {
putitem(wsets[q].pitem, wsets[q].ws);
wsets[q].flag = 1;
}
}
if(c < NTBASE)
state(c); # register new state
else
temp1[c-NTBASE] = state(c);
}
if(gsdebug && foutput != nil) {
foutput.puts(string i + ": ");
for(j:=0; j<=nnonter; j++)
if(temp1[j])
foutput.puts(nontrst[j].name + " " + string temp1[j] + ", ");
foutput.putc('\n');
}
if(first)
indgo[i] = apack(temp1[1:], nnonter-1) - 1;
more++;
}
}
}
#
# generate the closure of state i
#
closure(i: int)
{
zzclose++;
# first, copy kernel of state i to wsets
cwp = 0;
q := pstate[i+1];
for(p:=pstate[i]; p<q; p++) {
wsets[cwp].pitem = statemem[p].pitem;
wsets[cwp].flag = 1; # this item must get closed
wsets[cwp].ws[0:] = statemem[p].look;
cwp++;
}
# now, go through the loop, closing each item
work := 1;
while(work) {
work = 0;
for(u:=0; u<cwp; u++) {
if(wsets[u].flag == 0)
continue;
# dot is before c
c := wsets[u].pitem.first;
if(c < NTBASE) {
wsets[u].flag = 0;
# only interesting case is where . is before nonterminal
continue;
}
# compute the lookahead
aryfil(clset, tbitset, 0);
# find items involving c
for(v:=u; v<cwp; v++) {
if(wsets[v].flag != 1
|| wsets[v].pitem.first != c)
continue;
pi := wsets[v].pitem.prod;
ipi := wsets[v].pitem.off + 1;
wsets[v].flag = 0;
if(nolook)
continue;
while((ch := pi[ipi++]) > 0) {
# terminal symbol
if(ch < NTBASE) {
setbit(clset, ch);
break;
}
# nonterminal symbol
setunion(clset, pfirst[ch-NTBASE]);
if(!pempty[ch-NTBASE])
break;
}
if(ch <= 0)
setunion(clset, wsets[v].ws);
}
#
# now loop over productions derived from c
#
curres := pres[c - NTBASE];
n := len curres;
# initially fill the sets
nexts: for(s := 0; s < n; s++) {
prd := curres[s];
#
# put these items into the closure
# is the item there
#
for(v=0; v<cwp; v++) {
# yes, it is there
if(wsets[v].pitem.off == 0
&& wsets[v].pitem.prod == prd) {
if(!nolook && setunion(wsets[v].ws, clset))
wsets[v].flag = work = 1;
continue nexts;
}
}
# not there; make a new entry
if(cwp >= len wsets){
awsets := array[cwp + WSETINC] of Wset;
awsets[0:] = wsets;
wsets = awsets;
}
wsets[cwp].pitem = Pitem(prd, 0, prd[0], -prd[len prd-1]);
wsets[cwp].flag = 1;
wsets[cwp].ws = mkset();
if(!nolook) {
work = 1;
wsets[cwp].ws[0:] = clset;
}
cwp++;
}
}
}
# have computed closure; flags are reset; return
if(cldebug && foutput != nil) {
foutput.puts("\nState " + string i + ", nolook = " + string nolook + "\n");
for(u:=0; u<cwp; u++) {
if(wsets[u].flag)
foutput.puts("flag set!\n");
wsets[u].flag = 0;
foutput.putc('\t');
foutput.puts(writem(wsets[u].pitem));
prlook(wsets[u].ws);
foutput.putc('\n');
}
}
}
#
# sorts last state,and sees if it equals earlier ones. returns state number
#
state(c: int): int
{
zzstate++;
p1 := pstate[nstate];
p2 := pstate[nstate+1];
if(p1 == p2)
return 0; # null state
# sort the items
k, l: int;
for(k = p1+1; k < p2; k++) { # make k the biggest
for(l = k; l > p1; l--) {
if(statemem[l].pitem.prodno < statemem[l-1].pitem.prodno
|| statemem[l].pitem.prodno == statemem[l-1].pitem.prodno
&& statemem[l].pitem.off < statemem[l-1].pitem.off) {
s := statemem[l];
statemem[l] = statemem[l-1];
statemem[l-1] = s;
}else
break;
}
}
size1 := p2 - p1; # size of state
if(c >= NTBASE)
i := ntstates[c-NTBASE];
else
i = tstates[c];
look: for(; i != 0; i = mstates[i]) {
# get ith state
q1 := pstate[i];
q2 := pstate[i+1];
size2 := q2 - q1;
if(size1 != size2)
continue;
k = p1;
for(l = q1; l < q2; l++) {
if(statemem[l].pitem.prod != statemem[k].pitem.prod
|| statemem[l].pitem.off != statemem[k].pitem.off)
continue look;
k++;
}
# found it
pstate[nstate+1] = pstate[nstate]; # delete last state
# fix up lookaheads
if(nolook)
return i;
k = p1;
for(l = q1; l < q2; l++) {
if(setunion(statemem[l].look, statemem[k].look))
tystate[i] = MUSTDO;
k++;
}
return i;
}
# state is new
zznewstate++;
if(nolook)
error("yacc state/nolook error");
pstate[nstate+2] = p2;
if(nstate+1 >= NSTATES)
error("too many states");
if(c >= NTBASE) {
mstates[nstate] = ntstates[c-NTBASE];
ntstates[c-NTBASE] = nstate;
} else {
mstates[nstate] = tstates[c];
tstates[c] = nstate;
}
tystate[nstate] = MUSTDO;
return nstate++;
}
putitem(p: Pitem, set: Lkset)
{
p.off++;
p.first = p.prod[p.off];
if(pidebug && foutput != nil)
foutput.puts("putitem(" + writem(p) + "), state " + string nstate + "\n");
j := pstate[nstate+1];
if(j >= len statemem){
asm := array[j + STATEINC] of Item;
asm[0:] = statemem;
statemem = asm;
}
statemem[j].pitem = p;
if(!nolook){
s := mkset();
s[0:] = set;
statemem[j].look = s;
}
j++;
pstate[nstate+1] = j;
}
#
# creates output string for item pointed to by pp
#
writem(pp: Pitem): string
{
i: int;
p := pp.prod;
q := chcopy(nontrst[prdptr[pp.prodno][0]-NTBASE].name) + ": ";
npi := pp.off;
pi := p == prdptr[pp.prodno];
for(;;){
c := ' ';
if(pi == npi)
c = '.';
q[len q] = c;
i = p[pi++];
if(i <= 0)
break;
q += chcopy(symnam(i));
}
# an item calling for a reduction
i = p[npi];
if(i < 0)
q += " (" + string -i + ")";
return q;
}
#
# pack state i from temp1 into amem
#
apack(p: array of int, n: int): int
{
#
# we don't need to worry about checking because
# we will only look at entries known to be there...
# eliminate leading and trailing 0's
#
off := 0;
for(pp := 0; pp <= n && p[pp] == 0; pp++)
off--;
# no actions
if(pp > n)
return 0;
for(; n > pp && p[n] == 0; n--)
;
p = p[pp:n+1];
# now, find a place for the elements from p to q, inclusive
r := len amem - len p;
nextk: for(rr := 0; rr <= r; rr++) {
qq := rr;
for(pp = 0; pp < len p; pp++) {
if(p[pp] != 0)
if(p[pp] != amem[qq] && amem[qq] != 0)
continue nextk;
qq++;
}
# we have found an acceptable k
if(pkdebug && foutput != nil)
foutput.puts("off = " + string(off+rr) + ", k = " + string rr + "\n");
qq = rr;
for(pp = 0; pp < len p; pp++) {
if(p[pp]) {
if(qq > memp)
memp = qq;
amem[qq] = p[pp];
}
qq++;
}
if(pkdebug && foutput != nil) {
for(pp = 0; pp <= memp; pp += 10) {
foutput.putc('\t');
for(qq = pp; qq <= pp+9; qq++)
foutput.puts(string amem[qq] + " ");
foutput.putc('\n');
}
}
return off + rr;
}
error("no space in action table");
return 0;
}
#
# print the output for the states
#
output()
{
c, u, v: int;
ftable.puts("yyexca := array[] of {");
if(fdebug != nil)
fdebug.puts("yystates = array [] of {\n");
noset := mkset();
# output the stuff for state i
for(i:=0; i<nstate; i++) {
nolook = tystate[i]!=MUSTLOOKAHEAD;
closure(i);
# output actions
nolook = 1;
aryfil(temp1, ntokens+nnonter+1, 0);
for(u=0; u<cwp; u++) {
c = wsets[u].pitem.first;
if(c > 1 && c < NTBASE && temp1[c] == 0) {
for(v=u; v<cwp; v++)
if(c == wsets[v].pitem.first)
putitem(wsets[v].pitem, noset);
temp1[c] = state(c);
} else
if(c > NTBASE && temp1[(c -= NTBASE) + ntokens] == 0)
temp1[c+ntokens] = amem[indgo[i]+c];
}
if(i == 1)
temp1[1] = ACCEPTCODE;
# now, we have the shifts; look at the reductions
lastred = 0;
for(u=0; u<cwp; u++) {
c = wsets[u].pitem.first;
# reduction
if(c > 0)
continue;
lastred = -c;
us := wsets[u].ws;
for(k:=0; k<=ntokens; k++) {
if(!bitset(us, k))
continue;
if(temp1[k] == 0)
temp1[k] = c;
else
if(temp1[k] < 0) { # reduce/reduce conflict
if(foutput != nil)
foutput.puts(
"\n" + string i + ": reduce/reduce conflict (red'ns "
+ string -temp1[k] + " and " + string lastred + " ) on " + symnam(k));
if(-temp1[k] > lastred)
temp1[k] = -lastred;
zzrrconf++;
} else
# potential shift/reduce conflict
precftn(lastred, k, i);
}
}
wract(i);
}
if(fdebug != nil)
fdebug.puts("};\n");
ftable.puts("};\n");
ftable.puts("YYNPROD: con " + string nprod + ";\n");
ftable.puts("YYPRIVATE: con " + string PRIVATE + ";\n");
ftable.puts("yytoknames: array of string;\n");
ftable.puts("yystates: array of string;\n");
if(yydebug != nil){
ftable.puts("include \"y.debug\";\n");
ftable.puts("yydebug: con " + yydebug + ";\n");
}else{
ftable.puts("yydebug: con 0;\n");
}
}
#
# decide a shift/reduce conflict by precedence.
# r is a rule number, t a token number
# the conflict is in state s
# temp1[t] is changed to reflect the action
#
precftn(r, t, s: int)
{
action: int;
lp := levprd[r];
lt := toklev[t];
if(PLEVEL(lt) == 0 || PLEVEL(lp) == 0) {
# conflict
if(foutput != nil)
foutput.puts(
"\n" + string s + ": shift/reduce conflict (shift "
+ string temp1[t] + "(" + string PLEVEL(lt) + "), red'n "
+ string r + "(" + string PLEVEL(lp) + ")) on " + symnam(t));
zzsrconf++;
return;
}
if(PLEVEL(lt) == PLEVEL(lp))
action = ASSOC(lt);
else if(PLEVEL(lt) > PLEVEL(lp))
action = RASC; # shift
else
action = LASC; # reduce
case action{
BASC => # error action
temp1[t] = ERRCODE;
LASC => # reduce
temp1[t] = -r;
}
}
#
# output state i
# temp1 has the actions, lastred the default
#
wract(i: int)
{
p, p1: int;
# find the best choice for lastred
lastred = 0;
ntimes := 0;
for(j:=0; j<=ntokens; j++) {
if(temp1[j] >= 0)
continue;
if(temp1[j]+lastred == 0)
continue;
# count the number of appearances of temp1[j]
count := 0;
tred := -temp1[j];
levprd[tred] |= REDFLAG;
for(p=0; p<=ntokens; p++)
if(temp1[p]+tred == 0)
count++;
if(count > ntimes) {
lastred = tred;
ntimes = count;
}
}
#
# for error recovery, arrange that, if there is a shift on the
# error recovery token, `error', that the default be the error action
#
if(temp1[2] > 0)
lastred = 0;
# clear out entries in temp1 which equal lastred
# count entries in optst table
n := 0;
for(p=0; p<=ntokens; p++) {
p1 = temp1[p];
if(p1+lastred == 0)
temp1[p] = p1 = 0;
if(p1 > 0 && p1 != ACCEPTCODE && p1 != ERRCODE)
n++;
}
wrstate(i);
defact[i] = lastred;
flag := 0;
os := array[n*2] of int;
n = 0;
for(p=0; p<=ntokens; p++) {
if((p1=temp1[p]) != 0) {
if(p1 < 0) {
p1 = -p1;
} else if(p1 == ACCEPTCODE) {
p1 = -1;
} else if(p1 == ERRCODE) {
p1 = 0;
} else {
os[n++] = p;
os[n++] = p1;
zzacent++;
continue;
}
if(flag++ == 0)
ftable.puts("-1, " + string i + ",\n");
ftable.puts("\t" + string p + ", " + string p1 + ",\n");
zzexcp++;
}
}
if(flag) {
defact[i] = -2;
ftable.puts("\t-2, " + string lastred + ",\n");
}
optst[i] = os;
}
#
# writes state i
#
wrstate(i: int)
{
j0, j1, u: int;
pp, qq: int;
if(fdebug != nil) {
if(lastred) {
fdebug.puts(" nil, #" + string i + "\n");
} else {
fdebug.puts(" \"");
qq = pstate[i+1];
for(pp=pstate[i]; pp<qq; pp++){
fdebug.puts(writem(statemem[pp].pitem));
fdebug.puts("\\n");
}
if(tystate[i] == MUSTLOOKAHEAD)
for(u = pstate[i+1] - pstate[i]; u < cwp; u++)
if(wsets[u].pitem.first < 0){
fdebug.puts(writem(wsets[u].pitem));
fdebug.puts("\\n");
}
fdebug.puts("\", #" + string i + "/\n");
}
}
if(foutput == nil)
return;
foutput.puts("\nstate " + string i + "\n");
qq = pstate[i+1];
for(pp=pstate[i]; pp<qq; pp++){
foutput.putc('\t');
foutput.puts(writem(statemem[pp].pitem));
foutput.putc('\n');
}
if(tystate[i] == MUSTLOOKAHEAD) {
# print out empty productions in closure
for(u = pstate[i+1] - pstate[i]; u < cwp; u++) {
if(wsets[u].pitem.first < 0) {
foutput.putc('\t');
foutput.puts(writem(wsets[u].pitem));
foutput.putc('\n');
}
}
}
# check for state equal to another
for(j0=0; j0<=ntokens; j0++)
if((j1=temp1[j0]) != 0) {
foutput.puts("\n\t" + symnam(j0) + " ");
# shift, error, or accept
if(j1 > 0) {
if(j1 == ACCEPTCODE)
foutput.puts("accept");
else if(j1 == ERRCODE)
foutput.puts("error");
else
foutput.puts("shift "+string j1);
} else
foutput.puts("reduce " + string -j1 + " (src line " + string rlines[-j1] + ")");
}
# output the final production
if(lastred)
foutput.puts("\n\t. reduce " + string lastred + " (src line " + string rlines[lastred] + ")\n\n");
else
foutput.puts("\n\t. error\n\n");
# now, output nonterminal actions
j1 = ntokens;
for(j0 = 1; j0 <= nnonter; j0++) {
j1++;
if(temp1[j1])
foutput.puts("\t" + symnam(j0+NTBASE) + " goto " + string temp1[j1] + "\n");
}
}
#
# output the gotos for the nontermninals
#
go2out()
{
for(i := 1; i <= nnonter; i++) {
go2gen(i);
# find the best one to make default
best := -1;
times := 0;
# is j the most frequent
for(j := 0; j < nstate; j++) {
if(tystate[j] == 0)
continue;
if(tystate[j] == best)
continue;
# is tystate[j] the most frequent
count := 0;
cbest := tystate[j];
for(k := j; k < nstate; k++)
if(tystate[k] == cbest)
count++;
if(count > times) {
best = cbest;
times = count;
}
}
# best is now the default entry
zzgobest += times-1;
n := 0;
for(j = 0; j < nstate; j++)
if(tystate[j] != 0 && tystate[j] != best)
n++;
goent := array[2*n+1] of int;
n = 0;
for(j = 0; j < nstate; j++)
if(tystate[j] != 0 && tystate[j] != best) {
goent[n++] = j;
goent[n++] = tystate[j];
zzgoent++;
}
# now, the default
if(best == -1)
best = 0;
zzgoent++;
goent[n] = best;
yypgo[i] = goent;
}
}
#
# output the gotos for nonterminal c
#
go2gen(c: int)
{
i, cc, p, q: int;
# first, find nonterminals with gotos on c
aryfil(temp1, nnonter+1, 0);
temp1[c] = 1;
work := 1;
while(work) {
work = 0;
for(i=0; i<nprod; i++) {
# cc is a nonterminal with a goto on c
cc = prdptr[i][1]-NTBASE;
if(cc >= 0 && temp1[cc] != 0) {
# thus, the left side of production i does too
cc = prdptr[i][0]-NTBASE;
if(temp1[cc] == 0) {
work = 1;
temp1[cc] = 1;
}
}
}
}
# now, we have temp1[c] = 1 if a goto on c in closure of cc
if(g2debug && foutput != nil) {
foutput.puts(nontrst[c].name);
foutput.puts(": gotos on ");
for(i=0; i<=nnonter; i++)
if(temp1[i]){
foutput.puts(nontrst[i].name);
foutput.putc(' ');
}
foutput.putc('\n');
}
# now, go through and put gotos into tystate
aryfil(tystate, nstate, 0);
for(i=0; i<nstate; i++) {
q = pstate[i+1];
for(p=pstate[i]; p<q; p++) {
if((cc = statemem[p].pitem.first) >= NTBASE) {
# goto on c is possible
if(temp1[cc-NTBASE]) {
tystate[i] = amem[indgo[i]+c];
break;
}
}
}
}
}
#
# in order to free up the mem and amem arrays for the optimizer,
# and still be able to output yyr1, etc., after the sizes of
# the action array is known, we hide the nonterminals
# derived by productions in levprd.
#
hideprod()
{
j := 0;
levprd[0] = 0;
for(i:=1; i<nprod; i++) {
if(!(levprd[i] & REDFLAG)) {
j++;
if(foutput != nil) {
foutput.puts("Rule not reduced: ");
foutput.puts(writem(Pitem(prdptr[i], 0, 0, i)));
foutput.putc('\n');
}
}
levprd[i] = prdptr[i][0] - NTBASE;
}
if(j)
print("%d rules never reduced\n", j);
}
callopt()
{
j, k, p, q: int;
v: array of int;
pgo = array[nnonter+1] of int;
pgo[0] = 0;
maxoff = 0;
maxspr = 0;
for(i := 0; i < nstate; i++) {
k = 32000;
j = 0;
v = optst[i];
q = len v;
for(p = 0; p < q; p += 2) {
if(v[p] > j)
j = v[p];
if(v[p] < k)
k = v[p];
}
# nontrivial situation
if(k <= j) {
# j is now the range
# j -= k; # call scj
if(k > maxoff)
maxoff = k;
}
tystate[i] = q + 2*j;
if(j > maxspr)
maxspr = j;
}
# initialize ggreed table
ggreed = array[nnonter+1] of int;
for(i = 1; i <= nnonter; i++) {
ggreed[i] = 1;
j = 0;
# minimum entry index is always 0
v = yypgo[i];
q = len v - 1;
for(p = 0; p < q ; p += 2) {
ggreed[i] += 2;
if(v[p] > j)
j = v[p];
}
ggreed[i] = ggreed[i] + 2*j;
if(j > maxoff)
maxoff = j;
}
# now, prepare to put the shift actions into the amem array
for(i = 0; i < ACTSIZE; i++)
amem[i] = 0;
maxa = 0;
for(i = 0; i < nstate; i++) {
if(tystate[i] == 0 && adb > 1)
ftable.puts("State " + string i + ": null\n");
indgo[i] = YYFLAG1;
}
while((i = nxti()) != NOMORE)
if(i >= 0)
stin(i);
else
gin(-i);
# print amem array
if(adb > 2)
for(p = 0; p <= maxa; p += 10) {
ftable.puts(string p + " ");
for(i = 0; i < 10; i++)
ftable.puts(string amem[p+i] + " ");
ftable.putc('\n');
}
aoutput();
osummary();
}
#
# finds the next i
#
nxti(): int
{
max := 0;
maxi := 0;
for(i := 1; i <= nnonter; i++)
if(ggreed[i] >= max) {
max = ggreed[i];
maxi = -i;
}
for(i = 0; i < nstate; i++)
if(tystate[i] >= max) {
max = tystate[i];
maxi = i;
}
if(max == 0)
return NOMORE;
return maxi;
}
gin(i: int)
{
s: int;
# enter gotos on nonterminal i into array amem
ggreed[i] = 0;
q := yypgo[i];
nq := len q - 1;
# now, find amem place for it
nextgp: for(p := 0; p < ACTSIZE; p++) {
if(amem[p])
continue;
for(r := 0; r < nq; r += 2) {
s = p + q[r] + 1;
if(s > maxa){
maxa = s;
if(maxa >= ACTSIZE)
error("a array overflow");
}
if(amem[s])
continue nextgp;
}
# we have found amem spot
amem[p] = q[nq];
if(p > maxa)
maxa = p;
for(r = 0; r < nq; r += 2) {
s = p + q[r] + 1;
amem[s] = q[r+1];
}
pgo[i] = p;
if(adb > 1)
ftable.puts("Nonterminal " + string i + ", entry at " + string pgo[i] + "\n");
return;
}
error("cannot place goto " + string i + "\n");
}
stin(i: int)
{
s: int;
tystate[i] = 0;
# enter state i into the amem array
q := optst[i];
nq := len q;
# find an acceptable place
nextn: for(n := -maxoff; n < ACTSIZE; n++) {
flag := 0;
for(r := 0; r < nq; r += 2) {
s = q[r] + n;
if(s < 0 || s > ACTSIZE)
continue nextn;
if(amem[s] == 0)
flag++;
else if(amem[s] != q[r+1])
continue nextn;
}
# check the position equals another only if the states are identical
for(j:=0; j<nstate; j++) {
if(indgo[j] == n) {
# we have some disagreement
if(flag)
continue nextn;
if(nq == len optst[j]) {
# states are equal
indgo[i] = n;
if(adb > 1)
ftable.puts("State " + string i + ": entry at "
+ string n + " equals state " + string j + "\n");
return;
}
# we have some disagreement
continue nextn;
}
}
for(r = 0; r < nq; r += 2) {
s = q[r] + n;
if(s > maxa)
maxa = s;
if(amem[s] != 0 && amem[s] != q[r+1])
error("clobber of a array, pos'n " + string s + ", by " + string q[r+1] + "");
amem[s] = q[r+1];
}
indgo[i] = n;
if(adb > 1)
ftable.puts("State " + string i + ": entry at " + string indgo[i] + "\n");
return;
}
error("Error; failure to place state " + string i + "\n");
}
#
# this version is for limbo
# write out the optimized parser
#
aoutput()
{
ftable.puts("YYLAST:\tcon "+string (maxa+1)+";\n");
arout("yyact", amem, maxa+1);
arout("yypact", indgo, nstate);
arout("yypgo", pgo, nnonter+1);
}
#
# put out other arrays, copy the parsers
#
others()
{
finput = bufio->open(parser, Bufio->OREAD);
if(finput == nil)
error("cannot find parser " + parser);
arout("yyr1", levprd, nprod);
aryfil(temp1, nprod, 0);
#
#yyr2 is the number of rules for each production
#
for(i:=1; i<nprod; i++)
temp1[i] = len prdptr[i] - 2;
arout("yyr2", temp1, nprod);
aryfil(temp1, nstate, -1000);
for(i=0; i<=ntokens; i++)
for(j:=tstates[i]; j!=0; j=mstates[j])
temp1[j] = i;
for(i=0; i<=nnonter; i++)
for(j=ntstates[i]; j!=0; j=mstates[j])
temp1[j] = -i;
arout("yychk", temp1, nstate);
arout("yydef", defact, nstate);
# put out token translation tables
# table 1 has 0-256
aryfil(temp1, 256, 0);
c := 0;
for(i=1; i<=ntokens; i++) {
j = tokset[i].value;
if(j >= 0 && j < 256) {
if(temp1[j]) {
print("yacc bug -- cant have 2 different Ts with same value\n");
print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name);
nerrors++;
}
temp1[j] = i;
if(j > c)
c = j;
}
}
for(i = 0; i <= c; i++)
if(temp1[i] == 0)
temp1[i] = YYLEXUNK;
arout("yytok1", temp1, c+1);
# table 2 has PRIVATE-PRIVATE+256
aryfil(temp1, 256, 0);
c = 0;
for(i=1; i<=ntokens; i++) {
j = tokset[i].value - PRIVATE;
if(j >= 0 && j < 256) {
if(temp1[j]) {
print("yacc bug -- cant have 2 different Ts with same value\n");
print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name);
nerrors++;
}
temp1[j] = i;
if(j > c)
c = j;
}
}
arout("yytok2", temp1, c+1);
# table 3 has everything else
ftable.puts("yytok3 := array[] of {\n");
c = 0;
for(i=1; i<=ntokens; i++) {
j = tokset[i].value;
if(j >= 0 && j < 256)
continue;
if(j >= PRIVATE && j < 256+PRIVATE)
continue;
ftable.puts(sprint("%4d,%4d,", j, i));
c++;
if(c%5 == 0)
ftable.putc('\n');
}
ftable.puts(sprint("%4d\n};\n", 0));
# copy parser text
while((c=finput.getc()) != Bufio->EOF) {
if(c == '$') {
if((c = finput.getc()) != 'A')
ftable.putc('$');
else { # copy actions
if(codehead == nil)
ftable.puts("* => ;");
else
dumpcode(-1);
c = finput.getc();
}
}
ftable.putc(c);
}
ftable.close();
}
arout(s: string, v: array of int, n: int)
{
ftable.puts(s+" := array[] of {");
for(i := 0; i < n; i++) {
if(i%10 == 0)
ftable.putc('\n');
ftable.puts(sprint("%4d", v[i]));
ftable.putc(',');
}
ftable.puts("\n};\n");
}
#
# output the summary on y.output
#
summary()
{
if(foutput != nil) {
foutput.puts("\n" + string ntokens + " terminals, " + string(nnonter + 1) + " nonterminals\n");
foutput.puts("" + string nprod + " grammar rules, " + string nstate + "/" + string NSTATES + " states\n");
foutput.puts("" + string zzsrconf + " shift/reduce, " + string zzrrconf + " reduce/reduce conflicts reported\n");
foutput.puts("" + string len wsets + " working sets used\n");
foutput.puts("memory: parser " + string memp + "/" + string ACTSIZE + "\n");
foutput.puts(string (zzclose - 2*nstate) + " extra closures\n");
foutput.puts(string zzacent + " shift entries, " + string zzexcp + " exceptions\n");
foutput.puts(string zzgoent + " goto entries\n");
foutput.puts(string zzgobest + " entries saved by goto default\n");
}
if(zzsrconf != 0 || zzrrconf != 0) {
print("\nconflicts: ");
if(zzsrconf)
print("%d shift/reduce", zzsrconf);
if(zzsrconf && zzrrconf)
print(", ");
if(zzrrconf)
print("%d reduce/reduce", zzrrconf);
print("\n");
}
if(fdefine != nil)
fdefine.close();
}
#
# write optimizer summary
#
osummary()
{
if(foutput == nil)
return;
i := 0;
for(p := maxa; p >= 0; p--)
if(amem[p] == 0)
i++;
foutput.puts("Optimizer space used: output " + string (maxa+1) + "/" + string ACTSIZE + "\n");
foutput.puts(string(maxa+1) + " table entries, " + string i + " zero\n");
foutput.puts("maximum spread: " + string maxspr + ", maximum offset: " + string maxoff + "\n");
}
#
# copies and protects "'s in q
#
chcopy(q: string): string
{
s := "";
j := 0;
for(i := 0; i < len q; i++) {
if(q[i] == '"') {
s += q[j:i] + "\\";
j = i;
}
}
return s + q[j:i];
}
usage()
{
fprint(stderr, "usage: yacc [-vdm] [-Dn] [-o output] [-s stem] file\n");
raise "fail:usage";
}
bitset(set: Lkset, bit: int): int
{
return set[bit>>5] & (1<<(bit&31));
}
setbit(set: Lkset, bit: int): int
{
return set[bit>>5] |= (1<<(bit&31));
}
mkset(): Lkset
{
return array[tbitset] of {* => 0};
}
#
# set a to the union of a and b
# return 1 if b is not a subset of a, 0 otherwise
#
setunion(a, b: array of int): int
{
sub := 0;
for(i:=0; i<tbitset; i++) {
x := a[i];
y := x | b[i];
a[i] = y;
if(y != x)
sub = 1;
}
return sub;
}
prlook(p: Lkset)
{
if(p == nil){
foutput.puts("\tNULL");
return;
}
foutput.puts(" { ");
for(j:=0; j<=ntokens; j++){
if(bitset(p, j)){
foutput.puts(symnam(j));
foutput.putc(' ');
}
}
foutput.putc('}');
}
#
# utility routines
#
isdigit(c: int): int
{
return c >= '0' && c <= '9';
}
isword(c: int): int
{
return c >= 16ra0 || c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z';
}
mktemp(t: string): string
{
return t;
}
#
# arg processing
#
Arg.init(argv: list of string): ref Arg
{
if(argv != nil)
argv = tl argv;
return ref Arg(argv, 0, "");
}
Arg.opt(arg: self ref Arg): int
{
opts := arg.opts;
if(opts != ""){
arg.c = opts[0];
arg.opts = opts[1:];
return arg.c;
}
argv := arg.argv;
if(argv == nil)
return arg.c = 0;
opts = hd argv;
if(len opts < 2 || opts[0] != '-')
return arg.c = 0;
arg.argv = tl argv;
if(opts == "--")
return arg.c = 0;
arg.opts = opts[2:];
return arg.c = opts[1];
}
Arg.arg(arg: self ref Arg): string
{
s := arg.opts;
arg.opts = "";
if(s != "")
return s;
argv := arg.argv;
if(argv == nil)
return "";
arg.argv = tl argv;
return hd argv;
}