ref: 8e18df2cbaae08f7075076572f26f80a296701c1
dir: /sys/src/cmd/rc/lex.c/
#include "rc.h"
#include "io.h"
#include "getflags.h"
#include "fns.h"
lexer *lex;
int doprompt = 1;
int nerror;
int
wordchr(int c)
{
return !strchr("\n \t#;&|^$=`'{}()<>", c) && c!=EOF;
}
int
idchr(int c)
{
/*
* Formerly:
* return 'a'<=c && c<='z' || 'A'<=c && c<='Z' || '0'<=c && c<='9'
* || c=='_' || c=='*';
*/
return c>' ' && !strchr("!\"#$%&'()+,-./:;<=>?@[\\]^`{|}~", c);
}
lexer*
newlexer(io *input, char *file)
{
lexer *n = new(struct lexer);
n->input = input;
n->file = file;
n->line = 1;
n->eof = 0;
n->future = EOF;
n->peekc = '{';
n->epilog = "}\n";
n->lastc = 0;
n->inquote = 0;
n->incomm = 0;
n->lastword = 0;
n->lastdol = 0;
n->iflast = 0;
n->qflag = 0;
n->tok[0] = 0;
return n;
}
void
freelexer(lexer *p)
{
closeio(p->input);
free(p->file);
free(p);
}
/*
* read a character from the input stream
*/
static int
getnext(void)
{
int c;
if(lex->peekc!=EOF){
c = lex->peekc;
lex->peekc = EOF;
return c;
}
if(lex->eof){
epilog:
if(*lex->epilog)
return *lex->epilog++;
doprompt = 1;
return EOF;
}
if(doprompt)
pprompt();
c = rchr(lex->input);
if(c=='\\' && !lex->inquote){
c = rchr(lex->input);
if(c=='\n' && !lex->incomm){ /* don't continue a comment */
doprompt = 1;
c=' ';
}
else{
lex->peekc = c;
c='\\';
}
}
if(c==EOF){
lex->eof = 1;
goto epilog;
} else {
if(c=='\n')
doprompt = 1;
if((!lex->qflag && flag['v']!=0) || flag['V'])
pchr(err, c);
}
return c;
}
/*
* Look ahead in the input stream
*/
static int
nextc(void)
{
if(lex->future==EOF)
lex->future = getnext();
return lex->future;
}
/*
* Consume the lookahead character.
*/
static int
advance(void)
{
int c = nextc();
lex->lastc = lex->future;
lex->future = EOF;
if(c == '\n')
lex->line++;
return c;
}
static void
skipwhite(void)
{
int c;
for(;;){
c = nextc();
/* Why did this used to be if(!inquote && c=='#') ?? */
if(c=='#'){
lex->incomm = 1;
for(;;){
c = nextc();
if(c=='\n' || c==EOF) {
lex->incomm = 0;
break;
}
advance();
}
}
if(c==' ' || c=='\t')
advance();
else return;
}
}
void
skipnl(void)
{
int c;
for(;;){
skipwhite();
c = nextc();
if(c!='\n')
return;
advance();
}
}
static int
nextis(int c)
{
if(nextc()==c){
advance();
return 1;
}
return 0;
}
static char*
addtok(char *p, int val)
{
if(p==0)
return 0;
if(p==&lex->tok[NTOK-1]){
*p = 0;
yyerror("token buffer too short");
return 0;
}
*p++=val;
return p;
}
static char*
addutf(char *p, int c)
{
int i, n;
p = addtok(p, c); /* 1-byte UTF runes are special */
if(onebyte(c))
return p;
if(twobyte(c))
n = 2;
else if(threebyte(c))
n = 3;
else
n = 4;
for(i = 1; i < n; i++) {
c = nextc();
if(c == EOF || !xbyte(c))
break;
p = addtok(p, advance());
}
return p;
}
int
yylex(void)
{
int glob, c, d = nextc();
char *tok = lex->tok;
char *w = tok;
tree *t;
yylval.tree = 0;
/*
* Embarassing sneakiness: if the last token read was a quoted or unquoted
* WORD then we alter the meaning of what follows. If the next character
* is `(', we return SUB (a subscript paren) and consume the `('. Otherwise,
* if the next character is the first character of a simple or compound word,
* we insert a `^' before it.
*/
if(lex->lastword){
lex->lastword = 0;
if(d=='('){
advance();
strcpy(tok, "( [SUB]");
return SUB;
}
if(wordchr(d) || d=='\'' || d=='`' || d=='$' || d=='"'){
strcpy(tok, "^");
return '^';
}
}
lex->inquote = 0;
skipwhite();
switch(c = advance()){
case EOF:
lex->lastdol = 0;
strcpy(tok, "EOF");
return EOF;
case '$':
lex->lastdol = 1;
if(nextis('#')){
strcpy(tok, "$#");
return COUNT;
}
if(nextis('"')){
strcpy(tok, "$\"");
return '"';
}
strcpy(tok, "$");
return '$';
case '&':
lex->lastdol = 0;
if(nextis('&')){
skipnl();
strcpy(tok, "&&");
return ANDAND;
}
strcpy(tok, "&");
return '&';
case '|':
lex->lastdol = 0;
if(nextis(c)){
skipnl();
strcpy(tok, "||");
return OROR;
}
case '<':
case '>':
lex->lastdol = 0;
/*
* funny redirection tokens:
* redir: arrow | arrow '[' fd ']'
* arrow: '<' | '<<' | '>' | '>>' | '|'
* fd: digit | digit '=' | digit '=' digit
* digit: '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9'
* some possibilities are nonsensical and get a message.
*/
*w++=c;
t = newtree();
switch(c){
case '|':
t->type = PIPE;
t->fd0 = 1;
t->fd1 = 0;
break;
case '>':
t->type = REDIR;
if(nextis(c)){
t->rtype = APPEND;
*w++=c;
}
else t->rtype = WRITE;
t->fd0 = 1;
break;
case '<':
t->type = REDIR;
if(nextis(c)){
t->rtype = HERE;
*w++=c;
} else if (nextis('>')){
t->rtype = RDWR;
*w++=c;
} else t->rtype = READ;
t->fd0 = 0;
break;
}
if(nextis('[')){
*w++='[';
c = advance();
*w++=c;
if(c<'0' || '9'<c){
RedirErr:
*w = 0;
yyerror(t->type==PIPE?"pipe syntax"
:"redirection syntax");
return EOF;
}
t->fd0 = 0;
do{
t->fd0 = t->fd0*10+c-'0';
*w++=c;
c = advance();
}while('0'<=c && c<='9');
if(c=='='){
*w++='=';
if(t->type==REDIR)
t->type = DUP;
c = advance();
if('0'<=c && c<='9'){
t->rtype = DUPFD;
t->fd1 = t->fd0;
t->fd0 = 0;
do{
t->fd0 = t->fd0*10+c-'0';
*w++=c;
c = advance();
}while('0'<=c && c<='9');
}
else{
if(t->type==PIPE)
goto RedirErr;
t->rtype = CLOSE;
}
}
if(c!=']'
|| t->type==DUP && (t->rtype==HERE || t->rtype==APPEND))
goto RedirErr;
*w++=']';
}
*w='\0';
yylval.tree = t;
if(t->type==PIPE)
skipnl();
return t->type;
case '\'':
lex->lastdol = 0;
lex->lastword = 1;
lex->inquote = 1;
for(;;){
c = advance();
if(c==EOF)
break;
if(c=='\''){
if(nextc()!='\'')
break;
advance();
}
w = addutf(w, c);
}
if(w!=0)
*w='\0';
t = token(tok, WORD);
t->quoted = 1;
yylval.tree = t;
return t->type;
}
if(!wordchr(c)){
lex->lastdol = 0;
tok[0] = c;
tok[1]='\0';
return c;
}
glob = 0;
for(;;){
if(c=='*' || c=='[' || c=='?' || c==GLOB){
glob = 1;
w = addtok(w, GLOB);
}
w = addutf(w, c);
c = nextc();
if(lex->lastdol?!idchr(c):!wordchr(c)) break;
advance();
}
lex->lastword = 1;
lex->lastdol = 0;
if(w!=0)
*w='\0';
t = klook(tok);
if(t->type!=WORD)
lex->lastword = 0;
else
t->glob = glob;
t->quoted = 0;
yylval.tree = t;
return t->type;
}
void
yyerror(char *m)
{
pfln(err, lex->file, lex->line);
pstr(err, ": ");
if(lex->tok[0] && lex->tok[0]!='\n')
pfmt(err, "token %q: ", lex->tok);
pfmt(err, "%s\n", m);
flushio(err);
lex->lastword = 0;
lex->lastdol = 0;
while(lex->lastc!='\n' && lex->lastc!=EOF) advance();
nerror++;
setstatus(m);
}