ref: aae224a92023099c786253c706297fd619f3f352
parent: c228fe24de2e00830b175cd12ff8ac44f16a5d41
author: qwx <qwx@sciops.net>
date: Sat Nov 8 18:36:02 EST 2025
awk: avoid reparsing numbers and calling strtod
--- a/sys/src/cmd/awk/lex.c
+++ b/sys/src/cmd/awk/lex.c
@@ -103,7 +103,7 @@
return c;
}
-int gettok(char **pbuf, int *psz) /* get next input token */
+static int gettok(char **pbuf, int *psz, Awkfloat *fp) /* get next input token */
{int c;
char *buf = *pbuf;
@@ -132,6 +132,7 @@
break;
}
}
+ c = 'a';
} else { /* it's a number */char *rem;
/* read input until can't be a number */
@@ -148,13 +149,16 @@
}
}
*bp = 0;
- strtod(buf, &rem); /* parse the number */
+ if(to_number(buf, fp, &rem)) /* parse the number */
+ c = '0';
+ else
+ c = buf[0];
unputstr(rem); /* put rest back for later */
rem[0] = 0;
}
*pbuf = buf;
*psz = sz;
- return buf[0];
+ return c;
}
int word(char *);
@@ -166,6 +170,7 @@
int yylex(void)
{int c;
+ Awkfloat f;
static char *buf = 0;
static int bufsize = 500;
@@ -180,14 +185,16 @@
return regexpr();
}
for (;;) {- c = gettok(&buf, &bufsize);
+ c = gettok(&buf, &bufsize, &f);
if (c == 0)
return 0;
- if (isalpha(c) || c == '_')
+ if (c == 'a')
return word(buf);
- if (isdigit(c) || c == '.') {- yylval.cp = setsymtab(buf, tostring(buf), atof(buf), CON|NUM, symtab);
- /* should this also have STR set? */
+ /* may be unsuitable for printing (T.strnum) so don't set STR,
+ * but may be a regex to be treated literally (T.coerce[23])
+ * via strnode, so save a copy. */
+ if (c == '0') {+ yylval.cp = setsymtab(buf, tostring(buf), f, CON|NUM, symtab);
RET(NUMBER);
}
@@ -297,7 +304,7 @@
case '$':
/* BUG: awkward, if not wrong */
- c = gettok(&buf, &bufsize);
+ c = gettok(&buf, &bufsize, &f);
if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) {unputstr(buf);
RET(INDIRECT);
@@ -449,7 +456,7 @@
Keyword *kp;
int c, n;
- n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0]));
+ n = binsearch(w, keywords, nelem(keywords));
kp = keywords + n;
if (n != -1) { /* found in table */yylval.i = kp->sub;
--- a/sys/src/cmd/awk/lib.c
+++ b/sys/src/cmd/awk/lib.c
@@ -145,10 +145,8 @@
xfree(fldtab[0]->sval);
fldtab[0]->sval = buf; /* buf == record */
fldtab[0]->tval = REC | STR | DONTFREE;
- if (is_number(fldtab[0]->sval)) {- fldtab[0]->fval = atof(fldtab[0]->sval);
+ if (to_number(fldtab[0]->sval, &fldtab[0]->fval, nil))
fldtab[0]->tval |= NUM;
- }
}
setfval(nrloc, nrloc->fval+1);
setfval(fnrloc, fnrloc->fval+1);
@@ -237,10 +235,8 @@
p = qstring(p, '\0');
q = setsymtab(s, p, 0.0, STR, symtab);
setsval(q, p);
- if (is_number(q->sval)) {- q->fval = atof(q->sval);
+ if (to_number(q->sval, &q->fval, nil))
q->tval |= NUM;
- }
dprint( ("command line set %s to |%s|\n", s, p) );}
@@ -329,10 +325,8 @@
donefld = 1;
for (j = 1; j <= lastfld; j++) {p = fldtab[j];
- if(is_number(p->sval)) {- p->fval = atof(p->sval);
+ if (to_number(p->sval, &p->fval, nil))
p->tval |= NUM;
- }
}
setfval(nfloc, (Awkfloat) lastfld);
if (dbg) {@@ -666,45 +660,81 @@
return *s == '=' && s > os && *(s+1) != '=';
}
-/* strtod is supposed to be a proper test of what's a valid number */
+static int is_float(char *s, Awkfloat *fp, char **tp)
+{+ char c, *p, *q;
+ Awkfloat f;
-int is_number(char *s)
+ f = *fp = strtod(s, &p);
+ if (tp != nil)
+ *tp = p;
+ if (p == s)
+ return 0;
+ else if (isInf(f, 1) || isInf(f, -1) || isNaN(f))
+ return 0;
+ else if (f == 0.0 && ((q = strchr(s, '0')) == nil || q > p))
+ return 0;
+ else if (tp != nil)
+ return 1;
+ for (; (c = *p) != '\0'; p++) {+ switch(c) {+ case ' ':
+ case '\t':
+ case '\n':
+ case '\f':
+ case '\r':
+ case '\v':
+ continue;
+ case '\0':
+ return 1;
+ default:
+ return 0;
+ }
+ }
+ return 1;
+}
+
+int to_number(char *s, Awkfloat *fp, char **tp)
{- double r;
- char *ep;
+ vlong v;
+ char c, *p, *q;
- /*
- * fast could-it-be-a-number check before calling strtod,
- * which takes a surprisingly long time to reject non-numbers.
- */
- switch (*s) {- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- case '\t':
- case '\n':
- case '\v':
- case '\f':
- case '\r':
- case ' ':
- case '-':
- case '+':
+ v = strtoll(s, &p, 0);
+ *fp = (Awkfloat)v;
+ if (tp != nil)
+ *tp = p;
+ switch(*p){case '.':
- case 'n': /* nans */
- case 'N':
- case 'i': /* infs */
- case 'I':
- break;
- default:
- return 0; /* can't be a number */
+ case 'E':
+ case 'I': /* inf */
+ case 'N': /* nan */
+ case 'e':
+ case 'i':
+ case 'n':
+ if (is_float(s, fp, tp))
+ return NUM;
+ return 0;
}
-
- r = strtod(s, &ep);
- if (ep == s || isInf(r, 1) || isInf(r, -1) || isNaN(r))
+ if (p == s)
return 0;
- while (*ep == ' ' || *ep == '\t' || *ep == '\n')
- ep++;
- if (*ep == '\0')
- return 1;
- else
+ else if (v == 0 && ((q = strchr(s, '0')) == nil || q > p))
return 0;
+ else if (tp != nil)
+ return NUM;
+ for (; (c = *p) != '\0'; p++) {+ switch(c) {+ case ' ':
+ case '\t':
+ case '\n':
+ case '\f':
+ case '\r':
+ case '\v':
+ continue;
+ case '\0':
+ return NUM;
+ default:
+ return 0;
+ }
+ }
+ return NUM;
}
--- a/sys/src/cmd/awk/proto.h
+++ b/sys/src/cmd/awk/proto.h
@@ -118,7 +118,7 @@
extern void bclass(int);
extern double errcheck(double, char *);
extern int isclvar(char *);
-extern int is_number(char *);
+extern int to_number(char *, Awkfloat *, char **);
extern int adjbuf(char **pb, int *sz, int min, int q, char **pbp, char *what);
extern void run(Node *);
--- a/sys/src/cmd/awk/run.c
+++ b/sys/src/cmd/awk/run.c
@@ -416,10 +416,8 @@
tfree(x);
} else { /* getline <file */setsval(fldtab[0], buf);
- if (is_number(fldtab[0]->sval)) {- fldtab[0]->fval = atof(fldtab[0]->sval);
+ if (to_number(fldtab[0]->sval, &fldtab[0]->fval, nil))
fldtab[0]->tval |= NUM;
- }
}
} else { /* bare getline; use current input */if (a[0] == nil) /* getline */
@@ -724,9 +722,11 @@
x = execute(a[0]);
m = (int) getfval(x);
- if (m == 0 && !is_number(s = getsval(x))) /* suspicion! */
- FATAL("illegal field $(%s), name \"%s\"", s, x->nval);- /* BUG: can x->nval ever be null??? */
+ if (m == 0) {+ if (!to_number(s = getsval(x), &x->fval, nil)) /* suspicion! */
+ FATAL("illegal field $(%s), name \"%s\"", s, x->nval);+ /* BUG: can x->nval ever be null??? */
+ }
if (istemp(x))
tfree(x);
x = fieldadr(m);
@@ -1251,6 +1251,7 @@
Cell *x = 0, *y, *ap;
char *s, *ds, *t, *fs = 0;
char temp, num[50];
+ Awkfloat f;
int n, nb, sep, arg3type;
y = execute(a[0]); /* source string */
@@ -1290,8 +1291,8 @@
sprint(num, "%d", n);
temp = *patbeg;
*patbeg = '\0';
- if (is_number(t))
- setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval);
+ if (to_number(t, &f, nil))
+ setsymtab(num, t, f, STR|NUM, (Array *) ap->sval);
else
setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
*patbeg = temp;
@@ -1306,8 +1307,8 @@
}
n++;
sprint(num, "%d", n);
- if (is_number(t))
- setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval);
+ if (to_number(t, &f, nil))
+ setsymtab(num, t, f, STR|NUM, (Array *) ap->sval);
else
setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
spdone:
@@ -1326,8 +1327,8 @@
temp = *s;
*s = '\0';
sprint(num, "%d", n);
- if (is_number(t))
- setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval);
+ if (to_number(t, &f, nil))
+ setsymtab(num, t, f, STR|NUM, (Array *) ap->sval);
else
setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
*s = temp;
@@ -1344,8 +1345,8 @@
nb = chartorune(&r, s);
memmove(buf, s, nb);
buf[nb] = '\0';
- if (isdigit(buf[0]))
- setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval);
+ if (to_number(buf, &f, nil))
+ setsymtab(num, buf, f, STR|NUM, (Array *) ap->sval);
else
setsymtab(num, buf, 0.0, STR, (Array *) ap->sval);
}
@@ -1358,8 +1359,8 @@
temp = *s;
*s = '\0';
sprint(num, "%d", n);
- if (is_number(t))
- setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval);
+ if (to_number(t, &f, nil))
+ setsymtab(num, t, f, STR|NUM, (Array *) ap->sval);
else
setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
*s = temp;
--- a/sys/src/cmd/awk/tran.c
+++ b/sys/src/cmd/awk/tran.c
@@ -100,6 +100,7 @@
Cell *cp;
int i;
char temp[50];
+ Awkfloat f;
AARGC = &setsymtab("ARGC", EMPTY, (Awkfloat) ac, NUM, symtab)->fval; cp = setsymtab("ARGV", EMPTY, 0.0, ARR, symtab);@@ -107,8 +108,8 @@
cp->sval = (char *) ARGVtab;
for (i = 0; i < ac; i++) {sprint(temp, "%d", i);
- if (is_number(*av))
- setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab);
+ if (to_number(*av, &f, nil))
+ setsymtab(temp, *av, f, STR|NUM, ARGVtab);
else
setsymtab(temp, *av, 0.0, STR, ARGVtab);
av++;
@@ -120,6 +121,7 @@
int fd, i, n;
char *k, *v;
Dir *buf;
+ Awkfloat f;
ENVtab = makesymtab(NSYMTAB);
if ((fd = open("/env", OREAD)) < 0)@@ -133,8 +135,8 @@
continue;
if ((v = getenv(k)) == nil)
continue;
- if (is_number(v))
- setsymtab(k, v, atof(v), STR|NUM, ENVtab);
+ if (to_number(v, &f, nil))
+ setsymtab(k, v, f, STR|NUM, ENVtab);
else
setsymtab(k, v, 0.0, STR, ENVtab);
free(v);
@@ -364,9 +366,11 @@
else if (isrec(vp) && donerec == 0)
recbld();
if (!isnum(vp)) { /* not a number */- vp->fval = atof(vp->sval); /* best guess */
- if (is_number(vp->sval) && !(vp->tval&CON))
- vp->tval |= NUM; /* make NUM only sparingly */
+ vp->fval = 0;
+ if (to_number(vp->sval, &vp->fval, nil)) {+ if (!(vp->tval&CON))
+ vp->tval |= NUM; /* make NUM only sparingly */
+ }
}
dprint( ("getfval %p: %s = %g, t=%o\n", vp, vp->nval, vp->fval, vp->tval) );return(vp->fval);
--
⑨