git: 9front

Download patch

ref: aae224a92023099c786253c706297fd619f3f352
parent: c228fe24de2e00830b175cd12ff8ac44f16a5d41
author: qwx <qwx@sciops.net>
date: Sat Nov 8 18:36:02 EST 2025

awk: avoid reparsing numbers and calling strtod

--- a/sys/src/cmd/awk/lex.c
+++ b/sys/src/cmd/awk/lex.c
@@ -103,7 +103,7 @@
 	return c;
 }
 
-int gettok(char **pbuf, int *psz)	/* get next input token */
+static int gettok(char **pbuf, int *psz, Awkfloat *fp)	/* get next input token */
 {
 	int c;
 	char *buf = *pbuf;
@@ -132,6 +132,7 @@
 				break;
 			}
 		}
+		c = 'a';
 	} else {	/* it's a number */
 		char *rem;
 		/* read input until can't be a number */
@@ -148,13 +149,16 @@
 			}
 		}
 		*bp = 0;
-		strtod(buf, &rem);	/* parse the number */
+		if(to_number(buf, fp, &rem))	/* parse the number */
+			c = '0';
+		else
+			c = buf[0];
 		unputstr(rem);		/* put rest back for later */
 		rem[0] = 0;
 	}
 	*pbuf = buf;
 	*psz = sz;
-	return buf[0];
+	return c;
 }
 
 int	word(char *);
@@ -166,6 +170,7 @@
 int yylex(void)
 {
 	int c;
+	Awkfloat f;
 	static char *buf = 0;
 	static int bufsize = 500;
 
@@ -180,14 +185,16 @@
 		return regexpr();
 	}
 	for (;;) {
-		c = gettok(&buf, &bufsize);
+		c = gettok(&buf, &bufsize, &f);
 		if (c == 0)
 			return 0;
-		if (isalpha(c) || c == '_')
+		if (c == 'a')
 			return word(buf);
-		if (isdigit(c) || c == '.') {
-			yylval.cp = setsymtab(buf, tostring(buf), atof(buf), CON|NUM, symtab);
-			/* should this also have STR set? */
+		/* may be unsuitable for printing (T.strnum) so don't set STR,
+		 * but may be a regex to be treated literally (T.coerce[23])
+		 * via strnode, so save a copy. */
+		if (c == '0') {
+			yylval.cp = setsymtab(buf, tostring(buf), f, CON|NUM, symtab);
 			RET(NUMBER);
 		}
 	
@@ -297,7 +304,7 @@
 	
 		case '$':
 			/* BUG: awkward, if not wrong */
-			c = gettok(&buf, &bufsize);
+			c = gettok(&buf, &bufsize, &f);
 			if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) {
 				unputstr(buf);
 				RET(INDIRECT);
@@ -449,7 +456,7 @@
 	Keyword *kp;
 	int c, n;
 
-	n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0]));
+	n = binsearch(w, keywords, nelem(keywords));
 	kp = keywords + n;
 	if (n != -1) {	/* found in table */
 		yylval.i = kp->sub;
--- a/sys/src/cmd/awk/lib.c
+++ b/sys/src/cmd/awk/lib.c
@@ -145,10 +145,8 @@
 					xfree(fldtab[0]->sval);
 				fldtab[0]->sval = buf;	/* buf == record */
 				fldtab[0]->tval = REC | STR | DONTFREE;
-				if (is_number(fldtab[0]->sval)) {
-					fldtab[0]->fval = atof(fldtab[0]->sval);
+				if (to_number(fldtab[0]->sval, &fldtab[0]->fval, nil))
 					fldtab[0]->tval |= NUM;
-				}
 			}
 			setfval(nrloc, nrloc->fval+1);
 			setfval(fnrloc, fnrloc->fval+1);
@@ -237,10 +235,8 @@
 	p = qstring(p, '\0');
 	q = setsymtab(s, p, 0.0, STR, symtab);
 	setsval(q, p);
-	if (is_number(q->sval)) {
-		q->fval = atof(q->sval);
+	if (to_number(q->sval, &q->fval, nil))
 		q->tval |= NUM;
-	}
 	   dprint( ("command line set %s to |%s|\n", s, p) );
 }
 
@@ -329,10 +325,8 @@
 	donefld = 1;
 	for (j = 1; j <= lastfld; j++) {
 		p = fldtab[j];
-		if(is_number(p->sval)) {
-			p->fval = atof(p->sval);
+		if (to_number(p->sval, &p->fval, nil))
 			p->tval |= NUM;
-		}
 	}
 	setfval(nfloc, (Awkfloat) lastfld);
 	if (dbg) {
@@ -666,45 +660,81 @@
 	return *s == '=' && s > os && *(s+1) != '=';
 }
 
-/* strtod is supposed to be a proper test of what's a valid number */
+static int is_float(char *s, Awkfloat *fp, char **tp)
+{
+	char c, *p, *q;
+	Awkfloat f;
 
-int is_number(char *s)
+	f = *fp = strtod(s, &p);
+	if (tp != nil)
+		*tp = p;
+	if (p == s)
+		return 0;
+	else if (isInf(f, 1) || isInf(f, -1) || isNaN(f))
+		return 0;
+	else if (f == 0.0 && ((q = strchr(s, '0')) == nil || q > p))
+		return 0;
+	else if (tp != nil)
+		return 1;
+	for (; (c = *p) != '\0'; p++) {
+		switch(c) {
+		case ' ':
+		case '\t':
+		case '\n':
+		case '\f':
+		case '\r':
+		case '\v':
+			continue;
+		case '\0':
+			return 1;
+		default:
+			return 0;
+		}
+	}
+	return 1;
+}
+
+int to_number(char *s, Awkfloat *fp, char **tp)
 {
-	double r;
-	char *ep;
+	vlong v;
+	char c, *p, *q;
 
-	/*
-	 * fast could-it-be-a-number check before calling strtod,
-	 * which takes a surprisingly long time to reject non-numbers.
-	 */
-	switch (*s) {
-	case '0': case '1': case '2': case '3': case '4':
-	case '5': case '6': case '7': case '8': case '9':
-	case '\t':
-	case '\n':
-	case '\v':
-	case '\f':
-	case '\r':
-	case ' ':
-	case '-':
-	case '+':
+	v = strtoll(s, &p, 0);
+	*fp = (Awkfloat)v;
+	if (tp != nil)
+		*tp = p;
+	switch(*p){
 	case '.':
-	case 'n':		/* nans */
-	case 'N':
-	case 'i':		/* infs */
-	case 'I':
-		break;
-	default:
-		return 0;	/* can't be a number */
+	case 'E':
+	case 'I':	/* inf */
+	case 'N':	/* nan */
+	case 'e':
+	case 'i':
+	case 'n':
+		if (is_float(s, fp, tp))
+			return NUM;
+		return 0;
 	}
-
-	r = strtod(s, &ep);
-	if (ep == s || isInf(r, 1) || isInf(r, -1) || isNaN(r))
+	if (p == s)
 		return 0;
-	while (*ep == ' ' || *ep == '\t' || *ep == '\n')
-		ep++;
-	if (*ep == '\0')
-		return 1;
-	else
+	else if (v == 0 && ((q = strchr(s, '0')) == nil || q > p))
 		return 0;
+	else if (tp != nil)
+		return NUM;
+	for (; (c = *p) != '\0'; p++) {
+		switch(c) {
+		case ' ':
+		case '\t':
+		case '\n':
+		case '\f':
+		case '\r':
+		case '\v':
+			continue;
+		case '\0':
+			return NUM;
+		default:
+			return 0;
+		}
+	}
+	return NUM;
 }
--- a/sys/src/cmd/awk/proto.h
+++ b/sys/src/cmd/awk/proto.h
@@ -118,7 +118,7 @@
 extern	void	bclass(int);
 extern	double	errcheck(double, char *);
 extern	int	isclvar(char *);
-extern	int	is_number(char *);
+extern	int	to_number(char *, Awkfloat *, char **);
 
 extern	int	adjbuf(char **pb, int *sz, int min, int q, char **pbp, char *what);
 extern	void	run(Node *);
--- a/sys/src/cmd/awk/run.c
+++ b/sys/src/cmd/awk/run.c
@@ -416,10 +416,8 @@
 				tfree(x);
 		} else {			/* getline <file */
 			setsval(fldtab[0], buf);
-			if (is_number(fldtab[0]->sval)) {
-				fldtab[0]->fval = atof(fldtab[0]->sval);
+			if (to_number(fldtab[0]->sval, &fldtab[0]->fval, nil))
 				fldtab[0]->tval |= NUM;
-			}
 		}
 	} else {			/* bare getline; use current input */
 		if (a[0] == nil)	/* getline */
@@ -724,9 +722,11 @@
 
 	x = execute(a[0]);
 	m = (int) getfval(x);
-	if (m == 0 && !is_number(s = getsval(x)))	/* suspicion! */
-		FATAL("illegal field $(%s), name \"%s\"", s, x->nval);
-		/* BUG: can x->nval ever be null??? */
+	if (m == 0) {
+		if (!to_number(s = getsval(x), &x->fval, nil))	/* suspicion! */
+			FATAL("illegal field $(%s), name \"%s\"", s, x->nval);
+			/* BUG: can x->nval ever be null??? */
+	}
 	if (istemp(x))
 		tfree(x);
 	x = fieldadr(m);
@@ -1251,6 +1251,7 @@
 	Cell *x = 0, *y, *ap;
 	char *s, *ds, *t, *fs = 0;
 	char temp, num[50];
+	Awkfloat f;
 	int n, nb, sep, arg3type;
 
 	y = execute(a[0]);	/* source string */
@@ -1290,8 +1291,8 @@
 				sprint(num, "%d", n);
 				temp = *patbeg;
 				*patbeg = '\0';
-				if (is_number(t))
-					setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval);
+				if (to_number(t, &f, nil))
+					setsymtab(num, t, f, STR|NUM, (Array *) ap->sval);
 				else
 					setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
 				*patbeg = temp;
@@ -1306,8 +1307,8 @@
 		}
 		n++;
 		sprint(num, "%d", n);
-		if (is_number(t))
-			setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval);
+		if (to_number(t, &f, nil))
+			setsymtab(num, t, f, STR|NUM, (Array *) ap->sval);
 		else
 			setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
   spdone:
@@ -1326,8 +1327,8 @@
 			temp = *s;
 			*s = '\0';
 			sprint(num, "%d", n);
-			if (is_number(t))
-				setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval);
+			if (to_number(t, &f, nil))
+				setsymtab(num, t, f, STR|NUM, (Array *) ap->sval);
 			else
 				setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
 			*s = temp;
@@ -1344,8 +1345,8 @@
 			nb = chartorune(&r, s);
 			memmove(buf, s, nb);
 			buf[nb] = '\0';
-			if (isdigit(buf[0]))
-				setsymtab(num, buf, atof(buf), STR|NUM, (Array *) ap->sval);
+			if (to_number(buf, &f, nil))
+				setsymtab(num, buf, f, STR|NUM, (Array *) ap->sval);
 			else
 				setsymtab(num, buf, 0.0, STR, (Array *) ap->sval);
 		}
@@ -1358,8 +1359,8 @@
 			temp = *s;
 			*s = '\0';
 			sprint(num, "%d", n);
-			if (is_number(t))
-				setsymtab(num, t, atof(t), STR|NUM, (Array *) ap->sval);
+			if (to_number(t, &f, nil))
+				setsymtab(num, t, f, STR|NUM, (Array *) ap->sval);
 			else
 				setsymtab(num, t, 0.0, STR, (Array *) ap->sval);
 			*s = temp;
--- a/sys/src/cmd/awk/tran.c
+++ b/sys/src/cmd/awk/tran.c
@@ -100,6 +100,7 @@
 	Cell *cp;
 	int i;
 	char temp[50];
+	Awkfloat f;
 
 	AARGC = &setsymtab("ARGC", EMPTY, (Awkfloat) ac, NUM, symtab)->fval;
 	cp = setsymtab("ARGV", EMPTY, 0.0, ARR, symtab);
@@ -107,8 +108,8 @@
 	cp->sval = (char *) ARGVtab;
 	for (i = 0; i < ac; i++) {
 		sprint(temp, "%d", i);
-		if (is_number(*av))
-			setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab);
+		if (to_number(*av, &f, nil))
+			setsymtab(temp, *av, f, STR|NUM, ARGVtab);
 		else
 			setsymtab(temp, *av, 0.0, STR, ARGVtab);
 		av++;
@@ -120,6 +121,7 @@
 	int	fd, i, n;
 	char	*k, *v;
 	Dir	*buf;
+	Awkfloat f;
 
 	ENVtab = makesymtab(NSYMTAB);
 	if ((fd = open("/env", OREAD)) < 0)
@@ -133,8 +135,8 @@
 				continue;
 			if ((v = getenv(k)) == nil)
 				continue;
-			if (is_number(v))
-				setsymtab(k, v, atof(v), STR|NUM, ENVtab);
+			if (to_number(v, &f, nil))
+				setsymtab(k, v, f, STR|NUM, ENVtab);
 			else
 				setsymtab(k, v, 0.0, STR, ENVtab);
 			free(v);
@@ -364,9 +366,11 @@
 	else if (isrec(vp) && donerec == 0)
 		recbld();
 	if (!isnum(vp)) {	/* not a number */
-		vp->fval = atof(vp->sval);	/* best guess */
-		if (is_number(vp->sval) && !(vp->tval&CON))
-			vp->tval |= NUM;	/* make NUM only sparingly */
+		vp->fval = 0;
+		if (to_number(vp->sval, &vp->fval, nil)) {
+			if (!(vp->tval&CON))
+				vp->tval |= NUM;	/* make NUM only sparingly */
+		}
 	}
 	   dprint( ("getfval %p: %s = %g, t=%o\n", vp, vp->nval, vp->fval, vp->tval) );
 	return(vp->fval);
--