git: 9front

Download patch

ref: 2c32f151555ed98d2509f12c90c8acdc2ee2016e
parent: 937b2afc145b074e18c73d3615780c449813046d
author: qwx <qwx@sciops.net>
date: Sat Sep 27 04:01:16 EDT 2025

awk: make tolower and toupper builtins unicode-aware

--- a/sys/src/cmd/awk/run.c
+++ b/sys/src/cmd/awk/run.c
@@ -1538,13 +1538,14 @@
 {
 	Cell *x, *y;
 	Awkfloat u, tmp;
-	int t;
+	int n, t;
 	Rune wc;
-	char *p, *buf;
+	char *p, *s, *buf, *rbuf;
 	char mbc[50];
 	Node *nextarg;
 	Biobuf *fp;
 	void flush_all(void);
+	Rune (*conv)(Rune);
 
 	t = ptoi(a[0]);
 	x = execute(a[1]);
@@ -1600,21 +1601,22 @@
 		break;
 	case FTOUPPER:
 	case FTOLOWER:
-		buf = tostring(getsval(x));
-		if (t == FTOUPPER) {
-			for (p = buf; *p; p++)
-				if (islower(*p))
-					*p = toupper(*p);
-		} else {
-			for (p = buf; *p; p++)
-				if (isupper(*p))
-					*p = tolower(*p);
+		buf = getsval(x);
+		n = utflen(buf) * UTFmax + 1;	/* just in case size differs... */
+		if ((rbuf = malloc(n)) == nil)
+			FATAL("out of space in %s", t == FTOUPPER ? "toupper" : "tolower");
+		conv = t == FTOUPPER ? toupperrune : tolowerrune;
+		for (p = rbuf, s = buf; *s != '\0';) {
+			s += chartorune(&wc, s);
+			wc = conv(wc);
+			p += runetochar(p, &wc);
 		}
+		*p = 0;
 		if (istemp(x))
 			tfree(x);
 		x = gettemp();
-		setsval(x, buf);
-		free(buf);
+		setsval(x, rbuf);
+		free(rbuf);
 		return x;
 	case FFLUSH:
 		if (isrec(x) || strlen(getsval(x)) == 0) {
--