ref: 2c32f151555ed98d2509f12c90c8acdc2ee2016e
parent: 937b2afc145b074e18c73d3615780c449813046d
author: qwx <qwx@sciops.net>
date: Sat Sep 27 04:01:16 EDT 2025
awk: make tolower and toupper builtins unicode-aware
--- a/sys/src/cmd/awk/run.c
+++ b/sys/src/cmd/awk/run.c
@@ -1538,13 +1538,14 @@
{Cell *x, *y;
Awkfloat u, tmp;
- int t;
+ int n, t;
Rune wc;
- char *p, *buf;
+ char *p, *s, *buf, *rbuf;
char mbc[50];
Node *nextarg;
Biobuf *fp;
void flush_all(void);
+ Rune (*conv)(Rune);
t = ptoi(a[0]);
x = execute(a[1]);
@@ -1600,21 +1601,22 @@
break;
case FTOUPPER:
case FTOLOWER:
- buf = tostring(getsval(x));
- if (t == FTOUPPER) {- for (p = buf; *p; p++)
- if (islower(*p))
- *p = toupper(*p);
- } else {- for (p = buf; *p; p++)
- if (isupper(*p))
- *p = tolower(*p);
+ buf = getsval(x);
+ n = utflen(buf) * UTFmax + 1; /* just in case size differs... */
+ if ((rbuf = malloc(n)) == nil)
+ FATAL("out of space in %s", t == FTOUPPER ? "toupper" : "tolower");+ conv = t == FTOUPPER ? toupperrune : tolowerrune;
+ for (p = rbuf, s = buf; *s != '\0';) {+ s += chartorune(&wc, s);
+ wc = conv(wc);
+ p += runetochar(p, &wc);
}
+ *p = 0;
if (istemp(x))
tfree(x);
x = gettemp();
- setsval(x, buf);
- free(buf);
+ setsval(x, rbuf);
+ free(rbuf);
return x;
case FFLUSH:
if (isrec(x) || strlen(getsval(x)) == 0) {--
⑨