git: 9front

Download patch

ref: 87ec4550f392180df2ccd3fc48b3ca91f3bcd09c
parent: b94dfaaaab3574dff6ada00297eeb189b43fd672
author: Jacob Moody <moody@posixcafe.org>
date: Sun Jan 25 19:18:08 EST 2026

libc: simplify utf* functions

utfutf() is no longer needed.

--- a/sys/man/2/rune
+++ b/sys/man/2/rune
@@ -38,9 +38,6 @@
 char*	utfrrune(char *s, long c)
 .PP
 .B
-char*	utfutf(char *s1, char *s2)
-.PP
-.B
 int	utfncmp(char *s1, char *s2, long n)
 .SH DESCRIPTION
 These routines convert to and from a
@@ -170,24 +167,6 @@
 be part of the string
 .IR s .
 .PP
-.I Utfutf
-returns a pointer to the first occurrence of
-the
-.SM UTF
-string
-.I s2
-as a
-.SM UTF
-substring of
-.IR s1 ,
-or 0 if there is none.
-If
-.I s2
-is the null string,
-.I utfutf
-returns
-.IR s1 .
-.PP
 .I Utfncmp
 compares at most the first
 .I n
@@ -218,8 +197,6 @@
 .br
 .B /sys/src/libc/port/utfnlen.c
 .br
-.B /sys/src/libc/port/utfutf.c
-.br
 .B /sys/src/libc/port/utfncmp.c
 .SH SEE ALSO
 .IR utf (6),
@@ -234,3 +211,8 @@
 (0xFFFD)
 that resulted from invalid encoded input can yield
 a longer UTF sequence on the output.
+.SH HISTORY
+.I Utfutf
+is deprecated.
+Historically, utfutf was written to process a UTF format that predated UTF-8.
+Given the synchronizing property of UTF-8, utfutf is the same as strstr if the arguments point to valid UTF strings.
--- a/sys/src/libc/port/utfrrune.c
+++ b/sys/src/libc/port/utfrrune.c
@@ -4,27 +4,20 @@
 char*
 utfrrune(char *s, long c)
 {
-	long c1;
 	Rune r;
-	char *s1;
+	int n;
+	char *p;
+	char buf[UTFmax + 1] = {0};
 
 	if(c < Runesync)		/* not part of utf sequence */
 		return strrchr(s, c);
 
-	s1 = 0;
-	for(;;) {
-		c1 = *(uchar*)s;
-		if(c1 < Runeself) {	/* one byte rune */
-			if(c1 == 0)
-				return s1;
-			if(c1 == c)
-				s1 = s;
-			s++;
-			continue;
-		}
-		c1 = chartorune(&r, s);
-		if(r == c)
-			s1 = s;
-		s += c1;
+	r = c;
+	n = runetochar(buf, &r);
+	p = nil;
+	while(s = strstr(s, buf)){
+		p = s;
+		s += n;
 	}
+	return p;
 }
--- a/sys/src/libc/port/utfrune.c
+++ b/sys/src/libc/port/utfrune.c
@@ -4,26 +4,13 @@
 char*
 utfrune(char *s, long c)
 {
-	long c1;
 	Rune r;
-	int n;
+	char buf[UTFmax + 1] = {0};
 
 	if(c < Runesync)		/* not part of utf sequence */
 		return strchr(s, c);
 
-	for(;;) {
-		c1 = *(uchar*)s;
-		if(c1 < Runeself) {	/* one byte rune */
-			if(c1 == 0)
-				return 0;
-			if(c1 == c)
-				return s;
-			s++;
-			continue;
-		}
-		n = chartorune(&r, s);
-		if(r == c)
-			return s;
-		s += n;
-	}
+	r = c;
+	runetochar(buf, &r);
+	return strstr(s, buf);
 }
--- a/sys/src/libc/port/utfutf.c
+++ b/sys/src/libc/port/utfutf.c
@@ -1,7 +1,6 @@
 #include <u.h>
 #include <libc.h>
 
-
 /*
  * Return pointer to first occurrence of s2 in s1,
  * 0 if none
@@ -9,18 +8,5 @@
 char*
 utfutf(char *s1, char *s2)
 {
-	char *p;
-	long f, n1, n2;
-	Rune r;
-
-	n1 = chartorune(&r, s2);
-	f = r;
-	if(f <= Runesync)		/* represents self */
-		return strstr(s1, s2);
-
-	n2 = strlen(s2);
-	for(p=s1; p=utfrune(p, f); p+=n1)
-		if(strncmp(p, s2, n2) == 0)
-			return p;
-	return 0;
+	return strstr(s1, s2);
 }
--