ref: 87ec4550f392180df2ccd3fc48b3ca91f3bcd09c
parent: b94dfaaaab3574dff6ada00297eeb189b43fd672
author: Jacob Moody <moody@posixcafe.org>
date: Sun Jan 25 19:18:08 EST 2026
libc: simplify utf* functions utfutf() is no longer needed.
--- a/sys/man/2/rune
+++ b/sys/man/2/rune
@@ -38,9 +38,6 @@
char* utfrrune(char *s, long c)
.PP
.B
-char* utfutf(char *s1, char *s2)
-.PP
-.B
int utfncmp(char *s1, char *s2, long n)
.SH DESCRIPTION
These routines convert to and from a
@@ -170,24 +167,6 @@
be part of the string
.IR s .
.PP
-.I Utfutf
-returns a pointer to the first occurrence of
-the
-.SM UTF
-string
-.I s2
-as a
-.SM UTF
-substring of
-.IR s1 ,
-or 0 if there is none.
-If
-.I s2
-is the null string,
-.I utfutf
-returns
-.IR s1 .
-.PP
.I Utfncmp
compares at most the first
.I n
@@ -218,8 +197,6 @@
.br
.B /sys/src/libc/port/utfnlen.c
.br
-.B /sys/src/libc/port/utfutf.c
-.br
.B /sys/src/libc/port/utfncmp.c
.SH SEE ALSO
.IR utf (6),
@@ -234,3 +211,8 @@
(0xFFFD)
that resulted from invalid encoded input can yield
a longer UTF sequence on the output.
+.SH HISTORY
+.I Utfutf
+is deprecated.
+Historically, utfutf was written to process a UTF format that predated UTF-8.
+Given the synchronizing property of UTF-8, utfutf is the same as strstr if the arguments point to valid UTF strings.
--- a/sys/src/libc/port/utfrrune.c
+++ b/sys/src/libc/port/utfrrune.c
@@ -4,27 +4,20 @@
char*
utfrrune(char *s, long c)
{- long c1;
Rune r;
- char *s1;
+ int n;
+ char *p;
+ char buf[UTFmax + 1] = {0};if(c < Runesync) /* not part of utf sequence */
return strrchr(s, c);
- s1 = 0;
- for(;;) {- c1 = *(uchar*)s;
- if(c1 < Runeself) { /* one byte rune */- if(c1 == 0)
- return s1;
- if(c1 == c)
- s1 = s;
- s++;
- continue;
- }
- c1 = chartorune(&r, s);
- if(r == c)
- s1 = s;
- s += c1;
+ r = c;
+ n = runetochar(buf, &r);
+ p = nil;
+ while(s = strstr(s, buf)){+ p = s;
+ s += n;
}
+ return p;
}
--- a/sys/src/libc/port/utfrune.c
+++ b/sys/src/libc/port/utfrune.c
@@ -4,26 +4,13 @@
char*
utfrune(char *s, long c)
{- long c1;
Rune r;
- int n;
+ char buf[UTFmax + 1] = {0};if(c < Runesync) /* not part of utf sequence */
return strchr(s, c);
- for(;;) {- c1 = *(uchar*)s;
- if(c1 < Runeself) { /* one byte rune */- if(c1 == 0)
- return 0;
- if(c1 == c)
- return s;
- s++;
- continue;
- }
- n = chartorune(&r, s);
- if(r == c)
- return s;
- s += n;
- }
+ r = c;
+ runetochar(buf, &r);
+ return strstr(s, buf);
}
--- a/sys/src/libc/port/utfutf.c
+++ b/sys/src/libc/port/utfutf.c
@@ -1,7 +1,6 @@
#include <u.h>
#include <libc.h>
-
/*
* Return pointer to first occurrence of s2 in s1,
* 0 if none
@@ -9,18 +8,5 @@
char*
utfutf(char *s1, char *s2)
{- char *p;
- long f, n1, n2;
- Rune r;
-
- n1 = chartorune(&r, s2);
- f = r;
- if(f <= Runesync) /* represents self */
- return strstr(s1, s2);
-
- n2 = strlen(s2);
- for(p=s1; p=utfrune(p, f); p+=n1)
- if(strncmp(p, s2, n2) == 0)
- return p;
- return 0;
+ return strstr(s1, s2);
}
--
⑨