git: 9front

ref: 6504d5040a0a7414be760801c349c7d0bc665cc9
dir: /sys/src/libc/test/runenorm.c/

View raw version
#include <u.h>
#include <libc.h>
#include <bio.h>

//Annoying to get a gauge of how broken if we bail on the first failure
#define print sysfatal

static int
estrtoul(char *s)
{
	char *epr;
	Rune code;

	code = strtoul(s, &epr, 16);
	if(s == epr)
		sysfatal("bad code point hex string");
	return code;
}

#pragma	   varargck    type  "V"   Rune*
static int
vrunefmt(Fmt *f)
{
	Rune *s;
	int n;

	s = va_arg(f->args, Rune*);
	n = fmtprint(f, "%S(", s);
	for(; *s != 0; s++)
		n += fmtprint(f, "%X ", *s);
	n += fmtprint(f, ")");
	return n;
}

typedef struct {
	Rune src[64];
	Rune nfc[70];
	Rune nfd[70];
} Line;

typedef struct {
	Rune *s, *p;
	int n;
} Ctx;

static long
getrune(void *ctx)
{
	Ctx *c;

	c = ctx;
	if(c->p >= c->s + c->n)
		return -1;
	return *c->p++;
}

static void
testline(Line *l)
{
	Norm rd, rc;
	static Ctx ctx;
	Rune out1[70];
	Rune out2[70];
	int i, n;

	norminit(&rd, 0, &ctx, getrune);
	norminit(&rc, 1, &ctx, getrune);
	ctx.s = ctx.p = l->src;
	ctx.n = runestrlen(l->src) + 1;

	n = normpull(&rd, out1, nelem(out1), 1);
	if(out1[n-1] != '\0')
		sysfatal("norm ate null");
	if(runestrcmp(l->nfd, out1) != 0)
		print("(1) %V %V %V\n", l->src, l->nfd, out1);

	ctx.p = ctx.s;
	n = normpull(&rc, out2, nelem(out2), 1);
	if(out2[n-1] != '\0')
		sysfatal("norm ate null");
	if(runestrcmp(l->nfc, out2) != 0)
		print("(2) %V %V %V\n", l->src, l->nfc, out2);

	ctx.p = ctx.s;
	i = 0;
	do {
		n = normpull(&rd, out1 + i, 1, 1);
		i += n;
	} while(n != 0);
	if(runestrcmp(l->nfd, out1) != 0)
		print("rune-by-rune nfd fail %V %V\n", l->nfd, out1);

	ctx.p = ctx.s;
	i = 0;
	do {
		n = normpull(&rc, out2 + i, 1, 1);
		i += n;
	} while(n != 0);
	if(runestrcmp(l->nfc, out2) != 0)
		print("rune-by-rune nfc fail %V %V\n", l->nfc, out2);
}

static void
testutfline(Line *l)
{
	char out1[128], out2[128];
	char buf1[128], buf2[128], buf3[128];

	snprint(buf1, sizeof buf1, "%S", l->src);
	snprint(buf2, sizeof buf2, "%S", l->nfd);
	snprint(buf3, sizeof buf3, "%S", l->nfc);

	utfcomp(out1, sizeof out1, buf1, strlen(buf1)+1);
	utfdecomp(out2, sizeof out2, buf1, strlen(buf1)+1);

	if(strcmp(out1, buf3) != 0)
		print("utfline fail nfc: %s %s %s\n", buf1, buf3, out1);

	if(strcmp(out2, buf2) != 0)
		print("utfline fail nfd: %s %s %s\n", buf1, buf2, out2);
}

static void
testedge(void)
{
	Line l;
	int i;

	/*
	 * Test that we correctly break up long attacher
	 * runs with U+034F
	 */
	l.src[0] = L'U';
	for(i = 1; i < nelem(l.src)-1; i++)
		l.src[i] = 0x308;
	l.src[nelem(l.src)-1] = 0;

	memcpy(l.nfd, l.src, sizeof(l.src));
	l.nfd[31] = 0x34F;
	l.nfd[62] = 0x34F;
	l.nfd[63] = 0x308;
	l.nfd[64] = 0x308;
	l.nfd[65] = 0;

	memcpy(l.nfc, l.src, sizeof l.src);
	l.nfc[0] = 0xDC;
	l.nfc[30] = 0x34F;
	l.nfc[61] = 0x34F;
	l.nfc[62] = 0x308;
	l.nfc[63] = 0x308;
	l.nfc[64] = 0;

	testline(&l);

	for(i = 0; i < nelem(l.src)-1; i++)
		l.src[i] = 0x308;
	l.src[nelem(l.src)-1] = 0;
	memcpy(l.nfd, l.src, sizeof l.src);
	l.nfd[30] = 0x034F;
	l.nfd[61] = 0x034F;
	l.nfd[62] = 0x308;
	l.nfd[63] = 0x308;
	l.nfd[64] = 0x308;
	l.nfd[65] = 0;
	memcpy(l.nfc, l.nfd, sizeof l.nfd);

	testline(&l);

	l.src[0] = L'U';
	for(i = 1; i < 30; i++)
		l.src[i] = 0x300;
	l.src[i++] = 0x0344;
	l.src[i] = 0;
	memcpy(l.nfc, l.src, sizeof l.src);
	memcpy(l.nfd, l.src, sizeof l.src);
	l.nfd[30] = 0x034F;
	l.nfd[31] = 0x308;
	l.nfd[32] = 0x301;
	l.nfd[33] = 0;
	l.nfc[0] = 0xD9;
	l.nfc[29] = 0x034F;
	l.nfc[30] = 0x308;
	l.nfc[31] = 0x301;
	l.nfc[32] = 0;

	testline(&l);

	for(i = 0; i < 59; i++)
		l.src[i] = 0x300;
	l.src[i++] = 0x0344;
	l.src[i] = 0;
	memcpy(l.nfd, l.src, sizeof l.src);
	l.nfd[30] = 0x34F;
	l.nfd[59] = 0x300;
	l.nfd[60] = 0x34F;
	l.nfd[61] = 0x308;
	l.nfd[62] = 0x301;
	l.nfd[63] = 0x0;
	memcpy(l.nfc, l.nfd, sizeof l.nfd);

	testline(&l);

	l.src[0] = 0x16D63;
	for(i = 1; i < 33; i++)
		l.src[i] = 0x16D67;
	l.src[i] = 0;
	memcpy(l.nfd, l.src, sizeof l.src);

	l.nfc[0] = 0x16D6A;
	for(i = 1; i < 1+15; i++)
		l.nfc[i] = 0x16D68;
	l.nfc[i] = 0;

	testline(&l);
}

static void
testeof(void)
{
	Norm n;
	Ctx ctx;
	Rune buf[16], out[16], *p;

	buf[0] = L'u';
	ctx.s = ctx.p = buf;
	ctx.n = 1;

	norminit(&n, 1, &ctx, getrune);
	for(p = out; ctx.p < ctx.s + ctx.n;){
		p += normpull(&n, p, sizeof out - (p - out), 0);
	}
	if(p != out)
		print("norm flushed when we told it not to");
	buf[0] = L'̈';
	buf[1] = L'a';
	ctx.s = ctx.p = buf;
	ctx.n = 2;
	normpull(&n, p, sizeof out - (p - out), 1);
	if(out[0] != L'ü' || out[1] != 'a')
		print("eof test fail: %X\n", out[0]);
}

void
main(int, char)
{
	char *fields[10];
	char *runes[32];
	char *p;
	int n;
	int i;
	Biobuf *b;

	fmtinstall('V', vrunefmt);
	b = Bopen("/lib/ucd/NormalizationTest.txt", OREAD);
	if(b == nil)
		sysfatal("could not load composition exclusions: %r");

	Line test;
	while((p = Brdline(b, '\n')) != nil){
		p[Blinelen(b)-1] = 0;
		if(p[0] == 0 || p[0] == '#' || p[0] == '@')
			continue;
		getfields(p, fields, 6 + 1, 0, ";");
		n = getfields(fields[0], runes, nelem(runes), 0, " ");
		for(i = 0; i < n; i++)
			test.src[i] = estrtoul(runes[i]);
		test.src[i] = 0;

		n = getfields(fields[1], runes, nelem(runes), 0, " ");
		for(i = 0; i < n; i++)
			test.nfc[i] = estrtoul(runes[i]);
		test.nfc[i] = 0;

		n = getfields(fields[2], runes, nelem(runes), 0, " ");
		for(i = 0; i < n; i++)
			test.nfd[i] = estrtoul(runes[i]);
		test.nfd[i] = 0;

		testline(&test);
		testutfline(&test);
	}
	testedge();
	testeof();
	exits(nil);
}