ref: ab929ed38193804a483090b92a36b41c6af6fa39
dir: /sys/src/libc/port/runenorm.c/
#include <u.h>
#include <libc.h>
#include "runenormdata"
//Unicode Standard: Section 3.12 Conjoining Jamo Behavior
enum {
SBase = 0xAC00,
LBase = 0x1100,
VBase = 0x1161,
TBase = 0x11A7,
LCount = 19,
VCount = 21,
TCount = 28,
NCount = VCount * TCount,
SCount = LCount * NCount,
LLast = LBase + LCount - 1,
SLast = SBase + SCount - 1,
VLast = VBase + VCount - 1,
TLast = TBase + TCount - 1,
};
static void
_runedecomp(Rune dst[2], Rune c)
{
uint x;
if(c >= SBase && c <= SLast){
c -= SBase;
x = c % TCount;
if(x){
dst[0] = SBase + ((c / TCount) * TCount);
dst[1] = TBase + x;
return;
}
dst[0] = LBase + (c / NCount);
dst[1] = VBase + ((c % NCount) / TCount);
return;
}
x = decomplkup(c);
if((x & 0xFFFF) != 0){
dst[0] = x>>16;
dst[1] = x & 0xFFFF;
return;
}
x >>= 16;
if(x >= 0xEEEE && x <0xF8FF){
memmove(dst, _decompexceptions[x - 0xEEEE], sizeof(Rune)*2);
return;
}
dst[0] = x;
dst[1] = 0;
}
static Rune
_runerecomp(Rune r[2])
{
uint x, y, *p, next;
if(r[0] >= LBase && r[0] <= LLast){
if(r[1] < VBase || r[1] > VLast)
return 0;
x = (r[0] - LBase) * NCount + (r[1] - VBase) * TCount;
return SBase + x;
}
if(r[0] >= SBase && r[0] <= SLast && (r[0] - SBase) % TCount == 0){
if(r[1] > TBase && r[1] <= TLast)
return r[0] + (r[1] - TBase);
return 0;
}
if(r[0] > 0xFFFF || r[1] > 0xFFFF){
for(x = 0; x < nelem(_recompexceptions); x++)
if(r[0] == _recompexceptions[x][1] && r[1] == _recompexceptions[x][2])
return _recompexceptions[x][0];
return 0;
}
y = x = r[0]<<16 | r[1];
x ^= x >> 16;
x *= 0x21f0aaad;
x ^= x >> 15;
x *= 0xd35a2d97;
x ^= x >> 15;
p = _recompdata + (x%512)*2;
while(p[0] != y){
next = p[1]>>16;
if(!next)
return 0;
p = _recompcoll + (next-1)*2;
}
return p[1] & 0xFFFF;
}
static void
runecccsort(Rune *a, int len)
{
Rune r;
int i;
int fail;
do {
fail = 0;
for(i = 0; i < len - 1; i++){
if(ccclkup(a[i]) > ccclkup(a[i+1]) > 0){
r = a[i];
a[i] = a[i+1];
a[i + 1] = r;
fail = 1;
}
}
} while(fail);
}
char*
fullutfnorm(char *s, int n)
{
Rune r, peek;
char *p, *p2;
p = s;
if(fullrune(p, n) == 0)
return s;
p += chartorune(&r, p);
n -= (p - s);
if((r >= LBase && r <= LLast) || (r >= SBase && r <= SLast)){
do {
if(fullrune(p, n) == 0)
return s;
p2 = p + chartorune(&peek, p);
n -= (p2 - p);
p = p2;
} while(n > 0 && (peek >= VBase && peek <= VLast) || (peek > TBase && peek <= TLast));
if(n <= 0)
return s;
return p;
}
do {
if(fullrune(p, n) == 0)
return s;
p2 = p + chartorune(&peek, p);
n -= (p2 - p);
p = p2;
if(ccclkup(peek) == 0)
return p;
} while(n > 0);
return s;
}
Rune*
fullrunenorm(Rune *r, int n)
{
Rune *e, *p;
p = r;
e = p + n;
if((*p >= LBase && *p <= LLast) || (*p >= SBase && *p <= SLast)){
p++;
while(p < e && (*p >= VBase && *p <= VLast) || (*p > TBase && *p <= TLast))
p++;
if(p >= e)
return r;
return p;
}
for(; p < e && p + 1 < e; p++)
if(ccclkup(p[1]) == 0)
return p + 1;
return r;
}
static int
runenorm(Rune *dst, Rune *src, char *sdst, char *ssrc, int max, int compose)
{
Rune c, r[2], _stack[32];
Rune *p, *stack, *sp, *tp;
char *strp, *strstop;
Rune *rp, *rrp;
Rune *stop;
Rune peek;
int w, w2, size;
int mode;
if(src){
mode = 1;
p = src;
stop = dst + (max - 1);
strp = "";
strstop = nil;
} else {
mode = 0;
p = L"";
stop = nil;
strp = ssrc;
strstop = sdst + (max - 1);
}
stack = _stack + nelem(_stack)/2;
size = 0;
w = w2 = 0;
while(*strp || *p){
if(mode)
c = *p;
else
w = chartorune(&c, strp);
sp = stack - 1;
tp = stack;
_runedecomp(r, c);
while(r[0] != 0){
c = r[0];
if(r[1] != 0){
*sp-- = r[1];
if(sp == _stack)
break;
}
_runedecomp(r, c);
}
*sp = c;
if(mode)
peek = p[1];
else
w2 = chartorune(&peek, strp+w);
if((*sp >= LBase && *sp <= LLast) || (*sp >= SBase && *sp <= SLast)){
while(peek != 0 && (peek >= VBase && peek <= VLast) || (peek > TBase && peek <= TLast)){
*tp++ = peek;
if(mode){
p++;
peek = p[1];
} else {
strp += w;
w = w2;
w2 = chartorune(&peek, strp+w);
}
if(tp == _stack + nelem(_stack))
break;
}
}
while(peek != 0 && ccclkup(peek) != 0){
_runedecomp(r, peek);
if(r[1] != 0){
if(tp+1 >= _stack + nelem(_stack))
break;
*tp++ = r[0];
*tp++ = r[1];
} else if(r[0] != 0)
*tp++ = r[0];
else
*tp++ = peek;
if(mode){
p++;
peek = p[1];
} else {
strp += w;
w = w2;
w2 = chartorune(&peek, strp+w);
}
if(tp == _stack + nelem(_stack))
break;
}
runecccsort(sp, tp - sp);
if(compose && ccclkup(*sp) == 0){
for(rp = sp + 1; rp < tp; rp++){
r[0] = *sp;
r[1] = *rp;
c = _runerecomp(r);
if(c != 0){
*sp = c;
for(rrp = rp; rrp > sp; rrp--)
*rrp = rrp[-1];
sp++;
} else while(rp + 1 < tp && ccclkup(*rp) == ccclkup(*(rp+1)))
rp++;
}
}
for(; sp < tp; sp++){
if(mode){
if(dst < stop)
*dst++ = *sp;
size++;
} else {
w2 = runelen(*sp);
if(sdst+w2 < strstop)
sdst += runetochar(sdst, sp);
size += w2;
}
}
if(mode)
p++;
else
strp += w;
}
if(mode)
*dst = 0;
else
*sdst = 0;
return size;
}
int
runecomp(Rune *dst, Rune *src, int max)
{
return runenorm(dst, src, nil, nil, max, 1);
}
int
runedecomp(Rune *dst, Rune *src, int max)
{
return runenorm(dst, src, nil, nil, max, 0);
}
int
utfcomp(char *dst, char *src, int max)
{
return runenorm(nil, nil, dst, src, max, 1);
}
int
utfdecomp(char *dst, char *src, int max)
{
return runenorm(nil, nil, dst, src, max, 0);
}