ref: e6749bff14ee7320b78ee5d5fc1cde31267fefee
dir: /sys/src/cmd/grep/comp.c/
#include "grep.h" /* * incremental compiler. * add the branch c to the * state s. */ void increment(State *s, int c) { int i; State *t, **tt; Re *re1, *re2; nfollow = 0; gen++; matched = 0; for(i=0; i<s->count; i++) fol1(s->re[i], c); qsort(follow, nfollow, sizeof(*follow), fcmp); for(tt=&state0; t = *tt;) { if(t->count > nfollow) { tt = &t->linkleft; goto cont; } if(t->count < nfollow) { tt = &t->linkright; goto cont; } for(i=0; i<nfollow; i++) { re1 = t->re[i]; re2 = follow[i]; if(re1 > re2) { tt = &t->linkleft; goto cont; } if(re1 < re2) { tt = &t->linkright; goto cont; } } if(!!matched && !t->match) { tt = &t->linkleft; goto cont; } if(!matched && !!t->match) { tt = &t->linkright; goto cont; } s->next[c] = t; return; cont:; } t = sal(nfollow); *tt = t; for(i=0; i<nfollow; i++) { re1 = follow[i]; t->re[i] = re1; } s->next[c] = t; t->match = matched; } int fcmp(void *va, void *vb) { Re **aa, **bb; Re *a, *b; aa = va; bb = vb; a = *aa; b = *bb; if(a > b) return 1; if(a < b) return -1; return 0; } void fol1(Re *r, int c) { Re *r1; loop: if(r->gen == gen) return; if(nfollow >= maxfollow) error("nfollow"); r->gen = gen; switch(r->type) { default: error("fol1"); case Tcase: if(c >= 0 && c < 256) if(r1 = r->cases[c]) follow[nfollow++] = r1; if(r = r->next) goto loop; break; case Talt: case Tor: fol1(r->alt, c); r = r->next; goto loop; case Tbegin: if(c == '\n' || c == Cbegin) follow[nfollow++] = r->next; break; case Tend: if(c == '\n') { if(r->next == 0) { matched = 1; break; } r = r->next; goto loop; } break; case Tclass: if(c >= r->lo && c <= r->hi) follow[nfollow++] = r->next; break; } } int tab1[] = { 0x007f, 0x07ff, 0xffff, }; int tab2[] = { 0x003f, 0x0fff, 0x3ffff, }; Re2 rclass(Rune p0, Rune p1) { char xc0[6], xc1[6]; int i, n, m; Re2 x; if(p0 > p1) return re2char(0xff, 0xff); // no match /* * bust range into same length * character sequences */ for(i=0; i<nelem(tab1); i++) { m = tab1[i]; if(p0 <= m && p1 > m) return re2or(rclass(p0, m), rclass(m+1, p1)); } /* * bust range into part of a single page * or into full pages */ for(i=0; i<nelem(tab2); i++) { m = tab2[i]; if((p0 & ~m) != (p1 & ~m)) { if((p0 & m) != 0) return re2or(rclass(p0, p0|m), rclass((p0|m)+1, p1)); if((p1 & m) != m) return re2or(rclass(p0, (p1&~m)-1), rclass(p1&~m, p1)); } } n = runetochar(xc0, &p0); i = runetochar(xc1, &p1); if(i != n) error("length"); x = re2char(xc0[0], xc1[0]); for(i=1; i<n; i++) x = re2cat(x, re2char(xc0[i], xc1[i])); return x; } int pcmp(void *va, void *vb) { int n; Rune *a, *b; a = va; b = vb; n = a[0] - b[0]; if(n) return n; return a[1] - b[1]; } /* * convert character chass into * run-pair ranges of matches. * this is 10646/utf specific and * needs to be changed for some * other input character set. * this is the key to a fast * regular search of characters * by looking at sequential bytes. */ Re2 re2class(char *s) { Rune pairs[200+2], *p, *q, ov; int nc; Re2 x; nc = 0; if(*s == '^') { nc = 1; s++; } p = pairs; s += chartorune(p, s); for(;;) { if(*p == '\\') s += chartorune(p, s); if(*p == 0) break; p[1] = *p; p += 2; if(p >= pairs + nelem(pairs) - 2) error("class too big"); s += chartorune(p, s); if(*p != '-') continue; s += chartorune(p, s); if(*p == '\\') s += chartorune(p, s); if(*p == 0) break; p[-1] = *p; s += chartorune(p, s); } *p = 0; qsort(pairs, (p-pairs)/2, 2*sizeof(*pairs), pcmp); q = pairs; for(p=pairs+2; *p; p+=2) { if(p[0] > p[1]) continue; if(p[0] > q[1] || p[1] < q[0]) { q[2] = p[0]; q[3] = p[1]; q += 2; continue; } if(p[0] < q[0]) q[0] = p[0]; if(p[1] > q[1]) q[1] = p[1]; } q[2] = 0; p = pairs; if(nc) { x = rclass(0, p[0]-1); ov = p[1]+1; for(p+=2; *p; p+=2) { x = re2or(x, rclass(ov, p[0]-1)); ov = p[1]+1; } x = re2or(x, rclass(ov, Runemax)); } else { x = rclass(p[0], p[1]); for(p+=2; *p; p+=2) x = re2or(x, rclass(p[0], p[1])); } return x; }