ref: c2ea6907c920de6ed52aeb7ecc1bc4512acd4bcd
parent: eb910cf71edb43c5aae57a7605d186109f2cbd14
author: cinap_lenrek <cinap_lenrek@felloff.net>
date: Sat Dec 6 12:24:22 EST 2025
libregexec: add basic test case The test data contained in basic.rc has been taken lifted from Glenn Fowlers testregex at AT&T Research with the license reproduced in the file itself.
--- /dev/null
+++ b/sys/src/libregexp/test/basic.rc
@@ -1,0 +1,237 @@
+#!/bin/rc
+
+x=./$O.regexec
+
+fn fail {+ echo $* >[1=2]
+ exit $"*
+}
+
+fn M {+ sub=`'('{echo $3}+ if(! match=`'('{$x $1 $2 $#sub}) {+ fail 'did not match:' $*
+ }
+ match=$match(1-$#sub)
+ if(! ~ $"match $"sub){+ fail 'did not match:' $* '!=' $match
+ }
+}
+
+# The following license covers associated test data.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of THIS SOFTWARE FILE (the "Software"), to deal in the Software
+# without restriction, including without limitation the rights to use,
+# copy, modify, merge, publish, distribute, and/or sell copies of the
+# Software, and to permit persons to whom the Software is furnished to do
+# so, subject to the following disclaimer:
+#
+# THIS SOFTWARE IS PROVIDED BY AT&T ``AS IS'' AND ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+# IN NO EVENT SHALL AT&T BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+M 'abracadabra$' 'abracadabracadabra' '(7,18)'
+M 'a...b' 'abababbb' '(2,7)'
+M 'XXXXXX' '..XXXXXX' '(2,8)'
+M '\)' '()' '(1,2)'
+M 'a]' 'a]a' '(0,2)'
+M '}' '}' '(0,1)'
+M '\}' '}' '(0,1)'
+M '\]' ']' '(0,1)'
+M ']' ']' '(0,1)'
+M ']' ']' '(0,1)'
+M '{' '{' '(0,1)'+M '}' '}' '(0,1)'
+M '^a' 'ax' '(0,1)'
+M '\^a' 'a^a' '(1,3)'
+M 'a\^' 'a^' '(0,2)'
+M 'a$' 'aa' '(1,2)'
+M 'a\$' 'a$' '(0,2)'
+M '^$' '' '(0,0)'
+M '$^' '' '(0,0)'
+M 'a($)' 'aa' '(1,2)(2,2)'
+M 'a*(^a)' 'aa' '(0,1)(0,1)'
+M '(..)*(...)*' 'a' '(0,0)'
+M '(..)*(...)*' 'abcd' '(0,4)(2,4)'
+M '(ab|a)(bc|c)' 'abc' '(0,3)(0,2)(2,3)'
+M '(ab)c|abc' 'abc' '(0,3)(0,2)'
+M '((a|a)|a)' 'a' '(0,1)(0,1)(0,1)'
+M '(a*)(a|aa)' 'aaaa' '(0,4)(0,3)(3,4)'
+M 'a*(a.|aa)' 'aaaa' '(0,4)(2,4)'
+M 'a(b)|c(d)|a(e)f' 'aef' '(0,3)(?,?)(?,?)(1,2)'
+M '(a|b)?.*' 'b' '(0,1)(0,1)'
+M '(a|b)c|a(b|c)' 'ac' '(0,2)(0,1)'
+M '(a|b)c|a(b|c)' 'ab' '(0,2)(?,?)(1,2)'
+M '(a|b)*c|(a|ab)*c' 'abc' '(0,3)(1,2)'
+M '(a|b)*c|(a|ab)*c' 'xc' '(1,2)'
+M '(.a|.b).*|.*(.a|.b)' 'xa' '(0,2)(0,2)'
+M 'a?(ab|ba)ab' 'abab' '(0,4)(0,2)'
+M 'ab|abab' 'abbabab' '(0,2)'
+M 'aba|bab|bba' 'baaabbbaba' '(5,8)'
+M 'aba|bab' 'baaabbbaba' '(6,9)'
+M '(aa|aaa)*|(a|aaaaa)' 'aa' '(0,2)(0,2)'
+M '(a.|.a.)*|(a|.a...)' 'aa' '(0,2)(0,2)'
+M 'ab|a' 'xabc' '(1,3)'
+M 'ab|a' 'xxabc' '(2,4)'
+#M '(Ab|cD)*' 'aBcD' '(0,4)(2,4)'
+M '[^\-]' '--a' '(2,3)'
+M '[a\-]*' '--a' '(0,3)'
+M '[a-m\-]*' '--amoma--' '(0,4)'
+M ':::1:::0:|:::1:1:0:' ':::0:::1:::1:::0:' '(8,17)'
+M ':::1:::0:|:::1:1:1:' ':::0:::1:::1:::0:' '(8,17)'
+M '(a)(b)(c)' 'abc' '(0,3)(0,1)(1,2)(2,3)'
+M 'xxx' 'xxx' '(0,3)'
+M '(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)' 'feb 6,' '(0,6)'
+M '(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)' '2/7' '(0,3)'
+M '(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)' 'feb 1,Feb 6' '(5,11)'
+M '((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))' 'x' '(0,1)(0,1)(0,1)'
+M '((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))*' 'xx' '(0,2)(1,2)(1,2)'
+M 'a?(ab|ba)*' 'ababababababababababababababababababababababababababababababababababababababababa' '(0,81)(79,81)'
+M 'abaa|abbaa|abbbaa|abbbbaa' 'ababbabbbabbbabbbbabbbbaa' '(18,25)'
+M 'abaa|abbaa|abbbaa|abbbbaa' 'ababbabbbabbbabbbbabaa' '(18,22)'
+M 'aaac|aabc|abac|abbc|baac|babc|bbac|bbbc' 'baaabbbabac' '(7,11)'
+M 'a*a*a*a*a*b' 'aaaaaaaaab' '(0,10)'
+M '^' '' '(0,0)'
+M '$' '' '(0,0)'
+M '^$' '' '(0,0)'
+M '^a$' 'a' '(0,1)'
+M 'abc' 'abc' '(0,3)'
+M 'abc' 'xabcy' '(1,4)'
+M 'abc' 'ababc' '(2,5)'
+M 'ab*c' 'abc' '(0,3)'
+M 'ab*bc' 'abc' '(0,3)'
+M 'ab*bc' 'abbc' '(0,4)'
+M 'ab*bc' 'abbbbc' '(0,6)'
+M 'ab+bc' 'abbc' '(0,4)'
+M 'ab+bc' 'abbbbc' '(0,6)'
+M 'ab?bc' 'abbc' '(0,4)'
+M 'ab?bc' 'abc' '(0,3)'
+M 'ab?c' 'abc' '(0,3)'
+M '^abc$' 'abc' '(0,3)'
+M '^abc' 'abcc' '(0,3)'
+M 'abc$' 'aabc' '(1,4)'
+M '^' 'abc' '(0,0)'
+M '$' 'abc' '(3,3)'
+M 'a.c' 'abc' '(0,3)'
+M 'a.c' 'axc' '(0,3)'
+M 'a.*c' 'axyzc' '(0,5)'
+M 'a[bc]d' 'abd' '(0,3)'
+M 'a[b-d]e' 'ace' '(0,3)'
+M 'a[b-d]' 'aac' '(1,3)'
+M 'a[\-b]' 'a-' '(0,2)'
+M 'a[b\-]' 'a-' '(0,2)'
+M 'a]' 'a]' '(0,2)'
+M 'a[\]]b' 'a]b' '(0,3)'
+M 'a[^bc]d' 'aed' '(0,3)'
+M 'a[^\-b]c' 'adc' '(0,3)'
+M 'a[^\]b]c' 'adc' '(0,3)'
+M 'ab|cd' 'abc' '(0,2)'
+M 'ab|cd' 'abcd' '(0,2)'
+M 'a\(b' 'a(b' '(0,3)'
+M 'a\(*b' 'ab' '(0,2)'
+M 'a\(*b' 'a((b' '(0,4)'
+M '((a))' 'abc' '(0,1)(0,1)(0,1)'
+M '(a)b(c)' 'abc' '(0,3)(0,1)(2,3)'
+M 'a+b+c' 'aabbabc' '(4,7)'
+M 'a*' 'aaa' '(0,3)'
+M '(^)*' '-' '(0,0)(?,?)'
+M '(a*)*' '-' '(0,0)(?,?)'
+M '(a*)+' '-' '(0,0)(0,0)'
+M '(a*|b)*' '-' '(0,0)(?,?)'
+M '((a*|b))*' '-' '(0,0)(?,?)(?,?)'
+M '(a+|b)*' 'ab' '(0,2)(1,2)'
+M '(a+|b)+' 'ab' '(0,2)(1,2)'
+M '(a+|b)?' 'ab' '(0,1)(0,1)'
+M '[^ab]*' 'cde' '(0,3)'
+M 'a*' '' '(0,0)'
+M '([abc])*d' 'abbbcd' '(0,6)(4,5)'
+M '([abc])*bcd' 'abcd' '(0,4)(0,1)'
+M 'a|b|c|d|e' 'e' '(0,1)'
+M '(a|b|c|d|e)f' 'ef' '(0,2)(0,1)'
+M 'abcd*efg' 'abcdefg' '(0,7)'
+M 'ab*' 'xabyabbbz' '(1,3)'
+M 'ab*' 'xayabbbz' '(1,2)'
+M '(ab|cd)e' 'abcde' '(2,5)(2,4)'
+M '[abhgefdc]ij' 'hij' '(0,3)'
+M '(a|b)c*d' 'abcd' '(1,4)(1,2)'
+M '(ab|ab*)bc' 'abc' '(0,3)(0,1)'
+M 'a([bc]*)c*' 'abc' '(0,3)(1,3)'
+M 'a([bc]*)(c*d)' 'abcd' '(0,4)(1,3)(3,4)'
+M 'a([bc]+)(c*d)' 'abcd' '(0,4)(1,3)(3,4)'
+M 'a([bc]*)(c+d)' 'abcd' '(0,4)(1,2)(2,4)'
+M 'a[bcd]*dcdcde' 'adcdcde' '(0,7)'
+M '(ab|a)b*c' 'abc' '(0,3)(0,2)'
+M '((a)(b)c)(d)' 'abcd' '(0,4)(0,3)(0,1)(1,2)(3,4)'
+M '[A-Za-z_][A-Za-z0-9_]*' 'alpha' '(0,5)'
+M '^a(bc+|b[eh])g|.h$' 'abh' '(1,3)'
+M '(bc+d$|ef*g.|h?i(j|k))' 'effgz' '(0,5)(0,5)'
+M '(bc+d$|ef*g.|h?i(j|k))' 'ij' '(0,2)(0,2)(1,2)'
+M '(bc+d$|ef*g.|h?i(j|k))' 'reffgz' '(1,6)(1,6)'
+M '(((((((((a)))))))))' 'a' '(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)'
+M 'multiple words' 'multiple words yeah' '(0,14)'
+M '(.*)c(.*)' 'abcde' '(0,5)(0,2)(3,5)'
+M 'abcd' 'abcd' '(0,4)'
+M 'a(bc)d' 'abcd' '(0,4)(1,3)'
+M 'a[-]?c' 'ac' '(0,3)'
+M 'M[ou]''?am+[ae]r .*([AEae]l[\- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]' 'Muammar Qaddafi' '(0,15)(?,?)(10,12)'
+M 'M[ou]''?am+[ae]r .*([AEae]l[\- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]' 'Mo''ammar Gadhafi' '(0,16)(?,?)(11,13)'
+M 'M[ou]''?am+[ae]r .*([AEae]l[\- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]' 'Muammar Kaddafi' '(0,15)(?,?)(10,12)'
+M 'M[ou]''?am+[ae]r .*([AEae]l[\- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]' 'Muammar Qadhafi' '(0,15)(?,?)(10,12)'
+M 'M[ou]''?am+[ae]r .*([AEae]l[\- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]' 'Muammar Gadafi' '(0,14)(?,?)(10,11)'
+M 'M[ou]''?am+[ae]r .*([AEae]l[\- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]' 'Mu''ammar Qadafi' '(0,15)(?,?)(11,12)'
+M 'M[ou]''?am+[ae]r .*([AEae]l[\- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]' 'Moamar Gaddafi' '(0,14)(?,?)(9,11)'
+M 'M[ou]''?am+[ae]r .*([AEae]l[\- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]' 'Mu''ammar Qadhdhafi' '(0,18)(?,?)(13,15)'
+M 'M[ou]''?am+[ae]r .*([AEae]l[\- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]' 'Muammar Khaddafi' '(0,16)(?,?)(11,13)'
+M 'M[ou]''?am+[ae]r .*([AEae]l[\- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]' 'Muammar Ghaddafy' '(0,16)(?,?)(11,13)'
+M 'M[ou]''?am+[ae]r .*([AEae]l[\- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]' 'Muammar Ghadafi' '(0,15)(?,?)(11,12)'
+M 'M[ou]''?am+[ae]r .*([AEae]l[\- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]' 'Muammar Ghaddafi' '(0,16)(?,?)(11,13)'
+M 'M[ou]''?am+[ae]r .*([AEae]l[\- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]' 'Muamar Kaddafi' '(0,14)(?,?)(9,11)'
+M 'M[ou]''?am+[ae]r .*([AEae]l[\- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]' 'Muammar Quathafi' '(0,16)(?,?)(11,13)'
+M 'M[ou]''?am+[ae]r .*([AEae]l[\- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]' 'Muammar Gheddafi' '(0,16)(?,?)(11,13)'
+M 'M[ou]''?am+[ae]r .*([AEae]l[\- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]' 'Moammar Khadafy' '(0,15)(?,?)(11,12)'
+M 'M[ou]''?am+[ae]r .*([AEae]l[\- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]' 'Moammar Qudhafi' '(0,15)(?,?)(10,12)'
+M 'a+(b|c)*d+' 'aabcdd' '(0,6)(3,4)'
+M '^.+$' 'vivi' '(0,4)'
+M '^(.+)$' 'vivi' '(0,4)(0,4)'
+M '^([^!.]+).att.com!(.+)$' 'gryphon.att.com!eby' '(0,19)(0,7)(16,19)'
+M '^([^!]+!)?([^!]+)$' 'bas' '(0,3)(?,?)(0,3)'
+M '^([^!]+!)?([^!]+)$' 'bar!bas' '(0,7)(0,4)(4,7)'
+M '^([^!]+!)?([^!]+)$' 'foo!bas' '(0,7)(0,4)(4,7)'
+M '^.+!([^!]+!)([^!]+)$' 'foo!bar!bas' '(0,11)(4,8)(8,11)'
+M '((foo)|(bar))!bas' 'bar!bas' '(0,7)(0,3)(?,?)(0,3)'
+M '((foo)|(bar))!bas' 'foo!bar!bas' '(4,11)(4,7)(?,?)(4,7)'
+M '((foo)|(bar))!bas' 'foo!bas' '(0,7)(0,3)(0,3)'
+M '((foo)|bar)!bas' 'bar!bas' '(0,7)(0,3)'
+M '((foo)|bar)!bas' 'foo!bar!bas' '(4,11)(4,7)'
+M '((foo)|bar)!bas' 'foo!bas' '(0,7)(0,3)(0,3)'
+M '(foo|(bar))!bas' 'bar!bas' '(0,7)(0,3)(0,3)'
+M '(foo|(bar))!bas' 'foo!bar!bas' '(4,11)(4,7)(4,7)'
+M '(foo|(bar))!bas' 'foo!bas' '(0,7)(0,3)'
+M '(foo|bar)!bas' 'bar!bas' '(0,7)(0,3)'
+M '(foo|bar)!bas' 'foo!bar!bas' '(4,11)(4,7)'
+M '(foo|bar)!bas' 'foo!bas' '(0,7)(0,3)'
+M '^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$' 'foo!bar!bas' '(0,11)(0,11)(?,?)(?,?)(4,8)(8,11)'
+M '^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$' 'bas' '(0,3)(?,?)(0,3)'
+M '^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$' 'bar!bas' '(0,7)(0,4)(4,7)'
+M '^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$' 'foo!bar!bas' '(0,11)(?,?)(?,?)(4,8)(8,11)'
+M '^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$' 'foo!bas' '(0,7)(0,4)(4,7)'
+M '^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$' 'bas' '(0,3)(0,3)(?,?)(0,3)'
+M '^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$' 'bar!bas' '(0,7)(0,7)(0,4)(4,7)'
+M '^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$' 'foo!bar!bas' '(0,11)(0,11)(?,?)(?,?)(4,8)(8,11)'
+M '^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$' 'foo!bas' '(0,7)(0,7)(0,4)(4,7)'
+M '.*(/XXX).*' '/XXX' '(0,4)(0,4)'
+M '.*(\\XXX).*' '\XXX' '(0,4)(0,4)'
+M '\\XXX' '\XXX' '(0,4)'
+M '.*(/000).*' '/000' '(0,4)(0,4)'
+M '.*(\\000).*' '\000' '(0,4)(0,4)'
+M '\\000' '\000' '(0,4)'
+
+exit ''
--- /dev/null
+++ b/sys/src/libregexp/test/mkfile
@@ -1,0 +1,9 @@
+</$objtype/mkfile
+
+TEST=basic
+
+</sys/src/cmd/mktest
+
+$O.regexec: regexec.$O
+
+%.test: $O.regexec
--- /dev/null
+++ b/sys/src/libregexp/test/regexec.c
@@ -1,0 +1,47 @@
+#include <u.h>
+#include <libc.h>
+#include <regexp.h>
+
+Reprog *re;
+Resub m[10];
+
+void
+usage(void)
+{+ fprint(2, "usage: %s pattern string [nsub]\n", argv0);
+ exits("usage");+}
+
+void
+main(int argc, char *argv[])
+{+ int i, n;
+
+ ARGBEGIN {+ } ARGEND;
+
+ if(argc < 2)
+ usage();
+ re = regcomp(argv[0]);
+ if(re == nil)
+ sysfatal("regcomp");+ n = nelem(m);
+ if(argc == 3)
+ n = atoi(argv[2]);
+ if(n > nelem(m))
+ sysfatal("too many substitutions");+ if(regexec(re, argv[1], m, n) <= 0)
+ exits("no match");+ for(i = 0; i < n; i++) {+ if(m[i].sp == nil)
+ print("(?");+ else
+ print("(%d", (int)(m[i].sp - argv[1]));+ if(m[i].ep == nil)
+ print(",?)");+ else
+ print(",%d)", (int)(m[i].ep - argv[1]));+ }
+ print("\n");+ exits(nil);
+}
--
⑨