git: 9front

Download patch

ref: c2ea6907c920de6ed52aeb7ecc1bc4512acd4bcd
parent: eb910cf71edb43c5aae57a7605d186109f2cbd14
author: cinap_lenrek <cinap_lenrek@felloff.net>
date: Sat Dec 6 12:24:22 EST 2025

libregexec: add basic test case

The test data contained in basic.rc
has been taken lifted from Glenn Fowlers
testregex at AT&T Research with the license
reproduced in the file itself.

--- /dev/null
+++ b/sys/src/libregexp/test/basic.rc
@@ -1,0 +1,237 @@
+#!/bin/rc
+
+x=./$O.regexec
+
+fn fail {
+	echo $* >[1=2]
+	exit $"*
+}
+
+fn M {
+	sub=`'('{echo $3}
+	if(! match=`'('{$x $1 $2 $#sub}) {
+		fail 'did not match:' $*
+	}
+	match=$match(1-$#sub)
+	if(! ~ $"match $"sub){
+		fail 'did not match:' $* '!=' $match
+	}
+}
+
+#  The following license covers associated test data.
+# 
+#  Permission is hereby granted, free of charge, to any person obtaining a
+#  copy of THIS SOFTWARE FILE (the "Software"), to deal in the Software
+#  without restriction, including without limitation the rights to use,
+#  copy, modify, merge, publish, distribute, and/or sell copies of the
+#  Software, and to permit persons to whom the Software is furnished to do
+#  so, subject to the following disclaimer:
+# 
+#  THIS SOFTWARE IS PROVIDED BY AT&T ``AS IS'' AND ANY EXPRESS OR IMPLIED
+#  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+#  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+#  IN NO EVENT SHALL AT&T BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+M	'abracadabra$'	'abracadabracadabra'	'(7,18)'
+M	'a...b'	'abababbb'	'(2,7)'
+M	'XXXXXX'	'..XXXXXX'	'(2,8)'
+M	'\)'	'()'	'(1,2)'
+M	'a]'	'a]a'	'(0,2)'
+M	'}'	'}'	'(0,1)'
+M	'\}'	'}'	'(0,1)'
+M	'\]'	']'	'(0,1)'
+M	']'	']'	'(0,1)'
+M	']'	']'	'(0,1)'
+M	'{'	'{'	'(0,1)'
+M	'}'	'}'	'(0,1)'
+M	'^a'	'ax'	'(0,1)'
+M	'\^a'	'a^a'	'(1,3)'
+M	'a\^'	'a^'	'(0,2)'
+M	'a$'	'aa'	'(1,2)'
+M	'a\$'	'a$'	'(0,2)'
+M	'^$'	''	'(0,0)'
+M	'$^'	''	'(0,0)'
+M	'a($)'	'aa'	'(1,2)(2,2)'
+M	'a*(^a)'	'aa'	'(0,1)(0,1)'
+M	'(..)*(...)*'	'a'	'(0,0)'
+M	'(..)*(...)*'	'abcd'	'(0,4)(2,4)'
+M	'(ab|a)(bc|c)'	'abc'	'(0,3)(0,2)(2,3)'
+M	'(ab)c|abc'	'abc'	'(0,3)(0,2)'
+M	'((a|a)|a)'	'a'	'(0,1)(0,1)(0,1)'
+M	'(a*)(a|aa)'	'aaaa'	'(0,4)(0,3)(3,4)'
+M	'a*(a.|aa)'	'aaaa'	'(0,4)(2,4)'
+M	'a(b)|c(d)|a(e)f'	'aef'	'(0,3)(?,?)(?,?)(1,2)'
+M	'(a|b)?.*'	'b'	'(0,1)(0,1)'
+M	'(a|b)c|a(b|c)'	'ac'	'(0,2)(0,1)'
+M	'(a|b)c|a(b|c)'	'ab'	'(0,2)(?,?)(1,2)'
+M	'(a|b)*c|(a|ab)*c'	'abc'	'(0,3)(1,2)'
+M	'(a|b)*c|(a|ab)*c'	'xc'	'(1,2)'
+M	'(.a|.b).*|.*(.a|.b)'	'xa'	'(0,2)(0,2)'
+M	'a?(ab|ba)ab'	'abab'	'(0,4)(0,2)'
+M	'ab|abab'	'abbabab'	'(0,2)'
+M	'aba|bab|bba'	'baaabbbaba'	'(5,8)'
+M	'aba|bab'	'baaabbbaba'	'(6,9)'
+M	'(aa|aaa)*|(a|aaaaa)'	'aa'	'(0,2)(0,2)'
+M	'(a.|.a.)*|(a|.a...)'	'aa'	'(0,2)(0,2)'
+M	'ab|a'	'xabc'	'(1,3)'
+M	'ab|a'	'xxabc'	'(2,4)'
+#M	'(Ab|cD)*'	'aBcD'	'(0,4)(2,4)'
+M	'[^\-]'	'--a'	'(2,3)'
+M	'[a\-]*'	'--a'	'(0,3)'
+M	'[a-m\-]*'	'--amoma--'	'(0,4)'
+M	':::1:::0:|:::1:1:0:'	':::0:::1:::1:::0:'	'(8,17)'
+M	':::1:::0:|:::1:1:1:'	':::0:::1:::1:::0:'	'(8,17)'
+M	'(a)(b)(c)'	'abc'	'(0,3)(0,1)(1,2)(2,3)'
+M	'xxx'	'xxx'	'(0,3)'
+M	'(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)'	'feb 6,'	'(0,6)'
+M	'(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)'	'2/7'	'(0,3)'
+M	'(^|[ (,;])((([Ff]eb[^ ]* *|0*2/|\* */?)0*[6-7]))([^0-9]|$)'	'feb 1,Feb 6'	'(5,11)'
+M	'((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))'	'x'	'(0,1)(0,1)(0,1)'
+M	'((((((((((((((((((((((((((((((x))))))))))))))))))))))))))))))*'	'xx'	'(0,2)(1,2)(1,2)'
+M	'a?(ab|ba)*'	'ababababababababababababababababababababababababababababababababababababababababa'	'(0,81)(79,81)'
+M	'abaa|abbaa|abbbaa|abbbbaa'	'ababbabbbabbbabbbbabbbbaa'	'(18,25)'
+M	'abaa|abbaa|abbbaa|abbbbaa'	'ababbabbbabbbabbbbabaa'	'(18,22)'
+M	'aaac|aabc|abac|abbc|baac|babc|bbac|bbbc'	'baaabbbabac'	'(7,11)'
+M	'a*a*a*a*a*b'	'aaaaaaaaab'	'(0,10)'
+M	'^'	''	'(0,0)'
+M	'$'	''	'(0,0)'
+M	'^$'	''	'(0,0)'
+M	'^a$'	'a'	'(0,1)'
+M	'abc'	'abc'	'(0,3)'
+M	'abc'	'xabcy'	'(1,4)'
+M	'abc'	'ababc'	'(2,5)'
+M	'ab*c'	'abc'	'(0,3)'
+M	'ab*bc'	'abc'	'(0,3)'
+M	'ab*bc'	'abbc'	'(0,4)'
+M	'ab*bc'	'abbbbc'	'(0,6)'
+M	'ab+bc'	'abbc'	'(0,4)'
+M	'ab+bc'	'abbbbc'	'(0,6)'
+M	'ab?bc'	'abbc'	'(0,4)'
+M	'ab?bc'	'abc'	'(0,3)'
+M	'ab?c'	'abc'	'(0,3)'
+M	'^abc$'	'abc'	'(0,3)'
+M	'^abc'	'abcc'	'(0,3)'
+M	'abc$'	'aabc'	'(1,4)'
+M	'^'	'abc'	'(0,0)'
+M	'$'	'abc'	'(3,3)'
+M	'a.c'	'abc'	'(0,3)'
+M	'a.c'	'axc'	'(0,3)'
+M	'a.*c'	'axyzc'	'(0,5)'
+M	'a[bc]d'	'abd'	'(0,3)'
+M	'a[b-d]e'	'ace'	'(0,3)'
+M	'a[b-d]'	'aac'	'(1,3)'
+M	'a[\-b]'	'a-'	'(0,2)'
+M	'a[b\-]'	'a-'	'(0,2)'
+M	'a]'	'a]'	'(0,2)'
+M	'a[\]]b'	'a]b'	'(0,3)'
+M	'a[^bc]d'	'aed'	'(0,3)'
+M	'a[^\-b]c'	'adc'	'(0,3)'
+M	'a[^\]b]c'	'adc'	'(0,3)'
+M	'ab|cd'	'abc'	'(0,2)'
+M	'ab|cd'	'abcd'	'(0,2)'
+M	'a\(b'	'a(b'	'(0,3)'
+M	'a\(*b'	'ab'	'(0,2)'
+M	'a\(*b'	'a((b'	'(0,4)'
+M	'((a))'	'abc'	'(0,1)(0,1)(0,1)'
+M	'(a)b(c)'	'abc'	'(0,3)(0,1)(2,3)'
+M	'a+b+c'	'aabbabc'	'(4,7)'
+M	'a*'	'aaa'	'(0,3)'
+M	'(^)*'	'-'	'(0,0)(?,?)'
+M	'(a*)*'	'-'	'(0,0)(?,?)'
+M	'(a*)+'	'-'	'(0,0)(0,0)'
+M	'(a*|b)*'	'-'	'(0,0)(?,?)'
+M	'((a*|b))*'	'-'	'(0,0)(?,?)(?,?)'
+M	'(a+|b)*'	'ab'	'(0,2)(1,2)'
+M	'(a+|b)+'	'ab'	'(0,2)(1,2)'
+M	'(a+|b)?'	'ab'	'(0,1)(0,1)'
+M	'[^ab]*'	'cde'	'(0,3)'
+M	'a*'	''	'(0,0)'
+M	'([abc])*d'	'abbbcd'	'(0,6)(4,5)'
+M	'([abc])*bcd'	'abcd'	'(0,4)(0,1)'
+M	'a|b|c|d|e'	'e'	'(0,1)'
+M	'(a|b|c|d|e)f'	'ef'	'(0,2)(0,1)'
+M	'abcd*efg'	'abcdefg'	'(0,7)'
+M	'ab*'	'xabyabbbz'	'(1,3)'
+M	'ab*'	'xayabbbz'	'(1,2)'
+M	'(ab|cd)e'	'abcde'	'(2,5)(2,4)'
+M	'[abhgefdc]ij'	'hij'	'(0,3)'
+M	'(a|b)c*d'	'abcd'	'(1,4)(1,2)'
+M	'(ab|ab*)bc'	'abc'	'(0,3)(0,1)'
+M	'a([bc]*)c*'	'abc'	'(0,3)(1,3)'
+M	'a([bc]*)(c*d)'	'abcd'	'(0,4)(1,3)(3,4)'
+M	'a([bc]+)(c*d)'	'abcd'	'(0,4)(1,3)(3,4)'
+M	'a([bc]*)(c+d)'	'abcd'	'(0,4)(1,2)(2,4)'
+M	'a[bcd]*dcdcde'	'adcdcde'	'(0,7)'
+M	'(ab|a)b*c'	'abc'	'(0,3)(0,2)'
+M	'((a)(b)c)(d)'	'abcd'	'(0,4)(0,3)(0,1)(1,2)(3,4)'
+M	'[A-Za-z_][A-Za-z0-9_]*'	'alpha'	'(0,5)'
+M	'^a(bc+|b[eh])g|.h$'	'abh'	'(1,3)'
+M	'(bc+d$|ef*g.|h?i(j|k))'	'effgz'	'(0,5)(0,5)'
+M	'(bc+d$|ef*g.|h?i(j|k))'	'ij'	'(0,2)(0,2)(1,2)'
+M	'(bc+d$|ef*g.|h?i(j|k))'	'reffgz'	'(1,6)(1,6)'
+M	'(((((((((a)))))))))'	'a'	'(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)(0,1)'
+M	'multiple words'	'multiple words yeah'	'(0,14)'
+M	'(.*)c(.*)'	'abcde'	'(0,5)(0,2)(3,5)'
+M	'abcd'	'abcd'	'(0,4)'
+M	'a(bc)d'	'abcd'	'(0,4)(1,3)'
+M	'a[-]?c'	'ac'	'(0,3)'
+M	'M[ou]''?am+[ae]r .*([AEae]l[\- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]'	'Muammar Qaddafi'	'(0,15)(?,?)(10,12)'
+M	'M[ou]''?am+[ae]r .*([AEae]l[\- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]'	'Mo''ammar Gadhafi'	'(0,16)(?,?)(11,13)'
+M	'M[ou]''?am+[ae]r .*([AEae]l[\- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]'	'Muammar Kaddafi'	'(0,15)(?,?)(10,12)'
+M	'M[ou]''?am+[ae]r .*([AEae]l[\- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]'	'Muammar Qadhafi'	'(0,15)(?,?)(10,12)'
+M	'M[ou]''?am+[ae]r .*([AEae]l[\- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]'	'Muammar Gadafi'	'(0,14)(?,?)(10,11)'
+M	'M[ou]''?am+[ae]r .*([AEae]l[\- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]'	'Mu''ammar Qadafi'	'(0,15)(?,?)(11,12)'
+M	'M[ou]''?am+[ae]r .*([AEae]l[\- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]'	'Moamar Gaddafi'	'(0,14)(?,?)(9,11)'
+M	'M[ou]''?am+[ae]r .*([AEae]l[\- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]'	'Mu''ammar Qadhdhafi'	'(0,18)(?,?)(13,15)'
+M	'M[ou]''?am+[ae]r .*([AEae]l[\- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]'	'Muammar Khaddafi'	'(0,16)(?,?)(11,13)'
+M	'M[ou]''?am+[ae]r .*([AEae]l[\- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]'	'Muammar Ghaddafy'	'(0,16)(?,?)(11,13)'
+M	'M[ou]''?am+[ae]r .*([AEae]l[\- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]'	'Muammar Ghadafi'	'(0,15)(?,?)(11,12)'
+M	'M[ou]''?am+[ae]r .*([AEae]l[\- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]'	'Muammar Ghaddafi'	'(0,16)(?,?)(11,13)'
+M	'M[ou]''?am+[ae]r .*([AEae]l[\- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]'	'Muamar Kaddafi'	'(0,14)(?,?)(9,11)'
+M	'M[ou]''?am+[ae]r .*([AEae]l[\- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]'	'Muammar Quathafi'	'(0,16)(?,?)(11,13)'
+M	'M[ou]''?am+[ae]r .*([AEae]l[\- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]'	'Muammar Gheddafi'	'(0,16)(?,?)(11,13)'
+M	'M[ou]''?am+[ae]r .*([AEae]l[\- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]'	'Moammar Khadafy'	'(0,15)(?,?)(11,12)'
+M	'M[ou]''?am+[ae]r .*([AEae]l[\- ])?[GKQ]h?[aeu]+([dtz][dhz]?)+af[iy]'	'Moammar Qudhafi'	'(0,15)(?,?)(10,12)'
+M	'a+(b|c)*d+'	'aabcdd'	'(0,6)(3,4)'
+M	'^.+$'	'vivi'	'(0,4)'
+M	'^(.+)$'	'vivi'	'(0,4)(0,4)'
+M	'^([^!.]+).att.com!(.+)$'	'gryphon.att.com!eby'	'(0,19)(0,7)(16,19)'
+M	'^([^!]+!)?([^!]+)$'	'bas'	'(0,3)(?,?)(0,3)'
+M	'^([^!]+!)?([^!]+)$'	'bar!bas'	'(0,7)(0,4)(4,7)'
+M	'^([^!]+!)?([^!]+)$'	'foo!bas'	'(0,7)(0,4)(4,7)'
+M	'^.+!([^!]+!)([^!]+)$'	'foo!bar!bas'	'(0,11)(4,8)(8,11)'
+M	'((foo)|(bar))!bas'	'bar!bas'	'(0,7)(0,3)(?,?)(0,3)'
+M	'((foo)|(bar))!bas'	'foo!bar!bas'	'(4,11)(4,7)(?,?)(4,7)'
+M	'((foo)|(bar))!bas'	'foo!bas'	'(0,7)(0,3)(0,3)'
+M	'((foo)|bar)!bas'	'bar!bas'	'(0,7)(0,3)'
+M	'((foo)|bar)!bas'	'foo!bar!bas'	'(4,11)(4,7)'
+M	'((foo)|bar)!bas'	'foo!bas'	'(0,7)(0,3)(0,3)'
+M	'(foo|(bar))!bas'	'bar!bas'	'(0,7)(0,3)(0,3)'
+M	'(foo|(bar))!bas'	'foo!bar!bas'	'(4,11)(4,7)(4,7)'
+M	'(foo|(bar))!bas'	'foo!bas'	'(0,7)(0,3)'
+M	'(foo|bar)!bas'	'bar!bas'	'(0,7)(0,3)'
+M	'(foo|bar)!bas'	'foo!bar!bas'	'(4,11)(4,7)'
+M	'(foo|bar)!bas'	'foo!bas'	'(0,7)(0,3)'
+M	'^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$'	'foo!bar!bas'	'(0,11)(0,11)(?,?)(?,?)(4,8)(8,11)'
+M	'^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$'	'bas'	'(0,3)(?,?)(0,3)'
+M	'^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$'	'bar!bas'	'(0,7)(0,4)(4,7)'
+M	'^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$'	'foo!bar!bas'	'(0,11)(?,?)(?,?)(4,8)(8,11)'
+M	'^([^!]+!)?([^!]+)$|^.+!([^!]+!)([^!]+)$'	'foo!bas'	'(0,7)(0,4)(4,7)'
+M	'^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$'	'bas'	'(0,3)(0,3)(?,?)(0,3)'
+M	'^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$'	'bar!bas'	'(0,7)(0,7)(0,4)(4,7)'
+M	'^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$'	'foo!bar!bas'	'(0,11)(0,11)(?,?)(?,?)(4,8)(8,11)'
+M	'^(([^!]+!)?([^!]+)|.+!([^!]+!)([^!]+))$'	'foo!bas'	'(0,7)(0,7)(0,4)(4,7)'
+M	'.*(/XXX).*'	'/XXX'	'(0,4)(0,4)'
+M	'.*(\\XXX).*'	'\XXX'	'(0,4)(0,4)'
+M	'\\XXX'	'\XXX'	'(0,4)'
+M	'.*(/000).*'	'/000'	'(0,4)(0,4)'
+M	'.*(\\000).*'	'\000'	'(0,4)(0,4)'
+M	'\\000'	'\000'	'(0,4)'
+
+exit ''
--- /dev/null
+++ b/sys/src/libregexp/test/mkfile
@@ -1,0 +1,9 @@
+</$objtype/mkfile
+
+TEST=basic
+
+</sys/src/cmd/mktest
+
+$O.regexec: regexec.$O
+
+%.test: $O.regexec
--- /dev/null
+++ b/sys/src/libregexp/test/regexec.c
@@ -1,0 +1,47 @@
+#include <u.h>
+#include <libc.h>
+#include <regexp.h>
+
+Reprog *re;
+Resub m[10];
+
+void
+usage(void)
+{
+	fprint(2, "usage: %s pattern string [nsub]\n", argv0);
+	exits("usage");
+}
+
+void
+main(int argc, char *argv[])
+{
+	int i, n;
+
+	ARGBEGIN {
+	} ARGEND;
+
+	if(argc < 2)
+		usage();
+	re = regcomp(argv[0]);
+	if(re == nil)
+		sysfatal("regcomp");
+	n = nelem(m);
+	if(argc == 3)
+		n = atoi(argv[2]);
+	if(n > nelem(m))
+		sysfatal("too many substitutions");
+	if(regexec(re, argv[1], m, n) <= 0)
+		exits("no match");
+	for(i = 0; i < n; i++) {
+		if(m[i].sp == nil)
+			print("(?");
+		else
+			print("(%d", (int)(m[i].sp - argv[1]));
+		if(m[i].ep == nil)
+			print(",?)");
+		else
+			print(",%d)", (int)(m[i].ep - argv[1]));
+	}
+	print("\n");
+	exits(nil);
+}
--