ref: babf901b4a508c3ec5d1f89655f10377bbdf9637
dir: /appl/lib/w3c/xpointers.b/
implement Xpointers;
#
# Copyright © 2005 Vita Nuova Holdings Limited
#
include "sys.m";
sys: Sys;
include "xpointers.m";
init()
{
sys = load Sys Sys->PATH;
}
#
# XPointer framework syntax
#
# Pointer ::= Shorthand | SchemeBased
# Shorthand ::= NCName # from [XML-Names]
# SchemeBased ::= PointerPart (S? PointerPart)*
# PointerPart ::= SchemeName '(' SchemeData ')'
# SchemeName ::= QName # from [XML-Names]
# SchemeData ::= EscapedData*
# EscapedData ::= NormalChar | '^(' | '^)' | '^^' | '(' SchemeData ')'
# NormalChar ::= UnicodeChar - [()^]
# UnicodeChar ::= [#x0 - #x10FFFF]
framework(s: string): (string, list of (string, string, string), string)
{
(q, nm, i) := name(s, 0);
if(i >= len s){ # Shorthand
if(q != nil)
return (nil, nil, "shorthand pointer must be unqualified name");
if(nm == nil)
return (nil, nil, "missing pointer name");
return (nm, nil, nil);
}
# must be SchemeBased
l: list of (string, string, string);
for(;;){
if(nm == nil){
if(q != nil)
return (nil, nil, sys->sprint("prefix but no local part in name at %d", i));
return (nil, nil, sys->sprint("expected name at %d", i));
}
if(i >= len s || s[i] != '(')
return (nil, nil, sys->sprint("expected '(' at %d", i));
o := i++;
a := "";
nesting := 0;
for(; i < len s && ((c := s[i]) != ')' || nesting); i++){
case c {
'^' =>
if(i+1 >= len s)
return (nil, nil, "unexpected eof after ^");
c = s[++i];
if(c != '(' && c != ')' && c != '^')
return (nil, nil, sys->sprint("invalid escape ^%c at %d", c, i));
'(' =>
nesting++;
')' =>
if(--nesting < 0)
return (nil, nil, sys->sprint("unbalanced ) at %d", i));
}
a[len a] = c;
}
if(i >= len s)
return (nil, nil, sys->sprint("unbalanced ( at %d", o));
l = (q, nm, a) :: l;
if(++i == len s)
break;
while(i < len s && isspace(s[i]))
i++;
(q, nm, i) = name(s, i);
}
rl: list of (string, string, string);
for(; l != nil; l = tl l)
rl = hd l :: rl;
return (nil, rl, nil);
}
isspace(c: int): int
{
return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\v' || c == '\f';
}
#
# QName ::= (Prefix ':')? LocalPart
# Prefix ::= NCName
# LocalPart ::= NCName
#
#NCName :: (Oetter | '_') NCNameChar*
#NCNameChar :: Oetter | Digit | '.' | '-' | '_' | CombiningChar | Extender
name(s: string, o: int): (string, string, int)
{
(ns, i) := ncname(s, o);
if(i >= len s || s[i] != ':')
return (nil, ns, i);
(nm, j) := ncname(s, i+1);
if(j == i+1)
return (nil, ns, i); # assume it's a LocalPart followed by ':'
return (ns, nm, j);
}
ncname(s: string, o: int): (string, int)
{
if(o >= len s || !isalnum(c := s[o]) && c != '_' || c >= '0' && c <= '9')
return (nil, o); # missing or invalid start character
for(i := o; i < len s && isnamec(s[i]); i++)
;
return (s[o:i], i);
}
isnamec(c: int): int
{
return isalnum(c) || c == '_' || c == '-' || c == '.';
}
isalnum(c: int): int
{
#
# Hard to get absolutely right without silly amount of character data.
# Use what we know about ASCII
# and assume anything above the Oatin control characters is
# potentially an alphanumeric.
#
if(c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9')
return 1; # usual case
if(c <= ' ')
return 0;
if(c > 16rA0)
return 1; # non-ASCII
return 0;
}
# schemes: xpointer(), xmlns(), element()
# xmlns()
# XmlnsSchemeData ::= NCName S? '=' S? EscapedNamespaceName
# EscapedNamespaceName ::= EscapedData*
xmlns(s: string): (string, string, string)
{
(nm, i) := ncname(s, 0);
if(nm == nil)
return (nil, nil, "illegal namespace name");
while(i < len s && isspace(s[i]))
i++;
if(i >= len s || s[i++] != '=')
return (nil, nil, "illegal xmlns declaration");
while(i < len s && isspace(s[i]))
i++;
return (nm, s[i:], nil);
}
# element()
# ElementSchemeData ::= (NCName ChildSequence?) | ChildSequence
# ChildSequence ::= ('/' [1-9] [0-9]*)+
element(s: string): (string, list of int, string)
{
nm: string;
i := 0;
if(s != nil && s[0] != '/'){
(nm, i) = ncname(s, 0);
if(nm == nil)
return (nil, nil, "illegal element name");
}
l: list of int;
do{
if(i >= len s || s[i++] != '/')
return (nil, nil, "illegal child sequence (expected '/')");
v := 0;
do{
if(i >= len s || !isdigit(s[i]))
return (nil, nil, "illegal child sequence (expected integer)");
v = v*10 + s[i]-'0';
}while(++i < len s && s[i] != '/');
l = v :: l;
}while(i < len s);
rl: list of int;
for(; l != nil; l = tl l)
rl = hd l :: rl;
return (nm, rl, nil);
}
# xpointer()
# XpointerSchemeData ::= Expr # from Xpath, with new functions and data types
xpointer(s: string): (ref Xpath, string)
{
p := ref Parse(ref Rd(s, 0, 0), nil);
{
e := expr(p, 0);
if(p.r.i < len s)
synerr("missing operator");
return (e, nil);
}exception e{
"syntax error*" =>
return (nil, e);
* =>
raise;
}
}
Lerror, Ldslash, Lint, Lreal, Llit, Lvar, Ldotdot, Lop, Laxis, Lfn: con 'a'+iota; # internal lexical items
Keywd: adt {
name: string;
val: int;
};
axes: array of Keywd = array[] of {
("ancestor", Aancestor),
("ancestor-or-self", Aancestor_or_self),
("attribute", Aattribute),
("child", Achild),
("descendant", Adescendant),
("descendant-or-self", Adescendant_or_self),
("following", Afollowing),
("following-sibling", Afollowing_sibling),
("namespace", Anamespace),
("parent", Aparent),
("preceding", Apreceding),
("preceding-sibling", Apreceding_sibling),
("self", Aself),
};
keywds: array of Keywd = array[] of {
("and", Oand),
("comment", Onodetype),
("div", Odiv),
("mod", Omod),
("node", Onodetype),
("or", Oor),
("processing-instruction", Onodetype),
("text", Onodetype),
};
iskeywd(s: string): int
{
return look(keywds, s);
}
look(k: array of Keywd, s: string): int
{
for(i := 0; i < len k; i++)
if(k[i].name == s)
return k[i].val;
return 0;
}
lookname(k: array of Keywd, v: int): string
{
for(i := 0; i < len k; i++)
if(k[i].val == v)
return k[i].name;
return nil;
}
prectab := array[] of {
array[] of {Oor},
array[] of {Oand},
array[] of {'=', One},
array[] of {'<', Ole, '>', Oge},
array[] of {'+', '-'},
array[] of {Omul, Odiv, Omod},
array[] of {Oneg}, # unary '-'
array[] of {'|'}, # UnionExpr
};
isop(t: int, p: array of int): int
{
if(t >= 0)
for(j := 0; j < len p; j++)
if(t == p[j])
return 1;
return 0;
}
# Expr ::= OrExpr
# UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
# PathExpr ::= LocationPath | FilterExpr | FilterExpr '/' RelativeLocationPath |
# FilterExpr '//' RelativeLocationPath
# OrExpr ::= AndExpr | OrExpr 'or' AndExpr
# AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
# EqualityExpr ::= RelationalExpr | EqualityExpr '=' RelationalExpr | EqualityExpr '!=' RelationalExpr
# RelationalExpr ::= AdditiveExpr | RelationalExpr '<' AdditiveExpr | RelationalExpr '>' AdditiveExpr |
# RelationalExpr '<=' AdditiveExpr | RelationalExpr '>=' AdditiveExpr
# AdditiveExpr ::= MultiplicativeExpr | AdditiveExpr '+' MultiplicativeExpr | AdditiveExpr '-' MultiplicativeExpr
# MultiplicativeExpr ::= UnaryExpr | MultiplicativeExpr MultiplyOperator UnaryExpr |
# MultiplicativeExpr 'div' UnaryExpr | MultiplicativeExpr 'mod' UnaryExpr
# UnaryExpr ::= UnionExpr | '-' UnaryExpr
expr(p: ref Parse, k: int): ref Xpath
{
if(k >= len prectab)
return pathexpr(p);
if(prectab[k][0] == Oneg){ # unary '-'
if(p.look() == '-'){
p.get();
return ref Xpath.E(Oneg, expr(p,k+1), nil);
}
# must be UnionExpr
k++;
}
e := expr(p, k+1);
while(isop(p.look(), prectab[k])){
o := p.get().t0;
e = ref Xpath.E(o, e, expr(p, k+1)); # +assoc[k]
}
return e;
}
# PathExpr ::= LocationPath | FilterExpr ( ('/' | '//') RelativeLocationPath )
# FilterExpr ::= PrimaryExpr | FilterExpr Predicate => PrimaryExpr Predicate*
pathexpr(p: ref Parse): ref Xpath
{
# LocationPath?
case p.look() {
'.' or Ldotdot or Laxis or '@' or Onametest or Onodetype or '*' =>
return locationpath(p, 0);
'/' or Ldslash =>
return locationpath(p, 1);
}
# FilterExpr
e := primary(p);
while(p.look() == '[')
e = ref Xpath.E(Ofilter, e, predicate(p));
if((o := p.look()) == '/' || o == Ldslash)
e = ref Xpath.E(Opath, e, locationpath(p, 0));
return e;
}
# LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
# AbsoluteLocationPath ::= '/' RelativeLocationPath? | AbbreviatedAbsoluteLocationPath
# RelativeLocationPath ::= Step | RelativeLocationPath '/' Step
# AbbreviatedAbsoluteLocationPath ::= '//' RelativeLocationPath
# AbbreviatedRelativeLocationPath ::= RelativeLocationPath '//' Step
locationpath(p: ref Parse, abs: int): ref Xpath
{
# // => /descendent-or-self::node()/
pl: list of ref Xstep;
o := p.look();
if(o != '/' && o != Ldslash){
s := step(p);
if(s == nil)
synerr("expected Step in LocationPath");
pl = s :: pl;
}
while((o = p.look()) == '/' || o == Ldslash){
p.get();
if(o == Ldslash)
pl = ref Xstep(Adescendant_or_self, Onodetype, nil, "node", nil, nil) :: pl;
s := step(p);
if(s == nil){
if(abs && pl == nil)
break; # it's just an initial '/'
synerr("expected Step in LocationPath");
}
pl = s :: pl;
}
return ref Xpath.Path(abs, rev(pl));
}
# Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep
# AxisSpecifier ::= AxisName '::' | AbbreviatedAxisSpecifier
# AxisName := ... # long list
# NodeTest ::= NameTest | NodeType '(' ')'
# Predicate ::= '[' PredicateExpr ']'
# PredicateExpr ::= Expr
# AbbreviatedStep ::= '.' | '..'
# AbbreviatedAxisSpecifier ::= '@'?
step(p: ref Parse): ref Xstep
{
# AxisSpecifier ... | AbbreviatedStep
(o, ns, nm) := p.get();
axis := Achild;
case o {
'.' =>
return ref Xstep(Aself, Onodetype, nil, "node", nil, nil); # self::node()
Ldotdot =>
return ref Xstep(Aparent, Onodetype, nil, "node", nil, nil); # parent::node()
Laxis =>
axis = look(axes, ns);
(o, ns, nm) = p.get();
'@' =>
axis = Aattribute;
(o, ns, nm) = p.get();
* =>
;
}
if(o == '*'){
o = Onametest;
nm = "*";
ns = nil;
}
# NodeTest ::= NameTest | NodeType '(' ')'
if(o != Onametest && o != Onodetype){
p.unget((o, ns, nm));
return nil;
}
arg: string;
if(o == Onodetype){ # '(' ... ')'
expect(p, '(');
# grammar is wrong: processing-instruction can have optional literal
if(nm == "processing-instruction" && p.look() == Llit)
arg = p.get().t1;
expect(p, ')');
}
# Predicate*
pl: list of ref Xpath;
while((pe := predicate(p)) != nil)
pl = pe :: pl;
return ref Xstep(axis, o, ns, nm, arg, rev(pl));
}
# PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall
# FunctionCall ::= FunctionName '(' (Argument ( ',' Argument)*)? ')'
# Argument ::= Expr
primary(p: ref Parse): ref Xpath
{
(o, ns, nm) := p.get();
case o {
Lvar =>
return ref Xpath.Var(ns, nm);
'(' =>
e := expr(p, 0);
expect(p, ')');
return e;
Llit =>
return ref Xpath.Str(ns);
Lint =>
return ref Xpath.Int(big ns);
Lreal =>
return ref Xpath.Real(real ns);
Lfn =>
expect(p, '(');
al: list of ref Xpath;
if(p.look() != ')'){
for(;;){
al = expr(p, 0) :: al;
if(p.look() != ',')
break;
p.get();
}
al = rev(al);
}
expect(p, ')');
return ref Xpath.Fn(ns, nm, al);
* =>
synerr("invalid PrimaryExpr");
return nil;
}
}
# Predicate ::= '[' PredicateExpr ']'
# PredicateExpr ::= Expr
predicate(p: ref Parse): ref Xpath
{
l := p.get();
if(l.t0 != '['){
p.unget(l);
return nil;
}
e := expr(p, 0);
expect(p, ']');
return e;
}
expect(p: ref Parse, t: int)
{
l := p.get();
if(l.t0 != t)
synerr(sys->sprint("expected '%c'", t));
}
Xpath.text(e: self ref Xpath): string
{
if(e == nil)
return "nil";
pick r := e {
E =>
if(r.r == nil)
return sys->sprint("(%s%s)", opname(r.op), r.l.text());
if(r.op == Ofilter)
return sys->sprint("%s[%s]", r.l.text(), r.r.text());
return sys->sprint("(%s%s%s)", r.l.text(), opname(r.op), r.r.text());
Fn =>
a := "";
for(l := r.args; l != nil; l = tl l)
a += sys->sprint(",%s", (hd l).text());
if(a != "")
a = a[1:];
return sys->sprint("%s(%s)", qual(r.ns, r.name), a);
Var =>
return sys->sprint("$%s", qual(r.ns, r.name));
Path =>
if(r.abs)
t := "/";
else
t = "";
for(l := r.steps; l != nil; l = tl l){
if(t != nil && t != "/")
t += "/";
t += (hd l).text();
}
return t;
Int =>
return sys->sprint("%bd", r.val);
Real =>
return sys->sprint("%g", r.val);
Str =>
return sys->sprint("%s", str(r.s));
}
}
qual(ns: string, nm: string): string
{
if(ns != nil)
return ns+":"+nm;
return nm;
}
str(s: string): string
{
for(i := 0; i < len s; i++)
if(s[i] == '\'')
return sys->sprint("\"%s\"", s);
return sys->sprint("'%s'", s);
}
opname(o: int): string
{
case o {
One => return "!=";
Ole => return "<=";
Oge => return ">=";
Omul => return "*";
Odiv => return " div ";
Omod => return " mod ";
Oand => return " and ";
Oor => return " or ";
Oneg => return "-";
Ofilter => return " op_filter ";
Opath => return "/";
* => return sys->sprint(" %c ", o);
}
}
Xstep.text(s: self ref Xstep): string
{
t := sys->sprint("%s::", Xstep.axisname(s.axis));
case s.op {
Onametest =>
if(s.ns == "*" && s.name == "*")
t += "*";
else
t += qual(s.ns, s.name);
Onodetype =>
if(s.arg != nil)
t += sys->sprint("%s(%s)", s.name, str(s.arg));
else
t += sys->sprint("%s()", s.name);
}
for(l := s.preds; l != nil; l = tl l)
t += sys->sprint("[%s]", (hd l).text());
return t;
}
Xstep.axisname(n: int): string
{
return lookname(axes, n);
}
# ExprToken ::= '(' | ')' | '[' | ']' | '.' | '..' | '@' | ',' | '::' |
# NameTest | NodeType | Operator | FunctionName | AxisName |
# Literal | Number | VariableReference
# Operator ::= OperatorName | MultiplyOperator | '/' | '//' | '|' | '+' | '' | '=' | '!=' | '<' | '<=' | '>' | '>='
# MultiplyOperator ::= '*'
# FunctionName ::= QName - NodeType
# VariableReference ::= '$' QName
# NameTest ::= '*' | NCName ':' '*' | QName
# NodeType ::= 'comment' | 'text' | 'processing-instruction' | 'node'
#
Lex: type (int, string, string);
Parse: adt {
r: ref Rd;
pb: list of Lex; # push back
look: fn(p: self ref Parse): int;
get: fn(p: self ref Parse): Lex;
unget: fn(p: self ref Parse, t: Lex);
};
Parse.get(p: self ref Parse): Lex
{
if(p.pb != nil){
h := hd p.pb;
p.pb = tl p.pb;
return h;
}
return lex(p.r);
}
Parse.look(p: self ref Parse): int
{
t := p.get();
p.unget(t);
return t.t0;
}
Parse.unget(p: self ref Parse, t: Lex)
{
p.pb = t :: p.pb;
}
lex(r: ref Rd): Lex
{
l := lex0(r);
r.prev = l.t0;
return l;
}
# disambiguating rules are D1 to D3
# D1. preceding token p && p not in {'@', '::', '(', '[', ',', Operator} then '*' is MultiplyOperator
# and NCName must be OperatorName
xop(t: int): int
{
case t {
-1 or 0 or '@' or '(' or '[' or ',' or Lop or Omul or
'/' or Ldslash or '|' or '+' or '-' or '=' or One or '<' or Ole or '>' or Oge or
Oand or Oor or Omod or Odiv or Laxis =>
return 0;
}
return 1;
}
# UnaryExpr ::= UnionExpr | '-' UnaryExpr
# ExprToken ::= ... |
# NameTest | NodeType | Operator | FunctionName | AxisName |
# Literal | Number | VariableReference
# Operator ::= OperatorName | MultiplyOperator | '/' | '//' | '|' | '+' | '' | '=' | '!=' | '<' | '<=' | '>' | '>='
# MultiplyOperator ::= '*'
lex0(r: ref Rd): Lex
{
while(isspace(r.look()))
r.get();
case c := r.get() {
-1 or
'(' or ')' or '[' or ']' or '@' or ',' or '+' or '-' or '|' or '=' or ':' =>
# singletons ('::' only valid after name, see below)
return (c, nil, nil);
'/' =>
return subseq(r, '/', Ldslash, '/');
'!' =>
return subseq(r, '=', One, '!');
'<' =>
return subseq(r, '=', Ole, '<');
'>' =>
return subseq(r, '=', Oge, '>');
'*' =>
if(xop(r.prev))
return (Omul, nil, nil);
return (c, nil, nil);
'.' =>
case r.look() {
'0' to '9' =>
(v, nil) := number(r, r.get());
return (Lreal, v, nil);
'.' =>
r.get();
return (Ldotdot, nil, nil);
* =>
return ('.', nil, nil);
}
'$' =>
# variable reference
(ns, nm, i) := name(r.s, r.i);
if(ns == nil && nm == nil)
return (Lerror, nil, nil);
r.i = i;
return (Lvar, ns, nm);
'0' to '9' =>
(v, f) := number(r, c);
if(f)
return (Lreal, v, nil);
return (Lint, v, nil);
'"' or '\'' =>
return (Llit, literal(r, c), nil);
* =>
if(isalnum(c) || c == '_'){
# QName/NCName
r.unget();
(ns, nm, i) := name(r.s, r.i);
if(ns == nil && nm == nil)
return (Lerror, nil, nil);
r.i = i;
if(xop(r.prev)){
if(ns == nil){
o := iskeywd(nm);
if(o != Laxis && o != Onodetype)
return (o, nil, nil);
}
return (Lop, ns, nm);
}
while(isspace(r.look()))
r.get();
case r.look() {
'(' => # D2: NCName '(' =>NodeType or FunctionName
if(ns == nil && iskeywd(nm) == Onodetype)
return (Onodetype, nil, nm);
return (Lfn, ns, nm); # possibly NodeTest
':' => # D3: NCName '::' => AxisName
r.get();
case r.look() {
':' =>
if(ns == nil && look(axes, nm) != 0){
r.get();
return (Laxis, nm, nil);
}
'*' =>
# NameTest ::= ... | NCName ':' '*'
if(ns == nil){
r.get();
return (Onametest, nm, "*");
}
}
r.unget(); # put back the ':'
# NameTest ::= '*' | NCName ':' '*' | QName
}
return (Onametest, ns, nm); # actually NameTest
}
# unexpected character
}
return (Lerror, nil, nil);
}
subseq(r: ref Rd, a: int, t: int, e: int): Lex
{
if(r.look() != a)
return (e, nil, nil);
r.get();
return (t, nil, nil);
}
# Literal ::= '"'[^"]*'"' | "'"[^']* "'"
literal(r: ref Rd, delim: int): string
{
s: string;
while((c := r.get()) != delim){
if(c < 0){
synerr("missing string terminator");
return s;
}
if(c)
s[len s] = c; # could slice r.s
}
return s;
}
#
# Number ::= Digits('.' Digits?)? | '.' Digits
# Digits ::= [0-9]+
#
number(r: ref Rd, c: int): (string, int)
{
s: string;
for(; isdigit(c); c = r.get())
s[len s] = c;
if(c != '.'){
if(c >= 0)
r.unget();
return (s, 0);
}
if(!isdigit(c = r.get())){
if(c >= 0)
r.unget();
r.unget(); # the '.'
return (s, 0);
}
s[len s] = '.';
do{
s[len s] = c;
}while(isdigit(c = r.get()));
if(c >= 0)
r.unget();
return (s, 1);
}
isdigit(c: int): int
{
return c>='0' && c<='9';
}
Rd: adt{
s: string;
i: int;
prev: int; # previous token
get: fn(r: self ref Rd): int;
look: fn(r: self ref Rd): int;
unget: fn(r: self ref Rd);
};
Rd.get(r: self ref Rd): int
{
if(r.i >= len r.s)
return -1;
return r.s[r.i++];
}
Rd.look(r: self ref Rd): int
{
if(r.i >= len r.s)
return -1;
return r.s[r.i];
}
Rd.unget(r: self ref Rd)
{
if(r.i > 0)
r.i--;
}
rev[T](l: list of T): list of T
{
rl: list of T;
for(; l != nil; l = tl l)
rl = hd l :: rl;
return rl;
}
synerr(s: string)
{
raise "syntax error: "+s;
}
# to do:
# dictionary?