ref: babf901b4a508c3ec5d1f89655f10377bbdf9637
dir: /appl/lib/url.b/
implement Url;
include "sys.m";
sys: Sys;
include "string.m";
S: String;
include "url.m";
schemes = array[] of {
NOSCHEME => "",
HTTP => "http",
HTTPS => "https",
FTP => "ftp",
FILE => "file",
GOPHER => "gopher",
MAILTO => "mailto",
NEWS => "news",
NNTP => "nntp",
TELNET => "telnet",
WAIS => "wais",
PROSPERO => "prospero",
JAVASCRIPT => "javascript",
UNKNOWN => "unknown"
};
init()
{
sys = load Sys Sys->PATH;
S = load String String->PATH;
}
# To allow relative urls, only fill in specified pieces (don't apply defaults)
# general syntax: <scheme>:<scheme-specific>
# for IP schemes, <scheme-specific> is
# //<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
makeurl(surl: string): ref ParsedUrl
{
scheme := NOSCHEME;
user := "";
passwd := "";
host := "";
port := "";
pstart := "";
path := "";
query := "";
frag := "";
(sch, url) := split(surl, ":");
if(url == "") {
url = sch;
sch = "";
}
else {
(nil, x) := S->splitl(sch, "^-a-zA-Z0-9.+");
if(x != nil) {
url = surl;
sch = "";
}
else {
scheme = UNKNOWN;
sch = S->tolower(sch);
for(i := 0; i < len schemes; i++)
if(schemes[i] == sch) {
scheme = i;
break;
}
}
}
if(scheme == MAILTO)
path = url;
else if (scheme == JAVASCRIPT)
path = url;
else {
if(S->prefix("//", url)) {
netloc: string;
(netloc, path) = S->splitl(url[2:], "/");
if(path != "")
path = path[1:];
pstart = "/";
if(scheme == FILE)
host = netloc;
else {
(up,hp) := split(netloc, "@");
if(hp == "")
hp = up;
else
(user, passwd) = split(up, ":");
(host, port) = split(hp, ":");
}
}
else {
if(S->prefix("/", url)) {
pstart = "/";
path = url[1:];
}
else
path = url;
}
if(scheme == FILE) {
if(host == "")
host = "localhost";
}
else {
(path, frag) = split(path, "#");
(path, query) = split(path, "?");
}
}
return ref ParsedUrl(scheme, 1, user, passwd, host, port, pstart, path, query, frag);
}
ParsedUrl.tostring(u: self ref ParsedUrl) : string
{
if (u == nil)
return nil;
ans := "";
if(u.scheme > 0 && u.scheme < len schemes)
ans = schemes[u.scheme] + ":";
if(u.host != "") {
ans = ans + "//";
if(u.user != "") {
ans = ans + u.user;
if(u.passwd != "")
ans = ans + ":" + u.passwd;
ans = ans + "@";
}
ans = ans + u.host;
if(u.port != "")
ans = ans + ":" + u.port;
}
ans = ans + u.pstart + u.path;
if(u.query != "")
ans = ans + "?" + u.query;
if(u.frag != "")
ans = ans + "#" + u.frag;
return ans;
}
ParsedUrl.makeabsolute(u: self ref ParsedUrl, b: ref ParsedUrl)
{
# The following is correct according to RFC 1808, but is violated
# by various extant web pages.
if(u.scheme != NOSCHEME && u.scheme != HTTP)
return;
if(u.host == "" && u.path == "" && u.pstart == "" && u.query == "" && u.frag == "") {
u.scheme = b.scheme;
u.user = b.user;
u.passwd = b.passwd;
u.host = b.host;
u.port = b.port;
u.path = b.path;
u.pstart = b.pstart;
u.query = b.query;
u.frag = b.frag;
return;
}
if(u.scheme == NOSCHEME)
u.scheme = b.scheme;
if(u.host != "")
return;
u.user = b.user;
u.passwd = b.passwd;
u.host = b.host;
u.port = b.port;
if(u.pstart == "/")
return;
u.pstart = "/";
if(u.path == "") {
u.path = b.path;
if(u.query == "")
u.query = b.query;
}
else {
(p1,nil) := S->splitr(b.path, "/");
u.path = canonize(p1 + u.path);
}
}
# Like splitl, but assume one char match, and omit that from second part.
# If c doesn't appear in s, the return is (s, "").
split(s, c: string) : (string, string)
{
(a,b) := S->splitl(s, c);
if(b != "")
b = b[1:];
return (a,b);
}
# remove ./ and ../ from s
canonize(s: string): string
{
(base, file) := S->splitr(s, "/");
(nil, path) := sys->tokenize(base, "/");
revpath : list of string = nil;
for(p := path; p != nil; p = tl p) {
if(hd p == "..") {
if(revpath != nil)
revpath = tl revpath;
}
else if(hd p != ".")
revpath = (hd p) :: revpath;
}
while(revpath != nil && hd revpath == "..")
revpath = tl revpath;
ans := "";
if(revpath != nil) {
ans = hd revpath;
revpath = tl revpath;
while(revpath != nil) {
ans = (hd revpath) + "/" + ans;
revpath = tl revpath;
}
}
if (ans != nil)
ans += "/";
ans += file;
return ans;
}