git: 9front

Download patch

ref: 1487a98138fe6fcd5c7c295ff5de3102a020a478
parent: 5e781b5c85b6b6a0075ebe52bf6a25485693f48b
author: cinap_lenrek <cinap_lenrek@gmx.de>
date: Fri Oct 5 19:14:23 EDT 2012

replace urlencode with c version that isnt broken for utf-8

--- a/rc/bin/urlencode
+++ /dev/null
@@ -1,36 +1,0 @@
-#!/bin/awk -f
-BEGIN {
-# We assume an awk implementation that is just plain dumb.
-# We will convert an character to its ASCII value with the
-# table ord[], and produce two-digit hexadecimal output
-# without the printf("%02X") feature.
-
-EOL = "%0A"	# "end of line" string (encoded)
-split ("1 2 3 4 5 6 7 8 9 A B C D E F", hextab, " ")
-hextab [0] = 0
-for ( i=1; i<=255; ++i ) ord [ sprintf ("%c", i) "" ] = i + 0
-}
-{
-encoded = ""
-for ( i=1; i<=length ($0); ++i ) {
-	c = substr ($0, i, 1)
-	if ( c ~ /[a-zA-Z0-9.-]/ ) {
-	encoded = encoded c	# safe character
-	} else if ( c == " " ) {
-	encoded = encoded "+"	# special handling
-	} else {
-	# unsafe character, encode it as a two-digit hex-number
-	lo = ord [c] % 16
-	hi = int (ord [c] / 16);
-	encoded = encoded "%" hextab [hi] hextab [lo]
-	}
-}
-if ( EncodeEOL ) {
-	printf ("%s", encoded EOL)
-} else {
-	print encoded
-}
-}
-END {
-	#if ( EncodeEOL ) print ""
-}
--- /dev/null
+++ b/sys/src/cmd/urlencode.c
@@ -1,0 +1,98 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+
+Biobuf	bin;
+Biobuf	bout;
+int	dflag;
+
+char	hex[] = "0123456789abcdef";
+char	Hex[] = "0123456789ABCDEF";
+
+int
+hexdigit(int c)
+{
+	char *p;
+
+	if(c >= 0){
+		if((p = strchr(Hex, c)) != 0)
+			return p - Hex;
+		if((p = strchr(hex, c)) != 0)
+			return p - hex;
+	}
+	return -1;
+}
+
+void
+usage(void)
+{
+	fprint(2, "Usage: %s [ -d ] [ file ]\n", argv0);
+	exits("usage");
+}
+
+void
+main(int argc, char *argv[])
+{
+	int c;
+
+	ARGBEGIN {
+	case 'd':
+		dflag = 1;
+		break;
+	default:
+		usage();
+	} ARGEND;
+	if(argc == 1){
+		close(0);
+		if(open(*argv, OREAD) < 0)
+			sysfatal("%r");
+	} else if(argc > 1)
+		usage();
+
+	Binit(&bin, 0, OREAD);
+	Binit(&bout, 1, OWRITE);
+
+	if(dflag){
+		while((c = Bgetc(&bin)) >= 0){
+			if(c == '%'){
+				int c1, c2, x1, x2;
+
+				if((c1 = Bgetc(&bin)) < 0)
+					break;
+				if((x1 = hexdigit(c1)) < 0){
+					Bungetc(&bin);
+					Bputc(&bout, c);
+					continue;
+				}
+				if((c2 = Bgetc(&bin)) < 0)
+					break;
+				if((x2 = hexdigit(c2)) < 0){
+					Bungetc(&bin);
+					Bputc(&bout, c);
+					Bputc(&bout, c1);
+					continue;
+				}
+				c = x1<<4 | x2;
+			}
+			Bputc(&bout, c);
+		}
+	} else {
+		while((c = Bgetc(&bin)) >= 0){
+			if(strchr("/$-_@.!*'(),", c)
+			|| 'a'<=c && c<='z'
+			|| 'A'<=c && c<='Z'
+			|| '0'<=c && c<='9')
+				Bputc(&bout, c);
+			else if(c == ' ')
+				Bputc(&bout, '+');
+			else {
+				Bputc(&bout, '%');
+				Bputc(&bout, Hex[c>>4]);
+				Bputc(&bout, Hex[c&15]);
+			}
+		}
+	}
+
+	Bflush(&bout);
+	exits(0);
+}
--