ref: 1487a98138fe6fcd5c7c295ff5de3102a020a478
parent: 5e781b5c85b6b6a0075ebe52bf6a25485693f48b
author: cinap_lenrek <cinap_lenrek@gmx.de>
date: Fri Oct 5 19:14:23 EDT 2012
replace urlencode with c version that isnt broken for utf-8
--- a/rc/bin/urlencode
+++ /dev/null
@@ -1,36 +1,0 @@
-#!/bin/awk -f
-BEGIN {-# We assume an awk implementation that is just plain dumb.
-# We will convert an character to its ASCII value with the
-# table ord[], and produce two-digit hexadecimal output
-# without the printf("%02X") feature.-
-EOL = "%0A" # "end of line" string (encoded)
-split ("1 2 3 4 5 6 7 8 9 A B C D E F", hextab, " ")-hextab [0] = 0
-for ( i=1; i<=255; ++i ) ord [ sprintf ("%c", i) "" ] = i + 0-}
-{-encoded = ""
-for ( i=1; i<=length ($0); ++i ) {- c = substr ($0, i, 1)
- if ( c ~ /[a-zA-Z0-9.-]/ ) {- encoded = encoded c # safe character
- } else if ( c == " " ) {- encoded = encoded "+" # special handling
- } else {- # unsafe character, encode it as a two-digit hex-number
- lo = ord [c] % 16
- hi = int (ord [c] / 16);
- encoded = encoded "%" hextab [hi] hextab [lo]
- }
-}
-if ( EncodeEOL ) {- printf ("%s", encoded EOL)-} else {- print encoded
-}
-}
-END {- #if ( EncodeEOL ) print ""
-}
--- /dev/null
+++ b/sys/src/cmd/urlencode.c
@@ -1,0 +1,98 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+
+Biobuf bin;
+Biobuf bout;
+int dflag;
+
+char hex[] = "0123456789abcdef";
+char Hex[] = "0123456789ABCDEF";
+
+int
+hexdigit(int c)
+{+ char *p;
+
+ if(c >= 0){+ if((p = strchr(Hex, c)) != 0)
+ return p - Hex;
+ if((p = strchr(hex, c)) != 0)
+ return p - hex;
+ }
+ return -1;
+}
+
+void
+usage(void)
+{+ fprint(2, "Usage: %s [ -d ] [ file ]\n", argv0);
+ exits("usage");+}
+
+void
+main(int argc, char *argv[])
+{+ int c;
+
+ ARGBEGIN {+ case 'd':
+ dflag = 1;
+ break;
+ default:
+ usage();
+ } ARGEND;
+ if(argc == 1){+ close(0);
+ if(open(*argv, OREAD) < 0)
+ sysfatal("%r");+ } else if(argc > 1)
+ usage();
+
+ Binit(&bin, 0, OREAD);
+ Binit(&bout, 1, OWRITE);
+
+ if(dflag){+ while((c = Bgetc(&bin)) >= 0){+ if(c == '%'){+ int c1, c2, x1, x2;
+
+ if((c1 = Bgetc(&bin)) < 0)
+ break;
+ if((x1 = hexdigit(c1)) < 0){+ Bungetc(&bin);
+ Bputc(&bout, c);
+ continue;
+ }
+ if((c2 = Bgetc(&bin)) < 0)
+ break;
+ if((x2 = hexdigit(c2)) < 0){+ Bungetc(&bin);
+ Bputc(&bout, c);
+ Bputc(&bout, c1);
+ continue;
+ }
+ c = x1<<4 | x2;
+ }
+ Bputc(&bout, c);
+ }
+ } else {+ while((c = Bgetc(&bin)) >= 0){+ if(strchr("/$-_@.!*'(),", c)+ || 'a'<=c && c<='z'
+ || 'A'<=c && c<='Z'
+ || '0'<=c && c<='9')
+ Bputc(&bout, c);
+ else if(c == ' ')
+ Bputc(&bout, '+');
+ else {+ Bputc(&bout, '%');
+ Bputc(&bout, Hex[c>>4]);
+ Bputc(&bout, Hex[c&15]);
+ }
+ }
+ }
+
+ Bflush(&bout);
+ exits(0);
+}
--
⑨