diff options
author | Alex Dowad <alexinbeijing@gmail.com> | 2014-04-14 21:56:25 +0200 |
---|---|---|
committer | Timo Teräs <timo.teras@iki.fi> | 2014-04-25 10:18:45 +0300 |
commit | 39e52e8179972d92fe7cfa51da7f9dd5371b75d8 (patch) | |
tree | d535fe55dfa8f96e3399329d57b015fa286fcc3c /src | |
parent | 234f61a74e7ba4be512026d67d4ec9975b80632c (diff) | |
download | squark-39e52e8179972d92fe7cfa51da7f9dd5371b75d8.tar.bz2 squark-39e52e8179972d92fe7cfa51da7f9dd5371b75d8.tar.xz |
squark-filter: correctly identify URLs which use percent encoding
Diffstat (limited to 'src')
-rw-r--r-- | src/blob.c | 44 | ||||
-rwxr-xr-x | src/sqdb-build.lua | 1 |
2 files changed, 44 insertions, 1 deletions
@@ -139,6 +139,24 @@ int blob_icmp(blob_t a, blob_t b) return strncasecmp(a.ptr, b.ptr, a.len); } +int blob_find_char(blob_t blob, char c) +{ + int i; + char *ptr = blob.ptr; + for (i = 0; i < blob.len; i++) + if (ptr[i] == c) + return i; + return -1; +} + +unsigned char blob_read_hexbyte(blob_t *b, int i) +{ + if (b->len >= i+2) + return (dx(b->ptr[i]) << 4) + dx(b->ptr[i+1]); + else + return 0; +} + void blob_lowercase(blob_t blob) { int i; @@ -147,6 +165,30 @@ void blob_lowercase(blob_t blob) ptr[i] = tolower(ptr[i]); } +void blob_percent_decode(blob_t *blob) +{ + int i = blob_find_char(*blob, '%'); + + if (i >= 0) { + int dest = i; + int len = blob->len; + char* ptr = blob->ptr; + + for ( ; i < blob->len; i++, dest++) { + if (ptr[i] == '%') { + ptr[dest] = blob_read_hexbyte(blob, i+1); + i += 2; + len -= 2; + } + else { + ptr[dest] = ptr[i]; + } + } + + blob->len = len; + } +} + int blob_pull_inet_addr(blob_t *b, struct in_addr *saddr) { unsigned long ip = 0; @@ -280,7 +322,7 @@ void blob_push_urldecode(blob_t *to, blob_t url) blob_expand_head(to, orig, '/'); blob_expand_head_bytes(to, 1); /* back up past the '/' separator */ } else { - /* copy decoded; FIXME decode percent encoding */ + blob_percent_decode(&b); blob_push_byte(to, '/'); blob_push(to, b); } diff --git a/src/sqdb-build.lua b/src/sqdb-build.lua index cd039e2..2806bb2 100755 --- a/src/sqdb-build.lua +++ b/src/sqdb-build.lua @@ -117,6 +117,7 @@ local function read_urls(filename, category, locked) url = url:gsub("#.*", "") url = url:gsub(" *^", "") url = url:lower() + url = url:gsub("%%(%x%x)", function(h) return string.char(tonumber(h,16)) end) url = url:gsub("^(www%d*[.])([^.]*[.])", "%2") domain, path = url:match("([^/]*)/?(.*)") domain = domain:gsub(":.*", "") |