summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlex Dowad <alexinbeijing@gmail.com>2014-04-14 21:56:25 +0200
committerTimo Teräs <timo.teras@iki.fi>2014-04-25 10:18:45 +0300
commit39e52e8179972d92fe7cfa51da7f9dd5371b75d8 (patch)
treed535fe55dfa8f96e3399329d57b015fa286fcc3c
parent234f61a74e7ba4be512026d67d4ec9975b80632c (diff)
downloadsquark-39e52e8179972d92fe7cfa51da7f9dd5371b75d8.tar.bz2
squark-39e52e8179972d92fe7cfa51da7f9dd5371b75d8.tar.xz
squark-filter: correctly identify URLs which use percent encoding
-rw-r--r--src/blob.c44
-rwxr-xr-xsrc/sqdb-build.lua1
2 files changed, 44 insertions, 1 deletions
diff --git a/src/blob.c b/src/blob.c
index 41e8d75..4ddc156 100644
--- a/src/blob.c
+++ b/src/blob.c
@@ -139,6 +139,24 @@ int blob_icmp(blob_t a, blob_t b)
return strncasecmp(a.ptr, b.ptr, a.len);
}
+int blob_find_char(blob_t blob, char c)
+{
+ int i;
+ char *ptr = blob.ptr;
+ for (i = 0; i < blob.len; i++)
+ if (ptr[i] == c)
+ return i;
+ return -1;
+}
+
+unsigned char blob_read_hexbyte(blob_t *b, int i)
+{
+ if (b->len >= i+2)
+ return (dx(b->ptr[i]) << 4) + dx(b->ptr[i+1]);
+ else
+ return 0;
+}
+
void blob_lowercase(blob_t blob)
{
int i;
@@ -147,6 +165,30 @@ void blob_lowercase(blob_t blob)
ptr[i] = tolower(ptr[i]);
}
+void blob_percent_decode(blob_t *blob)
+{
+ int i = blob_find_char(*blob, '%');
+
+ if (i >= 0) {
+ int dest = i;
+ int len = blob->len;
+ char* ptr = blob->ptr;
+
+ for ( ; i < blob->len; i++, dest++) {
+ if (ptr[i] == '%') {
+ ptr[dest] = blob_read_hexbyte(blob, i+1);
+ i += 2;
+ len -= 2;
+ }
+ else {
+ ptr[dest] = ptr[i];
+ }
+ }
+
+ blob->len = len;
+ }
+}
+
int blob_pull_inet_addr(blob_t *b, struct in_addr *saddr)
{
unsigned long ip = 0;
@@ -280,7 +322,7 @@ void blob_push_urldecode(blob_t *to, blob_t url)
blob_expand_head(to, orig, '/');
blob_expand_head_bytes(to, 1); /* back up past the '/' separator */
} else {
- /* copy decoded; FIXME decode percent encoding */
+ blob_percent_decode(&b);
blob_push_byte(to, '/');
blob_push(to, b);
}
diff --git a/src/sqdb-build.lua b/src/sqdb-build.lua
index cd039e2..2806bb2 100755
--- a/src/sqdb-build.lua
+++ b/src/sqdb-build.lua
@@ -117,6 +117,7 @@ local function read_urls(filename, category, locked)
url = url:gsub("#.*", "")
url = url:gsub(" *^", "")
url = url:lower()
+ url = url:gsub("%%(%x%x)", function(h) return string.char(tonumber(h,16)) end)
url = url:gsub("^(www%d*[.])([^.]*[.])", "%2")
domain, path = url:match("([^/]*)/?(.*)")
domain = domain:gsub(":.*", "")