summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTimo Teräs <timo.teras@iki.fi>2010-08-19 22:56:23 +0300
committerTimo Teräs <timo.teras@iki.fi>2010-08-19 22:56:23 +0300
commit9ee4f03712925b1fe7634ca66d8d421e676e5b58 (patch)
tree0d83e191de91b27bafdd28583e8963e6908fd61c
parentaa008d0769a8e2c1f529b92585659336c0f11953 (diff)
downloadsquark-9ee4f03712925b1fe7634ca66d8d421e676e5b58.tar.bz2
squark-9ee4f03712925b1fe7634ca66d8d421e676e5b58.tar.xz
filter: do not modify deniedurl cgi parameter
Keep the modifications (which are needed for key lookup) inside the lookup routine. This includes e.g. lower casing the URL. This way can pass the exact original request string to our block page script. This also changes the way 'www123.' is stripped from the request.
-rw-r--r--blob.c22
-rw-r--r--blob.h1
-rw-r--r--squark-filter.c61
3 files changed, 44 insertions, 40 deletions
diff --git a/blob.c b/blob.c
index 8f630c2..f2a80f6 100644
--- a/blob.c
+++ b/blob.c
@@ -1,3 +1,6 @@
+#include <ctype.h>
+#include <string.h>
+
#include "blob.h"
/* RFC 3986 section 2.3 Unreserved Characters (January 2005) */
@@ -162,6 +165,20 @@ void blob_push(blob_t *b, blob_t d)
}
}
+void blob_push_lower(blob_t *b, blob_t d)
+{
+ int i;
+
+ if (b->len < d.len) {
+ *b = BLOB_NULL;
+ return;
+ }
+ for (i = 0; i < d.len; i++)
+ b->ptr[i] = tolower(d.ptr[i]);
+ b->ptr += d.len;
+ b->len -= d.len;
+}
+
void blob_push_byte(blob_t *b, unsigned char byte)
{
if (b->len) {
@@ -219,9 +236,6 @@ void blob_push_urldecode(blob_t *to, blob_t url)
do {
blob_pull_matching(&url, BLOB_STR("/"));
b = blob_pull_cspn(&url, BLOB_STR("/"));
- if (blob_is_null(url) && blob_is_null(b))
- break;
-
if (blob_is_null(b) || blob_cmp(b, BLOB_STR(".")) == 0) {
/* skip '.' or two consecutive / */
} else if (blob_cmp(b, BLOB_STR("..")) == 0) {
@@ -232,7 +246,7 @@ void blob_push_urldecode(blob_t *to, blob_t url)
blob_push_byte(to, '/');
blob_push(to, b);
}
- } while (1);
+ } while (!blob_is_null(url));
}
void blob_push_urlencode(blob_t *to, blob_t url)
diff --git a/blob.h b/blob.h
index f5c57eb..0c10ca6 100644
--- a/blob.h
+++ b/blob.h
@@ -42,6 +42,7 @@ unsigned long blob_inet_addr(blob_t buf);
blob_t blob_pushed(blob_t buffer, blob_t left);
void blob_push(blob_t *b, blob_t d);
+void blob_push_lower(blob_t *b, blob_t d);
void blob_push_byte(blob_t *b, unsigned char byte);
void blob_push_uint(blob_t *to, unsigned int value, int radix);
void blob_push_hexdump(blob_t *to, blob_t binary);
diff --git a/squark-filter.c b/squark-filter.c
index 8fab0bf..9bc6bb2 100644
--- a/squark-filter.c
+++ b/squark-filter.c
@@ -31,6 +31,7 @@ struct url_info {
blob_t username;
blob_t password;
blob_t host;
+ blob_t significant_host;
blob_t path;
blob_t query;
blob_t fragment;
@@ -66,7 +67,6 @@ void blob_pull_url_dns_part(blob_t *b, struct url_dns_part_data *udp)
case '5': case '6': case '7': case '8': case '9':
break;
default:
- t.ptr[i] = tolower(c);
numeric = 0;
break;
}
@@ -155,7 +155,17 @@ static int url_parse(blob_t uri, struct url_info *nfo)
if (blob_is_null(nfo->path))
nfo->path = BLOB_STR("/");
+ /* significant host name */
+ nfo->significant_host = nfo->host;
+ if (nfo->num_dots > 1) {
+ blob_t b = nfo->significant_host;
+ if (blob_pull_matching(&b, BLOB_STR("www")) &&
+ (blob_pull_uint(&b, 10), 1) &&
+ blob_pull_matching(&b, BLOB_STR(".")))
+ nfo->significant_host = b;
+ }
return 1;
+
error:
return 0;
}
@@ -178,8 +188,8 @@ static void url_print(struct url_info *nfo)
static int url_classify(struct url_info *url, struct sqdb *db)
{
- unsigned char buffer[1024];
- blob_t b, key, got, tld, mkey;
+ unsigned char buffer[512];
+ blob_t key, got, tld, keybuf, keylimits;
void *cmph;
struct sqdb_index_entry *indx;
cmph_uint32 i = SQDB_PARENT_ROOT, previ = SQDB_PARENT_ROOT;
@@ -188,10 +198,13 @@ static int url_classify(struct url_info *url, struct sqdb *db)
cmph = sqdb_section_get(db, SQDB_SECTION_INDEX_MPH, NULL);
indx = sqdb_section_get(db, SQDB_SECTION_INDEX, NULL);
+ keybuf = BLOB_BUF(buffer);
+ blob_push_lower(&keybuf, url->significant_host);
+ key = keylimits = blob_pushed(BLOB_BUF(buffer), keybuf);
+
/* search for most qualified domain match; do first lookup
* with two domain components */
if (url->is_ipv4) {
- key = url->host;
i = cmph_search_packed(cmph, key.ptr, key.len);
if (indx[i].parent != SQDB_PARENT_IPV4 ||
@@ -200,12 +213,12 @@ static int url_classify(struct url_info *url, struct sqdb *db)
goto parent_dns_match;
}
} else {
- key = BLOB_PTR_LEN(url->host.ptr + url->host.len, 0);
- tld = blob_expand_head(&key, url->host, '.');
+ key = BLOB_PTR_LEN(key.ptr + key.len, 0);
+ tld = blob_expand_head(&key, keylimits, '.');
do {
/* add one more domain component */
- got = blob_expand_head(&key, url->host, '.');
+ got = blob_expand_head(&key, keylimits, '.');
if (blob_is_null(got))
break;
@@ -232,45 +245,22 @@ static int url_classify(struct url_info *url, struct sqdb *db)
i = previ;
goto parent_dns_match;
}
- mkey = key;
dots_done++;
} while (indx[i].has_subdomains);
}
/* No paths to match for */
- if (i == SQDB_PARENT_ROOT || !indx[i].has_paths)
+ if (i == SQDB_PARENT_ROOT || !indx[i].has_paths || key.ptr != keylimits.ptr)
goto parent_dns_match;
- if (key.ptr != url->host.ptr) {
- blob_t tmpkey = key;
-
- /* Not exact dns match, but there's paths. Check if we
- * have only one more dns entry and it's of form www1 or
- * such. If so, this should be treated as exact match. */
- if (dots_done != url->num_dots)
- goto parent_dns_match;
-
- got = blob_expand_head(&tmpkey, url->host, '.');
- if (blob_is_null(got) ||
- !blob_pull_matching(&got, BLOB_STR("www")))
- goto parent_dns_match;
-
- blob_pull_uint(&got, 10);
- if (got.len != 0)
- goto parent_dns_match;
- }
-
/* and then search for path matches -- construct hashing
* string of url decoded path */
- b = BLOB_BUF(buffer);
- blob_push(&b, key);
- key = blob_pushed(BLOB_BUF(buffer), b);
- blob_push_urldecode(&b, url->path);
- b = blob_pushed(BLOB_BUF(buffer), b);
+ blob_push_urldecode(&keybuf, url->path);
+ key = keylimits = blob_pushed(BLOB_BUF(buffer), keybuf);
while (indx[i].has_paths) {
/* add one more path component */
- got = blob_expand_tail(&key, b, '/');
+ got = blob_expand_tail(&key, keylimits, '/');
if (blob_is_null(got))
break;
previ = i;
@@ -282,7 +272,6 @@ static int url_classify(struct url_info *url, struct sqdb *db)
i = previ;
goto parent_dns_match;
}
- mkey = key;
}
parent_dns_match:
@@ -432,7 +421,7 @@ int main(int argc, char **argv)
struct sqdb db;
int opt;
- sqdb_open(&db, "squark.db");
+ sqdb_open(&db, "/var/lib/squark/squark.db");
while ((opt = getopt(argc, argv, "r:b:")) != -1) {
switch (opt) {