summaryrefslogtreecommitdiffstats
path: root/squark-filter.c
diff options
context:
space:
mode:
Diffstat (limited to 'squark-filter.c')
-rw-r--r--squark-filter.c61
1 files changed, 25 insertions, 36 deletions
diff --git a/squark-filter.c b/squark-filter.c
index 8fab0bf..9bc6bb2 100644
--- a/squark-filter.c
+++ b/squark-filter.c
@@ -31,6 +31,7 @@ struct url_info {
blob_t username;
blob_t password;
blob_t host;
+ blob_t significant_host;
blob_t path;
blob_t query;
blob_t fragment;
@@ -66,7 +67,6 @@ void blob_pull_url_dns_part(blob_t *b, struct url_dns_part_data *udp)
case '5': case '6': case '7': case '8': case '9':
break;
default:
- t.ptr[i] = tolower(c);
numeric = 0;
break;
}
@@ -155,7 +155,17 @@ static int url_parse(blob_t uri, struct url_info *nfo)
if (blob_is_null(nfo->path))
nfo->path = BLOB_STR("/");
+ /* significant host name */
+ nfo->significant_host = nfo->host;
+ if (nfo->num_dots > 1) {
+ blob_t b = nfo->significant_host;
+ if (blob_pull_matching(&b, BLOB_STR("www")) &&
+ (blob_pull_uint(&b, 10), 1) &&
+ blob_pull_matching(&b, BLOB_STR(".")))
+ nfo->significant_host = b;
+ }
return 1;
+
error:
return 0;
}
@@ -178,8 +188,8 @@ static void url_print(struct url_info *nfo)
static int url_classify(struct url_info *url, struct sqdb *db)
{
- unsigned char buffer[1024];
- blob_t b, key, got, tld, mkey;
+ unsigned char buffer[512];
+ blob_t key, got, tld, keybuf, keylimits;
void *cmph;
struct sqdb_index_entry *indx;
cmph_uint32 i = SQDB_PARENT_ROOT, previ = SQDB_PARENT_ROOT;
@@ -188,10 +198,13 @@ static int url_classify(struct url_info *url, struct sqdb *db)
cmph = sqdb_section_get(db, SQDB_SECTION_INDEX_MPH, NULL);
indx = sqdb_section_get(db, SQDB_SECTION_INDEX, NULL);
+ keybuf = BLOB_BUF(buffer);
+ blob_push_lower(&keybuf, url->significant_host);
+ key = keylimits = blob_pushed(BLOB_BUF(buffer), keybuf);
+
/* search for most qualified domain match; do first lookup
* with two domain components */
if (url->is_ipv4) {
- key = url->host;
i = cmph_search_packed(cmph, key.ptr, key.len);
if (indx[i].parent != SQDB_PARENT_IPV4 ||
@@ -200,12 +213,12 @@ static int url_classify(struct url_info *url, struct sqdb *db)
goto parent_dns_match;
}
} else {
- key = BLOB_PTR_LEN(url->host.ptr + url->host.len, 0);
- tld = blob_expand_head(&key, url->host, '.');
+ key = BLOB_PTR_LEN(key.ptr + key.len, 0);
+ tld = blob_expand_head(&key, keylimits, '.');
do {
/* add one more domain component */
- got = blob_expand_head(&key, url->host, '.');
+ got = blob_expand_head(&key, keylimits, '.');
if (blob_is_null(got))
break;
@@ -232,45 +245,22 @@ static int url_classify(struct url_info *url, struct sqdb *db)
i = previ;
goto parent_dns_match;
}
- mkey = key;
dots_done++;
} while (indx[i].has_subdomains);
}
/* No paths to match for */
- if (i == SQDB_PARENT_ROOT || !indx[i].has_paths)
+ if (i == SQDB_PARENT_ROOT || !indx[i].has_paths || key.ptr != keylimits.ptr)
goto parent_dns_match;
- if (key.ptr != url->host.ptr) {
- blob_t tmpkey = key;
-
- /* Not exact dns match, but there's paths. Check if we
- * have only one more dns entry and it's of form www1 or
- * such. If so, this should be treated as exact match. */
- if (dots_done != url->num_dots)
- goto parent_dns_match;
-
- got = blob_expand_head(&tmpkey, url->host, '.');
- if (blob_is_null(got) ||
- !blob_pull_matching(&got, BLOB_STR("www")))
- goto parent_dns_match;
-
- blob_pull_uint(&got, 10);
- if (got.len != 0)
- goto parent_dns_match;
- }
-
/* and then search for path matches -- construct hashing
* string of url decoded path */
- b = BLOB_BUF(buffer);
- blob_push(&b, key);
- key = blob_pushed(BLOB_BUF(buffer), b);
- blob_push_urldecode(&b, url->path);
- b = blob_pushed(BLOB_BUF(buffer), b);
+ blob_push_urldecode(&keybuf, url->path);
+ key = keylimits = blob_pushed(BLOB_BUF(buffer), keybuf);
while (indx[i].has_paths) {
/* add one more path component */
- got = blob_expand_tail(&key, b, '/');
+ got = blob_expand_tail(&key, keylimits, '/');
if (blob_is_null(got))
break;
previ = i;
@@ -282,7 +272,6 @@ static int url_classify(struct url_info *url, struct sqdb *db)
i = previ;
goto parent_dns_match;
}
- mkey = key;
}
parent_dns_match:
@@ -432,7 +421,7 @@ int main(int argc, char **argv)
struct sqdb db;
int opt;
- sqdb_open(&db, "squark.db");
+ sqdb_open(&db, "/var/lib/squark/squark.db");
while ((opt = getopt(argc, argv, "r:b:")) != -1) {
switch (opt) {