diff options
Diffstat (limited to 'squark-filter.c')
-rw-r--r-- | squark-filter.c | 61 |
1 files changed, 25 insertions, 36 deletions
diff --git a/squark-filter.c b/squark-filter.c index 8fab0bf..9bc6bb2 100644 --- a/squark-filter.c +++ b/squark-filter.c @@ -31,6 +31,7 @@ struct url_info { blob_t username; blob_t password; blob_t host; + blob_t significant_host; blob_t path; blob_t query; blob_t fragment; @@ -66,7 +67,6 @@ void blob_pull_url_dns_part(blob_t *b, struct url_dns_part_data *udp) case '5': case '6': case '7': case '8': case '9': break; default: - t.ptr[i] = tolower(c); numeric = 0; break; } @@ -155,7 +155,17 @@ static int url_parse(blob_t uri, struct url_info *nfo) if (blob_is_null(nfo->path)) nfo->path = BLOB_STR("/"); + /* significant host name */ + nfo->significant_host = nfo->host; + if (nfo->num_dots > 1) { + blob_t b = nfo->significant_host; + if (blob_pull_matching(&b, BLOB_STR("www")) && + (blob_pull_uint(&b, 10), 1) && + blob_pull_matching(&b, BLOB_STR("."))) + nfo->significant_host = b; + } return 1; + error: return 0; } @@ -178,8 +188,8 @@ static void url_print(struct url_info *nfo) static int url_classify(struct url_info *url, struct sqdb *db) { - unsigned char buffer[1024]; - blob_t b, key, got, tld, mkey; + unsigned char buffer[512]; + blob_t key, got, tld, keybuf, keylimits; void *cmph; struct sqdb_index_entry *indx; cmph_uint32 i = SQDB_PARENT_ROOT, previ = SQDB_PARENT_ROOT; @@ -188,10 +198,13 @@ static int url_classify(struct url_info *url, struct sqdb *db) cmph = sqdb_section_get(db, SQDB_SECTION_INDEX_MPH, NULL); indx = sqdb_section_get(db, SQDB_SECTION_INDEX, NULL); + keybuf = BLOB_BUF(buffer); + blob_push_lower(&keybuf, url->significant_host); + key = keylimits = blob_pushed(BLOB_BUF(buffer), keybuf); + /* search for most qualified domain match; do first lookup * with two domain components */ if (url->is_ipv4) { - key = url->host; i = cmph_search_packed(cmph, key.ptr, key.len); if (indx[i].parent != SQDB_PARENT_IPV4 || @@ -200,12 +213,12 @@ static int url_classify(struct url_info *url, struct sqdb *db) goto parent_dns_match; } } else { - key = BLOB_PTR_LEN(url->host.ptr + url->host.len, 0); - tld = blob_expand_head(&key, url->host, '.'); + key = BLOB_PTR_LEN(key.ptr + key.len, 0); + tld = blob_expand_head(&key, keylimits, '.'); do { /* add one more domain component */ - got = blob_expand_head(&key, url->host, '.'); + got = blob_expand_head(&key, keylimits, '.'); if (blob_is_null(got)) break; @@ -232,45 +245,22 @@ static int url_classify(struct url_info *url, struct sqdb *db) i = previ; goto parent_dns_match; } - mkey = key; dots_done++; } while (indx[i].has_subdomains); } /* No paths to match for */ - if (i == SQDB_PARENT_ROOT || !indx[i].has_paths) + if (i == SQDB_PARENT_ROOT || !indx[i].has_paths || key.ptr != keylimits.ptr) goto parent_dns_match; - if (key.ptr != url->host.ptr) { - blob_t tmpkey = key; - - /* Not exact dns match, but there's paths. Check if we - * have only one more dns entry and it's of form www1 or - * such. If so, this should be treated as exact match. */ - if (dots_done != url->num_dots) - goto parent_dns_match; - - got = blob_expand_head(&tmpkey, url->host, '.'); - if (blob_is_null(got) || - !blob_pull_matching(&got, BLOB_STR("www"))) - goto parent_dns_match; - - blob_pull_uint(&got, 10); - if (got.len != 0) - goto parent_dns_match; - } - /* and then search for path matches -- construct hashing * string of url decoded path */ - b = BLOB_BUF(buffer); - blob_push(&b, key); - key = blob_pushed(BLOB_BUF(buffer), b); - blob_push_urldecode(&b, url->path); - b = blob_pushed(BLOB_BUF(buffer), b); + blob_push_urldecode(&keybuf, url->path); + key = keylimits = blob_pushed(BLOB_BUF(buffer), keybuf); while (indx[i].has_paths) { /* add one more path component */ - got = blob_expand_tail(&key, b, '/'); + got = blob_expand_tail(&key, keylimits, '/'); if (blob_is_null(got)) break; previ = i; @@ -282,7 +272,6 @@ static int url_classify(struct url_info *url, struct sqdb *db) i = previ; goto parent_dns_match; } - mkey = key; } parent_dns_match: @@ -432,7 +421,7 @@ int main(int argc, char **argv) struct sqdb db; int opt; - sqdb_open(&db, "squark.db"); + sqdb_open(&db, "/var/lib/squark/squark.db"); while ((opt = getopt(argc, argv, "r:b:")) != -1) { switch (opt) { |