diff options
Diffstat (limited to 'squark-filter.c')
-rw-r--r-- | squark-filter.c | 79 |
1 files changed, 45 insertions, 34 deletions
diff --git a/squark-filter.c b/squark-filter.c index fac9241..bc88e9a 100644 --- a/squark-filter.c +++ b/squark-filter.c @@ -122,12 +122,12 @@ static int url_parse(blob_t uri, struct url_info *nfo) if (!blob_is_null(prev.word)) { nfo->host = prev.word; nfo->num_dots = prev.num_dots; - nfo->is_ipv4 = prev.numeric && prev.num_dots == 4; + nfo->is_ipv4 = prev.numeric && prev.num_dots == 3; nfo->port = blob_pull_uint(&cur.word, 10); } else { nfo->host = cur.word; nfo->num_dots = cur.num_dots; - nfo->is_ipv4 = cur.numeric && cur.num_dots == 4; + nfo->is_ipv4 = cur.numeric && cur.num_dots == 3; } break; } @@ -182,7 +182,7 @@ static int url_classify(struct url_info *url, struct sqdb *db) blob_t b, key, got, tld, mkey; void *cmph; struct sqdb_index_entry *indx; - cmph_uint32 i = SQDB_PARENT_ROOT, previ; + cmph_uint32 i = SQDB_PARENT_ROOT, previ = SQDB_PARENT_ROOT; int dots_done = 1; cmph = sqdb_section_get(db, SQDB_SECTION_INDEX_MPH, NULL); @@ -190,41 +190,52 @@ static int url_classify(struct url_info *url, struct sqdb *db) /* search for most qualified domain match; do first lookup * with two domain components */ - key = BLOB_PTR_LEN(url->host.ptr + url->host.len, 0); - tld = blob_expand_head(&key, url->host, '.'); - - do { - /* add one more domain component */ - got = blob_expand_head(&key, url->host, '.'); - if (blob_is_null(got)) - break; - - previ = i; + if (url->is_ipv4) { + key = url->host; i = cmph_search_packed(cmph, key.ptr, key.len); - if (!blob_is_null(tld)) { - int p = indx[i].parent; - - if (p == SQDB_PARENT_ROOT || - p == SQDB_PARENT_IPV4 || - indx[p].parent != SQDB_PARENT_ROOT || - blob_cmp(tld, sqdb_get_string_literal(db, indx[p].component)) != 0) { - /* top level domain did not match */ - i = -1; - goto parent_dns_match; - } - tld = BLOB_NULL; - previ = p; - } - if (indx[i].parent != previ || - blob_cmp(got, sqdb_get_string_literal(db, indx[i].component)) != 0) { - /* the subdomain did no longer match, use - * parents classification */ + + if (indx[i].parent != SQDB_PARENT_IPV4 || + indx[i].component != blob_inet_addr(url->host)) { i = previ; goto parent_dns_match; } - mkey = key; - dots_done++; - } while (indx[i].has_subdomains); + } else { + key = BLOB_PTR_LEN(url->host.ptr + url->host.len, 0); + tld = blob_expand_head(&key, url->host, '.'); + + do { + /* add one more domain component */ + got = blob_expand_head(&key, url->host, '.'); + if (blob_is_null(got)) + break; + + previ = i; + i = cmph_search_packed(cmph, key.ptr, key.len); + if (!blob_is_null(tld)) { + int p = indx[i].parent; + + if (p == SQDB_PARENT_ROOT || + p == SQDB_PARENT_IPV4 || + indx[p].parent != SQDB_PARENT_ROOT || + blob_cmp(tld, sqdb_get_string_literal(db, indx[p].component)) != 0) { + /* top level domain did not match */ + i = -1; + goto parent_dns_match; + } + tld = BLOB_NULL; + previ = p; + } + if (indx[i].parent != previ || + blob_cmp(got, sqdb_get_string_literal(db, indx[i].component)) != 0) { + /* the subdomain did no longer match, use + * parents classification */ + i = previ; + goto parent_dns_match; + } + mkey = key; + dots_done++; + } while (indx[i].has_subdomains); + } /* No paths to match for */ if (i == SQDB_PARENT_ROOT || !indx[i].has_paths) |