From 2c4ddb6620101ebad2bff0c007a99aea97a15de1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timo=20Ter=C3=A4s?= Date: Thu, 19 Aug 2010 13:40:31 +0300 Subject: filter: properly filter ipv4 address form urls properly match them against db data. --- blob.c | 18 +++++++++++++ blob.h | 2 ++ squark-filter.c | 79 ++++++++++++++++++++++++++++++++------------------------- 3 files changed, 65 insertions(+), 34 deletions(-) diff --git a/blob.c b/blob.c index 81722d2..f6daef1 100644 --- a/blob.c +++ b/blob.c @@ -52,6 +52,24 @@ int blob_cmp(blob_t a, blob_t b) return memcmp(a.ptr, b.ptr, a.len); } +unsigned long blob_inet_addr(blob_t b) +{ + unsigned long ip = 0; + int i; + + for (i = 0; i < 3; i++) { + ip += blob_pull_uint(&b, 10); + ip <<= 8; + if (!blob_pull_matching(&b, BLOB_STR("."))) + return 0; + } + ip += blob_pull_uint(&b, 10); + if (b.len != 0) + return 0; + return ip; +} + + blob_t blob_pushed(blob_t buffer, blob_t left) { if (buffer.ptr + buffer.len != left.ptr + left.len) diff --git a/blob.h b/blob.h index cfd8cd5..d1ef577 100644 --- a/blob.h +++ b/blob.h @@ -38,6 +38,8 @@ static inline int blob_is_null(blob_t b) char *blob_cstr_dup(blob_t b); blob_t blob_dup(blob_t b); int blob_cmp(blob_t a, blob_t b); +unsigned long blob_inet_addr(blob_t buf); + blob_t blob_pushed(blob_t buffer, blob_t left); void blob_push(blob_t *b, blob_t d); void blob_push_uint(blob_t *to, unsigned int value, int radix); diff --git a/squark-filter.c b/squark-filter.c index fac9241..bc88e9a 100644 --- a/squark-filter.c +++ b/squark-filter.c @@ -122,12 +122,12 @@ static int url_parse(blob_t uri, struct url_info *nfo) if (!blob_is_null(prev.word)) { nfo->host = prev.word; nfo->num_dots = prev.num_dots; - nfo->is_ipv4 = prev.numeric && prev.num_dots == 4; + nfo->is_ipv4 = prev.numeric && prev.num_dots == 3; nfo->port = blob_pull_uint(&cur.word, 10); } else { nfo->host = cur.word; nfo->num_dots = cur.num_dots; - nfo->is_ipv4 = cur.numeric && cur.num_dots == 4; + nfo->is_ipv4 = cur.numeric && cur.num_dots == 3; } break; } @@ -182,7 +182,7 @@ static int url_classify(struct url_info *url, struct sqdb *db) blob_t b, key, got, tld, mkey; void *cmph; struct sqdb_index_entry *indx; - cmph_uint32 i = SQDB_PARENT_ROOT, previ; + cmph_uint32 i = SQDB_PARENT_ROOT, previ = SQDB_PARENT_ROOT; int dots_done = 1; cmph = sqdb_section_get(db, SQDB_SECTION_INDEX_MPH, NULL); @@ -190,41 +190,52 @@ static int url_classify(struct url_info *url, struct sqdb *db) /* search for most qualified domain match; do first lookup * with two domain components */ - key = BLOB_PTR_LEN(url->host.ptr + url->host.len, 0); - tld = blob_expand_head(&key, url->host, '.'); - - do { - /* add one more domain component */ - got = blob_expand_head(&key, url->host, '.'); - if (blob_is_null(got)) - break; - - previ = i; + if (url->is_ipv4) { + key = url->host; i = cmph_search_packed(cmph, key.ptr, key.len); - if (!blob_is_null(tld)) { - int p = indx[i].parent; - - if (p == SQDB_PARENT_ROOT || - p == SQDB_PARENT_IPV4 || - indx[p].parent != SQDB_PARENT_ROOT || - blob_cmp(tld, sqdb_get_string_literal(db, indx[p].component)) != 0) { - /* top level domain did not match */ - i = -1; - goto parent_dns_match; - } - tld = BLOB_NULL; - previ = p; - } - if (indx[i].parent != previ || - blob_cmp(got, sqdb_get_string_literal(db, indx[i].component)) != 0) { - /* the subdomain did no longer match, use - * parents classification */ + + if (indx[i].parent != SQDB_PARENT_IPV4 || + indx[i].component != blob_inet_addr(url->host)) { i = previ; goto parent_dns_match; } - mkey = key; - dots_done++; - } while (indx[i].has_subdomains); + } else { + key = BLOB_PTR_LEN(url->host.ptr + url->host.len, 0); + tld = blob_expand_head(&key, url->host, '.'); + + do { + /* add one more domain component */ + got = blob_expand_head(&key, url->host, '.'); + if (blob_is_null(got)) + break; + + previ = i; + i = cmph_search_packed(cmph, key.ptr, key.len); + if (!blob_is_null(tld)) { + int p = indx[i].parent; + + if (p == SQDB_PARENT_ROOT || + p == SQDB_PARENT_IPV4 || + indx[p].parent != SQDB_PARENT_ROOT || + blob_cmp(tld, sqdb_get_string_literal(db, indx[p].component)) != 0) { + /* top level domain did not match */ + i = -1; + goto parent_dns_match; + } + tld = BLOB_NULL; + previ = p; + } + if (indx[i].parent != previ || + blob_cmp(got, sqdb_get_string_literal(db, indx[i].component)) != 0) { + /* the subdomain did no longer match, use + * parents classification */ + i = previ; + goto parent_dns_match; + } + mkey = key; + dots_done++; + } while (indx[i].has_subdomains); + } /* No paths to match for */ if (i == SQDB_PARENT_ROOT || !indx[i].has_paths) -- cgit v1.2.3