summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTimo Teräs <timo.teras@iki.fi>2010-08-19 13:40:31 +0300
committerTimo Teräs <timo.teras@iki.fi>2010-08-19 13:40:31 +0300
commit2c4ddb6620101ebad2bff0c007a99aea97a15de1 (patch)
tree7c91a5a2eb874edddc26fdc468d5706789c2f831
parent2e58fc0a7a69ecbe4a48b296bcf6313825fcfa7c (diff)
downloadsquark-2c4ddb6620101ebad2bff0c007a99aea97a15de1.tar.bz2
squark-2c4ddb6620101ebad2bff0c007a99aea97a15de1.tar.xz
filter: properly filter ipv4 address form urls
properly match them against db data.
-rw-r--r--blob.c18
-rw-r--r--blob.h2
-rw-r--r--squark-filter.c79
3 files changed, 65 insertions, 34 deletions
diff --git a/blob.c b/blob.c
index 81722d2..f6daef1 100644
--- a/blob.c
+++ b/blob.c
@@ -52,6 +52,24 @@ int blob_cmp(blob_t a, blob_t b)
return memcmp(a.ptr, b.ptr, a.len);
}
+unsigned long blob_inet_addr(blob_t b)
+{
+ unsigned long ip = 0;
+ int i;
+
+ for (i = 0; i < 3; i++) {
+ ip += blob_pull_uint(&b, 10);
+ ip <<= 8;
+ if (!blob_pull_matching(&b, BLOB_STR(".")))
+ return 0;
+ }
+ ip += blob_pull_uint(&b, 10);
+ if (b.len != 0)
+ return 0;
+ return ip;
+}
+
+
blob_t blob_pushed(blob_t buffer, blob_t left)
{
if (buffer.ptr + buffer.len != left.ptr + left.len)
diff --git a/blob.h b/blob.h
index cfd8cd5..d1ef577 100644
--- a/blob.h
+++ b/blob.h
@@ -38,6 +38,8 @@ static inline int blob_is_null(blob_t b)
char *blob_cstr_dup(blob_t b);
blob_t blob_dup(blob_t b);
int blob_cmp(blob_t a, blob_t b);
+unsigned long blob_inet_addr(blob_t buf);
+
blob_t blob_pushed(blob_t buffer, blob_t left);
void blob_push(blob_t *b, blob_t d);
void blob_push_uint(blob_t *to, unsigned int value, int radix);
diff --git a/squark-filter.c b/squark-filter.c
index fac9241..bc88e9a 100644
--- a/squark-filter.c
+++ b/squark-filter.c
@@ -122,12 +122,12 @@ static int url_parse(blob_t uri, struct url_info *nfo)
if (!blob_is_null(prev.word)) {
nfo->host = prev.word;
nfo->num_dots = prev.num_dots;
- nfo->is_ipv4 = prev.numeric && prev.num_dots == 4;
+ nfo->is_ipv4 = prev.numeric && prev.num_dots == 3;
nfo->port = blob_pull_uint(&cur.word, 10);
} else {
nfo->host = cur.word;
nfo->num_dots = cur.num_dots;
- nfo->is_ipv4 = cur.numeric && cur.num_dots == 4;
+ nfo->is_ipv4 = cur.numeric && cur.num_dots == 3;
}
break;
}
@@ -182,7 +182,7 @@ static int url_classify(struct url_info *url, struct sqdb *db)
blob_t b, key, got, tld, mkey;
void *cmph;
struct sqdb_index_entry *indx;
- cmph_uint32 i = SQDB_PARENT_ROOT, previ;
+ cmph_uint32 i = SQDB_PARENT_ROOT, previ = SQDB_PARENT_ROOT;
int dots_done = 1;
cmph = sqdb_section_get(db, SQDB_SECTION_INDEX_MPH, NULL);
@@ -190,41 +190,52 @@ static int url_classify(struct url_info *url, struct sqdb *db)
/* search for most qualified domain match; do first lookup
* with two domain components */
- key = BLOB_PTR_LEN(url->host.ptr + url->host.len, 0);
- tld = blob_expand_head(&key, url->host, '.');
-
- do {
- /* add one more domain component */
- got = blob_expand_head(&key, url->host, '.');
- if (blob_is_null(got))
- break;
-
- previ = i;
+ if (url->is_ipv4) {
+ key = url->host;
i = cmph_search_packed(cmph, key.ptr, key.len);
- if (!blob_is_null(tld)) {
- int p = indx[i].parent;
-
- if (p == SQDB_PARENT_ROOT ||
- p == SQDB_PARENT_IPV4 ||
- indx[p].parent != SQDB_PARENT_ROOT ||
- blob_cmp(tld, sqdb_get_string_literal(db, indx[p].component)) != 0) {
- /* top level domain did not match */
- i = -1;
- goto parent_dns_match;
- }
- tld = BLOB_NULL;
- previ = p;
- }
- if (indx[i].parent != previ ||
- blob_cmp(got, sqdb_get_string_literal(db, indx[i].component)) != 0) {
- /* the subdomain did no longer match, use
- * parents classification */
+
+ if (indx[i].parent != SQDB_PARENT_IPV4 ||
+ indx[i].component != blob_inet_addr(url->host)) {
i = previ;
goto parent_dns_match;
}
- mkey = key;
- dots_done++;
- } while (indx[i].has_subdomains);
+ } else {
+ key = BLOB_PTR_LEN(url->host.ptr + url->host.len, 0);
+ tld = blob_expand_head(&key, url->host, '.');
+
+ do {
+ /* add one more domain component */
+ got = blob_expand_head(&key, url->host, '.');
+ if (blob_is_null(got))
+ break;
+
+ previ = i;
+ i = cmph_search_packed(cmph, key.ptr, key.len);
+ if (!blob_is_null(tld)) {
+ int p = indx[i].parent;
+
+ if (p == SQDB_PARENT_ROOT ||
+ p == SQDB_PARENT_IPV4 ||
+ indx[p].parent != SQDB_PARENT_ROOT ||
+ blob_cmp(tld, sqdb_get_string_literal(db, indx[p].component)) != 0) {
+ /* top level domain did not match */
+ i = -1;
+ goto parent_dns_match;
+ }
+ tld = BLOB_NULL;
+ previ = p;
+ }
+ if (indx[i].parent != previ ||
+ blob_cmp(got, sqdb_get_string_literal(db, indx[i].component)) != 0) {
+ /* the subdomain did no longer match, use
+ * parents classification */
+ i = previ;
+ goto parent_dns_match;
+ }
+ mkey = key;
+ dots_done++;
+ } while (indx[i].has_subdomains);
+ }
/* No paths to match for */
if (i == SQDB_PARENT_ROOT || !indx[i].has_paths)