From b7eac294771a72711208d288657a5667d65e8882 Mon Sep 17 00:00:00 2001 From: Alex Dowad Date: Thu, 10 Apr 2014 21:24:18 +0200 Subject: squark-filter: correctly identify URLs with uppercase chars in path squark.db stores all filtered domains/URLs in lowercase. So when querying the filter DB, we need to convert the input URL to lowercase. --- src/blob.c | 8 ++++++++ src/blob.h | 1 + src/squark-filter.c | 1 + 3 files changed, 10 insertions(+) (limited to 'src') diff --git a/src/blob.c b/src/blob.c index c526d9b..196adf9 100644 --- a/src/blob.c +++ b/src/blob.c @@ -139,6 +139,14 @@ int blob_icmp(blob_t a, blob_t b) return strncasecmp(a.ptr, b.ptr, a.len); } +void blob_lowercase(blob_t blob) +{ + int i; + char *ptr = blob.ptr; + for (i = 0; i < blob.len; i++) + ptr[i] = tolower(ptr[i]); +} + int blob_pull_inet_addr(blob_t *b, struct in_addr *saddr) { unsigned long ip = 0; diff --git a/src/blob.h b/src/blob.h index 6a8e51c..c2a394e 100644 --- a/src/blob.h +++ b/src/blob.h @@ -42,6 +42,7 @@ char *blob_cstr_dup(blob_t b); blob_t blob_dup(blob_t b); int blob_cmp(blob_t a, blob_t b); int blob_icmp(blob_t a, blob_t b); +void blob_lowercase(blob_t blob); unsigned long blob_inet_addr(blob_t buf); blob_t blob_pushed(blob_t buffer, blob_t left); diff --git a/src/squark-filter.c b/src/squark-filter.c index 9eed4a7..50bd798 100644 --- a/src/squark-filter.c +++ b/src/squark-filter.c @@ -275,6 +275,7 @@ static int url_classify(struct url_info *url, struct sqdb *db) /* and then search for path matches -- construct hashing * string of url decoded path */ + blob_lowercase(url->path); /* filter DB stores all URLs in lowercase */ blob_push_urldecode(&keybuf, url->path); keylimits = blob_pushed(BLOB_BUF(buffer), keybuf); -- cgit v1.2.3