summaryrefslogtreecommitdiffstats
path: root/squark-filter.c
diff options
context:
space:
mode:
Diffstat (limited to 'squark-filter.c')
-rw-r--r--squark-filter.c39
1 files changed, 32 insertions, 7 deletions
diff --git a/squark-filter.c b/squark-filter.c
index e47cbf5..f3a4aed 100644
--- a/squark-filter.c
+++ b/squark-filter.c
@@ -108,7 +108,8 @@ static void url_print(struct url_info *nfo)
static blob_t url_classify(struct url_info *url, struct sqdb *db)
{
- blob_t key, got, tld;
+ unsigned char buffer[1024];
+ blob_t b, key, got, tld, mkey;
void *cmph;
struct sqdb_index_entry *indx;
uint32_t *categories;
@@ -144,23 +145,45 @@ static blob_t url_classify(struct url_info *url, struct sqdb *db)
}
tld = BLOB_NULL;
}
+ mkey = key;
} while (indx[i].has_subdomains);
- if (key.ptr != url->host.ptr) {
- /* the full of dns part did not match, so we skip the
- * path name search */
+ if (key.ptr != url->host.ptr || !indx[i].has_paths) {
+ /* the full dns part did not match, or there's no more
+ * specific paths in db -- skip the path name search */
goto parent_dns_match;
}
- /* and then search for path matches */
-
+ /* and then search for path matches -- construct hashing
+ * string of url decoded path */
+ b = BLOB_BUF(buffer);
+ blob_push(&b, key);
+ key = blob_pushed(BLOB_BUF(buffer), b);
+ blob_push_urldecode(&b, url->path);
+ b = blob_pushed(BLOB_BUF(buffer), b);
+
+ while (indx[i].has_paths) {
+ /* add one more path component */
+ got = blob_expand_tail(&key, b, '/');
+ if (blob_is_null(got))
+ break;
+ previ = i;
+ i = cmph_search_packed(cmph, key.ptr, key.len);
+ tld = sqdb_get_string_literal(db, indx[i].component);
+ if (blob_cmp(got, sqdb_get_string_literal(db, indx[i].component)) != 0) {
+ /* the subdomain did no longer match, use
+ * parents classification */
+ i = previ;
+ goto parent_dns_match;
+ }
+ mkey = key;
+ }
parent_dns_match:
if (i == -1)
return BLOB_STR("unknown");
categories = sqdb_section_get(db, SQDB_SECTION_CATEGORIES, NULL);
- printf("%d\n", indx[i].category);
return sqdb_get_string_literal(db, categories[indx[i].category]);
}
@@ -171,10 +194,12 @@ int main(int argc, char **argv)
"http://facebook.com:1234/",
"https://slashdot.org/path/to/me",
"http://user:pass@paistortuga.com/~mocosoft",
+ "http://user:pass@paistortuga.com",
"user@weather.whenu.speedera.net",
"zedo1.speedera.net",
"foo.com/stuff?query;bar#frag",
"foo.com?query;bar#frag",
+ "aapracingandsports.com.au/racing/",
};
struct sqdb db;
struct url_info nfo;