summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTimo Teräs <timo.teras@iki.fi>2010-08-18 23:03:58 +0300
committerTimo Teräs <timo.teras@iki.fi>2010-08-18 23:03:58 +0300
commit2e58fc0a7a69ecbe4a48b296bcf6313825fcfa7c (patch)
tree64f0ba5cdd4cecd976ceed6dd1c3b3ac327a864f
parentcf8d9e12f5222e5e6173a82f58c14b7eb6842c46 (diff)
downloadsquark-2e58fc0a7a69ecbe4a48b296bcf6313825fcfa7c.tar.bz2
squark-2e58fc0a7a69ecbe4a48b296bcf6313825fcfa7c.tar.xz
db, filter: fix db generation of ipv4 style addresses
Properly embed the ipv4 address in database now. Teach filter to understand the two new reserved component id's.
-rwxr-xr-xsqdb-build.lua34
-rw-r--r--squark-filter.c27
-rw-r--r--squarkdb.h3
3 files changed, 46 insertions, 18 deletions
diff --git a/sqdb-build.lua b/sqdb-build.lua
index 2b301fc..cd039e2 100755
--- a/sqdb-build.lua
+++ b/sqdb-build.lua
@@ -193,6 +193,14 @@ local function enum_tree(cb, category, dns, data)
end
end
+function iptonumber(str)
+ local num = 0
+ for elem in str:gmatch("%d+") do
+ num = num * 256 + assert(tonumber(elem))
+ end
+ return num
+end
+
local function enum_all(cb)
local ipaddr, data, category
@@ -204,8 +212,7 @@ local function enum_all(cb)
if data.paths ~= nil then
enum_paths(cb, data.category, ipaddr, data.paths)
end
- -- fixme, calculate ip as 32-bit value
- cb(ipaddr, nil, 0, data.category, nil, data.paths)
+ cb(ipaddr, nil, iptonumber(ipaddr), data.category, nil, data.paths)
end
end
@@ -307,11 +314,22 @@ db:write_section("categories", all_categories_by_id)
db:create_index(num_entries)
enum_all(
function(uri, parent_uri, component, category, childs, paths)
- db:assign_index(db:hash(uri),
- category and category.id or 0,
- childs and true or false,
- paths and true or false,
- all_strings[component] or 0,
- parent_uri and db:hash(parent_uri) or 0)
+ if parent_uri == nil and type(component) == "number" then
+ -- Embedded IPv4 address
+ db:assign_index(db:hash(uri),
+ category and category.id or 0,
+ childs and true or false,
+ paths and true or false,
+ component,
+ -2)
+ else
+ -- Regular entry
+ db:assign_index(db:hash(uri),
+ category and category.id or 0,
+ childs and true or false,
+ paths and true or false,
+ all_strings[component] or 0,
+ parent_uri and db:hash(parent_uri) or -1)
+ end
end
)
diff --git a/squark-filter.c b/squark-filter.c
index 097f420..fac9241 100644
--- a/squark-filter.c
+++ b/squark-filter.c
@@ -182,7 +182,7 @@ static int url_classify(struct url_info *url, struct sqdb *db)
blob_t b, key, got, tld, mkey;
void *cmph;
struct sqdb_index_entry *indx;
- cmph_uint32 i = -1, previ;
+ cmph_uint32 i = SQDB_PARENT_ROOT, previ;
int dots_done = 1;
cmph = sqdb_section_get(db, SQDB_SECTION_INDEX_MPH, NULL);
@@ -201,26 +201,33 @@ static int url_classify(struct url_info *url, struct sqdb *db)
previ = i;
i = cmph_search_packed(cmph, key.ptr, key.len);
- if (blob_cmp(got, sqdb_get_string_literal(db, indx[i].component)) != 0) {
- /* the subdomain did no longer match, use
- * parents classification */
- i = previ;
- goto parent_dns_match;
- }
if (!blob_is_null(tld)) {
- if (blob_cmp(tld, sqdb_get_string_literal(db, indx[indx[i].parent].component)) != 0) {
+ int p = indx[i].parent;
+
+ if (p == SQDB_PARENT_ROOT ||
+ p == SQDB_PARENT_IPV4 ||
+ indx[p].parent != SQDB_PARENT_ROOT ||
+ blob_cmp(tld, sqdb_get_string_literal(db, indx[p].component)) != 0) {
/* top level domain did not match */
i = -1;
goto parent_dns_match;
}
tld = BLOB_NULL;
+ previ = p;
+ }
+ if (indx[i].parent != previ ||
+ blob_cmp(got, sqdb_get_string_literal(db, indx[i].component)) != 0) {
+ /* the subdomain did no longer match, use
+ * parents classification */
+ i = previ;
+ goto parent_dns_match;
}
mkey = key;
dots_done++;
} while (indx[i].has_subdomains);
/* No paths to match for */
- if (!indx[i].has_paths)
+ if (i == SQDB_PARENT_ROOT || !indx[i].has_paths)
goto parent_dns_match;
if (key.ptr != url->host.ptr) {
@@ -268,7 +275,7 @@ static int url_classify(struct url_info *url, struct sqdb *db)
}
parent_dns_match:
- if (i == -1)
+ if (i == SQDB_PARENT_ROOT)
return 0; /* no category */
return indx[i].category;
diff --git a/squarkdb.h b/squarkdb.h
index 743325e..68c1a2a 100644
--- a/squarkdb.h
+++ b/squarkdb.h
@@ -34,6 +34,9 @@ struct sqdb_header {
struct sqdb_section section[SQDB_SECTION_MAX];
};
+#define SQDB_PARENT_ROOT 0xffffff
+#define SQDB_PARENT_IPV4 0xfffffe
+
struct sqdb_index_entry {
uint32_t has_subdomains : 1;
uint32_t has_paths : 1;