diff options
-rw-r--r-- | lua-squarkdb.c | 21 | ||||
-rw-r--r-- | squark-filter.c | 8 | ||||
-rw-r--r-- | squarkdb.c | 16 | ||||
-rw-r--r-- | squarkdb.h | 5 |
4 files changed, 38 insertions, 12 deletions
diff --git a/lua-squarkdb.c b/lua-squarkdb.c index dbac6d0..09d4afe 100644 --- a/lua-squarkdb.c +++ b/lua-squarkdb.c @@ -227,7 +227,7 @@ static int Lsqdb_map_strings(lua_State *L) { struct sqdb *db; const char *str; - char *ptr; + unsigned char *ptr; size_t len, total, pos; db = Lsqdb_checkarg(L, 1); @@ -238,7 +238,9 @@ static int Lsqdb_map_strings(lua_State *L) lua_pushnil(L); while (lua_next(L, 2) != 0) { str = luaL_checklstring(L, -2, &len); - total += len + 1; + total += len; + if (len >= (1 << SQDB_LENGTH_BITS)) + total++; lua_pop(L, 1); } @@ -252,15 +254,20 @@ static int Lsqdb_map_strings(lua_State *L) lua_pushnil(L); while (lua_next(L, 2) != 0) { str = lua_tolstring(L, -2, &len); - memcpy(&ptr[pos], str, len + 1); lua_pop(L, 1); - /* table[key] = pos */ + /* table[key] = encoded_string_pointer */ lua_pushvalue(L, -1); - lua_pushinteger(L, pos); - lua_rawset(L, 2); + if (len >= (1 << SQDB_LENGTH_BITS)) { + lua_pushinteger(L, pos << SQDB_LENGTH_BITS); + ptr[pos++] = len; + } else { + lua_pushinteger(L, (pos << SQDB_LENGTH_BITS) + len); + } + memcpy(&ptr[pos], str, len); + pos += len; - pos += len + 1; + lua_rawset(L, 2); } return 0; diff --git a/squark-filter.c b/squark-filter.c index 8973d04..e47cbf5 100644 --- a/squark-filter.c +++ b/squark-filter.c @@ -112,12 +112,10 @@ static blob_t url_classify(struct url_info *url, struct sqdb *db) void *cmph; struct sqdb_index_entry *indx; uint32_t *categories; - char *strings; cmph_uint32 i = -1, previ; cmph = sqdb_section_get(db, SQDB_SECTION_INDEX_MPH, NULL); indx = sqdb_section_get(db, SQDB_SECTION_INDEX, NULL); - strings = sqdb_section_get(db, SQDB_SECTION_STRINGS, NULL); /* search for most qualified domain match; do first lookup * with two domain components */ @@ -132,14 +130,14 @@ static blob_t url_classify(struct url_info *url, struct sqdb *db) previ = i; i = cmph_search_packed(cmph, key.ptr, key.len); - if (blob_cmp(got, BLOB_STR(&strings[indx[i].component])) != 0) { + if (blob_cmp(got, sqdb_get_string_literal(db, indx[i].component)) != 0) { /* the subdomain did no longer match, use * parents classification */ i = previ; goto parent_dns_match; } if (!blob_is_null(tld)) { - if (blob_cmp(tld, BLOB_STR(&strings[indx[indx[i].parent].component])) != 0) { + if (blob_cmp(tld, sqdb_get_string_literal(db, indx[indx[i].parent].component)) != 0) { /* top level domain did not match */ i = -1; goto parent_dns_match; @@ -163,7 +161,7 @@ parent_dns_match: categories = sqdb_section_get(db, SQDB_SECTION_CATEGORIES, NULL); printf("%d\n", indx[i].category); - return BLOB_STR(&strings[categories[indx[i].category]]); + return sqdb_get_string_literal(db, categories[indx[i].category]); } int main(int argc, char **argv) @@ -138,3 +138,19 @@ void *sqdb_section_get(struct sqdb *db, int id, uint32_t *size) return db->mmap_base + hdr->section[id].offset; } +blob_t sqdb_get_string_literal(struct sqdb *db, uint32_t encoded_ptr) +{ + unsigned char *ptr; + unsigned int len, off; + + ptr = sqdb_section_get(db, SQDB_SECTION_STRINGS, NULL); + if (ptr == NULL) + return BLOB_NULL; + + off = encoded_ptr >> SQDB_LENGTH_BITS; + len = encoded_ptr & ((1 << SQDB_LENGTH_BITS) - 1); + if (len == 0) + len = ptr[off++]; + + return BLOB_PTR_LEN(ptr + off, len); +} @@ -3,6 +3,9 @@ #include <stddef.h> #include <stdint.h> +#include "blob.h" + +#define SQDB_LENGTH_BITS 5 #define SQDB_SECTION_STRINGS 0 #define SQDB_SECTION_CATEGORIES 1 @@ -39,6 +42,7 @@ struct sqdb_index_entry { uint32_t component; }; + const char *sqdb_section_names[SQDB_SECTION_MAX]; int sqdb_create(struct sqdb *db, const char *fn); @@ -47,5 +51,6 @@ void sqdb_close(struct sqdb *db); void *sqdb_section_create(struct sqdb *db, int id, uint32_t size); void *sqdb_section_get(struct sqdb *db, int id, uint32_t *size); +blob_t sqdb_get_string_literal(struct sqdb *db, uint32_t encoded_ptr); #endif |