summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lua-squarkdb.c21
-rw-r--r--squark-filter.c8
-rw-r--r--squarkdb.c16
-rw-r--r--squarkdb.h5
4 files changed, 38 insertions, 12 deletions
diff --git a/lua-squarkdb.c b/lua-squarkdb.c
index dbac6d0..09d4afe 100644
--- a/lua-squarkdb.c
+++ b/lua-squarkdb.c
@@ -227,7 +227,7 @@ static int Lsqdb_map_strings(lua_State *L)
{
struct sqdb *db;
const char *str;
- char *ptr;
+ unsigned char *ptr;
size_t len, total, pos;
db = Lsqdb_checkarg(L, 1);
@@ -238,7 +238,9 @@ static int Lsqdb_map_strings(lua_State *L)
lua_pushnil(L);
while (lua_next(L, 2) != 0) {
str = luaL_checklstring(L, -2, &len);
- total += len + 1;
+ total += len;
+ if (len >= (1 << SQDB_LENGTH_BITS))
+ total++;
lua_pop(L, 1);
}
@@ -252,15 +254,20 @@ static int Lsqdb_map_strings(lua_State *L)
lua_pushnil(L);
while (lua_next(L, 2) != 0) {
str = lua_tolstring(L, -2, &len);
- memcpy(&ptr[pos], str, len + 1);
lua_pop(L, 1);
- /* table[key] = pos */
+ /* table[key] = encoded_string_pointer */
lua_pushvalue(L, -1);
- lua_pushinteger(L, pos);
- lua_rawset(L, 2);
+ if (len >= (1 << SQDB_LENGTH_BITS)) {
+ lua_pushinteger(L, pos << SQDB_LENGTH_BITS);
+ ptr[pos++] = len;
+ } else {
+ lua_pushinteger(L, (pos << SQDB_LENGTH_BITS) + len);
+ }
+ memcpy(&ptr[pos], str, len);
+ pos += len;
- pos += len + 1;
+ lua_rawset(L, 2);
}
return 0;
diff --git a/squark-filter.c b/squark-filter.c
index 8973d04..e47cbf5 100644
--- a/squark-filter.c
+++ b/squark-filter.c
@@ -112,12 +112,10 @@ static blob_t url_classify(struct url_info *url, struct sqdb *db)
void *cmph;
struct sqdb_index_entry *indx;
uint32_t *categories;
- char *strings;
cmph_uint32 i = -1, previ;
cmph = sqdb_section_get(db, SQDB_SECTION_INDEX_MPH, NULL);
indx = sqdb_section_get(db, SQDB_SECTION_INDEX, NULL);
- strings = sqdb_section_get(db, SQDB_SECTION_STRINGS, NULL);
/* search for most qualified domain match; do first lookup
* with two domain components */
@@ -132,14 +130,14 @@ static blob_t url_classify(struct url_info *url, struct sqdb *db)
previ = i;
i = cmph_search_packed(cmph, key.ptr, key.len);
- if (blob_cmp(got, BLOB_STR(&strings[indx[i].component])) != 0) {
+ if (blob_cmp(got, sqdb_get_string_literal(db, indx[i].component)) != 0) {
/* the subdomain did no longer match, use
* parents classification */
i = previ;
goto parent_dns_match;
}
if (!blob_is_null(tld)) {
- if (blob_cmp(tld, BLOB_STR(&strings[indx[indx[i].parent].component])) != 0) {
+ if (blob_cmp(tld, sqdb_get_string_literal(db, indx[indx[i].parent].component)) != 0) {
/* top level domain did not match */
i = -1;
goto parent_dns_match;
@@ -163,7 +161,7 @@ parent_dns_match:
categories = sqdb_section_get(db, SQDB_SECTION_CATEGORIES, NULL);
printf("%d\n", indx[i].category);
- return BLOB_STR(&strings[categories[indx[i].category]]);
+ return sqdb_get_string_literal(db, categories[indx[i].category]);
}
int main(int argc, char **argv)
diff --git a/squarkdb.c b/squarkdb.c
index e05f514..f289b28 100644
--- a/squarkdb.c
+++ b/squarkdb.c
@@ -138,3 +138,19 @@ void *sqdb_section_get(struct sqdb *db, int id, uint32_t *size)
return db->mmap_base + hdr->section[id].offset;
}
+blob_t sqdb_get_string_literal(struct sqdb *db, uint32_t encoded_ptr)
+{
+ unsigned char *ptr;
+ unsigned int len, off;
+
+ ptr = sqdb_section_get(db, SQDB_SECTION_STRINGS, NULL);
+ if (ptr == NULL)
+ return BLOB_NULL;
+
+ off = encoded_ptr >> SQDB_LENGTH_BITS;
+ len = encoded_ptr & ((1 << SQDB_LENGTH_BITS) - 1);
+ if (len == 0)
+ len = ptr[off++];
+
+ return BLOB_PTR_LEN(ptr + off, len);
+}
diff --git a/squarkdb.h b/squarkdb.h
index 3733ec1..743325e 100644
--- a/squarkdb.h
+++ b/squarkdb.h
@@ -3,6 +3,9 @@
#include <stddef.h>
#include <stdint.h>
+#include "blob.h"
+
+#define SQDB_LENGTH_BITS 5
#define SQDB_SECTION_STRINGS 0
#define SQDB_SECTION_CATEGORIES 1
@@ -39,6 +42,7 @@ struct sqdb_index_entry {
uint32_t component;
};
+
const char *sqdb_section_names[SQDB_SECTION_MAX];
int sqdb_create(struct sqdb *db, const char *fn);
@@ -47,5 +51,6 @@ void sqdb_close(struct sqdb *db);
void *sqdb_section_create(struct sqdb *db, int id, uint32_t size);
void *sqdb_section_get(struct sqdb *db, int id, uint32_t *size);
+blob_t sqdb_get_string_literal(struct sqdb *db, uint32_t encoded_ptr);
#endif