diff options
author | Timo Teräs <timo.teras@iki.fi> | 2010-08-09 15:07:26 +0300 |
---|---|---|
committer | Timo Teräs <timo.teras@iki.fi> | 2010-08-09 15:07:26 +0300 |
commit | b5a5dd614101000f653e6ecb96ab34ae3f44353f (patch) | |
tree | 5b9cb2a8b9d56eefc2e044d8845bb5fbedb6ba49 /lua-squarkdb.c | |
parent | 02e7cfc6b4603be8ff3b69abbfad50193aaee845 (diff) | |
download | squark-b5a5dd614101000f653e6ecb96ab34ae3f44353f.tar.bz2 squark-b5a5dd614101000f653e6ecb96ab34ae3f44353f.tar.xz |
squarkdb: cmph based url database for squark filtering
Implement basics of squarkdb which will be used by squark-filter
to categorize URIs. Implementation is based on libcmph and uses
file format suitable to be mmap:ed from squark-filter.
Lua code is used to create the squark database from standard
domain / url blacklists.
Diffstat (limited to 'lua-squarkdb.c')
-rw-r--r-- | lua-squarkdb.c | 301 |
1 files changed, 301 insertions, 0 deletions
diff --git a/lua-squarkdb.c b/lua-squarkdb.c new file mode 100644 index 0000000..80a0d32 --- /dev/null +++ b/lua-squarkdb.c @@ -0,0 +1,301 @@ +#include <string.h> + +#include <lua.h> +#include <lualib.h> +#include <lauxlib.h> + +#include <cmph.h> + +#include "squarkdb.h" + +#define SQUARKDB_META "squarkdb" + +struct sqdb *Lsqdb_checkarg(lua_State *L, int index) +{ + struct sqdb *db; + + luaL_checktype(L, index, LUA_TUSERDATA); + db = (struct sqdb *) luaL_checkudata(L, index, SQUARKDB_META); + if (db == NULL) + luaL_typerror(L, index, SQUARKDB_META); + return db; +} + +static int Lsqdb_new(lua_State *L) +{ + struct sqdb *db; + const char *fn; + + fn = luaL_checklstring(L, 1, NULL); + + db = (struct sqdb *) lua_newuserdata(L, sizeof(struct sqdb)); + luaL_getmetatable(L, SQUARKDB_META); + lua_setmetatable(L, -2); + + if (sqdb_create(db, fn) < 0) + luaL_error(L, "Failed to create SquarkDB file '%s'", fn); + + return 1; +} + +static int Lsqdb_destroy(lua_State *L) +{ + struct sqdb *db; + + db = Lsqdb_checkarg(L, 1); + sqdb_close(db); + + return 1; +} + + +struct ioa_data { + lua_State *main; + lua_State *thread; +}; + +static void ioa_rewind(void *data) +{ + struct ioa_data *ioa = (struct ioa_data *) data; + + /* pop previous thread */ + lua_pop(ioa->main, 1); + + /* create a new lua thread */ + ioa->thread = lua_newthread(ioa->main); + lua_pushvalue(ioa->main, -2); /* copy function to top */ + lua_xmove(ioa->main, ioa->thread, 1); /* move function from L to NL */ +} + +static cmph_uint32 ioa_count_keys(void *data) +{ + struct ioa_data *ioa = (struct ioa_data *) data; + lua_State *NL; + cmph_uint32 cnt = 0; + + NL = lua_newthread(ioa->main); + lua_pushvalue(ioa->main, -2); /* copy function to top */ + lua_xmove(ioa->main, NL, 1); /* move function from L to NL */ + + do { + cnt++; + lua_settop(NL, 1); + } while (lua_resume(NL, 0) == LUA_YIELD); + ioa_rewind(data); + + return cnt - 1; +} + +static int ioa_read(void *data, char **key, cmph_uint32 *len) +{ + struct ioa_data *ioa = (struct ioa_data *) data; + lua_State *L = ioa->thread; + size_t l; + + /* get next key from lua thread */ + lua_settop(L, 1); + if (lua_resume(L, 0) != LUA_YIELD || + !lua_isstring(L, 1)) { + *key = NULL; + *len = 0; + return -1; + } + + *key = (char *) lua_tolstring(L, 1, &l); + *len = l; + + return l; +} + +static void ioa_dispose(void *data, char *key, cmph_uint32 len) +{ + /* LUA takes care of garbage collection */ +} + +static int Lsqdb_hash(lua_State *L) +{ + struct sqdb *db; + void *ptr; + cmph_uint32 hash; + const char *key; + size_t keylen; + + db = Lsqdb_checkarg(L, 1); + key = luaL_checklstring(L, 2, &keylen); + + ptr = sqdb_section_get(db, SQDB_SECTION_INDEX_MPH, NULL); + hash = cmph_search_packed(ptr, key, keylen); + + lua_pushinteger(L, hash); + + return 1; +} + +static int Lsqdb_generate_hash(lua_State *L) +{ + struct sqdb *db; + struct ioa_data ioa; + cmph_config_t *cfg; + cmph_t *cmph; + cmph_io_adapter_t io; + + char *ptr; + cmph_uint32 packed; + + db = Lsqdb_checkarg(L, 1); + luaL_argcheck(L, lua_isfunction(L, 2) && !lua_iscfunction(L, 2), + 2, "Lua function expected"); + + ioa.main = L; + io.data = &ioa; + io.nkeys = ioa_count_keys(io.data); + io.read = ioa_read; + io.dispose = ioa_dispose; + io.rewind = ioa_rewind; + + cfg = cmph_config_new(&io); + if (cfg == NULL) + luaL_error(L, "Failed to create CMPH config"); + + cmph_config_set_algo(cfg, CMPH_CHD); + cmph = cmph_new(cfg); + cmph_config_destroy(cfg); + + if (cmph == NULL) + luaL_error(L, "Failed to create minimal perfect hash"); + + packed = cmph_packed_size(cmph); + ptr = sqdb_section_create(db, SQDB_SECTION_INDEX_MPH, packed); + if (ptr == NULL) { + cmph_destroy(cmph); + luaL_error(L, "Unable to allocation MPH section from SquarkDB"); + } + + cmph_pack(cmph, ptr); + cmph_destroy(cmph); + + lua_pushinteger(L, io.nkeys); + lua_pushinteger(L, packed); + + return 2; +} + +static int Lsqdb_create_index(lua_State *L) +{ + struct sqdb *db; + lua_Integer num_entries; + void *ptr; + + db = Lsqdb_checkarg(L, 1); + num_entries = luaL_checkinteger(L, 2); + + ptr = sqdb_section_create(db, SQDB_SECTION_INDEX, sizeof(struct sqdb_index_entry) * num_entries); + if (ptr == NULL) + luaL_error(L, "Failed to create INDEX section"); + + return 0; +} + +static int Lsqdb_assign_index(lua_State *L) +{ + struct sqdb *db; + size_t size; + lua_Integer idx; + struct sqdb_index_entry *ptr; + + db = Lsqdb_checkarg(L, 1); + idx = luaL_checkinteger(L, 2); + + ptr = sqdb_section_get(db, SQDB_SECTION_INDEX, &size); + if (size <= 0 || idx * sizeof(struct sqdb_index_entry) >= size) + luaL_error(L, "Bad index assignment (idx=%d, section size=%d)", idx, size); + + ptr += idx; + if (ptr->component != 0) + luaL_error(L, "Index entry %d has been already assigned", idx); + + ptr->category = luaL_checkinteger(L, 3); + ptr->has_subdomains = lua_toboolean(L, 4); + ptr->has_paths = lua_toboolean(L, 5); + ptr->component = luaL_checkinteger(L, 6); + ptr->parent = luaL_checkinteger(L, 7); + + return 0; +} + +static int Lsqdb_map_strings(lua_State *L) +{ + struct sqdb *db; + const char *str; + char *ptr; + size_t len, total, pos; + + db = Lsqdb_checkarg(L, 1); + luaL_checktype(L, 2, LUA_TTABLE); + + /* go through the table and count total amount of data */ + total = 0; + lua_pushnil(L); + while (lua_next(L, 2) != 0) { + str = luaL_checklstring(L, -2, &len); + total += len + 1; + lua_pop(L, 1); + } + + /* create string literal section */ + ptr = sqdb_section_create(db, SQDB_SECTION_STRINGS, total); + if (ptr == NULL) + luaL_error(L, "Failed to create string literal section (%d bytes)", total); + + /* populate string literals and return their indices */ + pos = 0; + lua_pushnil(L); + while (lua_next(L, 2) != 0) { + str = lua_tolstring(L, -2, &len); + memcpy(&ptr[pos], str, len + 1); + lua_pop(L, 1); + + /* table[key] = pos */ + lua_pushvalue(L, -1); + lua_pushinteger(L, pos); + lua_rawset(L, 2); + + pos += len + 1; + } + + return 0; +} + +static const luaL_reg sqdb_meta_methods[] = { + { "__gc", Lsqdb_destroy }, + { NULL, NULL } +}; + +static const luaL_reg squarkdb_methods[] = { + { "new", Lsqdb_new }, + { "hash", Lsqdb_hash }, + { "generate_hash", Lsqdb_generate_hash }, + { "create_index", Lsqdb_create_index }, + { "assign_index", Lsqdb_assign_index }, + { "map_strings", Lsqdb_map_strings }, + { NULL, NULL } +}; + +LUALIB_API int luaopen_squarkdb(lua_State *L) +{ + /* Register squarkdb library */ + luaI_openlib(L, "squarkdb", squarkdb_methods, 0); + + /* And metatable for it */ + luaL_newmetatable(L, SQUARKDB_META); + luaI_openlib(L, NULL, sqdb_meta_methods, 0); + lua_pushliteral(L, "__index"); + lua_pushvalue(L, -3); + lua_rawset(L, -3); + lua_pushliteral(L, "__metatable"); + lua_pushvalue(L, -3); + lua_rawset(L, -3); + lua_pop(L, 1); + + return 1; +} |