summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTimo Teräs <timo.teras@iki.fi>2010-08-09 15:07:26 +0300
committerTimo Teräs <timo.teras@iki.fi>2010-08-09 15:07:26 +0300
commitb5a5dd614101000f653e6ecb96ab34ae3f44353f (patch)
tree5b9cb2a8b9d56eefc2e044d8845bb5fbedb6ba49
parent02e7cfc6b4603be8ff3b69abbfad50193aaee845 (diff)
downloadsquark-b5a5dd614101000f653e6ecb96ab34ae3f44353f.tar.bz2
squark-b5a5dd614101000f653e6ecb96ab34ae3f44353f.tar.xz
squarkdb: cmph based url database for squark filtering
Implement basics of squarkdb which will be used by squark-filter to categorize URIs. Implementation is based on libcmph and uses file format suitable to be mmap:ed from squark-filter. Lua code is used to create the squark database from standard domain / url blacklists.
-rw-r--r--Makefile21
-rw-r--r--lua-squarkdb.c301
-rwxr-xr-xsqdb-build.lua302
-rw-r--r--squarkdb.c121
-rw-r--r--squarkdb.h50
5 files changed, 789 insertions, 6 deletions
diff --git a/Makefile b/Makefile
index 01d7400..329c66a 100644
--- a/Makefile
+++ b/Makefile
@@ -1,13 +1,22 @@
+TARGETS=squark-auth squarkdb.so
+
+NETSNMP_CFLAGS:=$(shell net-snmp-config --cflags)
+NETSNMP_LIBS:=$(shell net-snmp-config --libs)
+LUA_CFLAGS:=$(shell pkg-config --cflags lua5.1)
+LUA_LIBS:=$(shell pkg-config --libs lua5.1)
+CMPH_CFLAGS:=$(shell pkg-config --cflags cmph)
+CMPH_LIBS:=$(shell pkg-config --libs cmph)
+
CC=gcc
-OBJS1=squark-auth.o
-TARGETS=squark-auth
-CFLAGS=-g -I. $(shell net-snmp-config --cflags) -std=gnu99 -Wall
-BUILDLIBS=$(shell net-snmp-config --libs)
+CFLAGS=-g -I. $(NETSNMP_CFLAGS) $(LUA_CFLAGS) $(CMPH_CFLAGS) -std=gnu99 -Wall
all: $(TARGETS)
-squark-auth: $(OBJS1)
- $(CC) -g -o $@ $(OBJS1) $(BUILDLIBS) -nopie
+squark-auth: squark-auth.o
+ $(CC) -o $@ $< $(NETSNMP_LIBS)
+
+squarkdb.so: lua-squarkdb.o squarkdb.o
+ $(CC) -shared -o $@ $^ $(LUA_LIBS) $(CMPH_LIBS)
clean:
rm $(OBJS1) $(TARGETS)
diff --git a/lua-squarkdb.c b/lua-squarkdb.c
new file mode 100644
index 0000000..80a0d32
--- /dev/null
+++ b/lua-squarkdb.c
@@ -0,0 +1,301 @@
+#include <string.h>
+
+#include <lua.h>
+#include <lualib.h>
+#include <lauxlib.h>
+
+#include <cmph.h>
+
+#include "squarkdb.h"
+
+#define SQUARKDB_META "squarkdb"
+
+struct sqdb *Lsqdb_checkarg(lua_State *L, int index)
+{
+ struct sqdb *db;
+
+ luaL_checktype(L, index, LUA_TUSERDATA);
+ db = (struct sqdb *) luaL_checkudata(L, index, SQUARKDB_META);
+ if (db == NULL)
+ luaL_typerror(L, index, SQUARKDB_META);
+ return db;
+}
+
+static int Lsqdb_new(lua_State *L)
+{
+ struct sqdb *db;
+ const char *fn;
+
+ fn = luaL_checklstring(L, 1, NULL);
+
+ db = (struct sqdb *) lua_newuserdata(L, sizeof(struct sqdb));
+ luaL_getmetatable(L, SQUARKDB_META);
+ lua_setmetatable(L, -2);
+
+ if (sqdb_create(db, fn) < 0)
+ luaL_error(L, "Failed to create SquarkDB file '%s'", fn);
+
+ return 1;
+}
+
+static int Lsqdb_destroy(lua_State *L)
+{
+ struct sqdb *db;
+
+ db = Lsqdb_checkarg(L, 1);
+ sqdb_close(db);
+
+ return 1;
+}
+
+
+struct ioa_data {
+ lua_State *main;
+ lua_State *thread;
+};
+
+static void ioa_rewind(void *data)
+{
+ struct ioa_data *ioa = (struct ioa_data *) data;
+
+ /* pop previous thread */
+ lua_pop(ioa->main, 1);
+
+ /* create a new lua thread */
+ ioa->thread = lua_newthread(ioa->main);
+ lua_pushvalue(ioa->main, -2); /* copy function to top */
+ lua_xmove(ioa->main, ioa->thread, 1); /* move function from L to NL */
+}
+
+static cmph_uint32 ioa_count_keys(void *data)
+{
+ struct ioa_data *ioa = (struct ioa_data *) data;
+ lua_State *NL;
+ cmph_uint32 cnt = 0;
+
+ NL = lua_newthread(ioa->main);
+ lua_pushvalue(ioa->main, -2); /* copy function to top */
+ lua_xmove(ioa->main, NL, 1); /* move function from L to NL */
+
+ do {
+ cnt++;
+ lua_settop(NL, 1);
+ } while (lua_resume(NL, 0) == LUA_YIELD);
+ ioa_rewind(data);
+
+ return cnt - 1;
+}
+
+static int ioa_read(void *data, char **key, cmph_uint32 *len)
+{
+ struct ioa_data *ioa = (struct ioa_data *) data;
+ lua_State *L = ioa->thread;
+ size_t l;
+
+ /* get next key from lua thread */
+ lua_settop(L, 1);
+ if (lua_resume(L, 0) != LUA_YIELD ||
+ !lua_isstring(L, 1)) {
+ *key = NULL;
+ *len = 0;
+ return -1;
+ }
+
+ *key = (char *) lua_tolstring(L, 1, &l);
+ *len = l;
+
+ return l;
+}
+
+static void ioa_dispose(void *data, char *key, cmph_uint32 len)
+{
+ /* LUA takes care of garbage collection */
+}
+
+static int Lsqdb_hash(lua_State *L)
+{
+ struct sqdb *db;
+ void *ptr;
+ cmph_uint32 hash;
+ const char *key;
+ size_t keylen;
+
+ db = Lsqdb_checkarg(L, 1);
+ key = luaL_checklstring(L, 2, &keylen);
+
+ ptr = sqdb_section_get(db, SQDB_SECTION_INDEX_MPH, NULL);
+ hash = cmph_search_packed(ptr, key, keylen);
+
+ lua_pushinteger(L, hash);
+
+ return 1;
+}
+
+static int Lsqdb_generate_hash(lua_State *L)
+{
+ struct sqdb *db;
+ struct ioa_data ioa;
+ cmph_config_t *cfg;
+ cmph_t *cmph;
+ cmph_io_adapter_t io;
+
+ char *ptr;
+ cmph_uint32 packed;
+
+ db = Lsqdb_checkarg(L, 1);
+ luaL_argcheck(L, lua_isfunction(L, 2) && !lua_iscfunction(L, 2),
+ 2, "Lua function expected");
+
+ ioa.main = L;
+ io.data = &ioa;
+ io.nkeys = ioa_count_keys(io.data);
+ io.read = ioa_read;
+ io.dispose = ioa_dispose;
+ io.rewind = ioa_rewind;
+
+ cfg = cmph_config_new(&io);
+ if (cfg == NULL)
+ luaL_error(L, "Failed to create CMPH config");
+
+ cmph_config_set_algo(cfg, CMPH_CHD);
+ cmph = cmph_new(cfg);
+ cmph_config_destroy(cfg);
+
+ if (cmph == NULL)
+ luaL_error(L, "Failed to create minimal perfect hash");
+
+ packed = cmph_packed_size(cmph);
+ ptr = sqdb_section_create(db, SQDB_SECTION_INDEX_MPH, packed);
+ if (ptr == NULL) {
+ cmph_destroy(cmph);
+ luaL_error(L, "Unable to allocation MPH section from SquarkDB");
+ }
+
+ cmph_pack(cmph, ptr);
+ cmph_destroy(cmph);
+
+ lua_pushinteger(L, io.nkeys);
+ lua_pushinteger(L, packed);
+
+ return 2;
+}
+
+static int Lsqdb_create_index(lua_State *L)
+{
+ struct sqdb *db;
+ lua_Integer num_entries;
+ void *ptr;
+
+ db = Lsqdb_checkarg(L, 1);
+ num_entries = luaL_checkinteger(L, 2);
+
+ ptr = sqdb_section_create(db, SQDB_SECTION_INDEX, sizeof(struct sqdb_index_entry) * num_entries);
+ if (ptr == NULL)
+ luaL_error(L, "Failed to create INDEX section");
+
+ return 0;
+}
+
+static int Lsqdb_assign_index(lua_State *L)
+{
+ struct sqdb *db;
+ size_t size;
+ lua_Integer idx;
+ struct sqdb_index_entry *ptr;
+
+ db = Lsqdb_checkarg(L, 1);
+ idx = luaL_checkinteger(L, 2);
+
+ ptr = sqdb_section_get(db, SQDB_SECTION_INDEX, &size);
+ if (size <= 0 || idx * sizeof(struct sqdb_index_entry) >= size)
+ luaL_error(L, "Bad index assignment (idx=%d, section size=%d)", idx, size);
+
+ ptr += idx;
+ if (ptr->component != 0)
+ luaL_error(L, "Index entry %d has been already assigned", idx);
+
+ ptr->category = luaL_checkinteger(L, 3);
+ ptr->has_subdomains = lua_toboolean(L, 4);
+ ptr->has_paths = lua_toboolean(L, 5);
+ ptr->component = luaL_checkinteger(L, 6);
+ ptr->parent = luaL_checkinteger(L, 7);
+
+ return 0;
+}
+
+static int Lsqdb_map_strings(lua_State *L)
+{
+ struct sqdb *db;
+ const char *str;
+ char *ptr;
+ size_t len, total, pos;
+
+ db = Lsqdb_checkarg(L, 1);
+ luaL_checktype(L, 2, LUA_TTABLE);
+
+ /* go through the table and count total amount of data */
+ total = 0;
+ lua_pushnil(L);
+ while (lua_next(L, 2) != 0) {
+ str = luaL_checklstring(L, -2, &len);
+ total += len + 1;
+ lua_pop(L, 1);
+ }
+
+ /* create string literal section */
+ ptr = sqdb_section_create(db, SQDB_SECTION_STRINGS, total);
+ if (ptr == NULL)
+ luaL_error(L, "Failed to create string literal section (%d bytes)", total);
+
+ /* populate string literals and return their indices */
+ pos = 0;
+ lua_pushnil(L);
+ while (lua_next(L, 2) != 0) {
+ str = lua_tolstring(L, -2, &len);
+ memcpy(&ptr[pos], str, len + 1);
+ lua_pop(L, 1);
+
+ /* table[key] = pos */
+ lua_pushvalue(L, -1);
+ lua_pushinteger(L, pos);
+ lua_rawset(L, 2);
+
+ pos += len + 1;
+ }
+
+ return 0;
+}
+
+static const luaL_reg sqdb_meta_methods[] = {
+ { "__gc", Lsqdb_destroy },
+ { NULL, NULL }
+};
+
+static const luaL_reg squarkdb_methods[] = {
+ { "new", Lsqdb_new },
+ { "hash", Lsqdb_hash },
+ { "generate_hash", Lsqdb_generate_hash },
+ { "create_index", Lsqdb_create_index },
+ { "assign_index", Lsqdb_assign_index },
+ { "map_strings", Lsqdb_map_strings },
+ { NULL, NULL }
+};
+
+LUALIB_API int luaopen_squarkdb(lua_State *L)
+{
+ /* Register squarkdb library */
+ luaI_openlib(L, "squarkdb", squarkdb_methods, 0);
+
+ /* And metatable for it */
+ luaL_newmetatable(L, SQUARKDB_META);
+ luaI_openlib(L, NULL, sqdb_meta_methods, 0);
+ lua_pushliteral(L, "__index");
+ lua_pushvalue(L, -3);
+ lua_rawset(L, -3);
+ lua_pushliteral(L, "__metatable");
+ lua_pushvalue(L, -3);
+ lua_rawset(L, -3);
+ lua_pop(L, 1);
+
+ return 1;
+}
diff --git a/sqdb-build.lua b/sqdb-build.lua
new file mode 100755
index 0000000..6cde349
--- /dev/null
+++ b/sqdb-build.lua
@@ -0,0 +1,302 @@
+#!/usr/bin/lua
+
+require("squarkdb")
+
+local all_strings = {}
+local all_domains = {}
+local all_ips = {}
+
+local all_categories = {}
+local num_categories = 0
+
+local strfind = string.find
+local strsub = string.sub
+local tinsert = table.insert
+
+local function strsplit(delimiter, text)
+ local list = {}
+ local pos = 1
+ --if strfind("", delimiter, 1) then -- this would result in endless loops
+ -- error("delimiter matches empty string!")
+ --end
+ while 1 do
+ local first, last = strfind(text, delimiter, pos)
+ if first then -- found?
+ tinsert(list, strsub(text, pos, first-1))
+ pos = last+1
+ else
+ tinsert(list, strsub(text, pos))
+ break
+ end
+ end
+ return list
+end
+
+local function account_string(s)
+ all_strings[s] = true
+end
+
+local function get_category(category_text)
+ local cat
+
+ cat = all_categories[category_text]
+ if cat ~= nil then return cat end
+
+ num_categories = num_categories + 1
+ cat = { desc=category_text, id=num_categories }
+ all_categories[category_text] = cat
+
+ account_string(category_text)
+
+ return cat
+end
+
+local function get_domain(domain, locked)
+ local parts, entry, idx, p, child
+
+ parts = strsplit("[.]", domain)
+ entry = all_domains
+ for idx=#parts,1,-1 do
+ p = parts[idx]
+ if entry.children == nil then
+ entry.children = {}
+ end
+ child = entry.children[p]
+ if child == nil then
+ child = {}
+ entry.children[p] = child
+ end
+ if child.locked and not locked then
+ return nil
+ end
+ entry = child
+ end
+ return child
+end
+
+local function get_path(domain_entry, path, locked)
+ local entry, p, n, component
+
+ entry = domain_entry
+ for n,component in pairs(strsplit("/", path)) do
+ if entry.paths == nil then
+ entry.paths = {}
+ end
+ p = entry.paths[component]
+ if p == nil then
+ p = {}
+ entry.paths[component] = p
+ end
+ if p.locked and not locked then
+ return nil
+ end
+ entry = p
+ end
+ return p
+end
+
+local function is_ip_addr(s)
+ return s:match("^%d+\.%d+\.%d+\.%d+$")
+end
+
+local function read_urls(filename, category, locked)
+ local fd, url, domain, path, d
+
+ fd = io.open(filename)
+ if fd == nil then
+ print("WARNING: File " .. filename .. " does not exist")
+ return
+ end
+ print("Reading " .. filename)
+ for url in fd:lines() do
+ url = url:gsub("#.*", "")
+ url = url:gsub(" *^", "")
+ url = url:lower()
+ url = url:gsub("^(www%d*[.])", "")
+ domain, path = url:match("([^/]*)/?(.*)")
+ domain = domain:gsub(":.*", "")
+ domain = domain:gsub("[.]$", "") -- trailing dot
+ if domain == "" then
+ d = nil
+ elseif not is_ip_addr(domain) then
+ d = get_domain(domain, locked)
+ else
+ d = all_ips[domain]
+ if d == nil then
+ d = {}
+ all_ips[domain] = d
+ end
+ end
+ if d == nil then
+ --if url ~= "" then
+ -- print(url .. " ignored due to locked record")
+ --end
+ elseif path ~= "" then
+ if d.category ~= category and #path < 100 and path:match("([?;&])") == nil then
+ path = path:gsub("^/", "")
+ path = path:gsub("/$", "")
+ p = get_path(d, path, locked)
+ if p ~= nil then
+ p.category = category
+ if locked then
+ p.locked = true
+ end
+ end
+ end
+ else
+ if d.category == nil then
+ d.category = category
+ if locked then
+ d.locked = true
+ end
+ end
+ end
+ end
+ fd:close()
+end
+
+local function enum_paths(cb, category, path, data)
+ local fpath, cpath, cdata, cat
+
+ for cpath, cdata in pairs(data) do
+ fpath = path .. "/" .. cpath
+ cat = cdata.category or category
+ cb(fpath, path, cpath, cat, false, cdata.paths)
+ if cdata.paths then
+ enum_paths(cb, cat, fpath, cdata.paths)
+ end
+ end
+end
+
+local function enum_tree(cb, category, dns, data)
+ local cdns, cdata, fdns
+ local cat = data.category or category
+
+ if data.paths ~= nil then
+ enum_paths(cb, cat, dns, data.paths)
+ end
+ if data.children ~= nil then
+ for cdns, cdata in pairs(data.children) do
+ if dns ~= nil then
+ fdns = cdns .. "." .. dns
+ else
+ fdns = cdns
+ end
+ cb(fdns, dns, cdns, cat, data.children, data.paths)
+ enum_tree(cb, cat, fdns, cdata)
+ end
+ end
+end
+
+local function enum_all(cb)
+ local ipaddr, data, category
+
+ -- enumerate all domains
+ enum_tree(cb, nil, nil, all_domains)
+
+ -- all IP addresses
+ for ipaddr, data in pairs(all_ips) do
+ if data.paths ~= nil then
+ enum_paths(cb, data.category, ipaddr, data.paths)
+ end
+ -- fixme, calculate ip as 32-bit value
+ cb(ipaddr, nil, 0, data.category, nil, data.paths)
+ end
+end
+
+local function prune_paths(paths, category)
+ local path, pdata, cat
+ local num_paths = 0
+
+ for path, pdata in pairs(paths) do
+ local sub_paths = 0
+
+ cat = pdata.category or category
+ if pdata.paths ~= nil then
+ sub_paths = prune_paths(pdata.paths, cat)
+ if sub_paths == 0 then
+ pdata.paths = nil
+ end
+ end
+ if cat == category and sub_paths == 0 then
+ paths[path] = nil
+ else
+ num_paths = num_paths + 1
+ account_string(path)
+ end
+ end
+ return num_paths
+end
+
+local function prune_tree(d, category)
+ local num_childs = 0
+ local num_paths = 0
+ local cat
+
+ cat = d.category or category
+ if d.children ~= nil then
+ for n, child in pairs(d.children) do
+ if prune_tree(child, cat, count) then
+ d.children[n] = nil
+ else
+ num_childs = num_childs + 1
+ account_string(n)
+ end
+ end
+ if num_childs == 0 then
+ d.children = nil
+ end
+ end
+ --print(name, d.category, category, d.num_paths, num_childs)
+ if d.paths ~= nil then
+ num_paths = prune_paths(d.paths, d.category)
+ end
+ if cat == category and num_paths == 0 and num_childs == 0 then
+ --num_pruned_leafs = num_pruned_leafs + 1
+ return true
+ end
+ return false
+end
+
+local function load_lists(conffile, part)
+ local line, fields, cat
+
+ for line in io.lines(conffile) do
+ line = line:gsub("#(.*)", "")
+ fields = strsplit("[\t ]", line)
+ if fields[1] == "STOP" then
+ break
+ end
+ if fields[3] then
+ read_urls("lists/" .. fields[2] .. "list/" .. fields[3] .. "/" .. part,
+ get_category(fields[1]),
+ fields[4] == "LOCK")
+ end
+ end
+end
+
+-- start by reading in all classification data
+load_lists("lists.conf", "domains")
+prune_tree(all_domains, nil)
+load_lists("lists.conf", "urls")
+prune_tree(all_domains, nil)
+
+-- generate database
+local db = squarkdb.new("squark.db")
+num_entries = db:generate_hash(function() enum_all(coroutine.yield) end)
+
+-- write string literals
+db:map_strings(all_strings)
+
+-- create master index
+db:create_index(num_entries)
+enum_all(
+ function(uri, parent_uri, component, category, childs, paths)
+ db:assign_index(db:hash(uri),
+ category and category.id or 0,
+ childs and true or false,
+ paths and true or false,
+ all_strings[component] or 0,
+ parent_uri and db:hash(parent_uri) or 0)
+ end
+)
diff --git a/squarkdb.c b/squarkdb.c
new file mode 100644
index 0000000..a62ea61
--- /dev/null
+++ b/squarkdb.c
@@ -0,0 +1,121 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/mman.h>
+
+#include "squarkdb.h"
+
+#define PAGE_SIZE 4096
+#define ALIGN(s,a) (((s) + a - 1) & ~(a - 1))
+
+const char *sqdb_section_names[SQDB_SECTION_MAX] = {
+ [SQDB_SECTION_STRINGS] = "strings",
+ [SQDB_SECTION_CATEGORIES] = "categories",
+ [SQDB_SECTION_INDEX] = "index",
+ [SQDB_SECTION_INDEX_MPH] = "index_mph",
+ [SQDB_SECTION_KEYWORD] = "keyword",
+ [SQDB_SECTION_KEYWORD_MPH] = "keyword_mph",
+};
+
+static int sqdb_allocate(struct sqdb *db, size_t s)
+{
+ size_t old_size, new_size;
+ void *base;
+
+ old_size = db->file_length;
+ new_size = ALIGN(db->file_length + s, PAGE_SIZE);
+
+ if (new_size == ALIGN(db->file_length, PAGE_SIZE)) {
+ db->file_length += s;
+ return old_size;
+ }
+
+ if (ftruncate(db->fd, new_size) < 0)
+ return -1;
+
+ if (db->mmap_base == NULL) {
+ base = mmap(NULL, new_size, PROT_READ|PROT_WRITE,
+ MAP_SHARED, db->fd, 0);
+ } else {
+ base = mremap(db->mmap_base, ALIGN(old_size, PAGE_SIZE),
+ new_size, MREMAP_MAYMOVE);
+ }
+ if (base == MAP_FAILED)
+ return -1;
+
+ db->mmap_base = base;
+ db->file_length += ALIGN(s, 16);
+
+ return old_size;
+}
+
+int sqdb_create(struct sqdb *db, const char *fn)
+{
+ struct sqdb_header *hdr;
+ int rc;
+
+ db->fd = open(fn, O_CREAT | O_TRUNC | O_RDWR, 0666);
+ if (db->fd < 0)
+ return -1;
+
+ db->file_length = 0;
+ db->mmap_base = NULL;
+
+ rc = sqdb_allocate(db, sizeof(struct sqdb_header));
+ if (rc < 0) {
+ close(db->fd);
+ return rc;
+ }
+
+ hdr = db->mmap_base;
+ strcpy(hdr->description, "Squark Filtering Database");
+ hdr->version = 1;
+ hdr->magic = 0xdbdbdbdb;
+ hdr->num_sections = SQDB_SECTION_MAX;
+
+ return 0;
+}
+
+int sqdb_open(struct sqdb *db, const char *fn);
+
+void sqdb_close(struct sqdb *db)
+{
+ if (db->mmap_base)
+ munmap(db->mmap_base, ALIGN(db->file_length, PAGE_SIZE));
+ close(db->fd);
+}
+
+void *sqdb_section_create(struct sqdb *db, int id, u_int32_t size)
+{
+ struct sqdb_header *hdr;
+ size_t pos;
+
+ hdr = db->mmap_base;
+ if (hdr->section[id].offset || hdr->section[id].length)
+ return NULL;
+
+ pos = sqdb_allocate(db, size);
+ if (pos < 0)
+ return NULL;
+
+ /* sqdb_allocate can remap mmap_base */
+ hdr = db->mmap_base;
+ hdr->section[id].offset = pos;
+ hdr->section[id].length = size;
+
+ return db->mmap_base + pos;
+}
+
+void *sqdb_section_get(struct sqdb *db, int id, u_int32_t *size)
+{
+ struct sqdb_header *hdr = db->mmap_base;
+
+ if (hdr->section[id].offset == 0)
+ return NULL;
+
+ if (size)
+ *size = hdr->section[id].length;
+
+ return db->mmap_base + hdr->section[id].offset;
+}
+
diff --git a/squarkdb.h b/squarkdb.h
new file mode 100644
index 0000000..2f96155
--- /dev/null
+++ b/squarkdb.h
@@ -0,0 +1,50 @@
+#ifndef SQUARKDB_H
+#define SQUARKDB_H
+
+#include <stdint.h>
+
+#define SQDB_SECTION_STRINGS 0
+#define SQDB_SECTION_CATEGORIES 1
+#define SQDB_SECTION_INDEX 2
+#define SQDB_SECTION_INDEX_MPH 3
+#define SQDB_SECTION_KEYWORD 4
+#define SQDB_SECTION_KEYWORD_MPH 5
+#define SQDB_SECTION_MAX 8
+
+struct sqdb {
+ int fd;
+ void * mmap_base;
+ size_t file_length;
+};
+
+struct sqdb_section {
+ u_int32_t offset;
+ u_int32_t length;
+};
+
+struct sqdb_header {
+ char description[116];
+ u_int32_t num_sections;
+ u_int32_t version;
+ u_int32_t magic;
+ struct sqdb_section section[SQDB_SECTION_MAX];
+};
+
+struct sqdb_index_entry {
+ u_int32_t has_subdomains : 1;
+ u_int32_t has_paths : 1;
+ u_int32_t category : 6;
+ u_int32_t parent : 24;
+ u_int32_t component;
+};
+
+const char *sqdb_section_names[SQDB_SECTION_MAX];
+
+int sqdb_create(struct sqdb *db, const char *fn);
+int sqdb_open(struct sqdb *db, const char *fn);
+void sqdb_close(struct sqdb *db);
+
+void *sqdb_section_create(struct sqdb *db, int id, u_int32_t size);
+void *sqdb_section_get(struct sqdb *db, int id, u_int32_t *size);
+
+#endif