summaryrefslogtreecommitdiffstats
path: root/sqdb-build.lua
diff options
context:
space:
mode:
authorTimo Teräs <timo.teras@iki.fi>2010-08-13 13:40:41 +0300
committerTimo Teräs <timo.teras@iki.fi>2010-08-13 13:40:41 +0300
commit7daf2874969fb6773d480e9776cd8418eeb6353f (patch)
tree32a9ca18ded660b18b4234c3311e09238d71c128 /sqdb-build.lua
parent8bc76c78a69360efc7a07a3c4e92f393cca22543 (diff)
downloadsquark-7daf2874969fb6773d480e9776cd8418eeb6353f.tar.bz2
squark-7daf2874969fb6773d480e9776cd8418eeb6353f.tar.xz
filter: fix db building issues and implement path component matching
Fixes has sub domains/paths hints to be correct. www<number> as first domain entry matching now checks it won't remove second level domain names. And the filter code now looksup path components from the db.
Diffstat (limited to 'sqdb-build.lua')
-rwxr-xr-xsqdb-build.lua17
1 files changed, 10 insertions, 7 deletions
diff --git a/sqdb-build.lua b/sqdb-build.lua
index fce1e7b..2b301fc 100755
--- a/sqdb-build.lua
+++ b/sqdb-build.lua
@@ -117,7 +117,7 @@ local function read_urls(filename, category, locked)
url = url:gsub("#.*", "")
url = url:gsub(" *^", "")
url = url:lower()
- url = url:gsub("^(www%d*[.])", "")
+ url = url:gsub("^(www%d*[.])([^.]*[.])", "%2")
domain, path = url:match("([^/]*)/?(.*)")
domain = domain:gsub(":.*", "")
domain = domain:gsub("[.]$", "") -- trailing dot
@@ -187,7 +187,7 @@ local function enum_tree(cb, category, dns, data)
fdns = cdns
end
cat = cdata.category or category
- cb(fdns, dns, cdns, cat, data.children, data.paths)
+ cb(fdns, dns, cdns, cat, cdata.children, cdata.paths)
enum_tree(cb, cat, fdns, cdata)
end
end
@@ -233,15 +233,15 @@ local function prune_paths(paths, category)
return num_paths
end
-local function prune_tree(d, category)
+local function prune_tree(d, pcategory)
local num_childs = 0
local num_paths = 0
local cat
- cat = d.category or category
+ cat = d.category or pcategory
if d.children ~= nil then
for n, child in pairs(d.children) do
- if prune_tree(child, cat, count) then
+ if prune_tree(child, cat, n) then
d.children[n] = nil
else
num_childs = num_childs + 1
@@ -254,9 +254,12 @@ local function prune_tree(d, category)
end
--print(name, d.category, category, d.num_paths, num_childs)
if d.paths ~= nil then
- num_paths = prune_paths(d.paths, d.category)
+ num_paths = prune_paths(d.paths, cat)
+ if num_paths == 0 then
+ d.paths = nil
+ end
end
- if cat == category and num_paths == 0 and num_childs == 0 then
+ if d.category == pcategory and num_paths == 0 and num_childs == 0 then
--num_pruned_leafs = num_pruned_leafs + 1
return true
end