diff options
author | Timo Teräs <timo.teras@iki.fi> | 2010-08-13 13:40:41 +0300 |
---|---|---|
committer | Timo Teräs <timo.teras@iki.fi> | 2010-08-13 13:40:41 +0300 |
commit | 7daf2874969fb6773d480e9776cd8418eeb6353f (patch) | |
tree | 32a9ca18ded660b18b4234c3311e09238d71c128 /sqdb-build.lua | |
parent | 8bc76c78a69360efc7a07a3c4e92f393cca22543 (diff) | |
download | squark-7daf2874969fb6773d480e9776cd8418eeb6353f.tar.bz2 squark-7daf2874969fb6773d480e9776cd8418eeb6353f.tar.xz |
filter: fix db building issues and implement path component matching
Fixes has sub domains/paths hints to be correct. www<number> as
first domain entry matching now checks it won't remove second level
domain names.
And the filter code now looksup path components from the db.
Diffstat (limited to 'sqdb-build.lua')
-rwxr-xr-x | sqdb-build.lua | 17 |
1 files changed, 10 insertions, 7 deletions
diff --git a/sqdb-build.lua b/sqdb-build.lua index fce1e7b..2b301fc 100755 --- a/sqdb-build.lua +++ b/sqdb-build.lua @@ -117,7 +117,7 @@ local function read_urls(filename, category, locked) url = url:gsub("#.*", "") url = url:gsub(" *^", "") url = url:lower() - url = url:gsub("^(www%d*[.])", "") + url = url:gsub("^(www%d*[.])([^.]*[.])", "%2") domain, path = url:match("([^/]*)/?(.*)") domain = domain:gsub(":.*", "") domain = domain:gsub("[.]$", "") -- trailing dot @@ -187,7 +187,7 @@ local function enum_tree(cb, category, dns, data) fdns = cdns end cat = cdata.category or category - cb(fdns, dns, cdns, cat, data.children, data.paths) + cb(fdns, dns, cdns, cat, cdata.children, cdata.paths) enum_tree(cb, cat, fdns, cdata) end end @@ -233,15 +233,15 @@ local function prune_paths(paths, category) return num_paths end -local function prune_tree(d, category) +local function prune_tree(d, pcategory) local num_childs = 0 local num_paths = 0 local cat - cat = d.category or category + cat = d.category or pcategory if d.children ~= nil then for n, child in pairs(d.children) do - if prune_tree(child, cat, count) then + if prune_tree(child, cat, n) then d.children[n] = nil else num_childs = num_childs + 1 @@ -254,9 +254,12 @@ local function prune_tree(d, category) end --print(name, d.category, category, d.num_paths, num_childs) if d.paths ~= nil then - num_paths = prune_paths(d.paths, d.category) + num_paths = prune_paths(d.paths, cat) + if num_paths == 0 then + d.paths = nil + end end - if cat == category and num_paths == 0 and num_childs == 0 then + if d.category == pcategory and num_paths == 0 and num_childs == 0 then --num_pruned_leafs = num_pruned_leafs + 1 return true end |