From 7daf2874969fb6773d480e9776cd8418eeb6353f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timo=20Ter=C3=A4s?= Date: Fri, 13 Aug 2010 13:40:41 +0300 Subject: filter: fix db building issues and implement path component matching Fixes has sub domains/paths hints to be correct. www as first domain entry matching now checks it won't remove second level domain names. And the filter code now looksup path components from the db. --- sqdb-build.lua | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'sqdb-build.lua') diff --git a/sqdb-build.lua b/sqdb-build.lua index fce1e7b..2b301fc 100755 --- a/sqdb-build.lua +++ b/sqdb-build.lua @@ -117,7 +117,7 @@ local function read_urls(filename, category, locked) url = url:gsub("#.*", "") url = url:gsub(" *^", "") url = url:lower() - url = url:gsub("^(www%d*[.])", "") + url = url:gsub("^(www%d*[.])([^.]*[.])", "%2") domain, path = url:match("([^/]*)/?(.*)") domain = domain:gsub(":.*", "") domain = domain:gsub("[.]$", "") -- trailing dot @@ -187,7 +187,7 @@ local function enum_tree(cb, category, dns, data) fdns = cdns end cat = cdata.category or category - cb(fdns, dns, cdns, cat, data.children, data.paths) + cb(fdns, dns, cdns, cat, cdata.children, cdata.paths) enum_tree(cb, cat, fdns, cdata) end end @@ -233,15 +233,15 @@ local function prune_paths(paths, category) return num_paths end -local function prune_tree(d, category) +local function prune_tree(d, pcategory) local num_childs = 0 local num_paths = 0 local cat - cat = d.category or category + cat = d.category or pcategory if d.children ~= nil then for n, child in pairs(d.children) do - if prune_tree(child, cat, count) then + if prune_tree(child, cat, n) then d.children[n] = nil else num_childs = num_childs + 1 @@ -254,9 +254,12 @@ local function prune_tree(d, category) end --print(name, d.category, category, d.num_paths, num_childs) if d.paths ~= nil then - num_paths = prune_paths(d.paths, d.category) + num_paths = prune_paths(d.paths, cat) + if num_paths == 0 then + d.paths = nil + end end - if cat == category and num_paths == 0 and num_childs == 0 then + if d.category == pcategory and num_paths == 0 and num_childs == 0 then --num_pruned_leafs = num_pruned_leafs + 1 return true end -- cgit v1.2.3