summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNatanael Copa <ncopa@alpinelinux.org>2009-10-23 08:55:25 +0000
committerNatanael Copa <ncopa@alpinelinux.org>2009-10-23 08:55:25 +0000
commit33459f27ce11a7450ea6775da6bfe39c2b609c24 (patch)
tree032ffc6d391ad458f7172d1f2fdbdc7a28346aa0
parent12abd7f87902f16aac06fec36fc0903c4985adfd (diff)
downloadacf-weblog-33459f27ce11a7450ea6775da6bfe39c2b609c24.tar.bz2
acf-weblog-33459f27ce11a7450ea6775da6bfe39c2b609c24.tar.xz
use iterator functions to import logfiles
Rather than passing over a table with the entire parsed logfile we pass over an iterator function. This way we dont need to have the entire logfile in memory at the same time and we can handle extremely big logfiles without consume lots of memory.
-rw-r--r--weblog-model.lua97
1 files changed, 65 insertions, 32 deletions
diff --git a/weblog-model.lua b/weblog-model.lua
index 4a2e16a..e28464b 100644
--- a/weblog-model.lua
+++ b/weblog-model.lua
@@ -180,7 +180,7 @@ end
local importsquidlog = function(logentries, sourcename)
con:execute("START TRANSACTION")
- for i,entry in pairs(logentries) do
+ for entry in logentries do
local sql = string.format("INSERT INTO weblog VALUES ('%s', '%s', '%s', '%s', '%s', '%s')",
escape(sourcename), escape(entry.clientip), escape(entry.clientuserid:lower()),
escape(entry.logdatetime), escape(entry.URL), escape(entry.bytes))
@@ -191,7 +191,7 @@ end
local importdglog = function(logentries, sourcename)
con:execute("START TRANSACTION")
- for i,entry in pairs(logentries) do
+ for entry in logentries do
local sql = string.format("INSERT INTO blocklog VALUES ('%s', '0.0.0.0', '%s', '%s', '%s', '%s', '%s', '%s', '%s')",
escape(sourcename), escape(entry.clientuserid:lower()), escape(entry.logdatetime), escape(entry.URL),
escape(entry.bytes), escape(entry.reason), escape(entry.score or "0"), escape(entry.shortreason))
@@ -552,39 +552,71 @@ end
-- ################################################################################
-- LOG FILE FUNCTIONS
-local parsesquidlog = function(f)
- local logentries = {}
- for line in f:lines() do
- -- Format of squid log (space separated):
- -- time elapsed remotehost code/status bytes method URL rfc931 peerstatus/peerhost
- local words = {}
- for word in string.gmatch(line, "%S+") do
- words[#words+1] = word
- end
- local logentry = {logdatetime=words[1], elapsed=words[2], clientip=words[3], code=string.match(words[4], "^[^/]*"), status=string.match(words[4], "[^/]*$"), bytes=words[5], method=words[6], URL=words[7], clientuserid=words[8], peerstatus=string.match(words[9], "^[^/]*"), peerhost=string.match(words[9], "[^/]*$")}
- logentry.logdatetime = os.date("%Y-%m-%d %H:%M:%S", logentry.logdatetime)..string.match(logentry.logdatetime, "%..*")
- -- Don't care about local requests (from DG)
- if logentry.clientip ~= "127.0.0.1" then
- logentries[#logentries+1] = logentry
+local function parsesquidlog_line(line)
+ -- Format of squid log (space separated):
+ -- time elapsed remotehost code/status bytes method URL rfc931 peerstatus/peerhost
+ local words = {}
+ for word in string.gmatch(line, "%S+") do
+ words[#words+1] = word
+ end
+ local logentry = {logdatetime=words[1],
+ elapsed=words[2],
+ clientip=words[3],
+ code=string.match(words[4], "^[^/]*"),
+ status=string.match(words[4], "[^/]*$"),
+ bytes=words[5],
+ method=words[6],
+ URL=words[7],
+ clientuserid=words[8],
+ peerstatus=string.match(words[9], "^[^/]*"),
+ peerhost=string.match(words[9], "[^/]*$")}
+
+ logentry.logdatetime = os.date("%Y-%m-%d %H:%M:%S", logentry.logdatetime)..string.match(logentry.logdatetime, "%..*")
+ return logentry
+end
+
+local function parsesquidlog_iter(f)
+ return function()
+ while true do
+ line = f:read("*line")
+ if line == nil then
+ return nil
+ end
+ local logentry = parsesquidlog_line(line)
+ -- Don't care about local requests (from DG)
+ if logentry.clientip ~= "127.0.0.1" then
+ return logentry
+ end
end
end
- return logentries
end
-local parsedglog = function(f)
- local logentries = {}
- for line in f:lines() do
- local words = format.string_to_table(line, "\t")
- local logentry = {logdatetime=words[1], clientuserid=words[2], clientip=words[3], URL=words[4], reason=words[5], method=words[6], bytes=words[7], shortreason=words[9]}
- if logentry.reason ~= "" then
- if logentry.shortreason == "" then logentry.shortreason = logentry.reason end
- logentry.score = string.match(logentry.reason, "^.*: ([0-9]+) ")
- logentry.logdatetime = string.gsub(logentry.logdatetime, "%.", "-")
+local function parsedglog_line(line)
+ local words = format.string_to_table(line, "\t")
+ return { logdatetime=words[1], clientuserid=words[2], clientip=words[3],
+ URL=words[4], reason=words[5], method=words[6], bytes=words[7],
+ shortreason=words[9]}
+end
+
+local function parsedglog_iter(f)
+ return function()
+ while true do
+ line = f:read("*line")
+ if line == nil then
+ return nil
+ end
+ local logentry = parsedglog_line(line)
- logentries[#logentries+1] = logentry
+ if logentry.reason ~= "" then
+ if logentry.shortreason == "" then
+ logentry.shortreason = logentry.reason
+ end
+ logentry.score = string.match(logentry.reason, "^.*: ([0-9]+) ")
+ logentry.logdatetime = string.gsub(logentry.logdatetime, "%.", "-")
+ return logentry
+ end
end
end
- return logentries
end
-- ################################################################################
@@ -846,11 +878,12 @@ end
-- import either squid or dg log file.
-- delete logfile after
-function importlogfile(source, cookiesfile, file, parselog_func, importlog_func)
+function importlogfile(source, cookiesfile, file, parselog_iter, importlog_func)
+ local logentries
logme("Processing " .. file )
logme("Getting " .. file )
loghandle = openlogfile(source, cookiesfile, file)
- logentries = parselog_func(loghandle)
+ logentries = parselog_iter(loghandle)
importlog_func(logentries, source.sourcename)
loghandle:close()
logme("Deleting " .. file )
@@ -876,10 +909,10 @@ function importlogs()
for j,file in ipairs(files) do
if string.match(file, "dansguardian/access%.log[%.%-]") then
count = count + 1
- importlogfile(source, cookeisfile, file, parsedglog, importdglog)
+ importlogfile(source, cookeisfile, file, parsedglog_iter, importdglog)
elseif string.match(file, "squid/access%.log[%.%-]") then
count = count + 1
- importlogfile(source, cookeisfile, file, parsesquidlog, importsquidlog)
+ importlogfile(source, cookeisfile, file, parsesquidlog_iter, importsquidlog)
end
end
end