diff options
-rw-r--r-- | weblog-model.lua | 97 |
1 files changed, 65 insertions, 32 deletions
diff --git a/weblog-model.lua b/weblog-model.lua index 4a2e16a..e28464b 100644 --- a/weblog-model.lua +++ b/weblog-model.lua @@ -180,7 +180,7 @@ end local importsquidlog = function(logentries, sourcename) con:execute("START TRANSACTION") - for i,entry in pairs(logentries) do + for entry in logentries do local sql = string.format("INSERT INTO weblog VALUES ('%s', '%s', '%s', '%s', '%s', '%s')", escape(sourcename), escape(entry.clientip), escape(entry.clientuserid:lower()), escape(entry.logdatetime), escape(entry.URL), escape(entry.bytes)) @@ -191,7 +191,7 @@ end local importdglog = function(logentries, sourcename) con:execute("START TRANSACTION") - for i,entry in pairs(logentries) do + for entry in logentries do local sql = string.format("INSERT INTO blocklog VALUES ('%s', '0.0.0.0', '%s', '%s', '%s', '%s', '%s', '%s', '%s')", escape(sourcename), escape(entry.clientuserid:lower()), escape(entry.logdatetime), escape(entry.URL), escape(entry.bytes), escape(entry.reason), escape(entry.score or "0"), escape(entry.shortreason)) @@ -552,39 +552,71 @@ end -- ################################################################################ -- LOG FILE FUNCTIONS -local parsesquidlog = function(f) - local logentries = {} - for line in f:lines() do - -- Format of squid log (space separated): - -- time elapsed remotehost code/status bytes method URL rfc931 peerstatus/peerhost - local words = {} - for word in string.gmatch(line, "%S+") do - words[#words+1] = word - end - local logentry = {logdatetime=words[1], elapsed=words[2], clientip=words[3], code=string.match(words[4], "^[^/]*"), status=string.match(words[4], "[^/]*$"), bytes=words[5], method=words[6], URL=words[7], clientuserid=words[8], peerstatus=string.match(words[9], "^[^/]*"), peerhost=string.match(words[9], "[^/]*$")} - logentry.logdatetime = os.date("%Y-%m-%d %H:%M:%S", logentry.logdatetime)..string.match(logentry.logdatetime, "%..*") - -- Don't care about local requests (from DG) - if logentry.clientip ~= "127.0.0.1" then - logentries[#logentries+1] = logentry +local function parsesquidlog_line(line) + -- Format of squid log (space separated): + -- time elapsed remotehost code/status bytes method URL rfc931 peerstatus/peerhost + local words = {} + for word in string.gmatch(line, "%S+") do + words[#words+1] = word + end + local logentry = {logdatetime=words[1], + elapsed=words[2], + clientip=words[3], + code=string.match(words[4], "^[^/]*"), + status=string.match(words[4], "[^/]*$"), + bytes=words[5], + method=words[6], + URL=words[7], + clientuserid=words[8], + peerstatus=string.match(words[9], "^[^/]*"), + peerhost=string.match(words[9], "[^/]*$")} + + logentry.logdatetime = os.date("%Y-%m-%d %H:%M:%S", logentry.logdatetime)..string.match(logentry.logdatetime, "%..*") + return logentry +end + +local function parsesquidlog_iter(f) + return function() + while true do + line = f:read("*line") + if line == nil then + return nil + end + local logentry = parsesquidlog_line(line) + -- Don't care about local requests (from DG) + if logentry.clientip ~= "127.0.0.1" then + return logentry + end end end - return logentries end -local parsedglog = function(f) - local logentries = {} - for line in f:lines() do - local words = format.string_to_table(line, "\t") - local logentry = {logdatetime=words[1], clientuserid=words[2], clientip=words[3], URL=words[4], reason=words[5], method=words[6], bytes=words[7], shortreason=words[9]} - if logentry.reason ~= "" then - if logentry.shortreason == "" then logentry.shortreason = logentry.reason end - logentry.score = string.match(logentry.reason, "^.*: ([0-9]+) ") - logentry.logdatetime = string.gsub(logentry.logdatetime, "%.", "-") +local function parsedglog_line(line) + local words = format.string_to_table(line, "\t") + return { logdatetime=words[1], clientuserid=words[2], clientip=words[3], + URL=words[4], reason=words[5], method=words[6], bytes=words[7], + shortreason=words[9]} +end + +local function parsedglog_iter(f) + return function() + while true do + line = f:read("*line") + if line == nil then + return nil + end + local logentry = parsedglog_line(line) - logentries[#logentries+1] = logentry + if logentry.reason ~= "" then + if logentry.shortreason == "" then + logentry.shortreason = logentry.reason + end + logentry.score = string.match(logentry.reason, "^.*: ([0-9]+) ") + logentry.logdatetime = string.gsub(logentry.logdatetime, "%.", "-") + return logentry + end end end - return logentries end -- ################################################################################ @@ -846,11 +878,12 @@ end -- import either squid or dg log file. -- delete logfile after -function importlogfile(source, cookiesfile, file, parselog_func, importlog_func) +function importlogfile(source, cookiesfile, file, parselog_iter, importlog_func) + local logentries logme("Processing " .. file ) logme("Getting " .. file ) loghandle = openlogfile(source, cookiesfile, file) - logentries = parselog_func(loghandle) + logentries = parselog_iter(loghandle) importlog_func(logentries, source.sourcename) loghandle:close() logme("Deleting " .. file ) @@ -876,10 +909,10 @@ function importlogs() for j,file in ipairs(files) do if string.match(file, "dansguardian/access%.log[%.%-]") then count = count + 1 - importlogfile(source, cookeisfile, file, parsedglog, importdglog) + importlogfile(source, cookeisfile, file, parsedglog_iter, importdglog) elseif string.match(file, "squid/access%.log[%.%-]") then count = count + 1 - importlogfile(source, cookeisfile, file, parsesquidlog, importsquidlog) + importlogfile(source, cookeisfile, file, parsesquidlog_iter, importsquidlog) end end end |