diff options
author | Ted Trask <ttrask01@yahoo.com> | 2011-09-01 14:39:26 +0000 |
---|---|---|
committer | Ted Trask <ttrask01@yahoo.com> | 2011-09-01 14:39:26 +0000 |
commit | 0e7f8ccb1a5b4d03d3df349269af92568fa82551 (patch) | |
tree | 00a85d9fd9240ba04b7d930c20b782753d83c783 | |
parent | 3120325cc04a0eb314004e827c11955a69d27ba1 (diff) | |
download | acf-weblog-0e7f8ccb1a5b4d03d3df349269af92568fa82551.tar.bz2 acf-weblog-0e7f8ccb1a5b4d03d3df349269af92568fa82551.tar.xz |
Cleaned up parsing and removed unused parsing of squark and dump logs
-rw-r--r-- | weblog-model.lua | 449 |
1 files changed, 79 insertions, 370 deletions
diff --git a/weblog-model.lua b/weblog-model.lua index a23a007..5fe4022 100644 --- a/weblog-model.lua +++ b/weblog-model.lua @@ -504,91 +504,98 @@ end -- ################################################################################ -- LOG FILE FUNCTIONS -local function parsesquidlog(line) - -- Format of squid log (space separated): - -- time elapsed remotehost code/status bytes method URL rfc931 peerstatus/peerhost - local words = {} - - for word in string.gmatch(line, "%S+") do - words[#words+1] = word - end - +local function checkwords(logentry) local goodwordloc="" local badwordloc="" - -- logme("value of word7 is "..words[7]) local wrdcnt=0 local isbad=0 local isdenied=0 local isbypass=0 local ignoreme=false - + --check for ignored records first for i,thisline in ipairs(ignorewords) do if not thisline then break end - _,instcnt = string.lower(words[7]):gsub(thisline, " ") + _,instcnt = string.lower(logentry.URL):gsub(thisline, " ") if instcnt ~= 0 then - ignoreme = true - break + ignoreme = true + break end end if ignoreme ~= true then - --proceed with record analysis - for i,thisline in ipairs(badwords) do - if not thisline then - break - end - - _,instcnt = string.lower(words[7]):gsub(thisline, " ") - --if string.find(words[7],thisline) ~= nil then - if instcnt ~= 0 then - -- logme("instcnt = "..instcnt) - isbad=1 - wrdcnt= wrdcnt + instcnt - if badwordloc ~= "" then - badwordloc = badwordloc.."|"..thisline - else - badwordloc=thisline - end - - end - - if string.find(words[7],"*DENIED*") then --- logme("*Denied*") - isdenied=1 - elseif string.find(words[7],"GBYPASS") then --- logme("GBYPASS") - isbypass=1 - elseif string.find(words[7],"*OVERRIDE*") then --- logme("*OVERRIDE*") - isbypass=1 - end - end - for i,goodline in ipairs(goodwords) do - if not goodline then - break + --proceed with record analysis + for i,thisline in ipairs(badwords) do + if not thisline then + break + end + + _,instcnt = string.lower(logentry.URL):gsub(thisline, " ") + --if string.find(logentry.URL,thisline) ~= nil then + if instcnt ~= 0 then + -- logme("instcnt = "..instcnt) + isbad=1 + wrdcnt= wrdcnt + instcnt + if badwordloc ~= "" then + badwordloc = badwordloc.."|"..thisline + else + badwordloc=thisline + end + end + + if string.find(logentry.URL,"*DENIED*") then + -- logme("*Denied*") + isdenied=1 + elseif string.find(logentry.URL,"GBYPASS") then + -- logme("GBYPASS") + isbypass=1 + elseif string.find(logentry.URL,"*OVERRIDE*") then + -- logme("*OVERRIDE*") + isbypass=1 + end end - _,instcnt = string.lower(words[7]):gsub(goodline, " ") - --if string.find(words[7],goodline) then - if instcnt ~= 0 then - if wrdcnt ~= 0 then - wrdcnt = wrdcnt - instcnt - if goodwordloc ~= "" then - goodwordloc = goodwordloc.."|"..goodline - else - goodwordloc = goodline - end - end + for i,goodline in ipairs(goodwords) do + if not goodline then + break + end + _,instcnt = string.lower(logentry.URL):gsub(goodline, " ") + --if string.find(logentry.URL,goodline) then + if instcnt ~= 0 then + if wrdcnt ~= 0 then + wrdcnt = wrdcnt - instcnt + if goodwordloc ~= "" then + goodwordloc = goodwordloc.."|"..goodline + else + goodwordloc = goodline + end + end + end end - end end -- Reset bad to reduce number of bad hits if score is zero -- if wrdcnt == 0 then -- isbad=0 -- end - + + logentry.score=wrdcnt + logentry.badyesno=isbad + logentry.deniedyesno=isdenied + logentry.bypassyesno=isbypass + logentry.wordloc=badwordloc + logentry.gwordloc=goodwordloc +end + +local function parsesquidlog(line) + -- Format of squid log (space separated): + -- time elapsed remotehost code/status bytes method URL rfc931 peerstatus/peerhost + local words = {} + + for word in string.gmatch(line, "%S+") do + words[#words+1] = word + end + local logentry = {logdatetime=words[1], elapsed=words[2], clientip=words[3], @@ -599,209 +606,22 @@ local function parsesquidlog(line) URL=words[7], clientuserid=words[8], peerstatus=string.match(words[9] or "", "^[^/]*"), - peerhost=string.match(words[9] or "", "[^/]*$"), - score=wrdcnt, - badyesno=isbad, - deniedyesno=isdenied, - bypassyesno=isbypass, - wordloc=badwordloc, - gwordloc=goodwordloc} + peerhost=string.match(words[9] or "", "[^/]*$")} + + checkwords(logentry) -- Don't care about local requests (from DG) (this check also removes blank lines) if logentry.clientip and logentry.clientip ~= "127.0.0.1" then - if logentry.clientuserid and logentry.clientuserid ~= "-" then - logentry.logdatetime = os.date("%Y-%m-%d %H:%M:%S", logentry.logdatetime)..string.match(logentry.logdatetime, "%..*") - return logentry - end - end - return nil -end - -local function parsesquarklog(line) - -- Format of squid log (space separated): - -- time elapsed remotehost code/status bytes method URL rfc931 peerstatus/peerhost - local words = {} - - for word in string.gmatch(line, "%S+") do - words[#words+1] = word - end - - local goodwordloc="" - local badwordloc="" - local wrdcnt=0 - local isbad=0 - local isdenied=0 - local isbypass=0 - local ignoreme=false - - --check for ignored records first - for i,thisline in ipairs(ignorewords) do - if not thisline then - break - end - _,instcnt = string.lower(words[5]):gsub(thisline, " ") - if instcnt ~= 0 then - ignoreme = true - break - end - end - - if ignoreme ~= true then - for i,thisline in ipairs(badwords) do - if not thisline then - -- logme("This line is apparently empty...") - break - end - - _,instcnt = string.lower(words[5]):gsub(thisline, " ") - -- if string.find(words[5],thisline) ~= nil then - --logme("checking "..thisline.." against "..words[5]) - if instcnt ~= 0 then - isbad=1 - wrdcnt = wrdcnt + instcnt - if badwordloc ~= "" then - badwordloc = badwordloc.."|"..thisline - else - badwordloc=thisline - end - - -- logme("bad "..badwordloc) - end - - if string.find(words[6],"*DENIED*") then - isdenied=1 - end - if string.find(words[6],"*OVERRIDE*") then - isbypass=1 + if logentry.clientuserid and logentry.clientuserid ~= "-" then + logentry.logdatetime = os.date("%Y-%m-%d %H:%M:%S", logentry.logdatetime)..string.match(logentry.logdatetime, "%..*") + return logentry end end - for i,goodline in ipairs(goodwords) do - if not goodline then - -- logme("This line is apparently empty...") - break - end - _,instcnt = string.lower(words[5]):gsub(goodline, " ") - --if string.find(words[4],goodline) then - if instcnt ~= 0 then - if wrdcnt ~= 0 then - wrdcnt = wrdcnt - instcnt - if goodwordloc ~= "" then - goodwordloc = goodwordloc.."|"..goodline - else - goodwordloc = goodline - end - end - end - end - end - - local words = format.string_to_table(line, "\t") - local logentry = {logdatetime=words[1], - clientuserid=words[2], - clientip=words[3], - URL=words[4], - reason=words[5], - method=words[6], - bytes=words[7], - shortreason=words[9], - score=wrdcnt, - badyesno=isbad, - deniedyesno=isdenied, - bypassyesno=isbypass, - wordloc=badwordloc, - gwordloc=goodwordloc} - - if logentry.reason and logentry.reason ~= "" then - if logentry.shortreason == "" then - logentry.shortreason = logentry.reason - end - return logentry - end return nil end local function parsedglog(line) - -- Format of squid log (space separated): - -- time elapsed remotehost code/status bytes method URL rfc931 peerstatus/peerhost - local words = {} - - for word in string.gmatch(line, "%S+") do - words[#words+1] = word - end - - local goodwordloc="" - local badwordloc="" - -- logme("value of word4 is "..words[4]) - local wrdcnt=0 - local isbad=0 - local isdenied=0 - local isbypass=0 - local ignoreme=false - - --check for ignored records first - for i,thisline in ipairs(ignorewords) do - if not thisline then - break - end - _,instcnt = string.lower(words[4]):gsub(thisline, " ") - if instcnt ~= 0 then - ignoreme = true - break - end - end - - if ignoreme ~= true then - for i,thisline in ipairs(badwords) do - if not thisline then - -- logme("This line is apparently empty...") - break - end - - _,instcnt = string.lower(words[4]):gsub(thisline, " ") - --if string.find(words[4],thisline) ~= nil then - if instcnt ~= 0 then - if wrdcnt ~= 0 then - isbad=1 - wrdcnt= wrdcnt + instcnt - if badwordloc ~= "" then - badwordloc = badwordloc.."|"..thisline - else - badwordloc=thisline - end - - -- logme("bad "..badwordloc) - end - - if string.find(words[5],"*DENIED*") then - isdenied=1 - elseif string.find(words[5],"GBYPASS") then - isdenied=1 - elseif string.find(words[5],"*OVERRIDE*") then - isbypass=1 - end - end - for i,goodline in ipairs(goodwords) do - if not goodline then - -- logme("This line is apparently empty...") - break - end - _,instcnt = string.lower(words[4]):gsub(goodline, " ") - --if string.find(words[4],goodline) then - - if instcnt ~= 0 then - wrdcnt = wrdcnt - instcnt - if goodwordloc ~= "" then - goodwordloc = goodwordloc.."|"..goodline - else - goodwordloc = goodline - end - end - end - end - - end - - local words = format.string_to_table(line, "\t") + local words = format.string_to_table(line, "\t") local logentry = {logdatetime=words[1], clientuserid=words[2], clientip=words[3], @@ -809,124 +629,21 @@ local function parsedglog(line) reason=words[5], method=words[6], bytes=words[7], - shortreason=words[9], - score=wrdcnt, - badyesno=isbad, - deniedyesno=isdenied, - bypassyesno=isbypass, - wordloc=badwordloc, - gwordloc=goodwordloc} - - if logentry.reason and logentry.reason ~= "" then - if logentry.shortreason == "" then - logentry.shortreason = logentry.reason - end - return logentry - end - return nil -end + shortreason=words[9]} -local function parsedumplog(line) - -- Format of squid log (space separated): - -- time elapsed remotehost code/status bytes method URL rfc931 peerstatus/peerhost - local words = {} - - for word in string.gmatch(line, "%S+") do - words[#words+1] = word - end - goodwordloc="" - badwordloc="" - wrdcnt=0 - isbad=0 - isdenied=0 - isbypass=0 - for i,thisline in ipairs(badwords) do - if not thisline then - logme("This line is apparently empty...") - break - end - _,instcnt = string.lower(words[5]):gsub(thisline, " ") - if instcnt ~= 0 then - isbad=1 - wrdcnt = wrdcnt + instcnt - if badwordloc ~= "" then - badwordloc = badwordloc.."|"..thisline - else - badwordloc=thisline - end - - -- logme("bad "..badwordloc) - end - if string.find(words[6],"*DENIED*") then - isdenied=1 - end - if string.find(words[5],"GBYPASS") then - isbypass=1 - elseif string.find(words[6],"*OVERRIDE*") then - isbypass=1 - end - end - for i,goodline in ipairs(goodwords) do - if not goodline then - -- logme("This line is apparently empty...") - break - end - _,instcnt = string.lower(words[4]):gsub(goodline, " ") - --if string.find(words[4],goodline) then - if instcnt ~= 0 then - if wrdcnt ~= 0 then - wrdcnt = wrdcnt - instcnt - if goodwordloc ~= "" then - goodwordloc = goodwordloc.."|"..goodline - else - goodwordloc = goodline - end - end - end - end - - local words = format.string_to_table(line, "\t") - local logentry = {logdatetime=words[1], - clientuserid=words[2], - clientip=words[3], - URL=words[4], - reason=words[6], - method=words[5], - bytes=words[7], - shortreason=words[9], - score=wrdcnt, - badyesno=isbad, - deniedyesno=isdenied, - bypassyesno=isbypass, - wordloc=badwordloc, - gwordloc=goodwordloc} + checkwords(logentry) if logentry.reason and logentry.reason ~= "" then if logentry.shortreason == "" then logentry.shortreason = logentry.reason end + logentry.score = string.match(logentry.reason, "^.*: ([0-9]+) ") + logentry.logdatetime = string.gsub(logentry.logdatetime, "%.", "-") return logentry end return nil end ---local function parsedglog(line) --- local words = format.string_to_table(line, "\t") --- local logentry = { logdatetime=words[1], clientuserid=words[2], clientip=words[3], --- URL=words[4], reason=words[5], method=words[6], bytes=words[7], --- shortreason=words[9]} --- if logentry.reason and logentry.reason ~= "" then --- if logentry.shortreason == "" then --- logentry.shortreason = logentry.reason --- end --- logentry.score = string.match(logentry.reason, "^.*: ([0-9]+) ") --- logentry.logdatetime = string.gsub(logentry.logdatetime, "%.", "-") --- return logentry --- end --- return nil ---end - - -- ################################################################################ -- DOWNLOAD FILE FUNCTIONS @@ -1266,18 +983,10 @@ function importlogs() count = count + 1 success = importlogfile(source, cookiesfile, file, parsedglog) and success end - if string.match(file, "squark/access%.log[%.%-]") then - count = count + 1 - success = importlogfile(source, cookiesfile, file, parsesquarklog) and success - end if string.match(file, "squid/access%.log[%.%-]") then count = count + 1 success = importlogfile(source, cookiesfile, file, parsesquidlog) and success end - if string.match(file, "dump/access%.log[%.%-]") then - count = count + 1 - success = importlogfile(source, cookiesfile, file, parsedumplog) and success - end end end os.remove(cookiesfile) |