summaryrefslogtreecommitdiffstats
path: root/weblog-model.lua
diff options
context:
space:
mode:
authorLuke Stuart <lukestu@gmail.com>2011-08-10 15:03:33 +0000
committerTed Trask <ttrask01@yahoo.com>2011-08-10 11:53:30 -0400
commit28804449df1f28067b9eba8475452b5c97b2a1ac (patch)
treeb5e712dbc55b15c0ee1612f8a1a0bccabbf3d9fc /weblog-model.lua
parent9377ebd8eef7b2e04d475a983369d8f9a903056b (diff)
downloadacf-weblog-28804449df1f28067b9eba8475452b5c97b2a1ac.tar.bz2
acf-weblog-28804449df1f28067b9eba8475452b5c97b2a1ac.tar.xz
Weblog Updates including Squark support and revamped analysis.
Diffstat (limited to 'weblog-model.lua')
-rw-r--r--weblog-model.lua650
1 files changed, 429 insertions, 221 deletions
diff --git a/weblog-model.lua b/weblog-model.lua
index 8db7490..be90d3d 100644
--- a/weblog-model.lua
+++ b/weblog-model.lua
@@ -6,6 +6,7 @@ require("fs")
require("format")
require("validator")
require("luasql.postgres")
+require("date")
local DatabaseName = "webproxylog"
local DatabaseOwner = "weblogowner"
@@ -14,36 +15,31 @@ local DatabaseUser = "webloguser"
local path = "PATH=/usr/local/bin:/usr/bin:/bin:/usr/local/sbin:/usr/sbin:/sbin "
local env
local con
-local configfile = "/etc/weblog.conf"
+local configfile = "/etc/weblog/weblog.conf"
local configcontent = fs.read_file(configfile) or ""
local config = format.parse_ini_file(configcontent, "") or {}
+local goodwordslist = "/etc/weblog/goodwords"
+local badwordslist = "/etc/weblog/badwords"
+local baseurl = "/etc/weblog/"
local database_creation_script = {
"CREATE TABLE dbhistlog (logdatetime timestamp(3) without time zone NOT NULL, msgtext text)",
- "CREATE TABLE pubblocklog(sourcename character varying(40), clientip inet NOT NULL, clientuserid character varying(64) NOT NULL, logdatetime timestamp(3) without time zone NOT NULL, uri text NOT NULL, bytes bigint NOT NULL, reason text, score integer, shortreason text)",
- "CREATE TABLE blocklog(sourcename character varying(40), clientip inet NOT NULL, clientuserid character varying(64) NOT NULL, logdatetime timestamp(0) without time zone NOT NULL, uri text NOT NULL, bytes bigint NOT NULL, reason text, score integer, shortreason text)",
- "CREATE TABLE pubweblog(sourcename character varying(40), clientip inet NOT NULL, clientuserid character varying(64) NOT NULL, logdatetime timestamp(3) without time zone NOT NULL, uri text NOT NULL, bytes bigint NOT NULL, reason text, score integer, shortreason text)",
- "CREATE TABLE weblog(sourcename character varying(40), clientip inet NOT NULL, clientuserid character varying(64) NOT NULL, logdatetime timestamp(3) without time zone NOT NULL, uri text NOT NULL, bytes bigint NOT NULL, reason text, score integer, shortreason text)",
+ "CREATE TABLE pubweblog(sourcename character varying(40), clientip inet NOT NULL, clientuserid character varying(64) NOT NULL, logdatetime timestamp(3) without time zone NOT NULL, uri text NOT NULL, bytes bigint NOT NULL, reason text, score integer, shortreason text, badyesno int, deniedyesno int, bypassyesno int, wordloc text, goodwordloc text, selected boolean, id serial)",
+ "CREATE TABLE pubweblog_history(sourcename character varying(40), clientip inet NOT NULL, clientuserid character varying(64) NOT NULL, logdatetime timestamp(3) without time zone NOT NULL, uri text NOT NULL, bytes bigint NOT NULL, reason text, score integer, shortreason text, badyesno int, deniedyesno int, bypassyesno int, wordloc text, goodwordloc text, selected boolean, id int)",
"CREATE TABLE source (sourcename character varying(40) NOT NULL, method character varying(100) NOT NULL, userid character varying(32), passwd character varying(255), source character varying(255) NOT NULL, tzislocal boolean, enabled boolean)",
- "CREATE TABLE usagestat (sourcename character varying(40) NOT NULL, date timestamp(0) without time zone NOT NULL, numrequest integer, numblock integer)",
- "CREATE TABLE watchlist (clientuserid character varying(64) NOT NULL, expiredatetime timestamp(0) without time zone NOT NULL)",
+ "CREATE TABLE usagestat(sourcename character varying(40) NOT NULL, date timestamp(0) without time zone NOT NULL, numrequest integer, numblock integer)",
+ "CREATE TABLE watchlist(clientuserid character varying(64) NOT NULL, expiredatetime timestamp(0) without time zone NOT NULL)",
"ALTER TABLE ONLY source ADD CONSTRAINT source_pkey PRIMARY KEY (sourcename)",
- "CREATE INDEX blocklogclientididx ON blocklog USING btree (clientuserid)",
- "CREATE INDEX blocklogclientidx ON blocklog USING btree (clientip, clientuserid)",
"CREATE INDEX dbhistlogdatetimeidx ON dbhistlog USING btree (logdatetime)",
- "CREATE INDEX pubblocklogclientididx ON pubblocklog USING btree (clientuserid)",
- "CREATE INDEX pubblocklogclientidx ON pubblocklog USING btree (clientip, clientuserid)",
"CREATE INDEX pubweblogclientdateidx ON pubweblog USING btree (logdatetime, clientuserid)",
- "CREATE INDEX pubweblogclientidx ON pubweblog USING btree (clientip, clientuserid)",
- "CREATE INDEX pubweblogclientipidx ON pubweblog USING btree (clientip)",
"CREATE INDEX pubweblogclientuserididx ON pubweblog USING btree (clientuserid)",
"CREATE INDEX pubwebloglogdatetimeidx ON pubweblog USING btree (logdatetime)",
- "CREATE INDEX weblogclientidx ON weblog USING btree (clientip, clientuserid)",
- "CREATE INDEX weblogclientipidx ON weblog USING btree (clientip)",
- "CREATE INDEX weblogclientuserididx ON weblog USING btree (clientuserid)",
+ "CREATE INDEX pubweblog_historyclientdateidx ON pubweblog_history USING btree (logdatetime, clientuserid)",
+ "CREATE INDEX pubweblog_historyclientuserididx ON pubweblog_history USING btree (clientuserid)",
+ "CREATE INDEX pubweblog_historylogdatetimeidx ON pubweblog_history USING btree (logdatetime)",
"GRANT SELECT ON dbhistlog TO "..DatabaseUser,
- "GRANT SELECT ON pubblocklog TO "..DatabaseUser,
"GRANT SELECT ON pubweblog TO "..DatabaseUser,
+ "GRANT SELECT ON pubweblog_history TO "..DatabaseUser,
"GRANT SELECT, UPDATE, INSERT, DELETE ON source TO "..DatabaseUser,
"GRANT SELECT ON usagestat TO "..DatabaseUser,
"GRANT SELECT, UPDATE, INSERT, DELETE ON watchlist TO "..DatabaseUser,
@@ -181,21 +177,21 @@ end
local importsquidlog = function(entry, sourcename)
if entry then
- local sql = string.format("INSERT INTO weblog VALUES ('%s', '%s', '%s', '%s', '%s', '%s')",
+ local sql = string.format("INSERT INTO pubweblog VALUES ('%s', '%s', '%s', '%s', '%s', '%s','%s','%s','%s','%s','%s','%s','%s')",
escape(sourcename), escape(entry.clientip), escape(entry.clientuserid, 64):lower(),
- escape(entry.logdatetime), escape(entry.URL), escape(entry.bytes))
+ escape(entry.logdatetime), escape(entry.URL), escape(entry.bytes), escape(entry.reason), escape(entry.score), escape(entry.shortreason), escape(entry.badyesno), escape(entry.deniedyesno), escape(entry.bypassyesno), escape(entry.wordloc), escape(entry.goodwordloc))
local res = assert (con:execute(sql))
end
end
local importdglog = function(entry, sourcename)
if entry then
- local sql = string.format("INSERT INTO blocklog VALUES ('%s', '0.0.0.0', '%s', '%s', '%s', '%s', '%s', '%s', '%s')",
- escape(sourcename), escape(entry.clientuserid:lower(), 64), escape(entry.logdatetime), escape(entry.URL),
- escape(entry.bytes), escape(entry.reason), escape(entry.score or "0"), escape(entry.shortreason))
- local res = assert (con:execute(sql))
+ local sql = string.format("INSERT INTO pubweblog VALUES ('%s', '%s', '%s', '%s', '%s', '%s','%s','%s','%s','%s','%s','%s','%s')",
+ escape(sourcename), escape(entry.clientip), escape(entry.clientuserid, 64):lower(),
+ escape(entry.logdatetime), escape(entry.URL), escape(entry.bytes), escape(entry.reason), escape(entry.score), escape(entry.shortreason), escape(entry.badyesno), escape(entry.deniedyesno), escape(entry.bypassyesno), escape(entry.wordloc), escape(entry.gwordloc))
+ local res = assert (con:execute(sql))
end
-end
+end
local listsourceentries = function(sourcename)
local sources = {}
@@ -245,7 +241,7 @@ local addtowatchlist = function()
local watchdays = config.watchdays or 14
local sql = "insert into watchlist select clientuserid, " ..
"(max(logdatetime) + INTERVAL '"..watchdays.." days') as expiredatetime " ..
- "from blocklog group by clientuserid"
+ "from pubweblog where bypassyesno > '0' group by clientuserid"
local res1 = assert (con:execute(sql))
sql = "delete from watchlist where exists " ..
"(select * from watchlist w where w.clientuserid = watchlist.clientuserid " ..
@@ -258,67 +254,11 @@ end
local updateusagestats = function()
-- update the usagestat table from weblog
-- (will result in multiple rows where logs rotated on partial hours)
- local sql = "insert into usagestat select weblog.sourcename, " ..
- "date_trunc('hour', weblog.logdatetime) as date, " ..
- "count(*), 0 from weblog group by sourcename,date"
+ local sql = "insert into usagestat select pubweblog.sourcename, " ..
+ "date_trunc('hour', pubweblog.logdatetime) as date, " ..
+ "count(*), SUM(deniedyesno) from pubweblog group by sourcename,date"
local res = assert (con:execute(sql))
- -- update the usagestat table from blocklog
- -- (will result in multiple rows where logs rotated on partial hours)
- -- (also, numrequest and numblock end up in different rows)
- sql= "insert into usagestat select blocklog.sourcename, " ..
- "date_trunc('hour', blocklog.logdatetime) as date, " ..
- "0, count(*) from blocklog group by sourcename,date"
- res = assert (con:execute(sql))
-end
-
--- Process weblog and blocklog, combine into pubweblog and pubblocklog
--- empties weblog and blocklog
-local importpubweblog = function()
- local sql = "ANALYZE"
- res = assert (con:execute(sql))
- -- Merge equal blocks into weblog
- sql = "update weblog set reason=blocklog.reason, " ..
- "score=blocklog.score, shortreason=blocklog.shortreason from blocklog where " ..
- "blocklog.logdatetime >= date_trunc('second', weblog.logdatetime) and " ..
- "blocklog.logdatetime < (weblog.logdatetime + INTERVAL '45' second) and " ..
- "blocklog.sourcename=weblog.sourcename and "..
- "blocklog.clientuserid=weblog.clientuserid and "..
- "weblog.uri like (blocklog.uri || '%')"
- res = assert (con:execute(sql))
- logme("importpubweblog merged " .. res .. " blocks with matching timestamps.")
-
- sql = "insert into weblog select * from blocklog where " ..
- " NOT EXISTS (select * from weblog where " ..
- "blocklog.logdatetime >= date_trunc('second', weblog.logdatetime) and " ..
- "blocklog.logdatetime < (weblog.logdatetime + INTERVAL '45' second) and " ..
- "blocklog.sourcename=weblog.sourcename and "..
- "blocklog.clientuserid=weblog.clientuserid and "..
- "weblog.uri like (blocklog.uri || '%') )"
- res = assert (con:execute(sql))
- if ( res > 0 ) then
- logme("NOTE: importpubweblog found and merged " .. res .. " unmatched blocks (source IP will not match)")
- end
-
- -- Move weblog to pubweblog
- sql= "insert into pubweblog select * from weblog"
- res = assert (con:execute(sql))
- logme("importpubweblog imported " .. res .. " new rows into database.")
-
- -- update pubblocklog table
- sql = "insert into pubblocklog select * from weblog where reason is not null"
- res = assert (con:execute(sql))
- logme("importpubweblog imported " .. res .. " new blocks into block table.")
-
- -- grab the bypass info
- sql = "insert into pubblocklog select * from weblog where uri like '%?GBYPASS%'"
- res = assert (con:execute(sql))
- logme("importpubweblog imported " .. res .. " dg bybass events into block table.")
-
- -- Truncate the staging tables
- assert (con:execute("truncate weblog"))
- assert (con:execute("truncate blocklog"))
- logme("truncated staging tables")
end
-- Delete people from the watchlist when they have expired out
@@ -344,41 +284,23 @@ end
-- Delete old junk from pub tables
local groompublogs = function()
local purgedays = config.purgedays or 30
- local watchdays = config.watchdays or 14
- local historydays = config.historydays or 14
+ --local watchdays = config.watchdays or 14
+ --local historydays = config.historydays or 14
+
local now = os.time()
local temp = os.date("%Y-%m-%d %H:%M:%S", now - purgedays*86400)
- logme("Purgedate is " .. temp .. ". Nothing will exist beyond purgedate.")
-
- -- purge anything older than purgedate
- sql = "delete from pubweblog where logdatetime < '" .. temp .."'"
- res = assert (con:execute(sql))
- logme("Purged " .. res .. " old records from pubweblog")
+ logme("Purgedate is " .. temp .. ". Nothing will exist in pubweblog beyond purgedate.")
- sql = "delete from pubblocklog where logdatetime < '" .. temp .. "'"
+ -- Move flagged records to histoy and then purge anything older than purgedate
+ sql = "Insert into pubweblog_history select * from pubweblog where logdatetime < '" .. temp .."' and (badyesno > 0 or deniedyesno > 0 or bypassyesno > 0 or selected = 'true')"
res = assert (con:execute(sql))
- logme("Purged " .. res .. " old records from pubblocklog")
-
- -- purge anything older than startddate+historydays+watchdays
- local temp = config.auditstart
- if not temp or temp == "" then temp = os.date("%Y-%m-%d %H:%M:%S") end
- logme("Purge date since last audit is " .. tostring(watchdays+historydays) .. " days before " .. temp .. ".")
-
- sql = "delete from pubweblog where logdatetime < (timestamp '"..temp.."' - INTERVAL '"..tostring(watchdays+historydays).." days')"
- res = assert (con:execute(sql))
- logme("removed " .. res .. " old pubweblog records that are older than history+watchdays")
-
- sql = "delete from pubblocklog where logdatetime < (timestamp '"..temp.."' - INTERVAL '"..tostring(watchdays+historydays).." days')"
+ logme("Moved " .. res .. " old records to history")
+
+ sql = "Delete from pubweblog where logdatetime < '" .. temp .."'"
res = assert (con:execute(sql))
- logme("removed " .. res .. " old pubblocklog records that are older than history+watchdays")
+ logme("Deleted " .. res .. " old records to from pubweblog")
- -- purge good people after historydays
- logme("The delete date for non-watchlist users is " .. tostring(historydays) .. " days before " .. temp .. ".")
-
- sql = "delete from pubweblog where logdatetime < (timestamp '".. temp.."' - INTERVAL '"..tostring(historydays).." days') and clientuserid NOT IN (select clientuserid from watchlist)"
- res = assert (con:execute(sql))
- logme("removed " .. res .. " records for users not on the watchlist.")
end
local listwatchlistentries = function()
@@ -408,11 +330,11 @@ local deletewatchlistentry = function(clientuserid)
return res
end
-local generatewhereclause = function(clientuserid, starttime, endtime, clientip)
+local generatewhereclause = function(clientuserid, starttime, endtime, clientip, badyesno, deniedyesno, bypassyesno, score, urisearch, selected)
local sql = ""
local where = {}
if clientuserid and clientuserid ~= "" then
- where[#where+1] = "clientuserid = '"..escape(clientuserid).."'"
+ where[#where+1] = "clientuserid LIKE '%"..escape(clientuserid).."%'"
end
if starttime and starttime ~= "" then
where[#where+1] = "logdatetime >= '"..escape(starttime).."'"
@@ -423,25 +345,46 @@ local generatewhereclause = function(clientuserid, starttime, endtime, clientip)
if clientip and clientip ~= "" then
where[#where+1] = "clientip = '"..escape(clientip).."'"
end
+ if badyesno and badyesno ~= "" then
+ where[#where+1] = "badyesno = '"..escape(badyesno).."'"
+ end
+ if deniedyesno and deniedyesno ~= "" then
+ where[#where+1] = "deniedyesno = '"..escape(deniedyesno).."'"
+ end
+ if bypassyesno and bypassyesno ~= "" then
+ where[#where+1] = "bypassyesno = '"..escape(bypassyesno).."'"
+ end
+ if score and score ~= "" then
+ where[#where+1] = "score >= '"..escape(score).."'"
+ end
+ if urisearch and urisearch ~= "" then
+ where[#where+1] = "lower(uri) LIKE '%"..escape(urisearch).."%'"
+ end
+
+ if selected and selected == "true" then
+ where[#where+1] = "selected = 'true'"
+ end
+
if #where > 0 then
sql = " WHERE " .. table.concat(where, " AND ")
end
+
return sql
end
-local listlogentries = function(logname, clientuserid, starttime, endtime, clientip)
+local listlogentries = function(activelog, clientuserid, starttime, endtime, clientip, badyesno, deniedyesno, bypassyesno, score, urisearch, sortby, selected)
local entries = {}
-- retrieve a cursor
- local sql = "SELECT * from "..logname
- sql = sql .. generatewhereclause(clientuserid, starttime, endtime, clientip)
- sql = sql .. " ORDER BY logdatetime"
+ local sql = "SELECT * FROM "..activelog
+ sql = sql .. generatewhereclause(clientuserid, starttime, endtime, clientip, badyesno, deniedyesno, bypassyesno, score, urisearch, selected)
+ sql = sql .. " ORDER BY "..sortby
cur = assert (con:execute(sql))
row = cur:fetch ({}, "a")
while row do
if config.shorturi == "true" then
shorturi=string.gsub(row.uri, "[;?].*", "...")
end
- entries[#entries+1] = {sourcename=row.sourcename, clientip=row.clientip, clientuserid=row.clientuserid, logdatetime=row.logdatetime, uri=row.uri, shorturi=shorturi, bytes=row.bytes, reason=row.reason, score=row.score, shortreason=row.shortreason}
+ entries[#entries+1] = {sourcename=row.sourcename, clientip=row.clientip, clientuserid=row.clientuserid, logdatetime=row.logdatetime, uri=row.uri, shorturi=shorturi, bytes=row.bytes, reason=row.reason, score=row.score, shortreason=row.shortreason, badyesno=row.badyesno, deniedyesno=row.deniedyesno, bypassyesno=row.bypassyesno, wordloc=row.wordloc, id=row.id, selected=row.selected }
if (config.shortreason ~= "true") then
entries[#entries].shortreason = nil
end
@@ -452,31 +395,8 @@ local listlogentries = function(logname, clientuserid, starttime, endtime, clien
return entries
end
-local listpubblocklogentries = function(...)
- return listlogentries("pubblocklog", ...)
-end
-
local listpubweblogentries = function(...)
- return listlogentries("pubweblog", ...)
-end
-
-local grouppubblocklogentries = function(starttime, endtime, groupby)
- groupby = groupby or "clientuserid"
- local entries = {}
- -- retrieve a cursor
- local sql = "SELECT "..groupby..", count(*) AS numblock, max(score) AS maxscore FROM pubblocklog"
- sql = sql .. generatewhereclause(nil, starttime, endtime)
- sql = sql .. " GROUP BY "..groupby.. " ORDER BY numblock DESC"
- cur = assert (con:execute(sql))
- row = cur:fetch ({}, "a")
- while row do
- entries[#entries+1] = {numblock=row.numblock, maxscore=row.maxscore}
- entries[#entries][groupby] = row[groupby]
- row = cur:fetch (row, "a")
- end
- -- close everything
- cur:close()
- return entries
+ return listlogentries(...)
end
local listusagestats = function()
@@ -555,47 +475,227 @@ local printtableentries = function(tablename)
end
-- ################################################################################
+
-- LOG FILE FUNCTIONS
local function parsesquidlog(line)
-- Format of squid log (space separated):
-- time elapsed remotehost code/status bytes method URL rfc931 peerstatus/peerhost
local words = {}
+
for word in string.gmatch(line, "%S+") do
words[#words+1] = word
end
+
+ local goodwordloc=""
+ local badwordloc=""
+ -- logme("value of word7 is "..words[7])
+ local wrdcnt=0
+ local isbad=0
+ local isdenied=0
+ local isbypass=0
+ local ignoreme=false
+
+ --check for ignored records first
+ for thisline in io.lines("/etc/weblog/ignorewords") do
+ if not thisline then
+ break
+ end
+ _,instcnt = string.lower(words[7]):gsub(format.escapespecialcharacters(thisline), " ")
+ --if string.find(words[7],thisline) ~= nil then
+ if instcnt ~= 0 then
+ ignoreme = true
+ break
+ end
+ end
+
+ if ignoreme ~= true then
+ --proceed with record analysis, badwords first
+ for thisline in io.lines("/etc/weblog/badwords") do
+ if not thisline then
+ break
+ end
+
+ _,instcnt = string.lower(words[7]):gsub(format.escapespecialcharacters(thisline), " ")
+ if instcnt ~= 0 then
+ isbad=1
+ wrdcnt= wrdcnt + instcnt
+ if badwordloc ~= "" then
+ badwordloc = badwordloc.."|"..thisline
+ else
+ badwordloc=thisline
+ end
+
+ end
+
+ if (words[11] and words[11]~= nil and string.find(words[11],"," )) then
+ --logme("squid says "..words[11])
+ prxarray = split(words[11],",")
+ for r,s in pairs(prxarray) do
+ if string.find(s,"blocked") then
+ isdenied=1
+ elseif string.find(s,"overridden") then
+ isbypass=1
+ end
+ end
+ end
+ end
+
+ --and now a good words search in mitigation of severity
+ for goodline in io.lines("/etc/weblog/goodwords") do
+ if not goodline then
+ break
+ end
+ _,instcnt = string.lower(words[7]):gsub(format.escapespecialcharacters(goodline), " ")
+ --if string.find(words[7],goodline) then
+ if instcnt ~= 0 then
+ if wrdcnt ~= 0 then
+ wrdcnt = wrdcnt - instcnt
+ if goodwordloc ~= "" then
+ goodwordloc = goodwordloc.."|"..goodline
+ else
+ goodwordloc = goodline
+ end
+ end
+ end
+ end
+ end
+
+ if (r and r~=nil) then
+ reason=r
+ else
+ reason=words[6]
+ end
local logentry = {logdatetime=words[1],
elapsed=words[2],
clientip=words[3],
code=string.match(words[4] or "", "^[^/]*"),
status=string.match(words[4] or "", "[^/]*$"),
bytes=words[5],
- method=words[6],
+ method=reason,
URL=words[7],
clientuserid=words[8],
peerstatus=string.match(words[9] or "", "^[^/]*"),
- peerhost=string.match(words[9] or "", "[^/]*$")}
+ peerhost=string.match(words[9] or "", "[^/]*$"),
+ score=wrdcnt,
+ badyesno=isbad,
+ deniedyesno=isdenied,
+ bypassyesno=isbypass,
+ wordloc=badwordloc,
+ gwordloc=goodwordloc}
-- Don't care about local requests (from DG) (this check also removes blank lines)
if logentry.clientip and logentry.clientip ~= "127.0.0.1" then
+ if logentry.clientuserid and logentry.clientuserid ~= "-" then
logentry.logdatetime = os.date("%Y-%m-%d %H:%M:%S", logentry.logdatetime)..string.match(logentry.logdatetime, "%..*")
return logentry
+ end
end
return nil
end
local function parsedglog(line)
- local words = format.string_to_table(line, "\t")
- local logentry = { logdatetime=words[1], clientuserid=words[2], clientip=words[3],
- URL=words[4], reason=words[5], method=words[6], bytes=words[7],
- shortreason=words[9]}
- if logentry.reason and logentry.reason ~= "" then
- if logentry.shortreason == "" then
- logentry.shortreason = logentry.reason
+ -- Format of squid log (space separated):
+ -- time elapsed remotehost code/status bytes method URL rfc931 peerstatus/peerhost
+ local words = {}
+
+ for word in string.gmatch(line, "%S+") do
+ words[#words+1] = word
+ end
+
+ local goodwordloc=""
+ local badwordloc=""
+ -- logme("value of word4 is "..words[4])
+ local wrdcnt=0
+ local isbad=0
+ local isdenied=0
+ local isbypass=0
+ local ignoreme=false
+
+ --check for ignored records first
+ for thisline in io.lines("/etc/weblog/ignorewords") do
+ if not thisline then
+ break
end
- logentry.score = string.match(logentry.reason, "^.*: ([0-9]+) ")
- logentry.logdatetime = string.gsub(logentry.logdatetime, "%.", "-")
- return logentry
+ _,instcnt = string.lower(words[4]):gsub(format.escapespecialcharacters(thisline), " ")
+ if instcnt ~= 0 then
+ ignoreme = true
+ break
+ end
+ end
+
+ if ignoreme ~= true then
+ for thisline in io.lines("/etc/weblog/badwords") do
+ if not thisline then
+ -- logme("This line is apparently empty...")
+ break
+ end
+
+ _,instcnt = string.lower(words[4]):gsub(thisline, " ")
+ --if string.find(words[4],thisline) ~= nil then
+ if instcnt ~= 0 then
+ if wrdcnt ~= 0 then
+ isbad=1
+ wrdcnt= wrdcnt + instcnt
+ if badwordloc ~= "" then
+ badwordloc = badwordloc.."|"..thisline
+ else
+ badwordloc=thisline
+ end
+
+ -- logme("bad "..badwordloc)
+ end
+
+ if string.find(words[5],"*DENIED*") then
+ isdenied=1
+ elseif string.find(words[5],"GBYPASS") then
+ isdenied=1
+ elseif string.find(words[5],"*OVERRIDE*") then
+ isbypass=1
+ end
+ end
+ for goodline in io.lines("/etc/weblog/goodwords") do
+ if not goodline then
+ -- logme("This line is apparently empty...")
+ break
+ end
+ _,instcnt = string.lower(words[4]):gsub(goodline, " ")
+ --if string.find(words[4],goodline) then
+
+ if instcnt ~= 0 then
+ wrdcnt = wrdcnt - instcnt
+ if goodwordloc ~= "" then
+ goodwordloc = goodwordloc.."|"..goodline
+ else
+ goodwordloc = goodline
+ end
+ end
+ end
+ end
+
+ end
+
+ local words = format.string_to_table(line, "\t")
+ local logentry = {logdatetime=words[1],
+ clientuserid=words[2],
+ clientip=words[3],
+ URL=words[4],
+ reason=words[5],
+ method=words[6],
+ bytes=words[7],
+ shortreason=words[9],
+ score=wrdcnt,
+ badyesno=isbad,
+ deniedyesno=isdenied,
+ bypassyesno=isbypass,
+ wordloc=badwordloc,
+ gwordloc=goodwordloc}
+
+ if logentry.reason and logentry.reason ~= "" then
+ if logentry.shortreason == "" then
+ logentry.shortreason = logentry.reason
+ end
+ return logentry
end
return nil
end
@@ -860,9 +960,9 @@ end
-- import either squid or dg log file.
-- delete logfile after
local function importlogfile(source, cookiesfile, file, parselog_func, importlog_func)
- logme("Getting " .. file )
+ --logme("Getting " .. file )
local loghandle = openlogfile(source, cookiesfile, file)
- logme("Processing " .. file )
+ --logme("Processing " .. file )
local res, err = pcall(function()
con:execute("START TRANSACTION")
for line in loghandle:lines() do
@@ -880,7 +980,7 @@ local function importlogfile(source, cookiesfile, file, parselog_func, importlog
end
pcall(function() logme("Exception on line:"..line) end)
if err2 then
- pcall(function() logme(err2) end)
+ pcall(function() logme("err2 "..err2) end)
end
if (config.stoponerror == "true") then
assert(res2, "Import halted on exception")
@@ -914,10 +1014,16 @@ function importlogs()
local res, err = pcall(function()
databaseconnect(DatabaseOwner, config.password)
- -- Determine sources
- local sources = listsourceentries(sourcename)
-- Download, parse, and import the logs
+ logme("Executing importlogs")
+ logme("Analyzing...")
+ local sql = "ANALYZE"
+ res = assert (con:execute(sql))
+
+ -- Determine sources
+ local sources = listsourceentries(sourcename)
+
for i,source in ipairs(sources) do
if source.enabled then
logme("Getting logs from source " .. source.sourcename)
@@ -928,7 +1034,8 @@ function importlogs()
if string.match(file, "dansguardian/access%.log[%.%-]") then
count = count + 1
success = importlogfile(source, cookiesfile, file, parsedglog, importdglog) and success
- elseif string.match(file, "squid/access%.log[%.%-]") then
+ end
+ if string.match(file, "squid/access%.log[%.%-]") then
count = count + 1
success = importlogfile(source, cookiesfile, file, parsesquidlog, importsquidlog) and success
end
@@ -942,7 +1049,6 @@ function importlogs()
if success then
addtowatchlist()
updateusagestats()
- importpubweblog()
groomwatchlist()
end
-- Purge old database entries
@@ -1112,55 +1218,116 @@ local validateparameters = function(params)
return success
end
-local handleparameters = function(clientuserid, starttime, endtime, clientip, focus)
+local handleparameters = function(activelog, clientuserid, starttime, endtime, clientip, badyesno, deniedyesno, bypassyesno, score, urisearch, sortby, selected, focus)
local result = {}
+ result.activelog = cfe({ value=activelog or "pubweblog", label="Active Weblog" })
result.clientuserid = cfe({ value=clientuserid or "", label="User ID" })
result.starttime = cfe({ value=starttime or "", label="Start Time" })
result.endtime = cfe({ value=endtime or "", label="End Time" })
result.clientip = cfe({ value=clientip or "", label="Client IP" })
+ result.badyesno = cfe({ value=badyesno, label="Show Dodgy Records", descr="Limit search to Dodgy records"})
+ result.deniedyesno = cfe({ value=deniedyesno, label="Show Denied Records", descr="Limit search to Denied uri"})
+ result.bypassyesno = cfe({ value=bypassyesno, label="Show Bypass Actions", descr="Limit search to Bypass attempts"})
+ result.score = cfe({ value=score, label="Minimum Score", descr="Minimum score to search on"})
+ result.urisearch = cfe({ value=urisearch or "", label="URI Contains", descr="Retrieve records where the URI contains this word"})
+ result.sortby = cfe({ value=sortby, label="Sort By field", descr="Sort by this field when displaying records"})
+ result.selected = cfe({ value=selected, label="Show Selected Records", descr="Show only records that have been selected"})
result.window = cfe({ value=config.window or "5", label="Time Window" })
result.focus = cfe({ value=focus or "", label="Focus Time" })
return result
end
-function getweblog(clientuserid, starttime, endtime, clientip, focus)
+function getselected(csvdata)
+ local result = {}
+ result.id = cfe({ value=csvdata or "", label="Record ID", descr="Id of Record"})
+ result.log = cfe({ type="list", value={}, label="Weblog Access Log" })
+ local res, err = pcall(function()
+ databaseconnect(DatabaseUser)
+ local entries = {}
+ -- retrieve a cursor
+ local sql = "SELECT * FROM pubweblog where selected = true and logdatetime >= '" .. starttime .. "' and logdatetime <= '" .. endtime .. "'"
+ local idcnt = 0
+ sql = sql .. " ORDER BY logdatetime;"
+ cur = assert (con:execute(sql))
+ row = cur:fetch ({}, "a")
+ while row do
+ entries[#entries+1] = {sourcename=row.sourcename, clientip=row.clientip, clientuserid=row.clientuserid, logdatetime=row.logdatetime, uri=row.uri, shorturi=shorturi, bytes=row.bytes, reason=row.reason, core=row.score, shortreason=row.shortreason, badyesno=row.badyesno, deniedyesno=row.deniedyesno, bypassyesno=row.bypassyesno, wordloc=row.wordloc, selected=row.selected }
+ row = cur:fetch (row, "a")
+ end
+ -- close everything
+ cur:close()
+ result.log.value = entries or {}
+ --result.log.value = sql
+ databasedisconnect()
+ end)
+ return cfe({ type="group", value=result, errtxt=err, label="Weblog Selected" })
+end
+
+function editselected(chkdata)
+ keycnt = 0
+ sql = "UPDATE pubweblog SET selected = "
+ databaseconnect(DatabaseOwner)
+ idarray = split(chkdata,"|")
+ for key,x in pairs(idarray) do
+ keycnt = keycnt + 1
+ if keycnt == 1 then
+ sql = sql..x.." WHERE id = "
+ else
+ sql = sql..x
+ end
+ end
+ assert (con:execute(sql))
+ databasedisconnect()
+end
+
+function clearselected()
+ local retval = cfe({ label="Clear selected records", errtxt = "Failed to clear selected records - who knows why" })
+ sql = "UPDATE pubweblog SET selected = false WHERE selected = true"
+ databaseconnect(DatabaseOwner)
+ assert (con:execute(sql))
+ databasedisconnect()
+ retval.value = "Cleared"
+ retval.errtxt = nil
+ return retval
+end
+
+function getweblog(activelog, clientuserid, starttime, endtime, clientip, badyesno, deniedyesno, bypassyesno, score, urisearch, sortby, selected, focus )
+
+ if (not activelog or activelog=="") then
+ activelog = "pubweblog"
+ end
+
if (not starttime or starttime=="") and (not endtime or endtime=="") and config.auditstart~="" and config.auditend~="" then
- starttime = config.auditstart
- endtime = config.auditend
+
+ starttime = config.auditstart
+ endtime = config.auditend
+
+ if config.badyesno=="true" then
+ badyesno = '1'
+ end
end
- local result = handleparameters(clientuserid, starttime, endtime, clientip, focus)
+ if (not score or score=="") and config.minimumscore~="" then
+ score = config.minimumscore
+ end
+
+ if (not sortby or sortby=="") and config.sortby~="" then
+ sortby = config.sortby
+ end
+
+ local result = handleparameters(activelog, clientuserid, starttime, endtime, clientip, badyesno, deniedyesno, bypassyesno, score, urisearch, sortby, selected, focus)
result.log = cfe({ type="list", value={}, label="Weblog Access Log" })
local success = validateparameters(result)
if success then
local res, err = pcall(function()
databaseconnect(DatabaseUser)
- result.log.value = listpubweblogentries(clientuserid, starttime, endtime, clientip) or {}
+ result.log.value = listpubweblogentries(activelog, clientuserid, starttime, endtime, clientip, badyesno, deniedyesno, bypassyesno, score, urisearch, sortby, selected ) or {}
databasedisconnect()
end)
else
err = "Invalid search parameters"
end
return cfe({ type="group", value=result, errtxt=err, label="Weblog Access Log" })
-end
-function getblocklog(clientuserid, starttime, endtime, clientip, focus)
- if (not starttime or starttime=="") and (not endtime or endtime=="") and config.auditstart~="" and config.auditend~="" then
- starttime = config.auditstart
- endtime = config.auditend
- end
- local result = handleparameters(clientuserid, starttime, endtime, clientip, focus)
- result.log = cfe({ type="list", value={}, label="Weblog Block Log" })
- local success = validateparameters(result)
- if success then
- local res, err = pcall(function()
- databaseconnect(DatabaseUser)
- result.log.value = listpubblocklogentries(clientuserid, starttime, endtime, clientip) or {}
- databasedisconnect()
- end)
- else
- err = "Invalid search parameters"
- end
- return cfe({ type="group", value=result, errtxt=err, label="Weblog Block Log" })
end
function getusagestats()
@@ -1177,53 +1344,26 @@ function getusagestats()
return retval
end
-function getauditstats()
- local result = {}
- result.auditstart = cfe({ value=config.auditstart or "", label="Audit Start Time" })
- result.auditend = cfe({ value=config.auditend or "", label="Audit End Time" })
- result.groupby = cfe({ value=config.groupby or "clientuserid", label="Group By" })
- result.stats = cfe({ type="list", value={}, label="Audit Block Statistics" })
- local res, err = pcall(function()
- if config.auditstart ~= "" and config.auditend ~= "" then
- databaseconnect(DatabaseUser)
- result.stats.value = grouppubblocklogentries(config.auditstart, config.auditend, result.groupby.value) or {}
- databasedisconnect()
- end
- end)
- return cfe({ type="group", value=result, errtxt=err, label="Weblog Audit Statistics" })
-end
-
-function completeaudit(timestamp)
- local conf = getconfig()
- conf.value.auditstart.value = conf.value.auditend.value
- local now = os.time()
- conf.value.auditend.value = timestamp or os.date("%Y-%m-%d %H:%M:%S", now - now%86400 - 86400)
- conf = updateconfig(conf)
- local retval = cfe({ value="Audit completed", label="Complete Audit Result" })
- if conf.errtxt then
- retval.value = ""
- retval.errtxt = "Failed to complete audit\n"..conf.errtxt.."\n"..conf.value.auditend.errtxt
- end
- return retval
-end
function getconfig()
local result = {}
result.auditstart = cfe({ value=config.auditstart or "", label="Audit Start Time" })
result.auditend = cfe({ value=config.auditend or "", label="Audit End Time" })
+ result.badyesno = cfe({ type="boolean", value=(config.badyesno == "1"), label="Display Suspect Records", descr="Show only records flagged as suspect on initial display" })
+ result.minimumscore = cfe({ value=config.minimumscore or "0", label="Minimum Score", descr="Minimum Score to search for" })
+ result.sortby = cfe({ type="select", value=config.sortby or "logdatetime", label="Sort By field", option={"logdatetime", "logdatetime DESC", "clientuserid", "clientuserid DESC", "clientip", "clientip DESC", "bytes", "bytes DESC", "score", "score DESC", "reason"} })
result.window = cfe({ value=config.window or "5", label="Time Window", descr="Minutes of activity to display before and after selected block" })
result.watchdays = cfe({ value=config.watchdays or "14", label="Days to Watch", descr="Number of additional days to keep history for users in watchlist" })
result.purgedays = cfe({ value=config.purgedays or "30", label="Days before Purge", descr="Days to keep history, regardless of audit" })
result.historydays = cfe({ value=config.historydays or "14", label="Days to keep History", descr="Days beyond Audit Start Time to keep complete log history" })
- result.groupby = cfe({ type="select", value=config.groupby or "clientuserid", label="Group results by", option={"clientuserid", "clientip"} })
result.shorturi = cfe({ type="boolean", value=(config.shorturi == "true"), label="Truncate URLs", descr="You can limit the length of displayed URLs by enabling this option"})
- result.shortreason = cfe({ type="boolean", value=(config.shortreason == "true"), label="Short Reason", descr="Display a short reason (without objectional words)"})
+ result.shortreason = cfe({ type="boolean", value=(config.shortreason == "true"), label="Short Reason", descr="Display a short reason (dansguardian only)"})
result.stoponerror = cfe({ type="boolean", value=(config.stoponerror == "true"), label="Stop on Error", descr="Stop import of logs if an error is encountered"})
return cfe({ type="group", value=result, label="Weblog Config" })
end
local function validateconfig(newconfig)
- local success = modelfunctions.validateselect(newconfig.value.groupby)
+ local success = modelfunctions.validateselect(newconfig.value.sortby)
if newconfig.value.window.value == "" then
newconfig.value.window.errtxt = "Cannot be blank"
success = false
@@ -1273,11 +1413,14 @@ function updateconfig(newconfig)
configcontent = format.update_ini_file(configcontent, "", "watchdays", newconfig.value.watchdays.value)
configcontent = format.update_ini_file(configcontent, "", "purgedays", newconfig.value.purgedays.value)
configcontent = format.update_ini_file(configcontent, "", "historydays", newconfig.value.historydays.value)
- configcontent = format.update_ini_file(configcontent, "", "groupby", newconfig.value.groupby.value)
+ --configcontent = format.update_ini_file(configcontent, "", "groupby", newconfig.value.groupby.value)
configcontent = format.update_ini_file(configcontent, "", "shorturi", tostring(newconfig.value.shorturi.value))
configcontent = format.update_ini_file(configcontent, "", "shortreason", tostring(newconfig.value.shortreason.value))
configcontent = format.update_ini_file(configcontent, "", "stoponerror", tostring(newconfig.value.stoponerror.value))
-
+ configcontent = format.update_ini_file(configcontent, "", "badyesno", tostring(newconfig.value.badyesno.value))
+ configcontent = format.update_ini_file(configcontent, "", "minimumscore", tostring(newconfig.value.minimumscore.value))
+ configcontent = format.update_ini_file(configcontent, "", "sortby", tostring(newconfig.value.sortby.value))
+
fs.write_file(configfile, configcontent)
config = format.parse_ini_file(configcontent, "") or {}
else
@@ -1414,3 +1557,68 @@ function create_database(database)
return database
end
+
+function listfiles()
+ local retval = cfe({ type="list", value={}, label="Weblog Files" })
+ if not fs.is_dir(baseurl) then fs.create_directory(baseurl) end
+ for file in posix.files(baseurl) do
+ file = baseurl..file
+ if fs.is_file(file) then
+ table.insert(retval.value, file)
+ end
+ end
+ table.sort(retval.value)
+ return retval
+end
+
+function getnewfile()
+ local filename = cfe({ label="File Name", descr="Must be in "..baseurl })
+ return cfe({ type="group", value={filename=filename}, label="Weblog File" })
+end
+
+function readfile(filename)
+ return modelfunctions.getfiledetails(filename, listfiles().value)
+end
+
+function updatefile(filedetails)
+ return modelfunctions.setfiledetails(filedetails, listfiles().value)
+end
+
+function deletefile(filename)
+ local retval = cfe({ label="Delete Weblog File result", errtxt = "Failed to delete Weblog File - invalid filename" })
+ for i,file in ipairs(listfiles().value) do
+ if filename == file then
+ retval.value = "Deleted File"
+ retval.errtxt = nil
+ os.remove(filename)
+ break
+ end
+ end
+
+ return retval
+end
+
+-- Split a string to an array by delimiter or pattern
+function split(str, pat)
+ if string.find(str, pat) == nil then
+ return str
+ end
+ local t = {}
+ local fpat = "(.-)" .. pat
+ local last_end = 1
+ local s, e, cap = str:find(fpat, 1)
+ while s do
+ if s ~= 1 or cap ~= "" then
+ table.insert(t,cap)
+ end
+ last_end = e+1
+ s, e, cap = str:find(fpat, last_end)
+ end
+ if last_end <= #str then
+ cap = str:sub(last_end)
+ table.insert(t, cap)
+ end
+ return t
+end
+
+