diff options
| -rw-r--r-- | weblog-model.lua | 449 | 
1 files changed, 79 insertions, 370 deletions
diff --git a/weblog-model.lua b/weblog-model.lua index a23a007..5fe4022 100644 --- a/weblog-model.lua +++ b/weblog-model.lua @@ -504,91 +504,98 @@ end  -- ################################################################################  -- LOG FILE FUNCTIONS -local function parsesquidlog(line) -	-- Format of squid log (space separated): -	-- time elapsed remotehost code/status bytes method URL rfc931 peerstatus/peerhost -	local words = {} -	 -	for word in string.gmatch(line, "%S+") do -		words[#words+1] = word -	end -	 +local function checkwords(logentry)  	local goodwordloc=""  	local badwordloc="" -	-- logme("value of word7 is "..words[7])  	local wrdcnt=0  	local isbad=0  	local isdenied=0  	local isbypass=0  	local ignoreme=false -	 +  	--check for ignored records first  	for i,thisline in ipairs(ignorewords) do  		if not thisline then  			break  		end -		_,instcnt = string.lower(words[7]):gsub(thisline, " ") +		_,instcnt = string.lower(logentry.URL):gsub(thisline, " ")  		if instcnt ~= 0 then -		   ignoreme = true -		   break +			ignoreme = true +			break  		end  	end  	if ignoreme ~= true then -	 --proceed with record analysis -	  for i,thisline in ipairs(badwords) do -		if not thisline then -			break -		end -		 -		_,instcnt = string.lower(words[7]):gsub(thisline, " ")   -		--if string.find(words[7],thisline) ~= nil then -		if instcnt ~= 0 then -		 -- logme("instcnt =  "..instcnt) -		  isbad=1 -		  wrdcnt= wrdcnt + instcnt  -		  if badwordloc ~= "" then  -		    badwordloc = badwordloc.."|"..thisline -		  else -		    badwordloc=thisline -		  end -		  -	        end -		 -		if string.find(words[7],"*DENIED*") then ---		   logme("*Denied*") -		   isdenied=1 -		elseif string.find(words[7],"GBYPASS") then ---		   logme("GBYPASS") -		   isbypass=1 -		elseif string.find(words[7],"*OVERRIDE*") then ---		   logme("*OVERRIDE*") -		   isbypass=1 -		end -	  end -	  for i,goodline in ipairs(goodwords) do -		if not goodline then -		  break +		--proceed with record analysis +		for i,thisline in ipairs(badwords) do +			if not thisline then +				break +			end + +			_,instcnt = string.lower(logentry.URL):gsub(thisline, " ") +			--if string.find(logentry.URL,thisline) ~= nil then +			if instcnt ~= 0 then +				-- logme("instcnt =  "..instcnt) +				isbad=1 +				wrdcnt= wrdcnt + instcnt +				if badwordloc ~= "" then +					badwordloc = badwordloc.."|"..thisline +				else +					badwordloc=thisline +				end +			end + +			if string.find(logentry.URL,"*DENIED*") then +				-- logme("*Denied*") +				isdenied=1 +			elseif string.find(logentry.URL,"GBYPASS") then +				-- logme("GBYPASS") +				isbypass=1 +			elseif string.find(logentry.URL,"*OVERRIDE*") then +				-- logme("*OVERRIDE*") +				isbypass=1 +			end  		end -		_,instcnt = string.lower(words[7]):gsub(goodline, " ") -		--if string.find(words[7],goodline) then -		if instcnt ~= 0 then -		  if wrdcnt ~= 0 then -		     wrdcnt = wrdcnt - instcnt -		     if goodwordloc ~= "" then -		        goodwordloc = goodwordloc.."|"..goodline -		     else  -		        goodwordloc = goodline -		     end -		  end +		for i,goodline in ipairs(goodwords) do +			if not goodline then +				break +			end +			_,instcnt = string.lower(logentry.URL):gsub(goodline, " ") +			--if string.find(logentry.URL,goodline) then +			if instcnt ~= 0 then +				if wrdcnt ~= 0 then +					wrdcnt = wrdcnt - instcnt +					if goodwordloc ~= "" then +						goodwordloc = goodwordloc.."|"..goodline +					else +						goodwordloc = goodline +					end +				end +			end  		end -	  end	  	end  	-- Reset bad to reduce number of bad hits if score is zero  	-- if wrdcnt == 0 then  	-- isbad=0  	-- end -		 + +	logentry.score=wrdcnt +	logentry.badyesno=isbad +	logentry.deniedyesno=isdenied +	logentry.bypassyesno=isbypass +	logentry.wordloc=badwordloc +	logentry.gwordloc=goodwordloc +end + +local function parsesquidlog(line) +	-- Format of squid log (space separated): +	-- time elapsed remotehost code/status bytes method URL rfc931 peerstatus/peerhost +	local words = {} + +	for word in string.gmatch(line, "%S+") do +		words[#words+1] = word +	end +  	local logentry = {logdatetime=words[1],  		elapsed=words[2],  		clientip=words[3], @@ -599,209 +606,22 @@ local function parsesquidlog(line)  		URL=words[7],  		clientuserid=words[8],  		peerstatus=string.match(words[9] or "", "^[^/]*"), -		peerhost=string.match(words[9] or "", "[^/]*$"), -		score=wrdcnt, -		badyesno=isbad, -		deniedyesno=isdenied, -		bypassyesno=isbypass,		 -		wordloc=badwordloc, -		gwordloc=goodwordloc}	 +		peerhost=string.match(words[9] or "", "[^/]*$")} + +	checkwords(logentry)  	-- Don't care about local requests (from DG) (this check also removes blank lines)  	if logentry.clientip and logentry.clientip ~= "127.0.0.1" then -	   if logentry.clientuserid and logentry.clientuserid ~= "-" then -		logentry.logdatetime = os.date("%Y-%m-%d %H:%M:%S", logentry.logdatetime)..string.match(logentry.logdatetime, "%..*") -		return logentry -           end -	end -	return nil -end - -local function parsesquarklog(line) -	-- Format of squid log (space separated): -	-- time elapsed remotehost code/status bytes method URL rfc931 peerstatus/peerhost -	local words = {} -	 -	for word in string.gmatch(line, "%S+") do -		words[#words+1] = word -	end - -	local goodwordloc="" -	local badwordloc="" -	local wrdcnt=0 -	local isbad=0 -	local isdenied=0 -	local isbypass=0 -	local ignoreme=false -	 -	--check for ignored records first -	for i,thisline in ipairs(ignorewords) do -		if not thisline then -			break -		end -		_,instcnt = string.lower(words[5]):gsub(thisline, " ") -		if instcnt ~= 0 then -		   ignoreme = true -		   break -		end -	end - - 	if ignoreme ~= true then	 -	  for i,thisline in ipairs(badwords) do -		if not thisline then -			-- logme("This line is apparently empty...") -			break -		end -		 -		_,instcnt = string.lower(words[5]):gsub(thisline, " ") -	--	if string.find(words[5],thisline) ~= nil then -		--logme("checking "..thisline.." against "..words[5]) -		if instcnt ~= 0 then -		  isbad=1 -		  wrdcnt = wrdcnt + instcnt -		if badwordloc ~= "" then  -		    badwordloc = badwordloc.."|"..thisline -		else -		    badwordloc=thisline -		end -		  -		  -- logme("bad "..badwordloc) -	        end -	 -		if string.find(words[6],"*DENIED*") then	 -		   isdenied=1 -		end -		if string.find(words[6],"*OVERRIDE*") then -		   isbypass=1 +		if logentry.clientuserid and logentry.clientuserid ~= "-" then +			logentry.logdatetime = os.date("%Y-%m-%d %H:%M:%S", logentry.logdatetime)..string.match(logentry.logdatetime, "%..*") +			return logentry  		end  	end -	for i,goodline in ipairs(goodwords) do -		if not goodline then -		  -- logme("This line is apparently empty...") -		  break -		end -		_,instcnt = string.lower(words[5]):gsub(goodline, " ") -		--if string.find(words[4],goodline) then -		  if instcnt ~= 0 then -		   if wrdcnt ~= 0 then -		     wrdcnt = wrdcnt - instcnt -		     if goodwordloc ~= "" then -		        goodwordloc = goodwordloc.."|"..goodline -		     else  -		        goodwordloc = goodline -		     end -		  end -		end -	  end -	end -	 -	local words = format.string_to_table(line, "\t")	 -	local logentry = {logdatetime=words[1], -			clientuserid=words[2], -			clientip=words[3], -			URL=words[4], -			reason=words[5], -			method=words[6], -			bytes=words[7], -			shortreason=words[9], -			score=wrdcnt, -			badyesno=isbad, -			deniedyesno=isdenied, -			bypassyesno=isbypass,		 -			wordloc=badwordloc, -			gwordloc=goodwordloc}	 - -	if logentry.reason and logentry.reason ~= "" then -           	if logentry.shortreason == "" then -           		logentry.shortreason = logentry.reason -           	end -	   	return logentry -	end  	return nil  end  local function parsedglog(line) -	-- Format of squid log (space separated): -	-- time elapsed remotehost code/status bytes method URL rfc931 peerstatus/peerhost -	local words = {} -	 -	for word in string.gmatch(line, "%S+") do -		words[#words+1] = word -	end - -	local goodwordloc="" -	local badwordloc="" -	-- logme("value of word4 is "..words[4]) -	local wrdcnt=0 -	local isbad=0 -	local isdenied=0 -	local isbypass=0 -	local ignoreme=false - -	--check for ignored records first -	for i,thisline in ipairs(ignorewords) do -		if not thisline then -			break -		end -		_,instcnt = string.lower(words[4]):gsub(thisline, " ") -		if instcnt ~= 0 then -		   ignoreme = true -		   break -		end -	end - -	if ignoreme ~= true then -	  for i,thisline in ipairs(badwords) do -		if not thisline then -			-- logme("This line is apparently empty...") -			break -		end -		 -		_,instcnt = string.lower(words[4]):gsub(thisline, " ") -		--if string.find(words[4],thisline) ~= nil then -		  if instcnt ~= 0 then -		   if wrdcnt ~= 0 then -		     isbad=1 -		     wrdcnt= wrdcnt + instcnt -		     if badwordloc ~= "" then  -		       badwordloc = badwordloc.."|"..thisline -		     else -		       badwordloc=thisline -		     end -		  -		  -- logme("bad "..badwordloc) -	        end -		 -		if string.find(words[5],"*DENIED*") then -		   isdenied=1 -		elseif string.find(words[5],"GBYPASS") then -		   isdenied=1  -		elseif string.find(words[5],"*OVERRIDE*") then -		   isbypass=1 -		end -	  end -	  for i,goodline in ipairs(goodwords) do -		if not goodline then -		  -- logme("This line is apparently empty...") -		  break -		end -		_,instcnt = string.lower(words[4]):gsub(goodline, " ") -		--if string.find(words[4],goodline) then -		   -		  if instcnt ~= 0 then -		     wrdcnt = wrdcnt - instcnt -		     if goodwordloc ~= "" then -		        goodwordloc = goodwordloc.."|"..goodline -		     else  -		        goodwordloc = goodline -		     end -		  end -		end -	  end	 -	 -	end -	 -	local words = format.string_to_table(line, "\t")	 +	local words = format.string_to_table(line, "\t")  	local logentry = {logdatetime=words[1],  			clientuserid=words[2],  			clientip=words[3], @@ -809,124 +629,21 @@ local function parsedglog(line)  			reason=words[5],  			method=words[6],  			bytes=words[7], -			shortreason=words[9], -			score=wrdcnt, -			badyesno=isbad, -			deniedyesno=isdenied, -			bypassyesno=isbypass,		 -			wordloc=badwordloc, -			gwordloc=goodwordloc}	 - -	if logentry.reason and logentry.reason ~= "" then -           	if logentry.shortreason == "" then -           		logentry.shortreason = logentry.reason -           	end -	   	return logentry -	end -	return nil -end +			shortreason=words[9]} -local function parsedumplog(line) -	-- Format of squid log (space separated): -	-- time elapsed remotehost code/status bytes method URL rfc931 peerstatus/peerhost -	local words = {} -	 -	for word in string.gmatch(line, "%S+") do -		words[#words+1] = word -	end -	goodwordloc="" -	badwordloc="" -	wrdcnt=0 -	isbad=0 -	isdenied=0 -	isbypass=0 -	for i,thisline in ipairs(badwords) do -		if not thisline then -			 logme("This line is apparently empty...") -			break -		end -		_,instcnt = string.lower(words[5]):gsub(thisline, " ") -		if instcnt ~= 0 then -		  isbad=1 -		  wrdcnt = wrdcnt + instcnt -		  if badwordloc ~= "" then  -		    badwordloc = badwordloc.."|"..thisline -		  else -		    badwordloc=thisline -		  end -		  -		   -- logme("bad "..badwordloc) -	        end -		if string.find(words[6],"*DENIED*") then	 -		   isdenied=1 -		end -		if string.find(words[5],"GBYPASS") then -		   isbypass=1 -		elseif string.find(words[6],"*OVERRIDE*") then -		   isbypass=1 -		end -	end -	for i,goodline in ipairs(goodwords) do -		if not goodline then -		  -- logme("This line is apparently empty...") -		  break -		end -		_,instcnt = string.lower(words[4]):gsub(goodline, " ") -		--if string.find(words[4],goodline) then -		  if instcnt ~= 0 then -		   if wrdcnt ~= 0 then -		     wrdcnt = wrdcnt - instcnt -		     if goodwordloc ~= "" then -		        goodwordloc = goodwordloc.."|"..goodline -		     else  -		        goodwordloc = goodline -		     end -		  end -		end -	end	 -	 -	local words = format.string_to_table(line, "\t")	 -	local logentry = {logdatetime=words[1], -			clientuserid=words[2], -			clientip=words[3], -			URL=words[4], -			reason=words[6], -			method=words[5], -			bytes=words[7], -			shortreason=words[9], -			score=wrdcnt, -			badyesno=isbad, -			deniedyesno=isdenied, -			bypassyesno=isbypass,		 -			wordloc=badwordloc, -			gwordloc=goodwordloc}	 +	checkwords(logentry)  	if logentry.reason and logentry.reason ~= "" then             	if logentry.shortreason == "" then             		logentry.shortreason = logentry.reason             	end +		logentry.score = string.match(logentry.reason, "^.*: ([0-9]+) ") +		logentry.logdatetime = string.gsub(logentry.logdatetime, "%.", "-")  	   	return logentry  	end  	return nil  end ---local function parsedglog(line) ---	local words = format.string_to_table(line, "\t") ---	local logentry = { logdatetime=words[1], clientuserid=words[2], clientip=words[3], ---		URL=words[4], reason=words[5], method=words[6], bytes=words[7], ---		shortreason=words[9]} ---	if logentry.reason and logentry.reason ~= "" then ---		if logentry.shortreason == "" then ---			logentry.shortreason = logentry.reason ---		end ---		logentry.score = string.match(logentry.reason, "^.*: ([0-9]+) ") ---		logentry.logdatetime = string.gsub(logentry.logdatetime, "%.", "-") ---		return logentry ---	end ---	return nil ---end - -  -- ################################################################################  -- DOWNLOAD FILE FUNCTIONS @@ -1266,18 +983,10 @@ function importlogs()  							count = count + 1  							success = importlogfile(source, cookiesfile, file, parsedglog) and success  						end -						if string.match(file, "squark/access%.log[%.%-]") then -							count = count + 1 -							success = importlogfile(source, cookiesfile, file, parsesquarklog) and success -						end  						if string.match(file, "squid/access%.log[%.%-]") then  							count = count + 1  							success = importlogfile(source, cookiesfile, file, parsesquidlog) and success  						end -						if string.match(file, "dump/access%.log[%.%-]") then -							count = count + 1 -							success = importlogfile(source, cookiesfile, file, parsedumplog) and success -						end  					end  				end  				os.remove(cookiesfile)  | 
