summaryrefslogtreecommitdiffstats
path: root/squlogan
blob: 43b42dda79c7dac28210c863bcfc10f21395aea3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
#!/usr/bin/lua

function parseline(line)
	local word
	local words = {}
	for word in string.gmatch(line, "%S+") do
		words[#words+1] = word
	end

	return {
		logdatetime=words[1],
		elapsed=words[2],
		clientip=words[3],
		code=string.match(words[4] or "", "^[^/]*"),
		status=string.match(words[4] or "", "[^/]*$"),
		bytes=tonumber(words[5]),
		method=words[6],
		URL=words[7],
		site = string.match(words[7], "^(%a+://[^/]*)"),
		clientuserid=words[8],
		peerstatus=string.match(words[9] or "", "^[^/]*"),
		peerhost=string.match(words[9] or "", "[^/]*$"),
		squarkcategory=string.match(words[11] or "", "^[^,]*"),
		squarkaction=string.match(words[11] or "", ",([^,]+)$")
	}

end

function add_by_subkey(tbl, key, entry)
	if key == nil then
		return tbl
	end
	if tbl[key] == nil then
		tbl[key] = {
			lines = entry.lines,
			bytes = entry.bytes,
			elapsed = entry.elapsed,
		}
		return tbl
	end
	tbl[key].lines = (tbl[key].lines or 0) + 1
	tbl[key].bytes = tbl[key].bytes + entry.bytes
	tbl[key].elapsed = tbl[key].elapsed + entry.elapsed
	return tbl
end


function add_stats(total, subkeys, entry)
	local _,k
	for _,k in pairs(subkeys) do
		if entry[k] ~= nil then
			add_by_subkey(total[k], entry[k], entry)
		end
	end

	total.elapsed = total.elapsed + entry.elapsed
	total.bytes = total.bytes + entry.bytes
	total.lines = total.lines + 1
	return total
end

local function newstats(groupname, subkeys)
	local t = {
		bytes = 0, lines = 0, elapsed = 0,
		group = groupname,
	}
	local _,k
	for _,k in pairs(subkeys) do
		t[k] = {}
	end
	return t
end

--		code = {}, category = {}, action = {},
local function add_stats_by_key(tbl, key, subkeys, entry)
	if key == nil then
		return tbl
	end
	if tbl[key] == nil then
		tbl[key] = newstats(key, subkeys)
	end
	add_stats(tbl[key], subkeys, entry)
	return tbl
end

function collect_stats(stats, keytbl, subkeys, entry)
	local _,k
	add_stats(stats.total, subkeys, entry)
	for _,k in pairs(keytbl) do
		add_stats_by_key(stats[k], entry[k], subkeys, entry)
	end
	return stats
end

function parsefile(file, keytbl, subkeys)
	local numlines = 0
	local f = assert(io.open(file))
	local stats = {}
	subkeys = subkeys or {"code", "squarkcategory", "squarkaction" }
	stats.total = newstats("total", subkeys)
	local _,k
	for _,k in pairs(keytbl or {"site"}) do
		stats[k] = {}
	end

	local line
	for line in f:lines() do
		collect_stats(stats, keytbl, subkeys, parseline(line))
	end
	f:close()
	return stats
end

function dump_tbl(header, tbl)
	io.write(header..":\n")
	local k,v
	for k,v in pairs(tbl) do
		print("\t"..k..":", v.lines, v.bytes, v.elapsed)
	end
end

function print_totals(header, tbl)
	local k, v
	print("Totals for "..header..":")
	for k,v in pairs(tbl) do
		if type(v) == "table" then
			dump_tbl(k, v)
		else
			io.write(k..": "..v.."\n")
		end
	end
	io.write("\n")
end

function sort_by_func(tbl, func)
	local t = {}
	for k, v in pairs(tbl) do
		table.insert(t, v)
	end
	table.sort(t, func)
	return t
end

function sort_by_key(tbl, key)
	return sort_by_func(tbl, function(a,b)
		return a[key] > b[key]
	end)
end

function top_n_by_func(tbl, n, sortfunc, callback)
	sorted = sort_by_func(tbl, sortfunc)
	local i
	for i = 1, #sorted do
		callback(sorted[i])
		if i > n then
			break
		end
	end
end

function top_n_by_key(tbl, n, statskey)
	top_n_by_func(tbl, n,
		function(a, b)
			return a[statskey] > b[statskey]
		end,
		function(rec)
			print(rec.group, rec[statskey])
		end)
end


stats = parsefile(arg[1] or "/var/log/squid/access.log",
	{"site"},
	{"code"})


print("= Top 10 sites by hits =")
top_n_by_key(stats.site, 10, "lines")

print("= Top 10 sites by TCP_HIT bytes =")
top_n_by_func(stats.site, 10,
	function(a, b)
		local an, bn = 0, 0
		if a.code.TCP_HIT and a.code.TCP_HIT.bytes then
			an = a.code.TCP_HIT.bytes
		end
		if b.code.TCP_HIT and b.code.TCP_HIT.bytes then
			bn = b.code.TCP_HIT.bytes
		end
		print(an, bn)
		return an > bn
	end,
	function(rec)
		print(rec.group, rec.code.TCP_HIT.bytes)
	end)