summaryrefslogtreecommitdiffstats
path: root/src/squark-filter.c
diff options
context:
space:
mode:
authorTimo Teräs <timo.teras@iki.fi>2010-11-07 00:47:39 +0200
committerTimo Teräs <timo.teras@iki.fi>2010-11-07 00:47:39 +0200
commit25593b5e6fea76ed7c08db586924032c0810c27e (patch)
treeb632534eb96978ad620fee1e5a9a5d280e0b191e /src/squark-filter.c
parente0450bd60a30ca944c16f84ee195463fd4aab653 (diff)
downloadsquark-25593b5e6fea76ed7c08db586924032c0810c27e.tar.bz2
squark-25593b5e6fea76ed7c08db586924032c0810c27e.tar.xz
squark: reorganize sources to src directory
Diffstat (limited to 'src/squark-filter.c')
-rw-r--r--src/squark-filter.c431
1 files changed, 431 insertions, 0 deletions
diff --git a/src/squark-filter.c b/src/squark-filter.c
new file mode 100644
index 0000000..995da40
--- /dev/null
+++ b/src/squark-filter.c
@@ -0,0 +1,431 @@
+/* squark-filter.c - Squid User Authentication and Rating Kit
+ * An external redirector for Squid which analyzes the URL according
+ * to a database and can redirect to a block page.
+ *
+ * Copyright (C) 2010 Timo Teräs <timo.teras@iki.fi>
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation. See http://www.gnu.org/ for details.
+ */
+
+#include <time.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <cmph.h>
+
+#include "blob.h"
+#include "addr.h"
+#include "filterdb.h"
+#include "authdb.h"
+
+#define FILTER_OVERRIDE_TIMEOUT (15*60)
+
+static struct sqdb db;
+static struct authdb adb;
+static struct authdb_config adbc;
+
+static int running = 1;
+static const blob_t dash = BLOB_STR_INIT("-");
+static const blob_t space = BLOB_STR_INIT(" ");
+static const blob_t slash = BLOB_STR_INIT("/");
+static const blob_t lf = BLOB_STR_INIT("\n");
+static struct authdb adb;
+static time_t now;
+
+struct url_info {
+ blob_t protocol;
+ blob_t username;
+ blob_t password;
+ blob_t host;
+ blob_t significant_host;
+ blob_t path;
+ blob_t query;
+ blob_t fragment;
+ int port;
+ int is_ipv4;
+ int num_dots;
+};
+
+struct url_dns_part_data {
+ blob_t word;
+ int num_dots;
+ int numeric;
+};
+
+void blob_pull_url_dns_part(blob_t *b, struct url_dns_part_data *udp)
+{
+ blob_t t = *b;
+ int c, i, dots = 0, numeric = 1;
+
+ for (i = 0; i < t.len; i++) {
+ c = (unsigned char) t.ptr[i];
+ switch (c) {
+ case '.':
+ dots++;
+ break;
+ case ':': case '@': case '/': case '?':
+ *b = BLOB_PTR_LEN(t.ptr + i, t.len - i);
+ udp->word = BLOB_PTR_LEN(t.ptr, i);
+ udp->num_dots = dots;
+ udp->numeric = numeric;
+ return;
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ break;
+ default:
+ numeric = 0;
+ break;
+ }
+ }
+
+ *b = BLOB_NULL;
+ udp->word = t;
+ udp->num_dots = dots;
+ udp->numeric = numeric;
+}
+
+/* URI is generalized as:
+ * [proto://][user[:password]@]domain.name[:port][/[path/to][?p=a&q=b;r=c][#fragment]]
+ * Character literals used as separators are:
+ * : / @ ? & ; #
+ * Also URI escaping says to treat %XX as encoded hex value.
+ */
+
+static int url_parse(blob_t uri, struct url_info *nfo)
+{
+ struct url_dns_part_data prev, cur;
+
+ memset(&prev, 0, sizeof(prev));
+ memset(nfo, 0, sizeof(*nfo));
+
+ /* parse protocol, username/password and domain name/port */
+ do {
+ blob_pull_url_dns_part(&uri, &cur);
+
+ switch (uri.len ? uri.ptr[0] : '/') {
+ case ':':
+ blob_pull_skip(&uri, 1);
+ if (blob_is_null(nfo->protocol) &&
+ blob_pull_matching(&uri, BLOB_STR("//")))
+ nfo->protocol = cur.word;
+ else
+ prev = cur;
+ break;
+ case '@':
+ blob_pull_skip(&uri, 1);
+ if (!blob_is_null(nfo->username) ||
+ !blob_is_null(nfo->password))
+ goto error;
+ if (!blob_is_null(prev.word)) {
+ nfo->username = prev.word;
+ nfo->password = cur.word;
+ } else
+ nfo->username = cur.word;
+ memset(&prev, 0, sizeof(prev));
+ break;
+ case '/':
+ case '?':
+ if (!blob_is_null(prev.word)) {
+ nfo->host = prev.word;
+ nfo->num_dots = prev.num_dots;
+ nfo->is_ipv4 = prev.numeric && prev.num_dots == 3;
+ nfo->port = blob_pull_uint(&cur.word, 10);
+ } else {
+ nfo->host = cur.word;
+ nfo->num_dots = cur.num_dots;
+ nfo->is_ipv4 = cur.numeric && cur.num_dots == 3;
+ }
+ if (blob_is_null(nfo->host))
+ nfo->host = BLOB_STR("localhost");
+ break;
+ }
+ } while (blob_is_null(nfo->host) && !blob_is_null(uri));
+
+ /* rest of the components */
+ nfo->path = blob_pull_cspn(&uri, BLOB_STR("?&;#"));
+ nfo->query = blob_pull_cspn(&uri, BLOB_STR("#"));
+ nfo->fragment = uri;
+
+ /* fill in defaults if needed */
+ if (blob_is_null(nfo->protocol)) {
+ if (nfo->port == 443)
+ nfo->protocol = BLOB_STR("https");
+ else
+ nfo->protocol = BLOB_STR("http");
+ if (nfo->port == 0)
+ nfo->port = 80;
+ } else if (nfo->port == 0) {
+ if (blob_cmp(nfo->protocol, BLOB_STR("https")) == 0)
+ nfo->port = 443;
+ else
+ nfo->port = 80;
+ }
+ if (blob_is_null(nfo->path))
+ nfo->path = BLOB_STR("/");
+
+ /* significant host name */
+ nfo->significant_host = nfo->host;
+ if (nfo->num_dots > 1) {
+ blob_t b = nfo->significant_host;
+ if (blob_pull_matching(&b, BLOB_STR("www")) &&
+ (blob_pull_uint(&b, 10), 1) &&
+ blob_pull_matching(&b, BLOB_STR(".")))
+ nfo->significant_host = b;
+ }
+ return 1;
+
+error:
+ return 0;
+}
+
+static void url_print(struct url_info *nfo)
+{
+#define print_field(nfo, x) if (!blob_is_null(nfo->x)) printf(" %s{%.*s}", #x, nfo->x.len, nfo->x.ptr)
+ print_field(nfo, protocol);
+ print_field(nfo, username);
+ print_field(nfo, password);
+ print_field(nfo, host);
+ printf(" port{%d}", nfo->port);
+ print_field(nfo, path);
+ print_field(nfo, query);
+ print_field(nfo, fragment);
+#undef print_field
+ printf("\n");
+ fflush(stdout);
+}
+
+static int url_classify(struct url_info *url, struct sqdb *db)
+{
+ unsigned char buffer[512];
+ blob_t key, got, tld, keybuf, keylimits;
+ void *cmph;
+ struct sqdb_index_entry *indx;
+ cmph_uint32 i = SQDB_PARENT_ROOT, previ = SQDB_PARENT_ROOT;
+ int dots_done = 1;
+
+ cmph = sqdb_section_get(db, SQDB_SECTION_INDEX_MPH, NULL);
+ indx = sqdb_section_get(db, SQDB_SECTION_INDEX, NULL);
+
+ keybuf = BLOB_BUF(buffer);
+ blob_push_lower(&keybuf, url->significant_host);
+ key = keylimits = blob_pushed(BLOB_BUF(buffer), keybuf);
+
+ /* search for most qualified domain match; do first lookup
+ * with two domain components */
+ if (url->is_ipv4) {
+ i = cmph_search_packed(cmph, key.ptr, key.len);
+
+ if (indx[i].parent != SQDB_PARENT_IPV4 ||
+ indx[i].component != blob_inet_addr(url->host)) {
+ i = previ;
+ goto parent_dns_match;
+ }
+ } else {
+ key = BLOB_PTR_LEN(key.ptr + key.len, 0);
+ tld = blob_expand_head(&key, keylimits, '.');
+
+ do {
+ /* add one more domain component */
+ got = blob_expand_head(&key, keylimits, '.');
+ if (blob_is_null(got))
+ break;
+
+ previ = i;
+ i = cmph_search_packed(cmph, key.ptr, key.len);
+ if (!blob_is_null(tld)) {
+ int p = indx[i].parent;
+
+ if (p == SQDB_PARENT_ROOT ||
+ p == SQDB_PARENT_IPV4 ||
+ indx[p].parent != SQDB_PARENT_ROOT ||
+ blob_cmp(tld, sqdb_get_string_literal(db, indx[p].component)) != 0) {
+ /* top level domain did not match */
+ i = -1;
+ goto parent_dns_match;
+ }
+ tld = BLOB_NULL;
+ previ = p;
+ }
+ if (indx[i].parent != previ ||
+ blob_cmp(got, sqdb_get_string_literal(db, indx[i].component)) != 0) {
+ /* the subdomain did no longer match, use
+ * parents classification */
+ i = previ;
+ goto parent_dns_match;
+ }
+ dots_done++;
+ } while (indx[i].has_subdomains);
+ }
+
+ /* No paths to match for */
+ if (i == SQDB_PARENT_ROOT || !indx[i].has_paths || key.ptr != keylimits.ptr)
+ goto parent_dns_match;
+
+ /* and then search for path matches -- construct hashing
+ * string of url decoded path */
+ blob_push_urldecode(&keybuf, url->path);
+ key = keylimits = blob_pushed(BLOB_BUF(buffer), keybuf);
+
+ while (indx[i].has_paths) {
+ /* add one more path component */
+ got = blob_expand_tail(&key, keylimits, '/');
+ if (blob_is_null(got))
+ break;
+ previ = i;
+ i = cmph_search_packed(cmph, key.ptr, key.len);
+ tld = sqdb_get_string_literal(db, indx[i].component);
+ if (blob_cmp(got, sqdb_get_string_literal(db, indx[i].component)) != 0) {
+ /* the subdomain did no longer match, use
+ * parents classification */
+ i = previ;
+ goto parent_dns_match;
+ }
+ }
+
+parent_dns_match:
+ if (i == SQDB_PARENT_ROOT)
+ return 0; /* no category */
+
+ return indx[i].category;
+}
+
+static blob_t get_category_name(struct sqdb *db, int id)
+{
+ uint32_t *c, clen;
+
+ c = sqdb_section_get(db, SQDB_SECTION_CATEGORIES, &clen);
+ if (c == NULL || id < 0 || id * sizeof(uint32_t) >= clen)
+ return BLOB_NULL;
+
+ return sqdb_get_string_literal(db, c[id]);
+}
+
+static void send_ok(blob_t tag)
+{
+ static char buffer[64];
+ blob_t b = BLOB_BUF(buffer);
+
+ blob_push(&b, tag);
+ blob_push(&b, lf);
+ b = blob_pushed(BLOB_BUF(buffer), b);
+
+ write(STDOUT_FILENO, b.ptr, b.len);
+}
+
+static void send_redirect(blob_t redirect_page, blob_t tag, blob_t url, blob_t categ, blob_t username)
+{
+ static char buffer[8*1024];
+ blob_t b = BLOB_BUF(buffer);
+
+ blob_push(&b, tag);
+ blob_push(&b, BLOB_STR(" 302:"));
+ blob_push(&b, adbc.redirect_url_base);
+ blob_push(&b, redirect_page);
+ blob_push(&b, BLOB_STR("?REASON="));
+ blob_push_urlencode(&b, categ);
+ blob_push(&b, BLOB_STR("&USER="));
+ blob_push_urlencode(&b, username);
+ blob_push(&b, BLOB_STR("&DENIEDURL="));
+ blob_push_urlencode(&b, url);
+ blob_push(&b, lf);
+ b = blob_pushed(BLOB_BUF(buffer), b);
+
+ write(STDOUT_FILENO, b.ptr, b.len);
+}
+
+static void read_input(struct sqdb *db)
+{
+ static char buffer[8 * 1024];
+ static blob_t left;
+
+ blob_t b, line, id, ipaddr, url, username;
+ struct url_info nfo;
+ int r, category, auth_ok;
+ sockaddr_any addr;
+ struct authdb_entry entry;
+ void *token;
+
+ if (blob_is_null(left))
+ left = BLOB_BUF(buffer);
+
+ r = read(STDIN_FILENO, left.ptr, left.len);
+ if (r < 0)
+ return;
+ if (r == 0) {
+ running = 0;
+ return;
+ }
+ left.ptr += r;
+ left.len -= r;
+
+ now = time(NULL);
+
+ b = blob_pushed(BLOB_BUF(buffer), left);
+ do {
+ line = blob_pull_cspn(&b, lf);
+ if (!blob_pull_matching(&b, lf))
+ return;
+
+ id = blob_pull_cspn(&line, space);
+ blob_pull_spn(&line, space);
+ url = blob_pull_cspn(&line, space);
+ blob_pull_spn(&line, space);
+ ipaddr = blob_pull_cspn(&line, slash); /* client addr */
+ blob_pull_cspn(&line, space); /* fqdn */
+ blob_pull_spn(&line, space);
+ username = blob_pull_cspn(&line, space);
+ /* http method */
+ /* urlgroup */
+ /* myaddr=xxx myport=xxx etc */
+
+ if (!blob_is_null(url) &&
+ addr_parse(ipaddr, &addr)) {
+ /* valid request, handle it */
+ if (url_parse(url, &nfo))
+ category = url_classify(&nfo, db);
+ else
+ category = 0;
+
+ token = authdb_get(&adb, &addr, &entry, 1);
+ if (authdb_check_login(token, &entry, username, now)) {
+ auth_ok = 1;
+ username = BLOB_STRLEN(entry.p.login_name);
+ } else {
+ auth_ok = 0;
+ }
+
+ if (!auth_ok) {
+ send_redirect(BLOB_STR("login.cgi"), id, url, BLOB_STR("auth"), username);
+ } else if (((1ULL << category) & entry.p.block_categories) &&
+ (now < entry.override_time ||
+ now > entry.override_time + FILTER_OVERRIDE_TIMEOUT ||
+ ((1ULL << category) & entry.p.hard_block_categories))) {
+ send_redirect(BLOB_STR("warning.cgi"), id, url, get_category_name(db, category), username);
+ } else
+ send_ok(id);
+ }
+
+ if (b.len) {
+ memcpy(buffer, b.ptr, b.len);
+ b.ptr = buffer;
+ }
+ left = BLOB_PTR_LEN(buffer + b.len, sizeof(buffer) - b.len);
+ } while (b.len);
+}
+
+int main(int argc, char **argv)
+{
+ sqdb_open(&db, "/var/lib/squark/squark.db");
+ authdb_open(&adb, &adbc, &db);
+
+ while (running)
+ read_input(&db);
+
+ sqdb_close(&db);
+ authdb_close(&adb);
+}