summaryrefslogtreecommitdiffstats
path: root/squark-filter.c
diff options
context:
space:
mode:
authorTimo Teräs <timo.teras@iki.fi>2010-11-07 00:47:39 +0200
committerTimo Teräs <timo.teras@iki.fi>2010-11-07 00:47:39 +0200
commit25593b5e6fea76ed7c08db586924032c0810c27e (patch)
treeb632534eb96978ad620fee1e5a9a5d280e0b191e /squark-filter.c
parente0450bd60a30ca944c16f84ee195463fd4aab653 (diff)
downloadsquark-25593b5e6fea76ed7c08db586924032c0810c27e.tar.bz2
squark-25593b5e6fea76ed7c08db586924032c0810c27e.tar.xz
squark: reorganize sources to src directory
Diffstat (limited to 'squark-filter.c')
-rw-r--r--squark-filter.c431
1 files changed, 0 insertions, 431 deletions
diff --git a/squark-filter.c b/squark-filter.c
deleted file mode 100644
index 995da40..0000000
--- a/squark-filter.c
+++ /dev/null
@@ -1,431 +0,0 @@
-/* squark-filter.c - Squid User Authentication and Rating Kit
- * An external redirector for Squid which analyzes the URL according
- * to a database and can redirect to a block page.
- *
- * Copyright (C) 2010 Timo Teräs <timo.teras@iki.fi>
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation. See http://www.gnu.org/ for details.
- */
-
-#include <time.h>
-#include <stdio.h>
-#include <ctype.h>
-#include <string.h>
-#include <unistd.h>
-
-#include <cmph.h>
-
-#include "blob.h"
-#include "addr.h"
-#include "filterdb.h"
-#include "authdb.h"
-
-#define FILTER_OVERRIDE_TIMEOUT (15*60)
-
-static struct sqdb db;
-static struct authdb adb;
-static struct authdb_config adbc;
-
-static int running = 1;
-static const blob_t dash = BLOB_STR_INIT("-");
-static const blob_t space = BLOB_STR_INIT(" ");
-static const blob_t slash = BLOB_STR_INIT("/");
-static const blob_t lf = BLOB_STR_INIT("\n");
-static struct authdb adb;
-static time_t now;
-
-struct url_info {
- blob_t protocol;
- blob_t username;
- blob_t password;
- blob_t host;
- blob_t significant_host;
- blob_t path;
- blob_t query;
- blob_t fragment;
- int port;
- int is_ipv4;
- int num_dots;
-};
-
-struct url_dns_part_data {
- blob_t word;
- int num_dots;
- int numeric;
-};
-
-void blob_pull_url_dns_part(blob_t *b, struct url_dns_part_data *udp)
-{
- blob_t t = *b;
- int c, i, dots = 0, numeric = 1;
-
- for (i = 0; i < t.len; i++) {
- c = (unsigned char) t.ptr[i];
- switch (c) {
- case '.':
- dots++;
- break;
- case ':': case '@': case '/': case '?':
- *b = BLOB_PTR_LEN(t.ptr + i, t.len - i);
- udp->word = BLOB_PTR_LEN(t.ptr, i);
- udp->num_dots = dots;
- udp->numeric = numeric;
- return;
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- break;
- default:
- numeric = 0;
- break;
- }
- }
-
- *b = BLOB_NULL;
- udp->word = t;
- udp->num_dots = dots;
- udp->numeric = numeric;
-}
-
-/* URI is generalized as:
- * [proto://][user[:password]@]domain.name[:port][/[path/to][?p=a&q=b;r=c][#fragment]]
- * Character literals used as separators are:
- * : / @ ? & ; #
- * Also URI escaping says to treat %XX as encoded hex value.
- */
-
-static int url_parse(blob_t uri, struct url_info *nfo)
-{
- struct url_dns_part_data prev, cur;
-
- memset(&prev, 0, sizeof(prev));
- memset(nfo, 0, sizeof(*nfo));
-
- /* parse protocol, username/password and domain name/port */
- do {
- blob_pull_url_dns_part(&uri, &cur);
-
- switch (uri.len ? uri.ptr[0] : '/') {
- case ':':
- blob_pull_skip(&uri, 1);
- if (blob_is_null(nfo->protocol) &&
- blob_pull_matching(&uri, BLOB_STR("//")))
- nfo->protocol = cur.word;
- else
- prev = cur;
- break;
- case '@':
- blob_pull_skip(&uri, 1);
- if (!blob_is_null(nfo->username) ||
- !blob_is_null(nfo->password))
- goto error;
- if (!blob_is_null(prev.word)) {
- nfo->username = prev.word;
- nfo->password = cur.word;
- } else
- nfo->username = cur.word;
- memset(&prev, 0, sizeof(prev));
- break;
- case '/':
- case '?':
- if (!blob_is_null(prev.word)) {
- nfo->host = prev.word;
- nfo->num_dots = prev.num_dots;
- nfo->is_ipv4 = prev.numeric && prev.num_dots == 3;
- nfo->port = blob_pull_uint(&cur.word, 10);
- } else {
- nfo->host = cur.word;
- nfo->num_dots = cur.num_dots;
- nfo->is_ipv4 = cur.numeric && cur.num_dots == 3;
- }
- if (blob_is_null(nfo->host))
- nfo->host = BLOB_STR("localhost");
- break;
- }
- } while (blob_is_null(nfo->host) && !blob_is_null(uri));
-
- /* rest of the components */
- nfo->path = blob_pull_cspn(&uri, BLOB_STR("?&;#"));
- nfo->query = blob_pull_cspn(&uri, BLOB_STR("#"));
- nfo->fragment = uri;
-
- /* fill in defaults if needed */
- if (blob_is_null(nfo->protocol)) {
- if (nfo->port == 443)
- nfo->protocol = BLOB_STR("https");
- else
- nfo->protocol = BLOB_STR("http");
- if (nfo->port == 0)
- nfo->port = 80;
- } else if (nfo->port == 0) {
- if (blob_cmp(nfo->protocol, BLOB_STR("https")) == 0)
- nfo->port = 443;
- else
- nfo->port = 80;
- }
- if (blob_is_null(nfo->path))
- nfo->path = BLOB_STR("/");
-
- /* significant host name */
- nfo->significant_host = nfo->host;
- if (nfo->num_dots > 1) {
- blob_t b = nfo->significant_host;
- if (blob_pull_matching(&b, BLOB_STR("www")) &&
- (blob_pull_uint(&b, 10), 1) &&
- blob_pull_matching(&b, BLOB_STR(".")))
- nfo->significant_host = b;
- }
- return 1;
-
-error:
- return 0;
-}
-
-static void url_print(struct url_info *nfo)
-{
-#define print_field(nfo, x) if (!blob_is_null(nfo->x)) printf(" %s{%.*s}", #x, nfo->x.len, nfo->x.ptr)
- print_field(nfo, protocol);
- print_field(nfo, username);
- print_field(nfo, password);
- print_field(nfo, host);
- printf(" port{%d}", nfo->port);
- print_field(nfo, path);
- print_field(nfo, query);
- print_field(nfo, fragment);
-#undef print_field
- printf("\n");
- fflush(stdout);
-}
-
-static int url_classify(struct url_info *url, struct sqdb *db)
-{
- unsigned char buffer[512];
- blob_t key, got, tld, keybuf, keylimits;
- void *cmph;
- struct sqdb_index_entry *indx;
- cmph_uint32 i = SQDB_PARENT_ROOT, previ = SQDB_PARENT_ROOT;
- int dots_done = 1;
-
- cmph = sqdb_section_get(db, SQDB_SECTION_INDEX_MPH, NULL);
- indx = sqdb_section_get(db, SQDB_SECTION_INDEX, NULL);
-
- keybuf = BLOB_BUF(buffer);
- blob_push_lower(&keybuf, url->significant_host);
- key = keylimits = blob_pushed(BLOB_BUF(buffer), keybuf);
-
- /* search for most qualified domain match; do first lookup
- * with two domain components */
- if (url->is_ipv4) {
- i = cmph_search_packed(cmph, key.ptr, key.len);
-
- if (indx[i].parent != SQDB_PARENT_IPV4 ||
- indx[i].component != blob_inet_addr(url->host)) {
- i = previ;
- goto parent_dns_match;
- }
- } else {
- key = BLOB_PTR_LEN(key.ptr + key.len, 0);
- tld = blob_expand_head(&key, keylimits, '.');
-
- do {
- /* add one more domain component */
- got = blob_expand_head(&key, keylimits, '.');
- if (blob_is_null(got))
- break;
-
- previ = i;
- i = cmph_search_packed(cmph, key.ptr, key.len);
- if (!blob_is_null(tld)) {
- int p = indx[i].parent;
-
- if (p == SQDB_PARENT_ROOT ||
- p == SQDB_PARENT_IPV4 ||
- indx[p].parent != SQDB_PARENT_ROOT ||
- blob_cmp(tld, sqdb_get_string_literal(db, indx[p].component)) != 0) {
- /* top level domain did not match */
- i = -1;
- goto parent_dns_match;
- }
- tld = BLOB_NULL;
- previ = p;
- }
- if (indx[i].parent != previ ||
- blob_cmp(got, sqdb_get_string_literal(db, indx[i].component)) != 0) {
- /* the subdomain did no longer match, use
- * parents classification */
- i = previ;
- goto parent_dns_match;
- }
- dots_done++;
- } while (indx[i].has_subdomains);
- }
-
- /* No paths to match for */
- if (i == SQDB_PARENT_ROOT || !indx[i].has_paths || key.ptr != keylimits.ptr)
- goto parent_dns_match;
-
- /* and then search for path matches -- construct hashing
- * string of url decoded path */
- blob_push_urldecode(&keybuf, url->path);
- key = keylimits = blob_pushed(BLOB_BUF(buffer), keybuf);
-
- while (indx[i].has_paths) {
- /* add one more path component */
- got = blob_expand_tail(&key, keylimits, '/');
- if (blob_is_null(got))
- break;
- previ = i;
- i = cmph_search_packed(cmph, key.ptr, key.len);
- tld = sqdb_get_string_literal(db, indx[i].component);
- if (blob_cmp(got, sqdb_get_string_literal(db, indx[i].component)) != 0) {
- /* the subdomain did no longer match, use
- * parents classification */
- i = previ;
- goto parent_dns_match;
- }
- }
-
-parent_dns_match:
- if (i == SQDB_PARENT_ROOT)
- return 0; /* no category */
-
- return indx[i].category;
-}
-
-static blob_t get_category_name(struct sqdb *db, int id)
-{
- uint32_t *c, clen;
-
- c = sqdb_section_get(db, SQDB_SECTION_CATEGORIES, &clen);
- if (c == NULL || id < 0 || id * sizeof(uint32_t) >= clen)
- return BLOB_NULL;
-
- return sqdb_get_string_literal(db, c[id]);
-}
-
-static void send_ok(blob_t tag)
-{
- static char buffer[64];
- blob_t b = BLOB_BUF(buffer);
-
- blob_push(&b, tag);
- blob_push(&b, lf);
- b = blob_pushed(BLOB_BUF(buffer), b);
-
- write(STDOUT_FILENO, b.ptr, b.len);
-}
-
-static void send_redirect(blob_t redirect_page, blob_t tag, blob_t url, blob_t categ, blob_t username)
-{
- static char buffer[8*1024];
- blob_t b = BLOB_BUF(buffer);
-
- blob_push(&b, tag);
- blob_push(&b, BLOB_STR(" 302:"));
- blob_push(&b, adbc.redirect_url_base);
- blob_push(&b, redirect_page);
- blob_push(&b, BLOB_STR("?REASON="));
- blob_push_urlencode(&b, categ);
- blob_push(&b, BLOB_STR("&USER="));
- blob_push_urlencode(&b, username);
- blob_push(&b, BLOB_STR("&DENIEDURL="));
- blob_push_urlencode(&b, url);
- blob_push(&b, lf);
- b = blob_pushed(BLOB_BUF(buffer), b);
-
- write(STDOUT_FILENO, b.ptr, b.len);
-}
-
-static void read_input(struct sqdb *db)
-{
- static char buffer[8 * 1024];
- static blob_t left;
-
- blob_t b, line, id, ipaddr, url, username;
- struct url_info nfo;
- int r, category, auth_ok;
- sockaddr_any addr;
- struct authdb_entry entry;
- void *token;
-
- if (blob_is_null(left))
- left = BLOB_BUF(buffer);
-
- r = read(STDIN_FILENO, left.ptr, left.len);
- if (r < 0)
- return;
- if (r == 0) {
- running = 0;
- return;
- }
- left.ptr += r;
- left.len -= r;
-
- now = time(NULL);
-
- b = blob_pushed(BLOB_BUF(buffer), left);
- do {
- line = blob_pull_cspn(&b, lf);
- if (!blob_pull_matching(&b, lf))
- return;
-
- id = blob_pull_cspn(&line, space);
- blob_pull_spn(&line, space);
- url = blob_pull_cspn(&line, space);
- blob_pull_spn(&line, space);
- ipaddr = blob_pull_cspn(&line, slash); /* client addr */
- blob_pull_cspn(&line, space); /* fqdn */
- blob_pull_spn(&line, space);
- username = blob_pull_cspn(&line, space);
- /* http method */
- /* urlgroup */
- /* myaddr=xxx myport=xxx etc */
-
- if (!blob_is_null(url) &&
- addr_parse(ipaddr, &addr)) {
- /* valid request, handle it */
- if (url_parse(url, &nfo))
- category = url_classify(&nfo, db);
- else
- category = 0;
-
- token = authdb_get(&adb, &addr, &entry, 1);
- if (authdb_check_login(token, &entry, username, now)) {
- auth_ok = 1;
- username = BLOB_STRLEN(entry.p.login_name);
- } else {
- auth_ok = 0;
- }
-
- if (!auth_ok) {
- send_redirect(BLOB_STR("login.cgi"), id, url, BLOB_STR("auth"), username);
- } else if (((1ULL << category) & entry.p.block_categories) &&
- (now < entry.override_time ||
- now > entry.override_time + FILTER_OVERRIDE_TIMEOUT ||
- ((1ULL << category) & entry.p.hard_block_categories))) {
- send_redirect(BLOB_STR("warning.cgi"), id, url, get_category_name(db, category), username);
- } else
- send_ok(id);
- }
-
- if (b.len) {
- memcpy(buffer, b.ptr, b.len);
- b.ptr = buffer;
- }
- left = BLOB_PTR_LEN(buffer + b.len, sizeof(buffer) - b.len);
- } while (b.len);
-}
-
-int main(int argc, char **argv)
-{
- sqdb_open(&db, "/var/lib/squark/squark.db");
- authdb_open(&adb, &adbc, &db);
-
- while (running)
- read_input(&db);
-
- sqdb_close(&db);
- authdb_close(&adb);
-}