From 25593b5e6fea76ed7c08db586924032c0810c27e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timo=20Ter=C3=A4s?= Date: Sun, 7 Nov 2010 00:47:39 +0200 Subject: squark: reorganize sources to src directory --- src/squark-filter.c | 431 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 431 insertions(+) create mode 100644 src/squark-filter.c (limited to 'src/squark-filter.c') diff --git a/src/squark-filter.c b/src/squark-filter.c new file mode 100644 index 0000000..995da40 --- /dev/null +++ b/src/squark-filter.c @@ -0,0 +1,431 @@ +/* squark-filter.c - Squid User Authentication and Rating Kit + * An external redirector for Squid which analyzes the URL according + * to a database and can redirect to a block page. + * + * Copyright (C) 2010 Timo Teräs + * All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. See http://www.gnu.org/ for details. + */ + +#include +#include +#include +#include +#include + +#include + +#include "blob.h" +#include "addr.h" +#include "filterdb.h" +#include "authdb.h" + +#define FILTER_OVERRIDE_TIMEOUT (15*60) + +static struct sqdb db; +static struct authdb adb; +static struct authdb_config adbc; + +static int running = 1; +static const blob_t dash = BLOB_STR_INIT("-"); +static const blob_t space = BLOB_STR_INIT(" "); +static const blob_t slash = BLOB_STR_INIT("/"); +static const blob_t lf = BLOB_STR_INIT("\n"); +static struct authdb adb; +static time_t now; + +struct url_info { + blob_t protocol; + blob_t username; + blob_t password; + blob_t host; + blob_t significant_host; + blob_t path; + blob_t query; + blob_t fragment; + int port; + int is_ipv4; + int num_dots; +}; + +struct url_dns_part_data { + blob_t word; + int num_dots; + int numeric; +}; + +void blob_pull_url_dns_part(blob_t *b, struct url_dns_part_data *udp) +{ + blob_t t = *b; + int c, i, dots = 0, numeric = 1; + + for (i = 0; i < t.len; i++) { + c = (unsigned char) t.ptr[i]; + switch (c) { + case '.': + dots++; + break; + case ':': case '@': case '/': case '?': + *b = BLOB_PTR_LEN(t.ptr + i, t.len - i); + udp->word = BLOB_PTR_LEN(t.ptr, i); + udp->num_dots = dots; + udp->numeric = numeric; + return; + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + break; + default: + numeric = 0; + break; + } + } + + *b = BLOB_NULL; + udp->word = t; + udp->num_dots = dots; + udp->numeric = numeric; +} + +/* URI is generalized as: + * [proto://][user[:password]@]domain.name[:port][/[path/to][?p=a&q=b;r=c][#fragment]] + * Character literals used as separators are: + * : / @ ? & ; # + * Also URI escaping says to treat %XX as encoded hex value. + */ + +static int url_parse(blob_t uri, struct url_info *nfo) +{ + struct url_dns_part_data prev, cur; + + memset(&prev, 0, sizeof(prev)); + memset(nfo, 0, sizeof(*nfo)); + + /* parse protocol, username/password and domain name/port */ + do { + blob_pull_url_dns_part(&uri, &cur); + + switch (uri.len ? uri.ptr[0] : '/') { + case ':': + blob_pull_skip(&uri, 1); + if (blob_is_null(nfo->protocol) && + blob_pull_matching(&uri, BLOB_STR("//"))) + nfo->protocol = cur.word; + else + prev = cur; + break; + case '@': + blob_pull_skip(&uri, 1); + if (!blob_is_null(nfo->username) || + !blob_is_null(nfo->password)) + goto error; + if (!blob_is_null(prev.word)) { + nfo->username = prev.word; + nfo->password = cur.word; + } else + nfo->username = cur.word; + memset(&prev, 0, sizeof(prev)); + break; + case '/': + case '?': + if (!blob_is_null(prev.word)) { + nfo->host = prev.word; + nfo->num_dots = prev.num_dots; + nfo->is_ipv4 = prev.numeric && prev.num_dots == 3; + nfo->port = blob_pull_uint(&cur.word, 10); + } else { + nfo->host = cur.word; + nfo->num_dots = cur.num_dots; + nfo->is_ipv4 = cur.numeric && cur.num_dots == 3; + } + if (blob_is_null(nfo->host)) + nfo->host = BLOB_STR("localhost"); + break; + } + } while (blob_is_null(nfo->host) && !blob_is_null(uri)); + + /* rest of the components */ + nfo->path = blob_pull_cspn(&uri, BLOB_STR("?&;#")); + nfo->query = blob_pull_cspn(&uri, BLOB_STR("#")); + nfo->fragment = uri; + + /* fill in defaults if needed */ + if (blob_is_null(nfo->protocol)) { + if (nfo->port == 443) + nfo->protocol = BLOB_STR("https"); + else + nfo->protocol = BLOB_STR("http"); + if (nfo->port == 0) + nfo->port = 80; + } else if (nfo->port == 0) { + if (blob_cmp(nfo->protocol, BLOB_STR("https")) == 0) + nfo->port = 443; + else + nfo->port = 80; + } + if (blob_is_null(nfo->path)) + nfo->path = BLOB_STR("/"); + + /* significant host name */ + nfo->significant_host = nfo->host; + if (nfo->num_dots > 1) { + blob_t b = nfo->significant_host; + if (blob_pull_matching(&b, BLOB_STR("www")) && + (blob_pull_uint(&b, 10), 1) && + blob_pull_matching(&b, BLOB_STR("."))) + nfo->significant_host = b; + } + return 1; + +error: + return 0; +} + +static void url_print(struct url_info *nfo) +{ +#define print_field(nfo, x) if (!blob_is_null(nfo->x)) printf(" %s{%.*s}", #x, nfo->x.len, nfo->x.ptr) + print_field(nfo, protocol); + print_field(nfo, username); + print_field(nfo, password); + print_field(nfo, host); + printf(" port{%d}", nfo->port); + print_field(nfo, path); + print_field(nfo, query); + print_field(nfo, fragment); +#undef print_field + printf("\n"); + fflush(stdout); +} + +static int url_classify(struct url_info *url, struct sqdb *db) +{ + unsigned char buffer[512]; + blob_t key, got, tld, keybuf, keylimits; + void *cmph; + struct sqdb_index_entry *indx; + cmph_uint32 i = SQDB_PARENT_ROOT, previ = SQDB_PARENT_ROOT; + int dots_done = 1; + + cmph = sqdb_section_get(db, SQDB_SECTION_INDEX_MPH, NULL); + indx = sqdb_section_get(db, SQDB_SECTION_INDEX, NULL); + + keybuf = BLOB_BUF(buffer); + blob_push_lower(&keybuf, url->significant_host); + key = keylimits = blob_pushed(BLOB_BUF(buffer), keybuf); + + /* search for most qualified domain match; do first lookup + * with two domain components */ + if (url->is_ipv4) { + i = cmph_search_packed(cmph, key.ptr, key.len); + + if (indx[i].parent != SQDB_PARENT_IPV4 || + indx[i].component != blob_inet_addr(url->host)) { + i = previ; + goto parent_dns_match; + } + } else { + key = BLOB_PTR_LEN(key.ptr + key.len, 0); + tld = blob_expand_head(&key, keylimits, '.'); + + do { + /* add one more domain component */ + got = blob_expand_head(&key, keylimits, '.'); + if (blob_is_null(got)) + break; + + previ = i; + i = cmph_search_packed(cmph, key.ptr, key.len); + if (!blob_is_null(tld)) { + int p = indx[i].parent; + + if (p == SQDB_PARENT_ROOT || + p == SQDB_PARENT_IPV4 || + indx[p].parent != SQDB_PARENT_ROOT || + blob_cmp(tld, sqdb_get_string_literal(db, indx[p].component)) != 0) { + /* top level domain did not match */ + i = -1; + goto parent_dns_match; + } + tld = BLOB_NULL; + previ = p; + } + if (indx[i].parent != previ || + blob_cmp(got, sqdb_get_string_literal(db, indx[i].component)) != 0) { + /* the subdomain did no longer match, use + * parents classification */ + i = previ; + goto parent_dns_match; + } + dots_done++; + } while (indx[i].has_subdomains); + } + + /* No paths to match for */ + if (i == SQDB_PARENT_ROOT || !indx[i].has_paths || key.ptr != keylimits.ptr) + goto parent_dns_match; + + /* and then search for path matches -- construct hashing + * string of url decoded path */ + blob_push_urldecode(&keybuf, url->path); + key = keylimits = blob_pushed(BLOB_BUF(buffer), keybuf); + + while (indx[i].has_paths) { + /* add one more path component */ + got = blob_expand_tail(&key, keylimits, '/'); + if (blob_is_null(got)) + break; + previ = i; + i = cmph_search_packed(cmph, key.ptr, key.len); + tld = sqdb_get_string_literal(db, indx[i].component); + if (blob_cmp(got, sqdb_get_string_literal(db, indx[i].component)) != 0) { + /* the subdomain did no longer match, use + * parents classification */ + i = previ; + goto parent_dns_match; + } + } + +parent_dns_match: + if (i == SQDB_PARENT_ROOT) + return 0; /* no category */ + + return indx[i].category; +} + +static blob_t get_category_name(struct sqdb *db, int id) +{ + uint32_t *c, clen; + + c = sqdb_section_get(db, SQDB_SECTION_CATEGORIES, &clen); + if (c == NULL || id < 0 || id * sizeof(uint32_t) >= clen) + return BLOB_NULL; + + return sqdb_get_string_literal(db, c[id]); +} + +static void send_ok(blob_t tag) +{ + static char buffer[64]; + blob_t b = BLOB_BUF(buffer); + + blob_push(&b, tag); + blob_push(&b, lf); + b = blob_pushed(BLOB_BUF(buffer), b); + + write(STDOUT_FILENO, b.ptr, b.len); +} + +static void send_redirect(blob_t redirect_page, blob_t tag, blob_t url, blob_t categ, blob_t username) +{ + static char buffer[8*1024]; + blob_t b = BLOB_BUF(buffer); + + blob_push(&b, tag); + blob_push(&b, BLOB_STR(" 302:")); + blob_push(&b, adbc.redirect_url_base); + blob_push(&b, redirect_page); + blob_push(&b, BLOB_STR("?REASON=")); + blob_push_urlencode(&b, categ); + blob_push(&b, BLOB_STR("&USER=")); + blob_push_urlencode(&b, username); + blob_push(&b, BLOB_STR("&DENIEDURL=")); + blob_push_urlencode(&b, url); + blob_push(&b, lf); + b = blob_pushed(BLOB_BUF(buffer), b); + + write(STDOUT_FILENO, b.ptr, b.len); +} + +static void read_input(struct sqdb *db) +{ + static char buffer[8 * 1024]; + static blob_t left; + + blob_t b, line, id, ipaddr, url, username; + struct url_info nfo; + int r, category, auth_ok; + sockaddr_any addr; + struct authdb_entry entry; + void *token; + + if (blob_is_null(left)) + left = BLOB_BUF(buffer); + + r = read(STDIN_FILENO, left.ptr, left.len); + if (r < 0) + return; + if (r == 0) { + running = 0; + return; + } + left.ptr += r; + left.len -= r; + + now = time(NULL); + + b = blob_pushed(BLOB_BUF(buffer), left); + do { + line = blob_pull_cspn(&b, lf); + if (!blob_pull_matching(&b, lf)) + return; + + id = blob_pull_cspn(&line, space); + blob_pull_spn(&line, space); + url = blob_pull_cspn(&line, space); + blob_pull_spn(&line, space); + ipaddr = blob_pull_cspn(&line, slash); /* client addr */ + blob_pull_cspn(&line, space); /* fqdn */ + blob_pull_spn(&line, space); + username = blob_pull_cspn(&line, space); + /* http method */ + /* urlgroup */ + /* myaddr=xxx myport=xxx etc */ + + if (!blob_is_null(url) && + addr_parse(ipaddr, &addr)) { + /* valid request, handle it */ + if (url_parse(url, &nfo)) + category = url_classify(&nfo, db); + else + category = 0; + + token = authdb_get(&adb, &addr, &entry, 1); + if (authdb_check_login(token, &entry, username, now)) { + auth_ok = 1; + username = BLOB_STRLEN(entry.p.login_name); + } else { + auth_ok = 0; + } + + if (!auth_ok) { + send_redirect(BLOB_STR("login.cgi"), id, url, BLOB_STR("auth"), username); + } else if (((1ULL << category) & entry.p.block_categories) && + (now < entry.override_time || + now > entry.override_time + FILTER_OVERRIDE_TIMEOUT || + ((1ULL << category) & entry.p.hard_block_categories))) { + send_redirect(BLOB_STR("warning.cgi"), id, url, get_category_name(db, category), username); + } else + send_ok(id); + } + + if (b.len) { + memcpy(buffer, b.ptr, b.len); + b.ptr = buffer; + } + left = BLOB_PTR_LEN(buffer + b.len, sizeof(buffer) - b.len); + } while (b.len); +} + +int main(int argc, char **argv) +{ + sqdb_open(&db, "/var/lib/squark/squark.db"); + authdb_open(&adb, &adbc, &db); + + while (running) + read_input(&db); + + sqdb_close(&db); + authdb_close(&adb); +} -- cgit v1.2.3