diff options
author | Timo Teräs <timo.teras@iki.fi> | 2010-11-07 00:47:39 +0200 |
---|---|---|
committer | Timo Teräs <timo.teras@iki.fi> | 2010-11-07 00:47:39 +0200 |
commit | 25593b5e6fea76ed7c08db586924032c0810c27e (patch) | |
tree | b632534eb96978ad620fee1e5a9a5d280e0b191e /squark-filter.c | |
parent | e0450bd60a30ca944c16f84ee195463fd4aab653 (diff) | |
download | squark-25593b5e6fea76ed7c08db586924032c0810c27e.tar.bz2 squark-25593b5e6fea76ed7c08db586924032c0810c27e.tar.xz |
squark: reorganize sources to src directory
Diffstat (limited to 'squark-filter.c')
-rw-r--r-- | squark-filter.c | 431 |
1 files changed, 0 insertions, 431 deletions
diff --git a/squark-filter.c b/squark-filter.c deleted file mode 100644 index 995da40..0000000 --- a/squark-filter.c +++ /dev/null @@ -1,431 +0,0 @@ -/* squark-filter.c - Squid User Authentication and Rating Kit - * An external redirector for Squid which analyzes the URL according - * to a database and can redirect to a block page. - * - * Copyright (C) 2010 Timo Teräs <timo.teras@iki.fi> - * All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published - * by the Free Software Foundation. See http://www.gnu.org/ for details. - */ - -#include <time.h> -#include <stdio.h> -#include <ctype.h> -#include <string.h> -#include <unistd.h> - -#include <cmph.h> - -#include "blob.h" -#include "addr.h" -#include "filterdb.h" -#include "authdb.h" - -#define FILTER_OVERRIDE_TIMEOUT (15*60) - -static struct sqdb db; -static struct authdb adb; -static struct authdb_config adbc; - -static int running = 1; -static const blob_t dash = BLOB_STR_INIT("-"); -static const blob_t space = BLOB_STR_INIT(" "); -static const blob_t slash = BLOB_STR_INIT("/"); -static const blob_t lf = BLOB_STR_INIT("\n"); -static struct authdb adb; -static time_t now; - -struct url_info { - blob_t protocol; - blob_t username; - blob_t password; - blob_t host; - blob_t significant_host; - blob_t path; - blob_t query; - blob_t fragment; - int port; - int is_ipv4; - int num_dots; -}; - -struct url_dns_part_data { - blob_t word; - int num_dots; - int numeric; -}; - -void blob_pull_url_dns_part(blob_t *b, struct url_dns_part_data *udp) -{ - blob_t t = *b; - int c, i, dots = 0, numeric = 1; - - for (i = 0; i < t.len; i++) { - c = (unsigned char) t.ptr[i]; - switch (c) { - case '.': - dots++; - break; - case ':': case '@': case '/': case '?': - *b = BLOB_PTR_LEN(t.ptr + i, t.len - i); - udp->word = BLOB_PTR_LEN(t.ptr, i); - udp->num_dots = dots; - udp->numeric = numeric; - return; - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - break; - default: - numeric = 0; - break; - } - } - - *b = BLOB_NULL; - udp->word = t; - udp->num_dots = dots; - udp->numeric = numeric; -} - -/* URI is generalized as: - * [proto://][user[:password]@]domain.name[:port][/[path/to][?p=a&q=b;r=c][#fragment]] - * Character literals used as separators are: - * : / @ ? & ; # - * Also URI escaping says to treat %XX as encoded hex value. - */ - -static int url_parse(blob_t uri, struct url_info *nfo) -{ - struct url_dns_part_data prev, cur; - - memset(&prev, 0, sizeof(prev)); - memset(nfo, 0, sizeof(*nfo)); - - /* parse protocol, username/password and domain name/port */ - do { - blob_pull_url_dns_part(&uri, &cur); - - switch (uri.len ? uri.ptr[0] : '/') { - case ':': - blob_pull_skip(&uri, 1); - if (blob_is_null(nfo->protocol) && - blob_pull_matching(&uri, BLOB_STR("//"))) - nfo->protocol = cur.word; - else - prev = cur; - break; - case '@': - blob_pull_skip(&uri, 1); - if (!blob_is_null(nfo->username) || - !blob_is_null(nfo->password)) - goto error; - if (!blob_is_null(prev.word)) { - nfo->username = prev.word; - nfo->password = cur.word; - } else - nfo->username = cur.word; - memset(&prev, 0, sizeof(prev)); - break; - case '/': - case '?': - if (!blob_is_null(prev.word)) { - nfo->host = prev.word; - nfo->num_dots = prev.num_dots; - nfo->is_ipv4 = prev.numeric && prev.num_dots == 3; - nfo->port = blob_pull_uint(&cur.word, 10); - } else { - nfo->host = cur.word; - nfo->num_dots = cur.num_dots; - nfo->is_ipv4 = cur.numeric && cur.num_dots == 3; - } - if (blob_is_null(nfo->host)) - nfo->host = BLOB_STR("localhost"); - break; - } - } while (blob_is_null(nfo->host) && !blob_is_null(uri)); - - /* rest of the components */ - nfo->path = blob_pull_cspn(&uri, BLOB_STR("?&;#")); - nfo->query = blob_pull_cspn(&uri, BLOB_STR("#")); - nfo->fragment = uri; - - /* fill in defaults if needed */ - if (blob_is_null(nfo->protocol)) { - if (nfo->port == 443) - nfo->protocol = BLOB_STR("https"); - else - nfo->protocol = BLOB_STR("http"); - if (nfo->port == 0) - nfo->port = 80; - } else if (nfo->port == 0) { - if (blob_cmp(nfo->protocol, BLOB_STR("https")) == 0) - nfo->port = 443; - else - nfo->port = 80; - } - if (blob_is_null(nfo->path)) - nfo->path = BLOB_STR("/"); - - /* significant host name */ - nfo->significant_host = nfo->host; - if (nfo->num_dots > 1) { - blob_t b = nfo->significant_host; - if (blob_pull_matching(&b, BLOB_STR("www")) && - (blob_pull_uint(&b, 10), 1) && - blob_pull_matching(&b, BLOB_STR("."))) - nfo->significant_host = b; - } - return 1; - -error: - return 0; -} - -static void url_print(struct url_info *nfo) -{ -#define print_field(nfo, x) if (!blob_is_null(nfo->x)) printf(" %s{%.*s}", #x, nfo->x.len, nfo->x.ptr) - print_field(nfo, protocol); - print_field(nfo, username); - print_field(nfo, password); - print_field(nfo, host); - printf(" port{%d}", nfo->port); - print_field(nfo, path); - print_field(nfo, query); - print_field(nfo, fragment); -#undef print_field - printf("\n"); - fflush(stdout); -} - -static int url_classify(struct url_info *url, struct sqdb *db) -{ - unsigned char buffer[512]; - blob_t key, got, tld, keybuf, keylimits; - void *cmph; - struct sqdb_index_entry *indx; - cmph_uint32 i = SQDB_PARENT_ROOT, previ = SQDB_PARENT_ROOT; - int dots_done = 1; - - cmph = sqdb_section_get(db, SQDB_SECTION_INDEX_MPH, NULL); - indx = sqdb_section_get(db, SQDB_SECTION_INDEX, NULL); - - keybuf = BLOB_BUF(buffer); - blob_push_lower(&keybuf, url->significant_host); - key = keylimits = blob_pushed(BLOB_BUF(buffer), keybuf); - - /* search for most qualified domain match; do first lookup - * with two domain components */ - if (url->is_ipv4) { - i = cmph_search_packed(cmph, key.ptr, key.len); - - if (indx[i].parent != SQDB_PARENT_IPV4 || - indx[i].component != blob_inet_addr(url->host)) { - i = previ; - goto parent_dns_match; - } - } else { - key = BLOB_PTR_LEN(key.ptr + key.len, 0); - tld = blob_expand_head(&key, keylimits, '.'); - - do { - /* add one more domain component */ - got = blob_expand_head(&key, keylimits, '.'); - if (blob_is_null(got)) - break; - - previ = i; - i = cmph_search_packed(cmph, key.ptr, key.len); - if (!blob_is_null(tld)) { - int p = indx[i].parent; - - if (p == SQDB_PARENT_ROOT || - p == SQDB_PARENT_IPV4 || - indx[p].parent != SQDB_PARENT_ROOT || - blob_cmp(tld, sqdb_get_string_literal(db, indx[p].component)) != 0) { - /* top level domain did not match */ - i = -1; - goto parent_dns_match; - } - tld = BLOB_NULL; - previ = p; - } - if (indx[i].parent != previ || - blob_cmp(got, sqdb_get_string_literal(db, indx[i].component)) != 0) { - /* the subdomain did no longer match, use - * parents classification */ - i = previ; - goto parent_dns_match; - } - dots_done++; - } while (indx[i].has_subdomains); - } - - /* No paths to match for */ - if (i == SQDB_PARENT_ROOT || !indx[i].has_paths || key.ptr != keylimits.ptr) - goto parent_dns_match; - - /* and then search for path matches -- construct hashing - * string of url decoded path */ - blob_push_urldecode(&keybuf, url->path); - key = keylimits = blob_pushed(BLOB_BUF(buffer), keybuf); - - while (indx[i].has_paths) { - /* add one more path component */ - got = blob_expand_tail(&key, keylimits, '/'); - if (blob_is_null(got)) - break; - previ = i; - i = cmph_search_packed(cmph, key.ptr, key.len); - tld = sqdb_get_string_literal(db, indx[i].component); - if (blob_cmp(got, sqdb_get_string_literal(db, indx[i].component)) != 0) { - /* the subdomain did no longer match, use - * parents classification */ - i = previ; - goto parent_dns_match; - } - } - -parent_dns_match: - if (i == SQDB_PARENT_ROOT) - return 0; /* no category */ - - return indx[i].category; -} - -static blob_t get_category_name(struct sqdb *db, int id) -{ - uint32_t *c, clen; - - c = sqdb_section_get(db, SQDB_SECTION_CATEGORIES, &clen); - if (c == NULL || id < 0 || id * sizeof(uint32_t) >= clen) - return BLOB_NULL; - - return sqdb_get_string_literal(db, c[id]); -} - -static void send_ok(blob_t tag) -{ - static char buffer[64]; - blob_t b = BLOB_BUF(buffer); - - blob_push(&b, tag); - blob_push(&b, lf); - b = blob_pushed(BLOB_BUF(buffer), b); - - write(STDOUT_FILENO, b.ptr, b.len); -} - -static void send_redirect(blob_t redirect_page, blob_t tag, blob_t url, blob_t categ, blob_t username) -{ - static char buffer[8*1024]; - blob_t b = BLOB_BUF(buffer); - - blob_push(&b, tag); - blob_push(&b, BLOB_STR(" 302:")); - blob_push(&b, adbc.redirect_url_base); - blob_push(&b, redirect_page); - blob_push(&b, BLOB_STR("?REASON=")); - blob_push_urlencode(&b, categ); - blob_push(&b, BLOB_STR("&USER=")); - blob_push_urlencode(&b, username); - blob_push(&b, BLOB_STR("&DENIEDURL=")); - blob_push_urlencode(&b, url); - blob_push(&b, lf); - b = blob_pushed(BLOB_BUF(buffer), b); - - write(STDOUT_FILENO, b.ptr, b.len); -} - -static void read_input(struct sqdb *db) -{ - static char buffer[8 * 1024]; - static blob_t left; - - blob_t b, line, id, ipaddr, url, username; - struct url_info nfo; - int r, category, auth_ok; - sockaddr_any addr; - struct authdb_entry entry; - void *token; - - if (blob_is_null(left)) - left = BLOB_BUF(buffer); - - r = read(STDIN_FILENO, left.ptr, left.len); - if (r < 0) - return; - if (r == 0) { - running = 0; - return; - } - left.ptr += r; - left.len -= r; - - now = time(NULL); - - b = blob_pushed(BLOB_BUF(buffer), left); - do { - line = blob_pull_cspn(&b, lf); - if (!blob_pull_matching(&b, lf)) - return; - - id = blob_pull_cspn(&line, space); - blob_pull_spn(&line, space); - url = blob_pull_cspn(&line, space); - blob_pull_spn(&line, space); - ipaddr = blob_pull_cspn(&line, slash); /* client addr */ - blob_pull_cspn(&line, space); /* fqdn */ - blob_pull_spn(&line, space); - username = blob_pull_cspn(&line, space); - /* http method */ - /* urlgroup */ - /* myaddr=xxx myport=xxx etc */ - - if (!blob_is_null(url) && - addr_parse(ipaddr, &addr)) { - /* valid request, handle it */ - if (url_parse(url, &nfo)) - category = url_classify(&nfo, db); - else - category = 0; - - token = authdb_get(&adb, &addr, &entry, 1); - if (authdb_check_login(token, &entry, username, now)) { - auth_ok = 1; - username = BLOB_STRLEN(entry.p.login_name); - } else { - auth_ok = 0; - } - - if (!auth_ok) { - send_redirect(BLOB_STR("login.cgi"), id, url, BLOB_STR("auth"), username); - } else if (((1ULL << category) & entry.p.block_categories) && - (now < entry.override_time || - now > entry.override_time + FILTER_OVERRIDE_TIMEOUT || - ((1ULL << category) & entry.p.hard_block_categories))) { - send_redirect(BLOB_STR("warning.cgi"), id, url, get_category_name(db, category), username); - } else - send_ok(id); - } - - if (b.len) { - memcpy(buffer, b.ptr, b.len); - b.ptr = buffer; - } - left = BLOB_PTR_LEN(buffer + b.len, sizeof(buffer) - b.len); - } while (b.len); -} - -int main(int argc, char **argv) -{ - sqdb_open(&db, "/var/lib/squark/squark.db"); - authdb_open(&adb, &adbc, &db); - - while (running) - read_input(&db); - - sqdb_close(&db); - authdb_close(&adb); -} |