/* squark-filter.c - Squid User Authentication and Rating Kit * An external redirector for Squid which analyzes the URL according * to a database and can redirect to a block page. * * Copyright (C) 2010 Timo Teräs * All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published * by the Free Software Foundation. See http://www.gnu.org/ for details. */ #include #include #include #include #include #include #include #include "config.h" #include "blob.h" #include "addr.h" #include "filterdb.h" #include "authdb.h" #include "reporting.h" #define FILTER_OVERRIDE_TIMEOUT (15*60) static struct sqdb db; static struct authdb adb; static struct authdb_config adbc; static int running = 1; static const blob_t dash = BLOB_STR_INIT("-"); static const blob_t space = BLOB_STR_INIT(" "); static const blob_t slash = BLOB_STR_INIT("/"); static const blob_t lf = BLOB_STR_INIT("\n"); static struct authdb adb; static time_t now; struct url_info { blob_t protocol; blob_t username; blob_t password; blob_t host; blob_t significant_host; blob_t path; blob_t query; blob_t fragment; int port; int is_ipv4; int num_dots; }; struct url_dns_part_data { blob_t word; int num_dots; int numeric; }; void blob_pull_url_dns_part(blob_t *b, struct url_dns_part_data *udp) { blob_t t = *b; int c, i, dots = 0, numeric = 1; for (i = 0; i < t.len; i++) { c = (unsigned char) t.ptr[i]; switch (c) { case '.': dots++; break; case ':': case '@': case '/': case '?': *b = BLOB_PTR_LEN(t.ptr + i, t.len - i); udp->word = BLOB_PTR_LEN(t.ptr, i); udp->num_dots = dots; udp->numeric = numeric; return; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': break; default: numeric = 0; break; } } *b = BLOB_NULL; udp->word = t; udp->num_dots = dots; udp->numeric = numeric; } /* URI is generalized as: * [proto://][user[:password]@]domain.name[:port][/[path/to][?p=a&q=b;r=c][#fragment]] * Character literals used as separators are: * : / @ ? & ; # * Also URI escaping says to treat %XX as encoded hex value. */ static int url_parse(blob_t uri, struct url_info *nfo) { struct url_dns_part_data prev, cur; memset(&prev, 0, sizeof(prev)); memset(nfo, 0, sizeof(*nfo)); /* parse protocol, username/password and domain name/port */ do { blob_pull_url_dns_part(&uri, &cur); switch (uri.len ? uri.ptr[0] : '/') { case ':': blob_pull_skip(&uri, 1); if (blob_is_null(nfo->protocol) && blob_pull_matching(&uri, BLOB_STR("//"))) nfo->protocol = cur.word; else prev = cur; break; case '@': blob_pull_skip(&uri, 1); if (!blob_is_null(nfo->username) || !blob_is_null(nfo->password)) goto error; if (!blob_is_null(prev.word)) { nfo->username = prev.word; nfo->password = cur.word; } else nfo->username = cur.word; memset(&prev, 0, sizeof(prev)); break; case '/': case '?': if (!blob_is_null(prev.word)) { nfo->host = prev.word; nfo->num_dots = prev.num_dots; nfo->is_ipv4 = prev.numeric && prev.num_dots == 3; nfo->port = blob_pull_uint(&cur.word, 10); } else { nfo->host = cur.word; nfo->num_dots = cur.num_dots; nfo->is_ipv4 = cur.numeric && cur.num_dots == 3; } if (blob_is_null(nfo->host)) nfo->host = BLOB_STR("localhost"); break; } } while (blob_is_null(nfo->host) && !blob_is_null(uri)); /* rest of the components */ nfo->path = blob_pull_cspn(&uri, BLOB_STR("?&;#")); nfo->query = blob_pull_cspn(&uri, BLOB_STR("#")); nfo->fragment = uri; /* fill in defaults if needed */ if (blob_is_null(nfo->protocol)) { if (nfo->port == 443) nfo->protocol = BLOB_STR("https"); else nfo->protocol = BLOB_STR("http"); if (nfo->port == 0) nfo->port = 80; } else if (nfo->port == 0) { if (blob_cmp(nfo->protocol, BLOB_STR("https")) == 0) nfo->port = 443; else nfo->port = 80; } if (blob_is_null(nfo->path)) nfo->path = BLOB_STR("/"); /* significant host name */ nfo->significant_host = nfo->host; if (nfo->num_dots > 1) { blob_t b = nfo->significant_host; if (blob_pull_matching(&b, BLOB_STR("www")) && (blob_pull_uint(&b, 10), 1) && blob_pull_matching(&b, BLOB_STR("."))) nfo->significant_host = b; } return 1; error: return 0; } #if 0 static void url_print(struct url_info *nfo) { #define print_field(nfo, x) if (!blob_is_null(nfo->x)) printf(" %s{%.*s}", #x, nfo->x.len, nfo->x.ptr) print_field(nfo, protocol); print_field(nfo, username); print_field(nfo, password); print_field(nfo, host); printf(" port{%d}", nfo->port); print_field(nfo, path); print_field(nfo, query); print_field(nfo, fragment); #undef print_field printf("\n"); fflush(stdout); } #endif static int url_classify(struct url_info *url, struct sqdb *db) { unsigned char buffer[512]; blob_t key, got, tld, keybuf, keylimits; void *cmph; struct sqdb_index_entry *indx; cmph_uint32 i = SQDB_PARENT_ROOT, previ = SQDB_PARENT_ROOT; int dots_done = 1; cmph = sqdb_section_get(db, SQDB_SECTION_INDEX_MPH, NULL); indx = sqdb_section_get(db, SQDB_SECTION_INDEX, NULL); keybuf = BLOB_BUF(buffer); blob_push_lower(&keybuf, url->significant_host); key = keylimits = blob_pushed(BLOB_BUF(buffer), keybuf); /* search for most qualified domain match; do first lookup * with two domain components */ if (url->is_ipv4) { i = cmph_search_packed(cmph, key.ptr, key.len); if (indx[i].parent != SQDB_PARENT_IPV4 || indx[i].component != blob_inet_addr(url->host)) { i = previ; goto parent_dns_match; } } else { key = BLOB_PTR_LEN(key.ptr + key.len, 0); tld = blob_expand_head(&key, keylimits, '.'); do { /* add one more domain component */ got = blob_expand_head(&key, keylimits, '.'); if (blob_is_null(got)) break; report_debug("Trying '%.*s'\n", key.len, key.ptr); previ = i; i = cmph_search_packed(cmph, key.ptr, key.len); if (!blob_is_null(tld)) { int p = indx[i].parent; if (p == SQDB_PARENT_ROOT || p == SQDB_PARENT_IPV4 || indx[p].parent != SQDB_PARENT_ROOT || blob_cmp(tld, sqdb_get_string_literal(db, indx[p].component)) != 0) { /* top level domain did not match */ i = SQDB_PARENT_ROOT; goto parent_dns_match; } tld = BLOB_NULL; previ = p; } if (indx[i].parent != previ || blob_cmp(got, sqdb_get_string_literal(db, indx[i].component)) != 0) { /* the subdomain did no longer match, use * parents classification */ i = previ; goto parent_dns_match; } dots_done++; } while (indx[i].has_subdomains); } /* No paths to match for */ if (i == SQDB_PARENT_ROOT || !indx[i].has_paths || key.ptr != keylimits.ptr) goto parent_dns_match; /* and then search for path matches -- construct hashing * string of url decoded path */ blob_lowercase(url->path); /* filter DB stores all URLs in lowercase */ blob_push_urldecode(&keybuf, url->path); keylimits = blob_pushed(BLOB_BUF(buffer), keybuf); report_debug("Checking path components on '%.*s'\n", keylimits.len, keylimits.ptr); while (indx[i].has_paths) { /* add one more path component */ got = blob_expand_tail(&key, keylimits, '/'); if (blob_is_null(got)) break; report_debug("Trying with path '%.*s'\n", key.len, key.ptr); previ = i; i = cmph_search_packed(cmph, key.ptr, key.len); if (indx[i].parent != previ || blob_cmp(got, sqdb_get_string_literal(db, indx[i].component)) != 0) { /* the subdomain did no longer match, use * parents classification */ i = previ; goto parent_dns_match; } } parent_dns_match: if (i == SQDB_PARENT_ROOT) return 0; /* no category */ return indx[i].category; } static blob_t get_category_name(struct sqdb *db, int id) { uint32_t *c, clen; c = sqdb_section_get(db, SQDB_SECTION_CATEGORIES, &clen); if (c == NULL || id < 0 || id * sizeof(uint32_t) >= clen) return BLOB_NULL; return sqdb_get_string_literal(db, c[id]); } static void send_ok(blob_t tag, blob_t categ, int override) { static char buffer[64]; blob_t b = BLOB_BUF(buffer); blob_push(&b, tag); blob_push(&b, BLOB_STR(" !")); blob_push(&b, categ); if (override) blob_push(&b, BLOB_STR(",overridden")); blob_push(&b, BLOB_STR("!")); blob_push(&b, lf); b = blob_pushed(BLOB_BUF(buffer), b); if(write(STDOUT_FILENO, b.ptr, b.len) < 0) report_error("Error occurred while writing to stdout: %s\n", strerror(errno)); } static void send_redirect(blob_t redirect_page, blob_t tag, blob_t url, blob_t categ, blob_t username) { static char buffer[8*1024]; blob_t b = BLOB_BUF(buffer); blob_push(&b, tag); blob_push(&b, BLOB_STR(" !")); blob_push(&b, categ); blob_push(&b, BLOB_STR(",blocked!302:")); blob_push(&b, adbc.redirect_url_base); blob_push(&b, redirect_page); blob_push(&b, BLOB_STR("?REASON=")); blob_push_urlencode(&b, categ); blob_push(&b, BLOB_STR("&USER=")); blob_push_urlencode(&b, username); blob_push(&b, BLOB_STR("&DENIEDURL=")); blob_push_urlencode(&b, url); blob_push(&b, lf); b = blob_pushed(BLOB_BUF(buffer), b); if(write(STDOUT_FILENO, b.ptr, b.len) < 0) report_error("Error occurred while writing to stdout: %s\n", strerror(errno)); } static void read_input(struct sqdb *db) { static char buffer[8 * 1024]; static blob_t left; blob_t b, line, id, ipaddr, url, username, categ; struct url_info nfo; int r, category, auth_ok; sockaddr_any addr; struct authdb_entry entry; void *token; if (blob_is_null(left)) left = BLOB_BUF(buffer); r = read(STDIN_FILENO, left.ptr, left.len); if (r < 0) { report_error("Error occurred while reading from stdin: %s\n", strerror(errno)); return; } if (r == 0) { running = 0; return; } left.ptr += r; left.len -= r; now = time(NULL); adbc_refresh(&adbc, now); b = blob_pushed(BLOB_BUF(buffer), left); do { line = blob_pull_cspn(&b, lf); if (!blob_pull_matching(&b, lf)) return; id = blob_pull_cspn(&line, space); blob_pull_spn(&line, space); url = blob_pull_cspn(&line, space); blob_pull_spn(&line, space); ipaddr = blob_pull_cspn(&line, slash); /* client addr */ blob_pull_cspn(&line, space); /* fqdn */ blob_pull_spn(&line, space); username = blob_pull_cspn(&line, space); /* http method */ /* urlgroup */ /* myaddr=xxx myport=xxx etc */ if (!blob_is_null(url) && addr_parse(ipaddr, &addr)) { /* valid request, handle it */ if (url_parse(url, &nfo)) { category = url_classify(&nfo, db); if (verbosity_level >= REPORT_INFO) { if (category == 0) { report_info("Could not identify category of URL: '%.*s'\n", url.len, url.ptr); } else { categ = get_category_name(db, category); report_info("Identified '%.*s' as category %d (%.*s)\n", url.len, url.ptr, category, categ.len, categ.ptr); } } } else { category = 0; report_info("Couldn't parse URL: '%.*s'\n", url.len, url.ptr); } token = authdb_get(&adb, &addr, &entry, 1); if (token == NULL) { /* internal error */ send_redirect(BLOB_STR("internal-error.html"), id, url, BLOB_STR("internal_error"), username); } else { if (authdb_check_login(token, &entry, username, now, &adbc)) { auth_ok = 1; username = BLOB_STRLEN(entry.p.login_name); } else if ((!adbc.require_auth) || (!blob_is_null(username) && blob_cmp(username, dash) != 0)) { auth_ok = 1; authdb_clear_entry(&entry); memcpy(entry.p.login_name, username.ptr, username.len); authdb_commit_login(token, &entry, now, &adbc); } else { auth_ok = 0; } if (!auth_ok) { send_redirect(BLOB_STR("login.cgi"), id, url, BLOB_STR("auth"), username); } else if ((1ULL << category) & entry.p.hard_block_categories) { send_redirect(BLOB_STR("blocked.cgi"), id, url, get_category_name(db, category), username); } else if (((1ULL << category) & entry.p.block_categories) && (now < entry.override_time || now > entry.override_time + FILTER_OVERRIDE_TIMEOUT)) { send_redirect(BLOB_STR("warning.cgi"), id, url, get_category_name(db, category), username); } else send_ok(id, get_category_name(db, category), !!((1ULL << category) & entry.p.block_categories)); } } if (b.len) { memcpy(buffer, b.ptr, b.len); b.ptr = buffer; } left = BLOB_PTR_LEN(buffer + b.len, sizeof(buffer) - b.len); } while (b.len); } int main(int argc, char **argv) { int rc = 1, opt; reporting_init("squark-filter"); while ((opt = getopt(argc, argv, "Vc:d:sqv::")) != -1) { switch (opt) { case 'V': fprintf(stderr, "squark-filter %s\n", squark_version); return 0; case 'c': squark_config = optarg; break; case 'd': squark_dbname = optarg; break; case 's': reporting_use_syslog(1); break; case 'q': reporting_verbosity(REPORT_ALERT); break; case 'v': if (optarg == 0) reporting_verbosity(REPORT_INFO); else if (*optarg == 'v') reporting_verbosity(REPORT_DEBUG); break; } } if (sqdb_open(&db, squark_dbname) < 0) { report_error("%s: failed to open squarkdb\n", squark_dbname); goto err_sqdb; } if (authdb_open(&adb, &adbc, &db) < 0) { report_error("Failed to initialize authdb\n"); goto err_adb; } while (running) read_input(&db); rc = 0; authdb_close(&adb); err_adb: sqdb_close(&db); err_sqdb: return rc; }