diff options
author | Timo Teräs <timo.teras@iki.fi> | 2010-08-14 20:21:59 +0300 |
---|---|---|
committer | Timo Teräs <timo.teras@iki.fi> | 2010-08-14 20:21:59 +0300 |
commit | 54581ca7fe116ab20d6fa7f15013efe1777e1380 (patch) | |
tree | dc22731105eaa0ff39920e461f6b4831dad46f47 | |
parent | 7daf2874969fb6773d480e9776cd8418eeb6353f (diff) | |
download | squark-54581ca7fe116ab20d6fa7f15013efe1777e1380.tar.bz2 squark-54581ca7fe116ab20d6fa7f15013efe1777e1380.tar.xz |
filter: squid interfacing code, basic minimum configuration
Implement squid redirect protocol. It implements the "concurrent"
version even though the algorithm is non-blocking. Doing this can
reduce the amount of read system calls on busy system.
Minimum command line based configuration for banning specific
categories and specifying the redirect site. Will probably have
to add some sort of config file system later.
-rw-r--r-- | blob.c | 16 | ||||
-rw-r--r-- | blob.h | 5 | ||||
-rw-r--r-- | squark-filter.c | 183 |
3 files changed, 175 insertions, 29 deletions
@@ -189,6 +189,22 @@ unsigned int blob_pull_uint(blob_t *b, int radix) return val; } +blob_t blob_pull_spn(blob_t *b, const blob_t reject) +{ + blob_t t = *b; + int i; + + for (i = 0; i < t.len; i++) { + if (memchr(reject.ptr, t.ptr[i], reject.len) == NULL) { + *b = BLOB_PTR_LEN(t.ptr + i, t.len - i); + return BLOB_PTR_LEN(t.ptr, i); + } + } + + *b = BLOB_NULL; + return t; +} + blob_t blob_pull_cspn(blob_t *b, const blob_t reject) { blob_t t = *b; @@ -25,7 +25,9 @@ typedef struct blob { #define BLOB_PTR_LEN(ptr,len) (blob_t){(void*)(ptr), (len)} #define BLOB_PTR_PTR(beg,end) BLOB_PTR_LEN((beg),(end)-(beg)+1) #define BLOB_BUF(buf) (blob_t){(void*)(buf), sizeof(buf)} -#define BLOB_STR(str) (blob_t){(char*)(str), strlen(str)} +#define BLOB_STRLEN(str) (blob_t){(str), strlen(str)} +#define BLOB_STR_INIT(str) {(str), sizeof(str)-1} +#define BLOB_STR(str) (blob_t) BLOB_STR_INIT(str) extern const blob_t BLOB_NULL; @@ -46,6 +48,7 @@ blob_t blob_pull(blob_t *b, int len); void blob_pull_skip(blob_t *b, int len); int blob_pull_matching(blob_t *b, blob_t e); unsigned int blob_pull_uint(blob_t *b, int radix); +blob_t blob_pull_spn(blob_t *b, const blob_t spn); blob_t blob_pull_cspn(blob_t *b, const blob_t cspn); blob_t blob_expand_head(blob_t *b, blob_t limits, unsigned char sep); diff --git a/squark-filter.c b/squark-filter.c index f3a4aed..c0d66d9 100644 --- a/squark-filter.c +++ b/squark-filter.c @@ -1,10 +1,18 @@ #include <stdio.h> +#include <string.h> +#include <unistd.h> #include <cmph.h> #include "squarkdb.h" #include "blob.h" +static int running = 1; +static uint64_t banned_categories = 0; +static const blob_t space = BLOB_STR_INIT(" "); +static const blob_t lf = BLOB_STR_INIT("\n"); +static blob_t redirect_page; + struct url_info { blob_t protocol; blob_t username; @@ -106,13 +114,12 @@ static void url_print(struct url_info *nfo) #undef print_field } -static blob_t url_classify(struct url_info *url, struct sqdb *db) +static int url_classify(struct url_info *url, struct sqdb *db) { unsigned char buffer[1024]; blob_t b, key, got, tld, mkey; void *cmph; struct sqdb_index_entry *indx; - uint32_t *categories; cmph_uint32 i = -1, previ; cmph = sqdb_section_get(db, SQDB_SECTION_INDEX_MPH, NULL); @@ -181,41 +188,161 @@ static blob_t url_classify(struct url_info *url, struct sqdb *db) parent_dns_match: if (i == -1) - return BLOB_STR("unknown"); + return 0; /* no category */ + + return indx[i].category; +} + +static blob_t get_category_name(struct sqdb *db, int id) +{ + uint32_t *c, clen; + + c = sqdb_section_get(db, SQDB_SECTION_CATEGORIES, &clen); + if (c == NULL || id < 0 || id * sizeof(uint32_t) >= clen) + return BLOB_NULL; + + return sqdb_get_string_literal(db, c[id]); +} + +static int find_category_id(struct sqdb *db, blob_t cat) +{ + uint32_t size, *ptr; + int i; + + ptr = sqdb_section_get(db, SQDB_SECTION_CATEGORIES, &size); + if (ptr == NULL) + return -1; + + size /= sizeof(uint32_t); + for (i = 0; i < size; i++) + if (blob_cmp(cat, sqdb_get_string_literal(db, ptr[i])) == 0) + return i; + + return -1; +} + +static void send_ok(blob_t tag) +{ + static char buffer[64]; + blob_t b = BLOB_BUF(buffer); + + blob_push(&b, tag); + blob_push(&b, lf); + b = blob_pushed(BLOB_BUF(buffer), b); + + write(STDOUT_FILENO, b.ptr, b.len); +} + +static void send_redirect(struct sqdb *db, blob_t tag, blob_t url, int categ, blob_t username) +{ + static char buffer[8*1024]; + blob_t b = BLOB_BUF(buffer); + + blob_push(&b, tag); + blob_push(&b, BLOB_STR(" 302:")); + blob_push(&b, redirect_page); + blob_push(&b, BLOB_STR("?REASON=")); + blob_push(&b, get_category_name(db, categ)); + blob_push(&b, BLOB_STR("&USER=")); + blob_push(&b, username); + blob_push(&b, lf); + b = blob_pushed(BLOB_BUF(buffer), b); + + write(STDOUT_FILENO, b.ptr, b.len); +} - categories = sqdb_section_get(db, SQDB_SECTION_CATEGORIES, NULL); - return sqdb_get_string_literal(db, categories[indx[i].category]); +static void read_input(struct sqdb *db) +{ + static char buffer[8 * 1024]; + static blob_t left; + + blob_t b, line, id, url, username; + struct url_info nfo; + int r, category; + + if (blob_is_null(left)) + left = BLOB_BUF(buffer); + + r = read(STDIN_FILENO, left.ptr, left.len); + if (r < 0) + return; + if (r == 0) { + running = 0; + return; + } + left.ptr += r; + left.len -= r; + + b = blob_pushed(BLOB_BUF(buffer), left); + do { + line = blob_pull_cspn(&b, lf); + if (!blob_pull_matching(&b, lf)) + return; + + id = blob_pull_cspn(&line, space); + blob_pull_spn(&line, space); + url = blob_pull_cspn(&line, space); + blob_pull_spn(&line, space); + blob_pull_cspn(&line, space); /* client addr / fqdn */ + blob_pull_spn(&line, space); + username = blob_pull_cspn(&line, space); + /* http method */ + /* urlgroup */ + /* myaddr=xxx myport=xxx etc */ + + if (!blob_is_null(username)) { + /* valid request, handle it */ + if (url_parse(url, &nfo)) + category = url_classify(&nfo, db); + else + category = 0; + + if ((1ULL << category) & banned_categories) + send_redirect(db, id, url, category, username); + else + send_ok(id); + } + + if (b.len) { + memcpy(buffer, b.ptr, b.len); + b.ptr = buffer; + } + left = BLOB_PTR_LEN(buffer + b.len, sizeof(buffer) - b.len); + } while (b.len); +} + +static void ban_category(struct sqdb *db, blob_t c) +{ + int category; + + category = find_category_id(db, c); + if (category >= 0) + banned_categories |= 1ULL << category; + else + fprintf(stderr, "WARNING: unknown category '%.*s'\n", + c.len, c.ptr); } int main(int argc, char **argv) { - const char * const uri[] = { - "http://sex.com", - "http://facebook.com:1234/", - "https://slashdot.org/path/to/me", - "http://user:pass@paistortuga.com/~mocosoft", - "http://user:pass@paistortuga.com", - "user@weather.whenu.speedera.net", - "zedo1.speedera.net", - "foo.com/stuff?query;bar#frag", - "foo.com?query;bar#frag", - "aapracingandsports.com.au/racing/", - }; struct sqdb db; - struct url_info nfo; - blob_t cat; - int i; + int opt; sqdb_open(&db, "squark.db"); - for (i = 0; i < ARRAY_SIZE(uri); i++) { - if (url_parse(BLOB_STR(uri[i]), &nfo)) { - cat = url_classify(&nfo, &db); - printf("%s - %.*s -", uri[i], cat.len, cat.ptr); - url_print(&nfo); - printf("\n"); - } else { - printf("%s - BAD_URL\n", uri[i]); + + while ((opt = getopt(argc, argv, "r:b:")) != -1) { + switch (opt) { + case 'r': + redirect_page = BLOB_STRLEN(optarg); + break; + case 'b': + ban_category(&db, BLOB_STRLEN(optarg)); + break; } } + + while (running) + read_input(&db); + sqdb_close(&db); } |