diff options
-rw-r--r-- | blob.c | 16 | ||||
-rw-r--r-- | blob.h | 5 | ||||
-rw-r--r-- | squark-filter.c | 183 |
3 files changed, 175 insertions, 29 deletions
@@ -189,6 +189,22 @@ unsigned int blob_pull_uint(blob_t *b, int radix) return val; } +blob_t blob_pull_spn(blob_t *b, const blob_t reject) +{ + blob_t t = *b; + int i; + + for (i = 0; i < t.len; i++) { + if (memchr(reject.ptr, t.ptr[i], reject.len) == NULL) { + *b = BLOB_PTR_LEN(t.ptr + i, t.len - i); + return BLOB_PTR_LEN(t.ptr, i); + } + } + + *b = BLOB_NULL; + return t; +} + blob_t blob_pull_cspn(blob_t *b, const blob_t reject) { blob_t t = *b; @@ -25,7 +25,9 @@ typedef struct blob { #define BLOB_PTR_LEN(ptr,len) (blob_t){(void*)(ptr), (len)} #define BLOB_PTR_PTR(beg,end) BLOB_PTR_LEN((beg),(end)-(beg)+1) #define BLOB_BUF(buf) (blob_t){(void*)(buf), sizeof(buf)} -#define BLOB_STR(str) (blob_t){(char*)(str), strlen(str)} +#define BLOB_STRLEN(str) (blob_t){(str), strlen(str)} +#define BLOB_STR_INIT(str) {(str), sizeof(str)-1} +#define BLOB_STR(str) (blob_t) BLOB_STR_INIT(str) extern const blob_t BLOB_NULL; @@ -46,6 +48,7 @@ blob_t blob_pull(blob_t *b, int len); void blob_pull_skip(blob_t *b, int len); int blob_pull_matching(blob_t *b, blob_t e); unsigned int blob_pull_uint(blob_t *b, int radix); +blob_t blob_pull_spn(blob_t *b, const blob_t spn); blob_t blob_pull_cspn(blob_t *b, const blob_t cspn); blob_t blob_expand_head(blob_t *b, blob_t limits, unsigned char sep); diff --git a/squark-filter.c b/squark-filter.c index f3a4aed..c0d66d9 100644 --- a/squark-filter.c +++ b/squark-filter.c @@ -1,10 +1,18 @@ #include <stdio.h> +#include <string.h> +#include <unistd.h> #include <cmph.h> #include "squarkdb.h" #include "blob.h" +static int running = 1; +static uint64_t banned_categories = 0; +static const blob_t space = BLOB_STR_INIT(" "); +static const blob_t lf = BLOB_STR_INIT("\n"); +static blob_t redirect_page; + struct url_info { blob_t protocol; blob_t username; @@ -106,13 +114,12 @@ static void url_print(struct url_info *nfo) #undef print_field } -static blob_t url_classify(struct url_info *url, struct sqdb *db) +static int url_classify(struct url_info *url, struct sqdb *db) { unsigned char buffer[1024]; blob_t b, key, got, tld, mkey; void *cmph; struct sqdb_index_entry *indx; - uint32_t *categories; cmph_uint32 i = -1, previ; cmph = sqdb_section_get(db, SQDB_SECTION_INDEX_MPH, NULL); @@ -181,41 +188,161 @@ static blob_t url_classify(struct url_info *url, struct sqdb *db) parent_dns_match: if (i == -1) - return BLOB_STR("unknown"); + return 0; /* no category */ + + return indx[i].category; +} + +static blob_t get_category_name(struct sqdb *db, int id) +{ + uint32_t *c, clen; + + c = sqdb_section_get(db, SQDB_SECTION_CATEGORIES, &clen); + if (c == NULL || id < 0 || id * sizeof(uint32_t) >= clen) + return BLOB_NULL; + + return sqdb_get_string_literal(db, c[id]); +} + +static int find_category_id(struct sqdb *db, blob_t cat) +{ + uint32_t size, *ptr; + int i; + + ptr = sqdb_section_get(db, SQDB_SECTION_CATEGORIES, &size); + if (ptr == NULL) + return -1; + + size /= sizeof(uint32_t); + for (i = 0; i < size; i++) + if (blob_cmp(cat, sqdb_get_string_literal(db, ptr[i])) == 0) + return i; + + return -1; +} + +static void send_ok(blob_t tag) +{ + static char buffer[64]; + blob_t b = BLOB_BUF(buffer); + + blob_push(&b, tag); + blob_push(&b, lf); + b = blob_pushed(BLOB_BUF(buffer), b); + + write(STDOUT_FILENO, b.ptr, b.len); +} + +static void send_redirect(struct sqdb *db, blob_t tag, blob_t url, int categ, blob_t username) +{ + static char buffer[8*1024]; + blob_t b = BLOB_BUF(buffer); + + blob_push(&b, tag); + blob_push(&b, BLOB_STR(" 302:")); + blob_push(&b, redirect_page); + blob_push(&b, BLOB_STR("?REASON=")); + blob_push(&b, get_category_name(db, categ)); + blob_push(&b, BLOB_STR("&USER=")); + blob_push(&b, username); + blob_push(&b, lf); + b = blob_pushed(BLOB_BUF(buffer), b); + + write(STDOUT_FILENO, b.ptr, b.len); +} - categories = sqdb_section_get(db, SQDB_SECTION_CATEGORIES, NULL); - return sqdb_get_string_literal(db, categories[indx[i].category]); +static void read_input(struct sqdb *db) +{ + static char buffer[8 * 1024]; + static blob_t left; + + blob_t b, line, id, url, username; + struct url_info nfo; + int r, category; + + if (blob_is_null(left)) + left = BLOB_BUF(buffer); + + r = read(STDIN_FILENO, left.ptr, left.len); + if (r < 0) + return; + if (r == 0) { + running = 0; + return; + } + left.ptr += r; + left.len -= r; + + b = blob_pushed(BLOB_BUF(buffer), left); + do { + line = blob_pull_cspn(&b, lf); + if (!blob_pull_matching(&b, lf)) + return; + + id = blob_pull_cspn(&line, space); + blob_pull_spn(&line, space); + url = blob_pull_cspn(&line, space); + blob_pull_spn(&line, space); + blob_pull_cspn(&line, space); /* client addr / fqdn */ + blob_pull_spn(&line, space); + username = blob_pull_cspn(&line, space); + /* http method */ + /* urlgroup */ + /* myaddr=xxx myport=xxx etc */ + + if (!blob_is_null(username)) { + /* valid request, handle it */ + if (url_parse(url, &nfo)) + category = url_classify(&nfo, db); + else + category = 0; + + if ((1ULL << category) & banned_categories) + send_redirect(db, id, url, category, username); + else + send_ok(id); + } + + if (b.len) { + memcpy(buffer, b.ptr, b.len); + b.ptr = buffer; + } + left = BLOB_PTR_LEN(buffer + b.len, sizeof(buffer) - b.len); + } while (b.len); +} + +static void ban_category(struct sqdb *db, blob_t c) +{ + int category; + + category = find_category_id(db, c); + if (category >= 0) + banned_categories |= 1ULL << category; + else + fprintf(stderr, "WARNING: unknown category '%.*s'\n", + c.len, c.ptr); } int main(int argc, char **argv) { - const char * const uri[] = { - "http://sex.com", - "http://facebook.com:1234/", - "https://slashdot.org/path/to/me", - "http://user:pass@paistortuga.com/~mocosoft", - "http://user:pass@paistortuga.com", - "user@weather.whenu.speedera.net", - "zedo1.speedera.net", - "foo.com/stuff?query;bar#frag", - "foo.com?query;bar#frag", - "aapracingandsports.com.au/racing/", - }; struct sqdb db; - struct url_info nfo; - blob_t cat; - int i; + int opt; sqdb_open(&db, "squark.db"); - for (i = 0; i < ARRAY_SIZE(uri); i++) { - if (url_parse(BLOB_STR(uri[i]), &nfo)) { - cat = url_classify(&nfo, &db); - printf("%s - %.*s -", uri[i], cat.len, cat.ptr); - url_print(&nfo); - printf("\n"); - } else { - printf("%s - BAD_URL\n", uri[i]); + + while ((opt = getopt(argc, argv, "r:b:")) != -1) { + switch (opt) { + case 'r': + redirect_page = BLOB_STRLEN(optarg); + break; + case 'b': + ban_category(&db, BLOB_STRLEN(optarg)); + break; } } + + while (running) + read_input(&db); + sqdb_close(&db); } |