summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTimo Teräs <timo.teras@iki.fi>2010-08-14 20:21:59 +0300
committerTimo Teräs <timo.teras@iki.fi>2010-08-14 20:21:59 +0300
commit54581ca7fe116ab20d6fa7f15013efe1777e1380 (patch)
treedc22731105eaa0ff39920e461f6b4831dad46f47
parent7daf2874969fb6773d480e9776cd8418eeb6353f (diff)
downloadsquark-54581ca7fe116ab20d6fa7f15013efe1777e1380.tar.bz2
squark-54581ca7fe116ab20d6fa7f15013efe1777e1380.tar.xz
filter: squid interfacing code, basic minimum configuration
Implement squid redirect protocol. It implements the "concurrent" version even though the algorithm is non-blocking. Doing this can reduce the amount of read system calls on busy system. Minimum command line based configuration for banning specific categories and specifying the redirect site. Will probably have to add some sort of config file system later.
-rw-r--r--blob.c16
-rw-r--r--blob.h5
-rw-r--r--squark-filter.c183
3 files changed, 175 insertions, 29 deletions
diff --git a/blob.c b/blob.c
index 0c28877..5a3b226 100644
--- a/blob.c
+++ b/blob.c
@@ -189,6 +189,22 @@ unsigned int blob_pull_uint(blob_t *b, int radix)
return val;
}
+blob_t blob_pull_spn(blob_t *b, const blob_t reject)
+{
+ blob_t t = *b;
+ int i;
+
+ for (i = 0; i < t.len; i++) {
+ if (memchr(reject.ptr, t.ptr[i], reject.len) == NULL) {
+ *b = BLOB_PTR_LEN(t.ptr + i, t.len - i);
+ return BLOB_PTR_LEN(t.ptr, i);
+ }
+ }
+
+ *b = BLOB_NULL;
+ return t;
+}
+
blob_t blob_pull_cspn(blob_t *b, const blob_t reject)
{
blob_t t = *b;
diff --git a/blob.h b/blob.h
index 3d065ed..dba2f97 100644
--- a/blob.h
+++ b/blob.h
@@ -25,7 +25,9 @@ typedef struct blob {
#define BLOB_PTR_LEN(ptr,len) (blob_t){(void*)(ptr), (len)}
#define BLOB_PTR_PTR(beg,end) BLOB_PTR_LEN((beg),(end)-(beg)+1)
#define BLOB_BUF(buf) (blob_t){(void*)(buf), sizeof(buf)}
-#define BLOB_STR(str) (blob_t){(char*)(str), strlen(str)}
+#define BLOB_STRLEN(str) (blob_t){(str), strlen(str)}
+#define BLOB_STR_INIT(str) {(str), sizeof(str)-1}
+#define BLOB_STR(str) (blob_t) BLOB_STR_INIT(str)
extern const blob_t BLOB_NULL;
@@ -46,6 +48,7 @@ blob_t blob_pull(blob_t *b, int len);
void blob_pull_skip(blob_t *b, int len);
int blob_pull_matching(blob_t *b, blob_t e);
unsigned int blob_pull_uint(blob_t *b, int radix);
+blob_t blob_pull_spn(blob_t *b, const blob_t spn);
blob_t blob_pull_cspn(blob_t *b, const blob_t cspn);
blob_t blob_expand_head(blob_t *b, blob_t limits, unsigned char sep);
diff --git a/squark-filter.c b/squark-filter.c
index f3a4aed..c0d66d9 100644
--- a/squark-filter.c
+++ b/squark-filter.c
@@ -1,10 +1,18 @@
#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
#include <cmph.h>
#include "squarkdb.h"
#include "blob.h"
+static int running = 1;
+static uint64_t banned_categories = 0;
+static const blob_t space = BLOB_STR_INIT(" ");
+static const blob_t lf = BLOB_STR_INIT("\n");
+static blob_t redirect_page;
+
struct url_info {
blob_t protocol;
blob_t username;
@@ -106,13 +114,12 @@ static void url_print(struct url_info *nfo)
#undef print_field
}
-static blob_t url_classify(struct url_info *url, struct sqdb *db)
+static int url_classify(struct url_info *url, struct sqdb *db)
{
unsigned char buffer[1024];
blob_t b, key, got, tld, mkey;
void *cmph;
struct sqdb_index_entry *indx;
- uint32_t *categories;
cmph_uint32 i = -1, previ;
cmph = sqdb_section_get(db, SQDB_SECTION_INDEX_MPH, NULL);
@@ -181,41 +188,161 @@ static blob_t url_classify(struct url_info *url, struct sqdb *db)
parent_dns_match:
if (i == -1)
- return BLOB_STR("unknown");
+ return 0; /* no category */
+
+ return indx[i].category;
+}
+
+static blob_t get_category_name(struct sqdb *db, int id)
+{
+ uint32_t *c, clen;
+
+ c = sqdb_section_get(db, SQDB_SECTION_CATEGORIES, &clen);
+ if (c == NULL || id < 0 || id * sizeof(uint32_t) >= clen)
+ return BLOB_NULL;
+
+ return sqdb_get_string_literal(db, c[id]);
+}
+
+static int find_category_id(struct sqdb *db, blob_t cat)
+{
+ uint32_t size, *ptr;
+ int i;
+
+ ptr = sqdb_section_get(db, SQDB_SECTION_CATEGORIES, &size);
+ if (ptr == NULL)
+ return -1;
+
+ size /= sizeof(uint32_t);
+ for (i = 0; i < size; i++)
+ if (blob_cmp(cat, sqdb_get_string_literal(db, ptr[i])) == 0)
+ return i;
+
+ return -1;
+}
+
+static void send_ok(blob_t tag)
+{
+ static char buffer[64];
+ blob_t b = BLOB_BUF(buffer);
+
+ blob_push(&b, tag);
+ blob_push(&b, lf);
+ b = blob_pushed(BLOB_BUF(buffer), b);
+
+ write(STDOUT_FILENO, b.ptr, b.len);
+}
+
+static void send_redirect(struct sqdb *db, blob_t tag, blob_t url, int categ, blob_t username)
+{
+ static char buffer[8*1024];
+ blob_t b = BLOB_BUF(buffer);
+
+ blob_push(&b, tag);
+ blob_push(&b, BLOB_STR(" 302:"));
+ blob_push(&b, redirect_page);
+ blob_push(&b, BLOB_STR("?REASON="));
+ blob_push(&b, get_category_name(db, categ));
+ blob_push(&b, BLOB_STR("&USER="));
+ blob_push(&b, username);
+ blob_push(&b, lf);
+ b = blob_pushed(BLOB_BUF(buffer), b);
+
+ write(STDOUT_FILENO, b.ptr, b.len);
+}
- categories = sqdb_section_get(db, SQDB_SECTION_CATEGORIES, NULL);
- return sqdb_get_string_literal(db, categories[indx[i].category]);
+static void read_input(struct sqdb *db)
+{
+ static char buffer[8 * 1024];
+ static blob_t left;
+
+ blob_t b, line, id, url, username;
+ struct url_info nfo;
+ int r, category;
+
+ if (blob_is_null(left))
+ left = BLOB_BUF(buffer);
+
+ r = read(STDIN_FILENO, left.ptr, left.len);
+ if (r < 0)
+ return;
+ if (r == 0) {
+ running = 0;
+ return;
+ }
+ left.ptr += r;
+ left.len -= r;
+
+ b = blob_pushed(BLOB_BUF(buffer), left);
+ do {
+ line = blob_pull_cspn(&b, lf);
+ if (!blob_pull_matching(&b, lf))
+ return;
+
+ id = blob_pull_cspn(&line, space);
+ blob_pull_spn(&line, space);
+ url = blob_pull_cspn(&line, space);
+ blob_pull_spn(&line, space);
+ blob_pull_cspn(&line, space); /* client addr / fqdn */
+ blob_pull_spn(&line, space);
+ username = blob_pull_cspn(&line, space);
+ /* http method */
+ /* urlgroup */
+ /* myaddr=xxx myport=xxx etc */
+
+ if (!blob_is_null(username)) {
+ /* valid request, handle it */
+ if (url_parse(url, &nfo))
+ category = url_classify(&nfo, db);
+ else
+ category = 0;
+
+ if ((1ULL << category) & banned_categories)
+ send_redirect(db, id, url, category, username);
+ else
+ send_ok(id);
+ }
+
+ if (b.len) {
+ memcpy(buffer, b.ptr, b.len);
+ b.ptr = buffer;
+ }
+ left = BLOB_PTR_LEN(buffer + b.len, sizeof(buffer) - b.len);
+ } while (b.len);
+}
+
+static void ban_category(struct sqdb *db, blob_t c)
+{
+ int category;
+
+ category = find_category_id(db, c);
+ if (category >= 0)
+ banned_categories |= 1ULL << category;
+ else
+ fprintf(stderr, "WARNING: unknown category '%.*s'\n",
+ c.len, c.ptr);
}
int main(int argc, char **argv)
{
- const char * const uri[] = {
- "http://sex.com",
- "http://facebook.com:1234/",
- "https://slashdot.org/path/to/me",
- "http://user:pass@paistortuga.com/~mocosoft",
- "http://user:pass@paistortuga.com",
- "user@weather.whenu.speedera.net",
- "zedo1.speedera.net",
- "foo.com/stuff?query;bar#frag",
- "foo.com?query;bar#frag",
- "aapracingandsports.com.au/racing/",
- };
struct sqdb db;
- struct url_info nfo;
- blob_t cat;
- int i;
+ int opt;
sqdb_open(&db, "squark.db");
- for (i = 0; i < ARRAY_SIZE(uri); i++) {
- if (url_parse(BLOB_STR(uri[i]), &nfo)) {
- cat = url_classify(&nfo, &db);
- printf("%s - %.*s -", uri[i], cat.len, cat.ptr);
- url_print(&nfo);
- printf("\n");
- } else {
- printf("%s - BAD_URL\n", uri[i]);
+
+ while ((opt = getopt(argc, argv, "r:b:")) != -1) {
+ switch (opt) {
+ case 'r':
+ redirect_page = BLOB_STRLEN(optarg);
+ break;
+ case 'b':
+ ban_category(&db, BLOB_STRLEN(optarg));
+ break;
}
}
+
+ while (running)
+ read_input(&db);
+
sqdb_close(&db);
}