summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--blob.c16
-rw-r--r--blob.h5
-rw-r--r--squark-filter.c183
3 files changed, 175 insertions, 29 deletions
diff --git a/blob.c b/blob.c
index 0c28877..5a3b226 100644
--- a/blob.c
+++ b/blob.c
@@ -189,6 +189,22 @@ unsigned int blob_pull_uint(blob_t *b, int radix)
return val;
}
+blob_t blob_pull_spn(blob_t *b, const blob_t reject)
+{
+ blob_t t = *b;
+ int i;
+
+ for (i = 0; i < t.len; i++) {
+ if (memchr(reject.ptr, t.ptr[i], reject.len) == NULL) {
+ *b = BLOB_PTR_LEN(t.ptr + i, t.len - i);
+ return BLOB_PTR_LEN(t.ptr, i);
+ }
+ }
+
+ *b = BLOB_NULL;
+ return t;
+}
+
blob_t blob_pull_cspn(blob_t *b, const blob_t reject)
{
blob_t t = *b;
diff --git a/blob.h b/blob.h
index 3d065ed..dba2f97 100644
--- a/blob.h
+++ b/blob.h
@@ -25,7 +25,9 @@ typedef struct blob {
#define BLOB_PTR_LEN(ptr,len) (blob_t){(void*)(ptr), (len)}
#define BLOB_PTR_PTR(beg,end) BLOB_PTR_LEN((beg),(end)-(beg)+1)
#define BLOB_BUF(buf) (blob_t){(void*)(buf), sizeof(buf)}
-#define BLOB_STR(str) (blob_t){(char*)(str), strlen(str)}
+#define BLOB_STRLEN(str) (blob_t){(str), strlen(str)}
+#define BLOB_STR_INIT(str) {(str), sizeof(str)-1}
+#define BLOB_STR(str) (blob_t) BLOB_STR_INIT(str)
extern const blob_t BLOB_NULL;
@@ -46,6 +48,7 @@ blob_t blob_pull(blob_t *b, int len);
void blob_pull_skip(blob_t *b, int len);
int blob_pull_matching(blob_t *b, blob_t e);
unsigned int blob_pull_uint(blob_t *b, int radix);
+blob_t blob_pull_spn(blob_t *b, const blob_t spn);
blob_t blob_pull_cspn(blob_t *b, const blob_t cspn);
blob_t blob_expand_head(blob_t *b, blob_t limits, unsigned char sep);
diff --git a/squark-filter.c b/squark-filter.c
index f3a4aed..c0d66d9 100644
--- a/squark-filter.c
+++ b/squark-filter.c
@@ -1,10 +1,18 @@
#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
#include <cmph.h>
#include "squarkdb.h"
#include "blob.h"
+static int running = 1;
+static uint64_t banned_categories = 0;
+static const blob_t space = BLOB_STR_INIT(" ");
+static const blob_t lf = BLOB_STR_INIT("\n");
+static blob_t redirect_page;
+
struct url_info {
blob_t protocol;
blob_t username;
@@ -106,13 +114,12 @@ static void url_print(struct url_info *nfo)
#undef print_field
}
-static blob_t url_classify(struct url_info *url, struct sqdb *db)
+static int url_classify(struct url_info *url, struct sqdb *db)
{
unsigned char buffer[1024];
blob_t b, key, got, tld, mkey;
void *cmph;
struct sqdb_index_entry *indx;
- uint32_t *categories;
cmph_uint32 i = -1, previ;
cmph = sqdb_section_get(db, SQDB_SECTION_INDEX_MPH, NULL);
@@ -181,41 +188,161 @@ static blob_t url_classify(struct url_info *url, struct sqdb *db)
parent_dns_match:
if (i == -1)
- return BLOB_STR("unknown");
+ return 0; /* no category */
+
+ return indx[i].category;
+}
+
+static blob_t get_category_name(struct sqdb *db, int id)
+{
+ uint32_t *c, clen;
+
+ c = sqdb_section_get(db, SQDB_SECTION_CATEGORIES, &clen);
+ if (c == NULL || id < 0 || id * sizeof(uint32_t) >= clen)
+ return BLOB_NULL;
+
+ return sqdb_get_string_literal(db, c[id]);
+}
+
+static int find_category_id(struct sqdb *db, blob_t cat)
+{
+ uint32_t size, *ptr;
+ int i;
+
+ ptr = sqdb_section_get(db, SQDB_SECTION_CATEGORIES, &size);
+ if (ptr == NULL)
+ return -1;
+
+ size /= sizeof(uint32_t);
+ for (i = 0; i < size; i++)
+ if (blob_cmp(cat, sqdb_get_string_literal(db, ptr[i])) == 0)
+ return i;
+
+ return -1;
+}
+
+static void send_ok(blob_t tag)
+{
+ static char buffer[64];
+ blob_t b = BLOB_BUF(buffer);
+
+ blob_push(&b, tag);
+ blob_push(&b, lf);
+ b = blob_pushed(BLOB_BUF(buffer), b);
+
+ write(STDOUT_FILENO, b.ptr, b.len);
+}
+
+static void send_redirect(struct sqdb *db, blob_t tag, blob_t url, int categ, blob_t username)
+{
+ static char buffer[8*1024];
+ blob_t b = BLOB_BUF(buffer);
+
+ blob_push(&b, tag);
+ blob_push(&b, BLOB_STR(" 302:"));
+ blob_push(&b, redirect_page);
+ blob_push(&b, BLOB_STR("?REASON="));
+ blob_push(&b, get_category_name(db, categ));
+ blob_push(&b, BLOB_STR("&USER="));
+ blob_push(&b, username);
+ blob_push(&b, lf);
+ b = blob_pushed(BLOB_BUF(buffer), b);
+
+ write(STDOUT_FILENO, b.ptr, b.len);
+}
- categories = sqdb_section_get(db, SQDB_SECTION_CATEGORIES, NULL);
- return sqdb_get_string_literal(db, categories[indx[i].category]);
+static void read_input(struct sqdb *db)
+{
+ static char buffer[8 * 1024];
+ static blob_t left;
+
+ blob_t b, line, id, url, username;
+ struct url_info nfo;
+ int r, category;
+
+ if (blob_is_null(left))
+ left = BLOB_BUF(buffer);
+
+ r = read(STDIN_FILENO, left.ptr, left.len);
+ if (r < 0)
+ return;
+ if (r == 0) {
+ running = 0;
+ return;
+ }
+ left.ptr += r;
+ left.len -= r;
+
+ b = blob_pushed(BLOB_BUF(buffer), left);
+ do {
+ line = blob_pull_cspn(&b, lf);
+ if (!blob_pull_matching(&b, lf))
+ return;
+
+ id = blob_pull_cspn(&line, space);
+ blob_pull_spn(&line, space);
+ url = blob_pull_cspn(&line, space);
+ blob_pull_spn(&line, space);
+ blob_pull_cspn(&line, space); /* client addr / fqdn */
+ blob_pull_spn(&line, space);
+ username = blob_pull_cspn(&line, space);
+ /* http method */
+ /* urlgroup */
+ /* myaddr=xxx myport=xxx etc */
+
+ if (!blob_is_null(username)) {
+ /* valid request, handle it */
+ if (url_parse(url, &nfo))
+ category = url_classify(&nfo, db);
+ else
+ category = 0;
+
+ if ((1ULL << category) & banned_categories)
+ send_redirect(db, id, url, category, username);
+ else
+ send_ok(id);
+ }
+
+ if (b.len) {
+ memcpy(buffer, b.ptr, b.len);
+ b.ptr = buffer;
+ }
+ left = BLOB_PTR_LEN(buffer + b.len, sizeof(buffer) - b.len);
+ } while (b.len);
+}
+
+static void ban_category(struct sqdb *db, blob_t c)
+{
+ int category;
+
+ category = find_category_id(db, c);
+ if (category >= 0)
+ banned_categories |= 1ULL << category;
+ else
+ fprintf(stderr, "WARNING: unknown category '%.*s'\n",
+ c.len, c.ptr);
}
int main(int argc, char **argv)
{
- const char * const uri[] = {
- "http://sex.com",
- "http://facebook.com:1234/",
- "https://slashdot.org/path/to/me",
- "http://user:pass@paistortuga.com/~mocosoft",
- "http://user:pass@paistortuga.com",
- "user@weather.whenu.speedera.net",
- "zedo1.speedera.net",
- "foo.com/stuff?query;bar#frag",
- "foo.com?query;bar#frag",
- "aapracingandsports.com.au/racing/",
- };
struct sqdb db;
- struct url_info nfo;
- blob_t cat;
- int i;
+ int opt;
sqdb_open(&db, "squark.db");
- for (i = 0; i < ARRAY_SIZE(uri); i++) {
- if (url_parse(BLOB_STR(uri[i]), &nfo)) {
- cat = url_classify(&nfo, &db);
- printf("%s - %.*s -", uri[i], cat.len, cat.ptr);
- url_print(&nfo);
- printf("\n");
- } else {
- printf("%s - BAD_URL\n", uri[i]);
+
+ while ((opt = getopt(argc, argv, "r:b:")) != -1) {
+ switch (opt) {
+ case 'r':
+ redirect_page = BLOB_STRLEN(optarg);
+ break;
+ case 'b':
+ ban_category(&db, BLOB_STRLEN(optarg));
+ break;
}
}
+
+ while (running)
+ read_input(&db);
+
sqdb_close(&db);
}