summaryrefslogtreecommitdiffstats
path: root/squark-filter.c
diff options
context:
space:
mode:
Diffstat (limited to 'squark-filter.c')
-rw-r--r--squark-filter.c183
1 files changed, 155 insertions, 28 deletions
diff --git a/squark-filter.c b/squark-filter.c
index f3a4aed..c0d66d9 100644
--- a/squark-filter.c
+++ b/squark-filter.c
@@ -1,10 +1,18 @@
#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
#include <cmph.h>
#include "squarkdb.h"
#include "blob.h"
+static int running = 1;
+static uint64_t banned_categories = 0;
+static const blob_t space = BLOB_STR_INIT(" ");
+static const blob_t lf = BLOB_STR_INIT("\n");
+static blob_t redirect_page;
+
struct url_info {
blob_t protocol;
blob_t username;
@@ -106,13 +114,12 @@ static void url_print(struct url_info *nfo)
#undef print_field
}
-static blob_t url_classify(struct url_info *url, struct sqdb *db)
+static int url_classify(struct url_info *url, struct sqdb *db)
{
unsigned char buffer[1024];
blob_t b, key, got, tld, mkey;
void *cmph;
struct sqdb_index_entry *indx;
- uint32_t *categories;
cmph_uint32 i = -1, previ;
cmph = sqdb_section_get(db, SQDB_SECTION_INDEX_MPH, NULL);
@@ -181,41 +188,161 @@ static blob_t url_classify(struct url_info *url, struct sqdb *db)
parent_dns_match:
if (i == -1)
- return BLOB_STR("unknown");
+ return 0; /* no category */
+
+ return indx[i].category;
+}
+
+static blob_t get_category_name(struct sqdb *db, int id)
+{
+ uint32_t *c, clen;
+
+ c = sqdb_section_get(db, SQDB_SECTION_CATEGORIES, &clen);
+ if (c == NULL || id < 0 || id * sizeof(uint32_t) >= clen)
+ return BLOB_NULL;
+
+ return sqdb_get_string_literal(db, c[id]);
+}
+
+static int find_category_id(struct sqdb *db, blob_t cat)
+{
+ uint32_t size, *ptr;
+ int i;
+
+ ptr = sqdb_section_get(db, SQDB_SECTION_CATEGORIES, &size);
+ if (ptr == NULL)
+ return -1;
+
+ size /= sizeof(uint32_t);
+ for (i = 0; i < size; i++)
+ if (blob_cmp(cat, sqdb_get_string_literal(db, ptr[i])) == 0)
+ return i;
+
+ return -1;
+}
+
+static void send_ok(blob_t tag)
+{
+ static char buffer[64];
+ blob_t b = BLOB_BUF(buffer);
+
+ blob_push(&b, tag);
+ blob_push(&b, lf);
+ b = blob_pushed(BLOB_BUF(buffer), b);
+
+ write(STDOUT_FILENO, b.ptr, b.len);
+}
+
+static void send_redirect(struct sqdb *db, blob_t tag, blob_t url, int categ, blob_t username)
+{
+ static char buffer[8*1024];
+ blob_t b = BLOB_BUF(buffer);
+
+ blob_push(&b, tag);
+ blob_push(&b, BLOB_STR(" 302:"));
+ blob_push(&b, redirect_page);
+ blob_push(&b, BLOB_STR("?REASON="));
+ blob_push(&b, get_category_name(db, categ));
+ blob_push(&b, BLOB_STR("&USER="));
+ blob_push(&b, username);
+ blob_push(&b, lf);
+ b = blob_pushed(BLOB_BUF(buffer), b);
+
+ write(STDOUT_FILENO, b.ptr, b.len);
+}
- categories = sqdb_section_get(db, SQDB_SECTION_CATEGORIES, NULL);
- return sqdb_get_string_literal(db, categories[indx[i].category]);
+static void read_input(struct sqdb *db)
+{
+ static char buffer[8 * 1024];
+ static blob_t left;
+
+ blob_t b, line, id, url, username;
+ struct url_info nfo;
+ int r, category;
+
+ if (blob_is_null(left))
+ left = BLOB_BUF(buffer);
+
+ r = read(STDIN_FILENO, left.ptr, left.len);
+ if (r < 0)
+ return;
+ if (r == 0) {
+ running = 0;
+ return;
+ }
+ left.ptr += r;
+ left.len -= r;
+
+ b = blob_pushed(BLOB_BUF(buffer), left);
+ do {
+ line = blob_pull_cspn(&b, lf);
+ if (!blob_pull_matching(&b, lf))
+ return;
+
+ id = blob_pull_cspn(&line, space);
+ blob_pull_spn(&line, space);
+ url = blob_pull_cspn(&line, space);
+ blob_pull_spn(&line, space);
+ blob_pull_cspn(&line, space); /* client addr / fqdn */
+ blob_pull_spn(&line, space);
+ username = blob_pull_cspn(&line, space);
+ /* http method */
+ /* urlgroup */
+ /* myaddr=xxx myport=xxx etc */
+
+ if (!blob_is_null(username)) {
+ /* valid request, handle it */
+ if (url_parse(url, &nfo))
+ category = url_classify(&nfo, db);
+ else
+ category = 0;
+
+ if ((1ULL << category) & banned_categories)
+ send_redirect(db, id, url, category, username);
+ else
+ send_ok(id);
+ }
+
+ if (b.len) {
+ memcpy(buffer, b.ptr, b.len);
+ b.ptr = buffer;
+ }
+ left = BLOB_PTR_LEN(buffer + b.len, sizeof(buffer) - b.len);
+ } while (b.len);
+}
+
+static void ban_category(struct sqdb *db, blob_t c)
+{
+ int category;
+
+ category = find_category_id(db, c);
+ if (category >= 0)
+ banned_categories |= 1ULL << category;
+ else
+ fprintf(stderr, "WARNING: unknown category '%.*s'\n",
+ c.len, c.ptr);
}
int main(int argc, char **argv)
{
- const char * const uri[] = {
- "http://sex.com",
- "http://facebook.com:1234/",
- "https://slashdot.org/path/to/me",
- "http://user:pass@paistortuga.com/~mocosoft",
- "http://user:pass@paistortuga.com",
- "user@weather.whenu.speedera.net",
- "zedo1.speedera.net",
- "foo.com/stuff?query;bar#frag",
- "foo.com?query;bar#frag",
- "aapracingandsports.com.au/racing/",
- };
struct sqdb db;
- struct url_info nfo;
- blob_t cat;
- int i;
+ int opt;
sqdb_open(&db, "squark.db");
- for (i = 0; i < ARRAY_SIZE(uri); i++) {
- if (url_parse(BLOB_STR(uri[i]), &nfo)) {
- cat = url_classify(&nfo, &db);
- printf("%s - %.*s -", uri[i], cat.len, cat.ptr);
- url_print(&nfo);
- printf("\n");
- } else {
- printf("%s - BAD_URL\n", uri[i]);
+
+ while ((opt = getopt(argc, argv, "r:b:")) != -1) {
+ switch (opt) {
+ case 'r':
+ redirect_page = BLOB_STRLEN(optarg);
+ break;
+ case 'b':
+ ban_category(&db, BLOB_STRLEN(optarg));
+ break;
}
}
+
+ while (running)
+ read_input(&db);
+
sqdb_close(&db);
}