From 7daf2874969fb6773d480e9776cd8418eeb6353f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timo=20Ter=C3=A4s?= Date: Fri, 13 Aug 2010 13:40:41 +0300 Subject: filter: fix db building issues and implement path component matching Fixes has sub domains/paths hints to be correct. www as first domain entry matching now checks it won't remove second level domain names. And the filter code now looksup path components from the db. --- blob.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 65 insertions(+), 1 deletion(-) (limited to 'blob.c') diff --git a/blob.c b/blob.c index a417a0b..0c28877 100644 --- a/blob.c +++ b/blob.c @@ -111,6 +111,29 @@ void blob_push_hexdump(blob_t *to, blob_t binary) to->len -= binary.len * 2; } +void blob_push_urldecode(blob_t *to, blob_t url) +{ + blob_t b, orig = *to; + + do { + blob_pull_matching(&url, BLOB_STR("/")); + b = blob_pull_cspn(&url, BLOB_STR("/")); + if (blob_is_null(url) && blob_is_null(b)) + break; + + if (blob_is_null(b) || blob_cmp(b, BLOB_STR(".")) == 0) { + /* skip '.' or two consecutive / */ + } else if (blob_cmp(b, BLOB_STR("..")) == 0) { + /* go up one path component */ + blob_shrink_tail(to, blob_pushed(orig, b), '/'); + } else { + /* copy decoded; FIXME decode percent encoding */ + blob_push(to, BLOB_STR("/")); + blob_push(to, b); + } + } while (1); +} + blob_t blob_pull(blob_t *b, int len) { blob_t r; @@ -177,6 +200,7 @@ blob_t blob_pull_cspn(blob_t *b, const blob_t reject) return BLOB_PTR_LEN(t.ptr, i); } } + *b = BLOB_NULL; return t; } @@ -186,7 +210,7 @@ blob_t blob_expand_head(blob_t *b, blob_t limits, unsigned char sep) blob_t t = *b; blob_t r; - if (t.ptr <= limits.ptr || t.ptr+t.len > limits.ptr+limits.len) + if (t.ptr < limits.ptr || t.ptr+t.len > limits.ptr+limits.len) return BLOB_NULL; while (t.ptr > limits.ptr && t.ptr[-1] == sep) t.ptr--, t.len++; @@ -200,3 +224,43 @@ blob_t blob_expand_head(blob_t *b, blob_t limits, unsigned char sep) *b = t; return r; } + +blob_t blob_expand_tail(blob_t *b, blob_t limits, unsigned char sep) +{ + blob_t t = *b; + blob_t r; + + if (t.ptr < limits.ptr || t.ptr+t.len > limits.ptr+limits.len) + return BLOB_NULL; + while (t.ptr + t.len < limits.ptr + limits.len && t.ptr[t.len] == sep) + t.len++; + + r.ptr = t.ptr + t.len; + r.len = 0; + while (t.ptr + t.len < limits.ptr + limits.len && t.ptr[t.len] != sep) { + t.len++; + r.len++; + } + *b = t; + return r; +} + +blob_t blob_shrink_tail(blob_t *b, blob_t limits, unsigned char sep) +{ + blob_t t = *b; + blob_t r; + + if (t.ptr <= limits.ptr || t.ptr+t.len > limits.ptr+limits.len) + return BLOB_NULL; + while (t.len && t.ptr[t.len-1] == sep) + t.len--; + + r.ptr = t.ptr; + r.len = 0; + while (t.len && t.ptr[t.len-1] != sep) { + t.len--; + r.ptr--, r.len++; + } + *b = t; + return r; +} -- cgit v1.2.3