separate adblock and surf-specific code - surf-adblock - Surf adblock web extension
(HTM) git clone git://git.codemadness.org/surf-adblock
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
(DIR) commit 08e747efa80a44603f80db0fdacb3f63ad210b8e
(DIR) parent c4841f33f64cd77b30def9c0b11d1ac9ece7f821
(HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Sun, 4 Jun 2017 14:15:27 +0200
separate adblock and surf-specific code
this will be useful later on for writing test-cases also or re-using
the code in a daemon.
Diffstat:
M Makefile | 2 +-
M TODO | 2 ++
A adblock.c | 956 +++++++++++++++++++++++++++++++
M surf-adblock.c | 944 +------------------------------
4 files changed, 973 insertions(+), 931 deletions(-)
---
(DIR) diff --git a/Makefile b/Makefile
@@ -1,6 +1,6 @@
include config.mk
-SRC = surf-adblock.c
+SRC = surf-adblock.c adblock.c
OBJ = ${SRC:.c=.lo}
all: surf-adblock.la
(DIR) diff --git a/TODO b/TODO
@@ -1,3 +1,5 @@
+- optimize towupper for fnmatch? check < 128, see musl optimization.
+
- fix blocking of : ||ads.somesite.com^
- fix tweakers.net popup / rule.
(DIR) diff --git a/adblock.c b/adblock.c
@@ -0,0 +1,956 @@
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <wchar.h>
+#include <wctype.h>
+
+#include "adblock.h"
+
+/* String data / memory pool */
+typedef struct string {
+ char *data; /* data */
+ size_t datasz; /* allocated size */
+ size_t len; /* current string length */
+} String;
+
+struct filterdomain {
+ char *domain;
+ int inverse;
+ struct filterdomain *next;
+};
+
+struct filterrule {
+ /* type: match mask, must be atleast 32-bit, see FilterType enum */
+ unsigned long block;
+ int matchbegin;
+ int matchend;
+ /* is exception rule: prefix @@ for ABP or #@# for CSS */
+ int isexception;
+ char *css; /* if non-NULL is CSS rule / hide element rule */
+ char *uri;
+ struct filterdomain *domains;
+ struct filterrule *next;
+};
+
+enum {
+ FilterTypeScript = 1 << 0,
+ FilterTypeImage = 1 << 1,
+ FilterTypeCSS = 1 << 2,
+ FilterTypeObject = 1 << 3,
+ FilterTypeXHR = 1 << 4,
+ FilterTypeObjectSub = 1 << 5,
+ FilterTypeSubDoc = 1 << 6,
+ FilterTypePing = 1 << 7,
+ FilterTypeDocument = 1 << 8,
+ FilterTypeElemHide = 1 << 9,
+ FilterTypeOther = 1 << 10,
+ FilterTypeGenericHide = 1 << 11,
+ FilterTypeGenericBlock = 1 << 12,
+ FilterTypeMatchCase = 1 << 13,
+};
+
+struct filtertype {
+ /* `type` must be atleast 32-bit, see FilterType enum */
+ unsigned long type;
+ char *name;
+ size_t namelen;
+ int allowinverse;
+ int allownormal;
+ int onlyexception;
+ int (*fn)(struct filterrule *, char *);
+};
+
+static int parsedomainsoption(struct filterrule *, char *);
+
+#define STRP(s) s,sizeof(s)-1
+
+static struct filtertype filtertypes[] = {
+ /* NOTE: options with 'type' = 0 are silently ignored and treated as
+ * requests for now */
+ { 0, STRP("collapse"), 1, 1, 0, NULL },
+ { FilterTypeDocument, STRP("document"), 1, 0, 1, NULL },
+ { 0, STRP("domain"), 0, 1, 0,
+ /* domain=... */ &parsedomainsoption },
+ { 0, STRP("donottrack"), 1, 1, 0, NULL },
+ { FilterTypeElemHide, STRP("elemhide"), 0, 0, 1, NULL },
+ { 0, STRP("font"), 1, 1, 0, NULL },
+ { FilterTypeGenericBlock, STRP("genericblock"), 1, 1, 1, NULL },
+ { FilterTypeGenericHide, STRP("generichide"), 1, 1, 1, NULL },
+ { FilterTypeImage, STRP("image"), 1, 1, 0, NULL },
+ { FilterTypeMatchCase, STRP("match-case"), 1, 1, 0, NULL },
+ { 0, STRP("media"), 1, 1, 0, NULL },
+ { FilterTypeObject, STRP("object"), 1, 1, 0, NULL },
+ { FilterTypeObjectSub, STRP("object-subrequest"), 1, 1, 0, NULL },
+ { FilterTypeOther, STRP("other"), 1, 1, 0, NULL },
+ { FilterTypePing, STRP("ping"), 1, 1, 0, NULL },
+ { 0, STRP("popup"), 1, 1, 0, NULL },
+ { FilterTypeScript, STRP("script"), 1, 1, 0, NULL },
+ { FilterTypeCSS, STRP("stylesheet"), 1, 1, 0, NULL },
+ { FilterTypeSubDoc, STRP("subdocument"), 1, 1, 0, NULL },
+ { 0, STRP("third-party"), 1, 1, 0, NULL },
+ { FilterTypeXHR, STRP("xmlhttprequest"), 1, 1, 0, NULL },
+ /* NOTE: site-key not supported */
+};
+
+static String globalcss;
+static struct filterrule *rules;
+
+static void
+weprintf(const char *fmt, ...)
+{
+ va_list ap;
+
+ fprintf(stderr, "surf-adblock: ");
+
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+}
+
+static void *
+wecalloc(size_t nmemb, size_t size)
+{
+ void *p;
+
+ if (!(p = calloc(nmemb, size)))
+ weprintf("calloc: %s\n", strerror(errno));
+
+ return p;
+}
+
+static char *
+westrndup(const char *s, size_t n)
+{
+ char *p;
+
+ if (!(p = strndup(s, n)))
+ weprintf("strndup: %s\n", strerror(errno));
+ return p;
+}
+
+static char *
+westrdup(const char *s)
+{
+ char *p;
+
+ if (!(p = strdup(s)))
+ weprintf("strdup: %s\n", strerror(errno));
+
+ return p;
+}
+
+void
+cleanup(void)
+{
+ struct filterrule *r;
+ struct filterdomain *d;
+
+ free(globalcss.data);
+
+ for (r = rules; r; r = rules) {
+ for (d = r->domains; d; d = r->domains) {
+ free(d->domain);
+ r->domains = d->next;
+ free(d);
+ }
+ free(r->css);
+ free(r->uri);
+ rules = r->next;
+ free(r);
+ }
+}
+
+static size_t
+string_buffer_realloc(String *s, size_t newsz)
+{
+ char *tmp;
+ size_t allocsz;
+
+ for (allocsz = 64; allocsz <= newsz; allocsz *= 2)
+ ;
+ if (!(tmp = realloc(s->data, allocsz))) {
+ weprintf("realloc: %s\n", strerror(errno));
+ } else {
+ s->data = tmp;
+ s->datasz = allocsz;
+ }
+
+ return s->datasz;
+}
+
+static size_t
+string_append(String *s, const char *data, size_t len)
+{
+ size_t newlen;
+
+ if (!len)
+ return len;
+
+ newlen = s->len + len;
+ /* check if allocation is necesary, don't shrink buffer,
+ * should be more than datasz ofcourse. */
+ if (newlen >= s->datasz) {
+ if (string_buffer_realloc(s, newlen + 1) <= newlen)
+ return 0;
+ }
+ memcpy(s->data + s->len, data, len);
+ s->len = newlen;
+ s->data[s->len] = '\0';
+ return len;
+}
+
+#define END 0
+#define UNMATCHABLE -2
+#define BRACKET -3
+#define CARET -4
+#define STAR -5
+
+static int
+str_next(const char *str, size_t n, size_t *step)
+{
+ if (!n) {
+ *step = 0;
+ return 0;
+ }
+ if (str[0] >= 128U) {
+ wchar_t wc;
+ int k = mbtowc(&wc, str, n);
+ if (k<0) {
+ *step = 1;
+ return -1;
+ }
+ *step = k;
+ return wc;
+ }
+ *step = 1;
+
+ return str[0];
+}
+
+static int
+pat_next(const char *pat, size_t m, size_t *step)
+{
+ int esc = 0;
+
+ if (!m || !*pat) {
+ *step = 0;
+ return END;
+ }
+ *step = 1;
+ if (pat[0]=='\\' && pat[1]) {
+ *step = 2;
+ pat++;
+ esc = 1;
+ goto escaped;
+ }
+ if (pat[0]=='^')
+ return CARET;
+ if (pat[0] == '*')
+ return STAR;
+escaped:
+ if (pat[0] >= 128U) {
+ wchar_t wc;
+ int k = mbtowc(&wc, pat, m);
+ if (k<0) {
+ *step = 0;
+ return UNMATCHABLE;
+ }
+ *step = k + esc;
+ return wc;
+ }
+ return pat[0];
+}
+
+static int
+casefold(int k)
+{
+ int c = towupper(k);
+ return c == k ? towlower(k) : c;
+}
+
+/* match() based on musl-libc fnmatch:
+ https://git.musl-libc.org/cgit/musl/tree/src/regex/fnmatch.c */
+static int
+match(const char *pat, const char *str, int fcase)
+{
+ size_t m = -1, n = -1;
+ const char *p, *ptail, *endpat;
+ const char *s, *stail, *endstr;
+ size_t pinc, sinc, tailcnt=0;
+ int c, k, kfold;
+
+ for (;;) {
+ switch ((c = pat_next(pat, m, &pinc))) {
+ case UNMATCHABLE:
+ return 1;
+ case STAR:
+ pat++;
+ m--;
+ break;
+ default:
+ k = str_next(str, n, &sinc);
+ /* TODO: write a test-case */
+ if (c == CARET && (k == '?' || k == '/' || k <= 0))
+ return 1;
+ if (k <= 0)
+ return (c==END) ? 0 : 1;
+ str += sinc;
+ n -= sinc;
+ kfold = fcase ? casefold(k) : k;
+ if (k != c && kfold != c)
+ return 1;
+ pat+=pinc;
+ m-=pinc;
+ continue;
+ }
+ break;
+ }
+
+ /* Compute real pat length if it was initially unknown/-1 */
+ m = strnlen(pat, m);
+ endpat = pat + m;
+
+ /* Find the last * in pat and count chars needed after it */
+ for (p=ptail=pat; p<endpat; p+=pinc) {
+ switch (pat_next(p, endpat-p, &pinc)) {
+ case UNMATCHABLE:
+ return 1;
+ case STAR:
+ tailcnt=0;
+ ptail = p+1;
+ break;
+ default:
+ tailcnt++;
+ break;
+ }
+ }
+
+ /* Past this point we need not check for UNMATCHABLE in pat,
+ * because all of pat has already been parsed once. */
+
+ /* Compute real str length if it was initially unknown/-1 */
+ n = strnlen(str, n);
+ endstr = str + n;
+ if (n < tailcnt) return 1;
+
+ /* Find the final tailcnt chars of str, accounting for UTF-8.
+ * On illegal sequences we may get it wrong, but in that case
+ * we necessarily have a matching failure anyway. */
+ for (s=endstr; s>str && tailcnt; tailcnt--) {
+ if (s[-1] < 128U || MB_CUR_MAX==1) s--;
+ else while ((unsigned char)*--s-0x80U<0x40 && s>str);
+ }
+ if (tailcnt) return 1;
+ stail = s;
+
+ /* Check that the pat and str tails match */
+ p = ptail;
+ for (;;) {
+ c = pat_next(p, endpat-p, &pinc);
+ p += pinc;
+ if ((k = str_next(s, endstr-s, &sinc)) <= 0) {
+ if (c != END) return 1;
+ break;
+ }
+ s += sinc;
+ kfold = fcase ? casefold(k) : k;
+ if (k != c && kfold != c)
+ return 1;
+ }
+
+ /* We're all done with the tails now, so throw them out */
+ endstr = stail;
+ endpat = ptail;
+
+ /* Match pattern components until there are none left */
+ while (pat<endpat) {
+ p = pat;
+ s = str;
+ for (;;) {
+ c = pat_next(p, endpat-p, &pinc);
+ p += pinc;
+ /* Encountering * completes/commits a component */
+ if (c == STAR) {
+ pat = p;
+ str = s;
+ break;
+ }
+ k = str_next(s, endstr-s, &sinc);
+ if (!k)
+ return 1;
+ kfold = fcase ? casefold(k) : k;
+ if (k != c && kfold != c)
+ break;
+ s += sinc;
+ }
+ if (c == STAR) continue;
+ /* If we failed, advance str, by 1 char if it's a valid
+ * char, or past all invalid bytes otherwise. */
+ k = str_next(str, endstr-str, &sinc);
+ if (k > 0) str += sinc;
+ else for (str++; str_next(str, endstr-str, &sinc)<0; str++);
+ }
+
+ return 0;
+}
+
+/*
+domain=... if domain is prefixed with ~, ignore.
+multiple domains can be separated with |
+*/
+static int
+parsedomains(const char *s, int sep, struct filterdomain **head)
+{
+ struct filterdomain *d, *last = *head = NULL;
+ char *p;
+ int inverse;
+
+ do {
+ inverse = 0;
+ if (*s == '~') {
+ inverse = !inverse;
+ s++;
+ }
+ if (!*s || *s == sep)
+ break;
+
+ if (!(d = wecalloc(1, sizeof(struct filterdomain))))
+ return -1;
+ if ((p = strchr(s, sep))) { /* TODO: should not contain ',' */
+ d->domain = westrndup(s, p - s);
+ s = p + 1;
+ } else {
+ d->domain = westrdup(s);
+ }
+ if (!d->domain)
+ return -1;
+ d->inverse = inverse;
+
+ if (!*head)
+ *head = last = d;
+ else
+ last = last->next = d;
+ } while (p);
+
+ return (*head != NULL);
+}
+
+static int
+parsedomainselement(struct filterrule *f, char *s)
+{
+ struct filterdomain *d, *last;
+
+ for (last = f->domains; last && last->next; last = last->next)
+ ;
+
+ if (parsedomains(s, ',', &d) < 0)
+ return -1;
+ if (last)
+ last->next = d;
+ else
+ f->domains = d;
+
+ return (d != NULL);
+}
+
+static int
+parsedomainsoption(struct filterrule *f, char *s)
+{
+ struct filterdomain *d, *last;
+
+ for (last = f->domains; last && last->next; last = last->next)
+ ;
+
+ if (parsedomains(s, '|', &d) < 0)
+ return -1;
+ if (last)
+ last->next = d;
+ else
+ f->domains = d;
+
+ return (d != NULL);
+}
+
+static int
+filtertype_cmp(const void *a, const void *b)
+{
+ return strcmp(((struct filtertype *)a)->name,
+ ((struct filtertype *)b)->name);
+}
+
+/* check if domain is the same domain or a subdomain of `s` */
+static int
+matchdomain(const char *s, const char *domain)
+{
+ size_t l1, l2;
+
+ l1 = strlen(s);
+ l2 = strlen(domain);
+
+ /* subdomain-specific (longer) or other domain */
+ if (l1 > l2)
+ return 0;
+ /* subdomain */
+ if (l2 > l1 && domain[l2 - l1 - 1] == '.')
+ return !strcmp(&domain[l2 - l1], s);
+
+ return !strcmp(s, domain);
+}
+
+static int
+matchrule(struct filterrule *f, const char *uri, const char *type,
+ const char *domain)
+{
+ /* NOTE: order matters, see FilterType enum values */
+ struct filterdomain *d;
+ char pat[1024];
+ int r, m;
+
+ r = f->domains ? 0 : 1;
+ for (d = f->domains; d; d = d->next) {
+ if (matchdomain(d->domain, domain)) {
+ if (r && d->inverse)
+ r = 0;
+ else if (!r && !d->inverse)
+ r = 1;
+ } else if (r && !d->inverse) {
+ r = 0;
+ }
+ }
+ if (f->css) {
+ /* DEBUG */
+#if 0
+ if (f->isexception)
+ printf("DEBUG, exception rule, CSS: %s, match? %d\n",
+ f->css, r);
+#endif
+ return r;
+ }
+
+#if 1
+ /* skip allow rule, TODO: inverse? */
+ if (!r)
+ return 0;
+#endif
+
+#if 0
+ /* DEBUG: test, match if it is a simple pattern */
+ char *p;
+ p = strchr(f->uri, '*');
+ if (!p)
+ p = strchr(f->uri, '^');
+ if (!p) {
+ /* TODO: write a test-case */
+ if (f->block & FilterTypeMatchCase) {
+ if (f->matchbegin)
+ m = strncmp(uri, f->uri, strlen(f->uri)) == 0;
+ else if (f->matchend)
+ m = strlen(f->uri) <= strlen(uri) &&
+ strcmp(&uri[strlen(uri) - strlen(f->uri)], f->uri) == 0;
+ else
+ m = strstr(uri, f->uri) ? 1 : 0;
+ } else {
+ if (f->matchbegin)
+ m = strncasecmp(uri, f->uri, strlen(f->uri)) == 0;
+ else if (f->matchend)
+ m = strlen(f->uri) <= strlen(uri) &&
+ strcasecmp(&uri[strlen(uri) - strlen(f->uri)], f->uri) == 0;
+ else
+ m = strcasestr(uri, f->uri) ? 1 : 0;
+ }
+ /*m = r ? !m : m;*/
+ return m;
+ }
+#endif
+
+ r = snprintf(pat, sizeof(pat), "%s%s%s",
+ f->matchbegin ? "" : "*",
+ f->uri,
+ f->matchend ? "" : "*");
+ if (r == -1 || (size_t)r >= sizeof(pat)) {
+ fprintf(stderr, "warning: pattern too large, ignoring\n");
+ return 0;
+ }
+
+ m = 0;
+ if (!match(pat, uri, (f->block & FilterTypeMatchCase) ? 0 : 1)) {
+#if 0
+ for (; *type; type++) {
+ for (i = 0; blockstr[i]; i++) {
+ if (blockstr[i] == *type &&
+ f->block & (1 << i))
+ printf("block type '%c'\n", blockstr[i]);
+ return 1;
+ }
+ }
+ }
+
+ return 0;
+#endif
+ m = 1;
+ }
+ /*m = r ? !m : m;*/
+ return m;
+}
+
+static int
+parserule(struct filterrule *f, char *s)
+{
+ struct filtertype key, *ft;
+ int inverse = 0;
+ char *p, *values;
+
+ if (*s == '!' || (*s == '[' && s[strlen(s) - 1] == ']'))
+ return 0; /* skip comment or empty line */
+ for (; *s && isspace(*s); s++)
+ ;
+ if (!*s)
+ return 0; /* line had only whitespace: skip */
+
+ memset(f, 0, sizeof(struct filterrule));
+
+ if ((p = strstr(s, "#@#"))) {
+ *p = '\0';
+ if (parsedomainselement(f, s) < 0)
+ return -1;
+ *p = '#';
+ if (!(f->css = westrdup(p + 3)))
+ return -1;
+ f->isexception = 1;
+ goto end; /* end of CSS rule */
+ }
+
+ /* element hiding rule, NOTE: no wildcards are supported,
+ "Simplified element hiding syntax" is not supported. */
+ if ((p = strstr(s, "##"))) {
+ *p = '\0';
+ if (parsedomainselement(f, s) < 0)
+ return -1;
+ *p = '#';
+ if (!(f->css = westrdup(p + 2)))
+ return -1;
+ goto end; /* end of rule */
+ }
+
+ if (!strncmp(s, "@@", 2)) {
+ f->isexception = 1;
+ s += 2;
+ }
+ if (*s == '|') {
+ s++;
+ if (*s == '|') {
+ f->matchbegin = 1;
+ s++;
+ } else {
+ f->matchend = 1;
+ }
+ }
+
+ /* no options, use rest of line as uri. */
+ if (!(p = strrchr(s, '$'))) {
+ if (!(f->uri = westrdup(s)))
+ return -1;
+ goto end;
+ }
+
+ /* has options */
+ if (!(f->uri = westrndup(s, p - s)))
+ return -1;
+ s = ++p;
+
+ /* blockmask, has options? default: allow all options, case-sensitive
+ * has no options? default: block all options, case-sensitive */
+ f->block = *s ? (unsigned long)FilterTypeMatchCase : ~0UL;
+ do {
+ if ((p = strchr(s, ',')))
+ *p = '\0';
+ /* match option */
+ inverse = 0;
+ if (*s == '~') {
+ inverse = 1;
+ s++;
+ }
+ if ((values = strchr(s, '=')))
+ *(values) = '\0';
+ key.name = s;
+
+ ft = bsearch(&key, &filtertypes,
+ sizeof(filtertypes) / sizeof(*filtertypes),
+ sizeof(*filtertypes), filtertype_cmp);
+
+ /* restore NUL-terminator for domain= option */
+ if (values)
+ *(values++) = '=';
+
+ if (ft) {
+ if (inverse)
+ f->block &= ~(ft->type);
+ else
+ f->block |= ft->type;
+ if (ft->fn && values)
+ ft->fn(f, values);
+ } else {
+ /* DEBUG */
+ fprintf(stderr, "ignored: unknown option: '%s' "
+ "in rule: %s\n", key.name, f->uri);
+ }
+
+ /* restore ',' */
+ if (p) {
+ *p = ',';
+ s = p + 1;
+ }
+ } while (p);
+end:
+
+ return 1;
+}
+
+#if 0
+static void
+debugrule(struct filterrule *r)
+{
+ printf("\turi: %s\n\tcss: %s\n\tisexception: %d\n\tblockmask: "
+ "%lu\n===\n", r->uri ? r->uri : "", r->css ? r->css : "",
+ r->isexception, r->block);
+}
+#endif
+
+static int
+loadrules(FILE *fp)
+{
+ struct filterrule f, *r, *rn = NULL;
+ char *line = NULL;
+ size_t linesiz = 0;
+ ssize_t n;
+ int ret;
+
+ /* TODO: handle ferror() */
+ /* load rules */
+ while ((n = getline(&line, &linesiz, fp)) > 0) {
+ if (line[n - 1] == '\n')
+ line[--n] = '\0';
+ if (n > 0 && line[n - 1] == '\r')
+ line[--n] = '\0';
+
+ if ((ret = parserule(&f, line) > 0)) {
+ if (!(r = wecalloc(1, sizeof(struct filterrule))))
+ return -1;
+ if (!rules)
+ rules = rn = r;
+ else
+ rn = rn->next = r;
+ memcpy(rn, &f, sizeof(struct filterrule));
+ } else if (ret < 0) {
+ return -1;
+ }
+ }
+ return (rules != NULL);
+}
+
+char *
+getglobalcss(void)
+{
+ return globalcss.data;
+}
+
+char *
+getdocumentcss(const char *uri)
+{
+ const char *s;
+ char domain[256];
+ String sitecss;
+ struct filterrule *r;
+ size_t len;
+
+ if (!uri)
+ return NULL;
+
+ if (!(s = strstr(uri, "://")))
+ return NULL;
+ s += sizeof("://") - 1;
+ len = strcspn(s, "/");
+ memcpy(domain, s, len);
+ domain[len] = '\0';
+
+ printf("uri: %s\n", uri);
+ printf("domain: %s\n", domain);
+
+ /* DEBUG: timing */
+ struct timespec tp_start, tp_end, tp_diff;
+ if (clock_gettime(CLOCK_MONOTONIC, &tp_start) == -1) {
+ fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
+ }
+
+ /* site-specific CSS */
+ memset(&sitecss, 0, sizeof(sitecss));
+ for (r = rules; r; r = r->next) {
+ if (!r->css || !r->domains || !matchrule(r, "", "", domain))
+ continue;
+
+ len = strlen(r->css);
+ if (string_append(&sitecss, r->css, len) < len)
+ goto err;
+
+ s = r->isexception ? "{display:initial;}" : "{display:none;}";
+ len = strlen(s);
+ if (string_append(&sitecss, s, len) < len)
+ goto err;
+ }
+/* printf("sitecss: %s\n", sitecss.data ? sitecss.data : "<empty>");*/
+
+ /* DEBUG: timing */
+ if (clock_gettime(CLOCK_MONOTONIC, &tp_end) == -1) {
+ fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
+ }
+
+ tp_diff.tv_sec = tp_end.tv_sec - tp_start.tv_sec;
+ tp_diff.tv_nsec = tp_end.tv_nsec - tp_start.tv_nsec;
+ if (tp_diff.tv_nsec < 0) {
+ tp_diff.tv_sec--;
+ tp_diff.tv_nsec += 1000000000L;
+ }
+
+ printf("timing: %zu sec, %.3f ms\n",
+ tp_diff.tv_sec, (float)tp_diff.tv_nsec / 1000000.0f);
+
+ if (globalcss.data)
+ printf("global CSS length in bytes: %zu\n", strlen(globalcss.data));
+ if (sitecss.data)
+ printf("site CSS length in bytes: %zu\n", strlen(sitecss.data));
+
+ return sitecss.data;
+
+err:
+ free(sitecss.data);
+ return NULL;
+}
+
+int
+checkrequest(const char *uri, const char *requri)
+{
+ char domain[256];
+ struct filterrule *r;
+ const char *s;
+ size_t len;
+ int status = 1;
+
+ if (!uri || !strcmp(requri, uri))
+ return 1;
+
+ s = strstr(uri, "://") + sizeof("://") - 1;
+ len = strcspn(s, "/");
+ memcpy(domain, s, len);
+ domain[len] = '\0';
+
+ /* DEBUG: timing */
+ struct timespec tp_start, tp_end, tp_diff;
+ if (clock_gettime(CLOCK_MONOTONIC, &tp_start) == -1) {
+ fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
+ }
+
+ /* match rules */
+ for (r = rules; r; r = r->next) {
+ if (!r->css && matchrule(r, requri, "csio^", domain)) {
+ printf("requri: %s\n", requri);
+ printf("uri: %s\n", uri);
+ printf("domain: %s\n", domain);
+
+ fprintf(stderr, "blocked: %s, %s\n", domain, requri);
+
+ /* DEBUG: for showing the timing */
+ status = 0;
+ goto end;
+ /*return 1;*/
+ }
+ }
+
+end:
+ /* DEBUG: timing */
+ if (clock_gettime(CLOCK_MONOTONIC, &tp_end) == -1) {
+ fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
+ }
+
+ tp_diff.tv_sec = tp_end.tv_sec - tp_start.tv_sec;
+ tp_diff.tv_nsec = tp_end.tv_nsec - tp_start.tv_nsec;
+ if (tp_diff.tv_nsec < 0) {
+ tp_diff.tv_sec--;
+ tp_diff.tv_nsec += 1000000000L;
+ }
+
+ printf("%s [%s] timing: %zu sec, %.3f ms\n",
+ requri, uri, tp_diff.tv_sec, (float)tp_diff.tv_nsec / 1000000.0f);
+
+ return status;
+}
+
+void
+init(void)
+{
+ struct filterrule *r;
+ FILE *fp;
+ char filepath[PATH_MAX], *e;
+ size_t len;
+ int n;
+
+ if ((e = getenv("SURF_ADBLOCK_FILE"))) {
+ n = snprintf(filepath, sizeof(filepath), "%s", e);
+ } else {
+ if (!(e = getenv("HOME")))
+ e = "";
+ n = snprintf(filepath, sizeof(filepath),
+ "%s%s.surf/adblockrules", e, e[0] ? "/" : "");
+ }
+ if (n < 0 || (size_t)n >= sizeof(filepath)) {
+ weprintf("fatal: rules file path too long");
+ return;
+ }
+
+ if (!(fp = fopen(filepath, "r"))) {
+ weprintf("fatal: cannot open rules file %s: %s\n",
+ filepath, strerror(errno));
+ return;
+ }
+
+ n = loadrules(fp);
+ fclose(fp);
+ if (n < 1) {
+ if (n < 0) {
+ weprintf("fatal: cannot read rules from file %s: %s\n",
+ filepath, strerror(errno));
+ } else {
+ weprintf("fatal: cannot read any rule from file %s\n",
+ filepath);
+ }
+ return;
+ }
+
+ /* general CSS rules: all sites */
+ for (r = rules; r; r = r->next) {
+ if (!r->css || r->domains)
+ continue;
+
+ len = strlen(r->css);
+ if (string_append(&globalcss, r->css, strlen(r->css)) < len) {
+ weprintf("cannot load global css selectors "
+ "in memory\n");
+ cleanup();
+ return;
+ }
+ len = sizeof("{display:none;}") - 1;
+ if (string_append(&globalcss, "{display:none;}", len) < len) {
+ weprintf("cannot append css rule "
+ "to global css selectors\n");
+ cleanup();
+ return;
+ }
+ }
+}
(DIR) diff --git a/surf-adblock.c b/surf-adblock.c
@@ -8,769 +8,30 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <wchar.h>
-#include <wctype.h>
#include <webkit2/webkit-web-extension.h>
#include <webkitdom/webkitdom.h>
-/* String data / memory pool */
-typedef struct string {
- char *data; /* data */
- size_t datasz; /* allocated size */
- size_t len; /* current string length */
-} String;
+#include "adblock.h"
typedef struct Page {
guint64 id;
WebKitWebPage *webpage;
- WebKitDOMDOMWindow *view;
+ /*WebKitDOMDOMWindow *view;*/
struct Page *next;
} Page;
-struct filterdomain {
- char *domain;
- int inverse;
- struct filterdomain *next;
-};
-
-struct filterrule {
- /* type: match mask, must be atleast 32-bit, see FilterType enum */
- unsigned long block;
- int matchbegin;
- int matchend;
- /* is exception rule: prefix @@ for ABP or #@# for CSS */
- int isexception;
- char *css; /* if non-NULL is CSS rule / hide element rule */
- char *uri;
- struct filterdomain *domains;
- struct filterrule *next;
-};
-
-enum {
- FilterTypeScript = 1 << 0,
- FilterTypeImage = 1 << 1,
- FilterTypeCSS = 1 << 2,
- FilterTypeObject = 1 << 3,
- FilterTypeXHR = 1 << 4,
- FilterTypeObjectSub = 1 << 5,
- FilterTypeSubDoc = 1 << 6,
- FilterTypePing = 1 << 7,
- FilterTypeDocument = 1 << 8,
- FilterTypeElemHide = 1 << 9,
- FilterTypeOther = 1 << 10,
- FilterTypeGenericHide = 1 << 11,
- FilterTypeGenericBlock = 1 << 12,
- FilterTypeMatchCase = 1 << 13,
-};
-
-struct filtertype {
- /* `type` must be atleast 32-bit, see FilterType enum */
- unsigned long type;
- char *name;
- size_t namelen;
- int allowinverse;
- int allownormal;
- int onlyexception;
- int (*fn)(struct filterrule *, char *);
-};
-
-static int parsedomainsoption(struct filterrule *, char *);
-
-#define STRP(s) s,sizeof(s)-1
-
-static struct filtertype filtertypes[] = {
- /* NOTE: options with 'type' = 0 are silently ignored and treated as
- * requests for now */
- { 0, STRP("collapse"), 1, 1, 0, NULL },
- { FilterTypeDocument, STRP("document"), 1, 0, 1, NULL },
- { 0, STRP("domain"), 0, 1, 0,
- /* domain=... */ &parsedomainsoption },
- { 0, STRP("donottrack"), 1, 1, 0, NULL },
- { FilterTypeElemHide, STRP("elemhide"), 0, 0, 1, NULL },
- { 0, STRP("font"), 1, 1, 0, NULL },
- { FilterTypeGenericBlock, STRP("genericblock"), 1, 1, 1, NULL },
- { FilterTypeGenericHide, STRP("generichide"), 1, 1, 1, NULL },
- { FilterTypeImage, STRP("image"), 1, 1, 0, NULL },
- { FilterTypeMatchCase, STRP("match-case"), 1, 1, 0, NULL },
- { 0, STRP("media"), 1, 1, 0, NULL },
- { FilterTypeObject, STRP("object"), 1, 1, 0, NULL },
- { FilterTypeObjectSub, STRP("object-subrequest"), 1, 1, 0, NULL },
- { FilterTypeOther, STRP("other"), 1, 1, 0, NULL },
- { FilterTypePing, STRP("ping"), 1, 1, 0, NULL },
- { 0, STRP("popup"), 1, 1, 0, NULL },
- { FilterTypeScript, STRP("script"), 1, 1, 0, NULL },
- { FilterTypeCSS, STRP("stylesheet"), 1, 1, 0, NULL },
- { FilterTypeSubDoc, STRP("subdocument"), 1, 1, 0, NULL },
- { 0, STRP("third-party"), 1, 1, 0, NULL },
- { FilterTypeXHR, STRP("xmlhttprequest"), 1, 1, 0, NULL },
- /* NOTE: site-key not supported */
-};
-
-static String globalcss;
static Page *pages;
-static struct filterrule *rules;
-
-static void
-cleanup(void)
-{
- struct filterrule *r;
- struct filterdomain *d;
-
- free(globalcss.data);
-
- for (r = rules; r; r = rules) {
- for (d = r->domains; d; d = r->domains) {
- free(d->domain);
- r->domains = d->next;
- free(d);
- }
- free(r->css);
- free(r->uri);
- rules = r->next;
- free(r);
- }
-}
-
-static void
-weprintf(const char *fmt, ...)
-{
- va_list ap;
-
- fprintf(stderr, "surf-adblock: ");
-
- va_start(ap, fmt);
- vfprintf(stderr, fmt, ap);
- va_end(ap);
-}
-
-static size_t
-string_buffer_realloc(String *s, size_t newsz)
-{
- char *tmp;
- size_t allocsz;
-
- for (allocsz = 64; allocsz <= newsz; allocsz *= 2)
- ;
- if (!(tmp = realloc(s->data, allocsz))) {
- weprintf("realloc: %s\n", strerror(errno));
- } else {
- s->data = tmp;
- s->datasz = allocsz;
- }
-
- return s->datasz;
-}
-
-static size_t
-string_append(String *s, const char *data, size_t len)
-{
- size_t newlen;
-
- if (!len)
- return len;
-
- newlen = s->len + len;
- /* check if allocation is necesary, don't shrink buffer,
- * should be more than datasz ofcourse. */
- if (newlen >= s->datasz) {
- if (string_buffer_realloc(s, newlen + 1) <= newlen)
- return 0;
- }
- memcpy(s->data + s->len, data, len);
- s->len = newlen;
- s->data[s->len] = '\0';
- return len;
-}
-
-static void *
-wecalloc(size_t nmemb, size_t size)
-{
- void *p;
-
- if (!(p = calloc(nmemb, size)))
- weprintf("calloc: %s\n", strerror(errno));
-
- return p;
-}
-
-static char *
-westrndup(const char *s, size_t n)
-{
- char *p;
-
- if (!(p = strndup(s, n)))
- weprintf("strndup: %s\n", strerror(errno));
- return p;
-}
-
-static char *
-westrdup(const char *s)
-{
- char *p;
-
- if (!(p = strdup(s)))
- weprintf("strdup: %s\n", strerror(errno));
-
- return p;
-}
-
-#define END 0
-#define UNMATCHABLE -2
-#define BRACKET -3
-#define CARET -4
-#define STAR -5
-
-static int
-str_next(const char *str, size_t n, size_t *step)
-{
- if (!n) {
- *step = 0;
- return 0;
- }
- if (str[0] >= 128U) {
- wchar_t wc;
- int k = mbtowc(&wc, str, n);
- if (k<0) {
- *step = 1;
- return -1;
- }
- *step = k;
- return wc;
- }
- *step = 1;
-
- return str[0];
-}
-
-static int
-pat_next(const char *pat, size_t m, size_t *step)
-{
- int esc = 0;
-
- if (!m || !*pat) {
- *step = 0;
- return END;
- }
- *step = 1;
- if (pat[0]=='\\' && pat[1]) {
- *step = 2;
- pat++;
- esc = 1;
- goto escaped;
- }
- if (pat[0]=='^')
- return CARET;
- if (pat[0] == '*')
- return STAR;
-escaped:
- if (pat[0] >= 128U) {
- wchar_t wc;
- int k = mbtowc(&wc, pat, m);
- if (k<0) {
- *step = 0;
- return UNMATCHABLE;
- }
- *step = k + esc;
- return wc;
- }
- return pat[0];
-}
-
-static int
-casefold(int k)
-{
- int c = towupper(k);
- return c == k ? towlower(k) : c;
-}
-
-/* match() based on musl-libc fnmatch:
- https://git.musl-libc.org/cgit/musl/tree/src/regex/fnmatch.c */
-static int
-match(const char *pat, const char *str, int fcase)
-{
- size_t m = -1, n = -1;
- const char *p, *ptail, *endpat;
- const char *s, *stail, *endstr;
- size_t pinc, sinc, tailcnt=0;
- int c, k, kfold;
-
- for (;;) {
- switch ((c = pat_next(pat, m, &pinc))) {
- case UNMATCHABLE:
- return 1;
- case STAR:
- pat++;
- m--;
- break;
- default:
- k = str_next(str, n, &sinc);
- /* TODO: write a test-case */
- if (c == CARET && (k == '?' || k == '/' || k <= 0))
- return 1;
- if (k <= 0)
- return (c==END) ? 0 : 1;
- str += sinc;
- n -= sinc;
- kfold = fcase ? casefold(k) : k;
- if (k != c && kfold != c)
- return 1;
- pat+=pinc;
- m-=pinc;
- continue;
- }
- break;
- }
-
- /* Compute real pat length if it was initially unknown/-1 */
- m = strnlen(pat, m);
- endpat = pat + m;
-
- /* Find the last * in pat and count chars needed after it */
- for (p=ptail=pat; p<endpat; p+=pinc) {
- switch (pat_next(p, endpat-p, &pinc)) {
- case UNMATCHABLE:
- return 1;
- case STAR:
- tailcnt=0;
- ptail = p+1;
- break;
- default:
- tailcnt++;
- break;
- }
- }
-
- /* Past this point we need not check for UNMATCHABLE in pat,
- * because all of pat has already been parsed once. */
-
- /* Compute real str length if it was initially unknown/-1 */
- n = strnlen(str, n);
- endstr = str + n;
- if (n < tailcnt) return 1;
-
- /* Find the final tailcnt chars of str, accounting for UTF-8.
- * On illegal sequences we may get it wrong, but in that case
- * we necessarily have a matching failure anyway. */
- for (s=endstr; s>str && tailcnt; tailcnt--) {
- if (s[-1] < 128U || MB_CUR_MAX==1) s--;
- else while ((unsigned char)*--s-0x80U<0x40 && s>str);
- }
- if (tailcnt) return 1;
- stail = s;
-
- /* Check that the pat and str tails match */
- p = ptail;
- for (;;) {
- c = pat_next(p, endpat-p, &pinc);
- p += pinc;
- if ((k = str_next(s, endstr-s, &sinc)) <= 0) {
- if (c != END) return 1;
- break;
- }
- s += sinc;
- kfold = fcase ? casefold(k) : k;
- if (k != c && kfold != c)
- return 1;
- }
-
- /* We're all done with the tails now, so throw them out */
- endstr = stail;
- endpat = ptail;
-
- /* Match pattern components until there are none left */
- while (pat<endpat) {
- p = pat;
- s = str;
- for (;;) {
- c = pat_next(p, endpat-p, &pinc);
- p += pinc;
- /* Encountering * completes/commits a component */
- if (c == STAR) {
- pat = p;
- str = s;
- break;
- }
- k = str_next(s, endstr-s, &sinc);
- if (!k)
- return 1;
- kfold = fcase ? casefold(k) : k;
- if (k != c && kfold != c)
- break;
- s += sinc;
- }
- if (c == STAR) continue;
- /* If we failed, advance str, by 1 char if it's a valid
- * char, or past all invalid bytes otherwise. */
- k = str_next(str, endstr-str, &sinc);
- if (k > 0) str += sinc;
- else for (str++; str_next(str, endstr-str, &sinc)<0; str++);
- }
-
- return 0;
-}
-
-/*
-domain=... if domain is prefixed with ~, ignore.
-multiple domains can be separated with |
-*/
-static int
-parsedomains(const char *s, int sep, struct filterdomain **head)
-{
- struct filterdomain *d, *last = *head = NULL;
- char *p;
- int inverse;
-
- do {
- inverse = 0;
- if (*s == '~') {
- inverse = !inverse;
- s++;
- }
- if (!*s || *s == sep)
- break;
-
- if (!(d = wecalloc(1, sizeof(struct filterdomain))))
- return -1;
- if ((p = strchr(s, sep))) { /* TODO: should not contain ',' */
- d->domain = westrndup(s, p - s);
- s = p + 1;
- } else {
- d->domain = westrdup(s);
- }
- if (!d->domain)
- return -1;
- d->inverse = inverse;
-
- if (!*head)
- *head = last = d;
- else
- last = last->next = d;
- } while (p);
-
- return (*head != NULL);
-}
-
-static int
-parsedomainselement(struct filterrule *f, char *s)
-{
- struct filterdomain *d, *last;
-
- for (last = f->domains; last && last->next; last = last->next)
- ;
-
- if (parsedomains(s, ',', &d) < 0)
- return -1;
- if (last)
- last->next = d;
- else
- f->domains = d;
-
- return (d != NULL);
-}
-
-static int
-parsedomainsoption(struct filterrule *f, char *s)
-{
- struct filterdomain *d, *last;
-
- for (last = f->domains; last && last->next; last = last->next)
- ;
-
- if (parsedomains(s, '|', &d) < 0)
- return -1;
- if (last)
- last->next = d;
- else
- f->domains = d;
-
- return (d != NULL);
-}
-
-static int
-filtertype_cmp(const void *a, const void *b)
-{
- return strcmp(((struct filtertype *)a)->name,
- ((struct filtertype *)b)->name);
-}
-
-/* check if domain is the same domain or a subdomain of `s` */
-static int
-matchdomain(const char *s, const char *domain)
-{
- size_t l1, l2;
-
- l1 = strlen(s);
- l2 = strlen(domain);
-
- /* subdomain-specific (longer) or other domain */
- if (l1 > l2)
- return 0;
- /* subdomain */
- if (l2 > l1 && domain[l2 - l1 - 1] == '.')
- return !strcmp(&domain[l2 - l1], s);
-
- return !strcmp(s, domain);
-}
-
-static int
-matchrule(struct filterrule *f, const char *uri, const char *type,
- const char *domain)
-{
- /* NOTE: order matters, see FilterType enum values */
- struct filterdomain *d;
- char pat[1024];
- int r, m;
-
- r = f->domains ? 0 : 1;
- for (d = f->domains; d; d = d->next) {
- if (matchdomain(d->domain, domain)) {
- if (r && d->inverse)
- r = 0;
- else if (!r && !d->inverse)
- r = 1;
- } else if (r && !d->inverse) {
- r = 0;
- }
- }
- if (f->css) {
- /* DEBUG */
-#if 0
- if (f->isexception)
- printf("DEBUG, exception rule, CSS: %s, match? %d\n",
- f->css, r);
-#endif
- return r;
- }
-
-#if 1
- /* skip allow rule, TODO: inverse? */
- if (!r)
- return 0;
-#endif
-
-#if 0
- /* DEBUG: test, match if it is a simple pattern */
- char *p;
- p = strchr(f->uri, '*');
- if (!p)
- p = strchr(f->uri, '^');
- if (!p) {
- /* TODO: write a test-case */
- if (f->block & FilterTypeMatchCase) {
- if (f->matchbegin)
- m = strncmp(uri, f->uri, strlen(f->uri)) == 0;
- else if (f->matchend)
- m = strlen(f->uri) <= strlen(uri) &&
- strcmp(&uri[strlen(uri) - strlen(f->uri)], f->uri) == 0;
- else
- m = strstr(uri, f->uri) ? 1 : 0;
- } else {
- if (f->matchbegin)
- m = strncasecmp(uri, f->uri, strlen(f->uri)) == 0;
- else if (f->matchend)
- m = strlen(f->uri) <= strlen(uri) &&
- strcasecmp(&uri[strlen(uri) - strlen(f->uri)], f->uri) == 0;
- else
- m = strcasestr(uri, f->uri) ? 1 : 0;
- }
- /*m = r ? !m : m;*/
- return m;
- }
-#endif
-
- r = snprintf(pat, sizeof(pat), "%s%s%s",
- f->matchbegin ? "" : "*",
- f->uri,
- f->matchend ? "" : "*");
- if (r == -1 || (size_t)r >= sizeof(pat)) {
- fprintf(stderr, "warning: pattern too large, ignoring\n");
- return 0;
- }
-
- m = 0;
- if (!match(pat, uri, (f->block & FilterTypeMatchCase) ? 0 : 1)) {
-#if 0
- for (; *type; type++) {
- for (i = 0; blockstr[i]; i++) {
- if (blockstr[i] == *type &&
- f->block & (1 << i))
- printf("block type '%c'\n", blockstr[i]);
- return 1;
- }
- }
- }
-
- return 0;
-#endif
- m = 1;
- }
- /*m = r ? !m : m;*/
- return m;
-}
-
-static int
-parserule(struct filterrule *f, char *s)
-{
- struct filtertype key, *ft;
- int inverse = 0;
- char *p, *values;
-
- if (*s == '!' || (*s == '[' && s[strlen(s) - 1] == ']'))
- return 0; /* skip comment or empty line */
- for (; *s && isspace(*s); s++)
- ;
- if (!*s)
- return 0; /* line had only whitespace: skip */
-
- memset(f, 0, sizeof(struct filterrule));
-
- if ((p = strstr(s, "#@#"))) {
- *p = '\0';
- if (parsedomainselement(f, s) < 0)
- return -1;
- *p = '#';
- if (!(f->css = westrdup(p + 3)))
- return -1;
- f->isexception = 1;
- goto end; /* end of CSS rule */
- }
-
- /* element hiding rule, NOTE: no wildcards are supported,
- "Simplified element hiding syntax" is not supported. */
- if ((p = strstr(s, "##"))) {
- *p = '\0';
- if (parsedomainselement(f, s) < 0)
- return -1;
- *p = '#';
- if (!(f->css = westrdup(p + 2)))
- return -1;
- goto end; /* end of rule */
- }
-
- if (!strncmp(s, "@@", 2)) {
- f->isexception = 1;
- s += 2;
- }
- if (*s == '|') {
- s++;
- if (*s == '|') {
- f->matchbegin = 1;
- s++;
- } else {
- f->matchend = 1;
- }
- }
-
- /* no options, use rest of line as uri. */
- if (!(p = strrchr(s, '$'))) {
- if (!(f->uri = westrdup(s)))
- return -1;
- goto end;
- }
-
- /* has options */
- if (!(f->uri = westrndup(s, p - s)))
- return -1;
- s = ++p;
-
- /* blockmask, has options? default: allow all options, case-sensitive
- * has no options? default: block all options, case-sensitive */
- f->block = *s ? (unsigned long)FilterTypeMatchCase : ~0UL;
- do {
- if ((p = strchr(s, ',')))
- *p = '\0';
- /* match option */
- inverse = 0;
- if (*s == '~') {
- inverse = 1;
- s++;
- }
- if ((values = strchr(s, '=')))
- *(values) = '\0';
- key.name = s;
-
- ft = bsearch(&key, &filtertypes,
- sizeof(filtertypes) / sizeof(*filtertypes),
- sizeof(*filtertypes), filtertype_cmp);
-
- /* restore NUL-terminator for domain= option */
- if (values)
- *(values++) = '=';
-
- if (ft) {
- if (inverse)
- f->block &= ~(ft->type);
- else
- f->block |= ft->type;
- if (ft->fn && values)
- ft->fn(f, values);
- } else {
- /* DEBUG */
- fprintf(stderr, "ignored: unknown option: '%s' "
- "in rule: %s\n", key.name, f->uri);
- }
-
- /* restore ',' */
- if (p) {
- *p = ',';
- s = p + 1;
- }
- } while (p);
-end:
-
- return 1;
-}
-
-#if 0
-static void
-debugrule(struct filterrule *r)
-{
- printf("\turi: %s\n\tcss: %s\n\tisexception: %d\n\tblockmask: "
- "%lu\n===\n", r->uri ? r->uri : "", r->css ? r->css : "",
- r->isexception, r->block);
-}
-#endif
-
-static int
-loadrules(FILE *fp)
-{
- struct filterrule f, *r, *rn = NULL;
- char *line = NULL;
- size_t linesiz = 0;
- ssize_t n;
- int ret;
-
- /* TODO: handle ferror() */
- /* load rules */
- while ((n = getline(&line, &linesiz, fp)) > 0) {
- if (line[n - 1] == '\n')
- line[--n] = '\0';
- if (n > 0 && line[n - 1] == '\r')
- line[--n] = '\0';
-
- if ((ret = parserule(&f, line) > 0)) {
- if (!(r = wecalloc(1, sizeof(struct filterrule))))
- return -1;
- if (!rules)
- rules = rn = r;
- else
- rn = rn->next = r;
- memcpy(rn, &f, sizeof(struct filterrule));
- } else if (ret < 0) {
- return -1;
- }
- }
- return (rules != NULL);
-}
static Page *
newpage(WebKitWebPage *page)
{
Page *p;
- if (!(p = wecalloc(1, sizeof(Page))))
+ if (!(p = calloc(1, sizeof(Page)))) {
+ fprintf(stderr, "surf-adblock: calloc: %s\n", strerror(errno));
return NULL;
+ }
p->next = pages;
pages = p;
@@ -783,146 +44,32 @@ newpage(WebKitWebPage *page)
static void
documentloaded(WebKitWebPage *wp, Page *p)
{
- char domain[256];
WebKitDOMDocument *doc = webkit_web_page_get_dom_document(wp);
WebKitDOMHTMLElement *body = webkit_dom_document_get_body(doc);
WebKitDOMElement *el;
- String sitecss;
- struct filterrule *r;
- const char *s, *uri = webkit_web_page_get_uri(p->webpage);
- size_t len;
-
- if (!uri)
- return;
-
- s = strstr(uri, "://") + sizeof("://") - 1;
- len = strcspn(s, "/");
- memcpy(domain, s, len);
- domain[len] = '\0';
-
- printf("uri: %s\n", uri);
- printf("domain: %s\n", domain);
-
- /* DEBUG: timing */
- struct timespec tp_start, tp_end, tp_diff;
- if (clock_gettime(CLOCK_MONOTONIC, &tp_start) == -1) {
- fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
- }
-
- /* site-specific CSS */
- memset(&sitecss, 0, sizeof(sitecss));
- for (r = rules; r; r = r->next) {
- if (!r->css || !r->domains || !matchrule(r, "", "", domain))
- continue;
-
- len = strlen(r->css);
- if (string_append(&sitecss, r->css, len) < len)
- return;
-
- s = r->isexception ? "{display:initial;}" : "{display:none;}";
- len = strlen(s);
- if (string_append(&sitecss, s, len) < len)
- return;
- }
-/* printf("sitecss: %s\n", sitecss.data ? sitecss.data : "<empty>");*/
-
- /* DEBUG: timing */
- if (clock_gettime(CLOCK_MONOTONIC, &tp_end) == -1) {
- fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
- }
-
- tp_diff.tv_sec = tp_end.tv_sec - tp_start.tv_sec;
- tp_diff.tv_nsec = tp_end.tv_nsec - tp_start.tv_nsec;
- if (tp_diff.tv_nsec < 0) {
- tp_diff.tv_sec--;
- tp_diff.tv_nsec += 1000000000L;
- }
-
- printf("timing: %zu sec, %.3f ms\n",
- tp_diff.tv_sec, (float)tp_diff.tv_nsec / 1000000.0f);
+ const char *uri = webkit_web_page_get_uri(p->webpage);
+ char *css, *globalcss;
- if (globalcss.data)
- printf("global CSS length in bytes: %zu\n", strlen(globalcss.data));
- if (sitecss.data)
- printf("site CSS length in bytes: %zu\n", strlen(sitecss.data));
+ /*p->view = webkit_dom_document_get_default_view(doc);*/
- p->view = webkit_dom_document_get_default_view(doc);
-
- if (globalcss.data) {
+ if ((globalcss = getglobalcss())) {
el = webkit_dom_document_create_element(doc, "style", NULL);
webkit_dom_element_set_attribute(el, "type", "text/css", NULL);
- webkit_dom_element_set_inner_html(el, globalcss.data, NULL);
+ webkit_dom_element_set_inner_html(el, globalcss, NULL);
webkit_dom_node_append_child(WEBKIT_DOM_NODE(body),
WEBKIT_DOM_NODE(el), NULL);
}
- if (sitecss.data) {
+ if ((css = getdocumentcss(uri))) {
el = webkit_dom_document_create_element(doc, "style", NULL);
webkit_dom_element_set_attribute(el, "type", "text/css", NULL);
- webkit_dom_element_set_inner_html(el, sitecss.data, NULL);
+ webkit_dom_element_set_inner_html(el, css, NULL);
webkit_dom_node_append_child(WEBKIT_DOM_NODE(body),
WEBKIT_DOM_NODE(el), NULL);
}
- free(sitecss.data);
-}
-
-int
-checkrequest(const char *uri, const char *requri)
-{
- char domain[256];
- struct filterrule *r;
- const char *s;
- size_t len;
- int status = 1;
-
- if (!uri || !strcmp(requri, uri))
- return 1;
-
- s = strstr(uri, "://") + sizeof("://") - 1;
- len = strcspn(s, "/");
- memcpy(domain, s, len);
- domain[len] = '\0';
-
- /* DEBUG: timing */
- struct timespec tp_start, tp_end, tp_diff;
- if (clock_gettime(CLOCK_MONOTONIC, &tp_start) == -1) {
- fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
- }
-
- /* match rules */
- for (r = rules; r; r = r->next) {
- if (!r->css && matchrule(r, requri, "csio^", domain)) {
- printf("requri: %s\n", requri);
- printf("uri: %s\n", uri);
- printf("domain: %s\n", domain);
-
- fprintf(stderr, "blocked: %s, %s\n", domain, requri);
-
- /* DEBUG: for showing the timing */
- status = 0;
- goto end;
- /*return 1;*/
- }
- }
-
-end:
- /* DEBUG: timing */
- if (clock_gettime(CLOCK_MONOTONIC, &tp_end) == -1) {
- fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
- }
-
- tp_diff.tv_sec = tp_end.tv_sec - tp_start.tv_sec;
- tp_diff.tv_nsec = tp_end.tv_nsec - tp_start.tv_nsec;
- if (tp_diff.tv_nsec < 0) {
- tp_diff.tv_sec--;
- tp_diff.tv_nsec += 1000000000L;
- }
-
- printf("%s [%s] timing: %zu sec, %.3f ms\n",
- requri, uri, tp_diff.tv_sec, (float)tp_diff.tv_nsec / 1000000.0f);
-
- return status;
+ free(css);
+ /* NOTE: globalcss free'd at cleanup() */
}
static gboolean
@@ -941,69 +88,6 @@ sendrequest(WebKitWebPage *wp, WebKitURIRequest *req,
return status;
}
-void
-init(void)
-{
- struct filterrule *r;
- FILE *fp;
- char filepath[PATH_MAX], *e;
- size_t len;
- int n;
-
- if ((e = getenv("SURF_ADBLOCK_FILE"))) {
- n = snprintf(filepath, sizeof(filepath), "%s", e);
- } else {
- if (!(e = getenv("HOME")))
- e = "";
- n = snprintf(filepath, sizeof(filepath),
- "%s%s.surf/adblockrules", e, e[0] ? "/" : "");
- }
- if (n < 0 || (size_t)n >= sizeof(filepath)) {
- weprintf("fatal: rules file path too long");
- return;
- }
-
- if (!(fp = fopen(filepath, "r"))) {
- weprintf("fatal: cannot open rules file %s: %s\n",
- filepath, strerror(errno));
- return;
- }
-
- n = loadrules(fp);
- fclose(fp);
- if (n < 1) {
- if (n < 0) {
- weprintf("fatal: cannot read rules from file %s: %s\n",
- filepath, strerror(errno));
- } else {
- weprintf("fatal: cannot read any rule from file %s\n",
- filepath);
- }
- return;
- }
-
- /* general CSS rules: all sites */
- for (r = rules; r; r = r->next) {
- if (!r->css || r->domains)
- continue;
-
- len = strlen(r->css);
- if (string_append(&globalcss, r->css, strlen(r->css)) < len) {
- weprintf("cannot load global css selectors "
- "in memory\n");
- cleanup();
- return;
- }
- len = sizeof("{display:none;}") - 1;
- if (string_append(&globalcss, "{display:none;}", len) < len) {
- weprintf("cannot append css rule "
- "to global css selectors\n");
- cleanup();
- return;
- }
- }
-}
-
static void
webpagecreated(WebKitWebExtension *e, WebKitWebPage *p, gpointer unused)
{