initial insertion (twss) - surf-adblock - Surf adblock web extension
(HTM) git clone git://git.codemadness.org/surf-adblock
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
(DIR) commit 888e9c8a2237bf9c374e04737f103e91cdc30267
(HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Thu, 14 Jul 2016 22:18:31 +0200
initial insertion (twss)
Diffstat:
A LICENSE | 16 ++++++++++++++++
A Makefile | 35 +++++++++++++++++++++++++++++++
A README | 1 +
A TODO | 11 +++++++++++
A config.mk | 32 +++++++++++++++++++++++++++++++
A dl.sh | 20 ++++++++++++++++++++
A surf-adblock.c | 781 +++++++++++++++++++++++++++++++
7 files changed, 896 insertions(+), 0 deletions(-)
---
(DIR) diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,16 @@
+ISC License
+
+Copyright (c) 2016 Hiltjo Posthuma <hiltjo@codemadness.org>
+Copyright (c) 2016 Quentin Rameau <quinq@fifth.space>
+
+Permission to use, copy, modify, and/or distribute this software for any
+purpose with or without fee is hereby granted, provided that the above
+copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
(DIR) diff --git a/Makefile b/Makefile
@@ -0,0 +1,35 @@
+include config.mk
+
+SRC = surf-adblock.c
+OBJ = ${SRC:.c=.lo}
+
+all: surf-adblock.la
+
+.SUFFIXES: .la .lo .o .c
+
+.c.o:
+ ${CC} -c ${CFLAGS} $<
+
+.c.lo:
+ ${LIBTOOL} --mode compile --tag CC ${CC} ${LIBCFLAGS} -c $<
+
+${OBJ}: config.mk
+
+surf-adblock.la: ${OBJ}
+ ${LIBTOOL} --mode link --tag CC ${CC} ${LIB} ${LIBLDFLAGS} -o $@ \
+ ${OBJ} -rpath ${DESTDIR}${LIBPREFIX}
+
+clean:
+ rm -rf surf-adblock.la .libs ${OBJ} ${OBJ:.lo=.o}
+
+install: all surf-adblock.la
+ mkdir -p ${DESTDIR}${LIBPREFIX}
+ ${LIBTOOL} --mode install install -c surf-adblock.la \
+ ${DESTDIR}${LIBPREFIX}/surf-adblock.la
+
+uninstall:
+ ${LIBTOOL} --mode uninstall rm -f \
+ ${DESTDIR}${LIBPREFIX}/surf-adblock.la
+ rm -df ${DESTDIR}${LIBPREFIX} || true
+
+.PHONY: all clean install uninstall
(DIR) diff --git a/README b/README
@@ -0,0 +1 @@
+adblock parser (WIP)
(DIR) diff --git a/TODO b/TODO
@@ -0,0 +1,11 @@
+Docs:
+- https://adblockplus.org/en/filter-cheatsheet
+- https://adblockplus.org/filters
+
+- separate between site-specific and global block rules.
+- optimize matching.
+- optimize memory allocation.
+- optimize: pregenerate one global stylesheet that applies to all sites?
+? support exception rules #@#
+
+
(DIR) diff --git a/config.mk b/config.mk
@@ -0,0 +1,32 @@
+VERSION = 0.1
+
+# Customize below to fit your system
+
+# paths
+PREFIX = /usr/local
+MANPREFIX = ${PREFIX}/share/man
+LIBPREFIX = ${PREFIX}/lib/surf
+
+X11INC = /usr/X11R6/include
+X11LIB = /usr/X11R6/lib
+
+GTKINC = `pkg-config --cflags gtk+-3.0 webkit2gtk-4.0`
+GTKLIB = `pkg-config --libs gtk+-3.0 webkit2gtk-4.0`
+WEBEXTINC = `pkg-config --cflags webkit2gtk-4.0 webkit2gtk-web-extension-4.0`
+WEBEXTLIB = `pkg-config --libs webkit2gtk-4.0 webkit2gtk-web-extension-4.0`
+
+# includes and libs
+INCS = -I. -I/usr/include -I${X11INC} ${GTKINC}
+LIBS = -L/usr/lib -lc -L${X11LIB} -lX11 ${GTKLIB} -lgthread-2.0
+
+# flags
+CPPFLAGS = -DVERSION=\"${VERSION}\" -DWEBEXTDIR=\"${LIBPREFIX}\" -D_DEFAULT_SOURCE
+CFLAGS = -std=c99 -pedantic -Wall -Os ${INCS} ${CPPFLAGS}
+LDFLAGS = -s ${LIBS}
+LIBCPPFLAGS = -DWEBEXTDIR=\"${LIBPREFIX}\" -DWEBKIT_DOM_USE_UNSTABLE_API
+LIBCFLAGS = -std=c99 -pedantic -Wall -Os ${WEBEXTINC} ${LIBCPPFLAGS}
+LIBLDFLAGS = -s ${WEBEXTLIB} -module -avoid-version -no-undefined
+
+# compiler and linker
+CC = cc
+LIBTOOL = libtool --quiet
(DIR) diff --git a/dl.sh b/dl.sh
@@ -0,0 +1,20 @@
+#!/bin/sh
+# download adblock lists
+
+set -e
+
+(while read -r l; do
+ curl -L "$l"
+ echo "" # not all lists have line at EOF.
+done <<!
+https://easylist.github.io/easylist/easylist.txt
+https://easylist.github.io/easylist/easyprivacy.txt
+https://easylist-downloads.adblockplus.org/antiadblockfilters.txt
+https://easylist-downloads.adblockplus.org/easylistdutch.txt
+https://easylist.github.io/easylistgermany/easylistgermany.txt
+https://easylist-downloads.adblockplus.org/liste_fr.txt
+https://easylist.github.io/easylist/fanboy-annoyance.txt
+https://easylist.github.io/easylist/fanboy-social.txt
+!
+) | awk '{if(!x[$0]++){print $0;}}' > list
+# remove duplicate lines but keep the order.
(DIR) diff --git a/surf-adblock.c b/surf-adblock.c
@@ -0,0 +1,781 @@
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <wchar.h>
+#include <wctype.h>
+
+#include <webkit2/webkit-web-extension.h>
+#include <webkitdom/webkitdom.h>
+#include <webkitdom/WebKitDOMDOMWindowUnstable.h>
+
+typedef struct Page {
+ guint64 id;
+ WebKitWebPage *webpage;
+ WebKitDOMDOMWindow *view;
+ struct Page *next;
+} Page;
+
+struct filterdomain {
+ char *domain;
+ int inverse;
+ struct filterdomain *next;
+};
+
+struct filterrule {
+ char *line; /* DEBUG */
+ /* type: match mask, must be atleast 32-bit, see FilterType enum */
+ unsigned long block;
+ int matchbegin;
+ int matchend;
+ /* is exception rule: prefix @@ for ABP or #@# for CSS */
+ int isexception;
+ char *css; /* if non-NULL is CSS rule / hide element rule */
+ char *uri;
+ struct filterdomain *domains;
+ struct filterrule *next;
+};
+
+enum {
+ FilterTypeScript = 1 << 0,
+ FilterTypeImage = 1 << 1,
+ FilterTypeCSS = 1 << 2,
+ FilterTypeObject = 1 << 3,
+ FilterTypeXHR = 1 << 4,
+ FilterTypeObjectSub = 1 << 5,
+ FilterTypeSubDoc = 1 << 6,
+ FilterTypePing = 1 << 7,
+ FilterTypeDocument = 1 << 8,
+ FilterTypeElemHide = 1 << 9,
+ FilterTypeOther = 1 << 10,
+ FilterTypeGenericHide = 1 << 11,
+ FilterTypeGenericBlock = 1 << 12,
+ FilterTypeMatchCase = 1 << 13,
+};
+
+struct filtertype {
+ /* `type` must be atleast 32-bit, see FilterType enum */
+ unsigned long type;
+ char *name;
+ size_t namelen;
+ int allowinverse;
+ int allownormal;
+ int onlyexception;
+ void (*fn)(struct filterrule *, char *);
+};
+
+static void parsedomainsoption(struct filterrule *, char *);
+
+#define STRP(s) s,sizeof(s)-1
+
+struct filtertype filtertypes[] = {
+ /* NOTE: options with 'type' = 0 are silently ignored and treated as
+ * requests for now */
+ { 0, STRP("collapse"), 1, 1, 0, NULL },
+ { FilterTypeDocument, STRP("document"), 1, 0, 1, NULL },
+ { 0, STRP("domain"), 0, 1, 0, &parsedomainsoption }, /* domain=... */
+ { 0, STRP("donottrack"), 1, 1, 0, NULL },
+ { FilterTypeElemHide, STRP("elemhide"), 0, 0, 1, NULL },
+ { 0, STRP("font"), 1, 1, 0, NULL },
+ { FilterTypeGenericBlock, STRP("genericblock"), 1, 1, 1, NULL },
+ { FilterTypeGenericHide, STRP("generichide"), 1, 1, 1, NULL },
+ { FilterTypeImage, STRP("image"), 1, 1, 0, NULL },
+ { FilterTypeMatchCase, STRP("match-case"), 1, 1, 0, NULL },
+ { 0, STRP("media"), 1, 1, 0, NULL },
+ { FilterTypeObject, STRP("object"), 1, 1, 0, NULL },
+ { FilterTypeObjectSub, STRP("object-subrequest"), 1, 1, 0, NULL },
+ { FilterTypeOther, STRP("other"), 1, 1, 0, NULL },
+ { FilterTypePing, STRP("ping"), 1, 1, 0, NULL },
+ { 0, STRP("popup"), 1, 1, 0, NULL },
+ { FilterTypeScript, STRP("script"), 1, 1, 0, NULL },
+ { FilterTypeCSS, STRP("stylesheet"), 1, 1, 0, NULL },
+ { FilterTypeSubDoc, STRP("subdocument"), 1, 1, 0, NULL },
+ { 0, STRP("third-party"), 1, 1, 0, NULL },
+ { FilterTypeXHR, STRP("xmlhttprequest"), 1, 1, 0, NULL },
+ /* NOTE: site-key not supported */
+};
+
+static Page *pages;
+static char globalcss[5000000]; /* TEST: dynamic allocate later */
+static struct filterrule *rules;
+
+void *
+ecalloc(size_t nmemb, size_t size)
+{
+ void *p;
+
+ if (!(p = calloc(nmemb, size))) {
+ fprintf(stderr, "calloc: %s\n", strerror(errno));
+ exit(1);
+ }
+
+ return p;
+}
+
+char *
+estrdup(const char *s)
+{
+ char *p;
+
+ if (!(p = strdup(s))) {
+ fprintf(stderr, "strdup: %s\n", strerror(errno));
+ exit(1);
+ }
+
+ return p;
+}
+
+#define END 0
+#define UNMATCHABLE -2
+#define BRACKET -3
+#define CARET -4
+#define STAR -5
+
+static int
+str_next(const char *str, size_t n, size_t *step)
+{
+ if (!n) {
+ *step = 0;
+ return 0;
+ }
+ if (str[0] >= 128U) {
+ wchar_t wc;
+ int k = mbtowc(&wc, str, n);
+ if (k<0) {
+ *step = 1;
+ return -1;
+ }
+ *step = k;
+ return wc;
+ }
+ *step = 1;
+
+ return str[0];
+}
+
+static int
+pat_next(const char *pat, size_t m, size_t *step)
+{
+ int esc = 0;
+
+ if (!m || !*pat) {
+ *step = 0;
+ return END;
+ }
+ *step = 1;
+ if (pat[0]=='\\' && pat[1]) {
+ *step = 2;
+ pat++;
+ esc = 1;
+ goto escaped;
+ }
+ if (pat[0]=='^')
+ return CARET;
+ if (pat[0] == '*')
+ return STAR;
+escaped:
+ if (pat[0] >= 128U) {
+ wchar_t wc;
+ int k = mbtowc(&wc, pat, m);
+ if (k<0) {
+ *step = 0;
+ return UNMATCHABLE;
+ }
+ *step = k + esc;
+ return wc;
+ }
+ return pat[0];
+}
+
+static int
+casefold(int k)
+{
+ int c = towupper(k);
+ return c == k ? towlower(k) : c;
+}
+
+/* match() based on musl-libc fnmatch:
+ https://git.musl-libc.org/cgit/musl/tree/src/regex/fnmatch.c */
+int
+match(const char *pat, const char *str, int fcase)
+{
+ size_t m = -1, n = -1;
+ const char *p, *ptail, *endpat;
+ const char *s, *stail, *endstr;
+ size_t pinc, sinc, tailcnt=0;
+ int c, k, kfold;
+
+ for (;;) {
+ switch ((c = pat_next(pat, m, &pinc))) {
+ case UNMATCHABLE:
+ return 1;
+ case STAR:
+ pat++;
+ m--;
+ break;
+ default:
+ k = str_next(str, n, &sinc);
+ if (k <= 0)
+ return (c==END) ? 0 : 1;
+ if (c == CARET && (iswdigit(k) || iswalpha(k) || strchr("_-.%", k)))
+ return 1;
+ str += sinc;
+ n -= sinc;
+ kfold = fcase ? casefold(k) : k;
+ if (k != c && kfold != c)
+ return 1;
+ pat+=pinc;
+ m-=pinc;
+ continue;
+ }
+ break;
+ }
+
+ /* Compute real pat length if it was initially unknown/-1 */
+ m = strnlen(pat, m);
+ endpat = pat + m;
+
+ /* Find the last * in pat and count chars needed after it */
+ for (p=ptail=pat; p<endpat; p+=pinc) {
+ switch (pat_next(p, endpat-p, &pinc)) {
+ case UNMATCHABLE:
+ return 1;
+ case STAR:
+ tailcnt=0;
+ ptail = p+1;
+ break;
+ default:
+ tailcnt++;
+ break;
+ }
+ }
+
+ /* Past this point we need not check for UNMATCHABLE in pat,
+ * because all of pat has already been parsed once. */
+
+ /* Compute real str length if it was initially unknown/-1 */
+ n = strnlen(str, n);
+ endstr = str + n;
+ if (n < tailcnt) return 1;
+
+ /* Find the final tailcnt chars of str, accounting for UTF-8.
+ * On illegal sequences we may get it wrong, but in that case
+ * we necessarily have a matching failure anyway. */
+ for (s=endstr; s>str && tailcnt; tailcnt--) {
+ if (s[-1] < 128U || MB_CUR_MAX==1) s--;
+ else while ((unsigned char)*--s-0x80U<0x40 && s>str);
+ }
+ if (tailcnt) return 1;
+ stail = s;
+
+ /* Check that the pat and str tails match */
+ p = ptail;
+ for (;;) {
+ c = pat_next(p, endpat-p, &pinc);
+ p += pinc;
+ if ((k = str_next(s, endstr-s, &sinc)) <= 0) {
+ if (c != END) return 1;
+ break;
+ }
+ s += sinc;
+ kfold = fcase ? casefold(k) : k;
+ if (k != c && kfold != c)
+ return 1;
+ }
+
+ /* We're all done with the tails now, so throw them out */
+ endstr = stail;
+ endpat = ptail;
+
+ /* Match pattern components until there are none left */
+ while (pat<endpat) {
+ p = pat;
+ s = str;
+ for (;;) {
+ c = pat_next(p, endpat-p, &pinc);
+ p += pinc;
+ /* Encountering * completes/commits a component */
+ if (c == STAR) {
+ pat = p;
+ str = s;
+ break;
+ }
+ k = str_next(s, endstr-s, &sinc);
+ if (!k)
+ return 1;
+ kfold = fcase ? casefold(k) : k;
+ if (k != c && kfold != c)
+ break;
+ s += sinc;
+ }
+ if (c == STAR) continue;
+ /* If we failed, advance str, by 1 char if it's a valid
+ * char, or past all invalid bytes otherwise. */
+ k = str_next(str, endstr-str, &sinc);
+ if (k > 0) str += sinc;
+ else for (str++; str_next(str, endstr-str, &sinc)<0; str++);
+ }
+
+ return 0;
+}
+
+
+/*
+domain=... if domain is prefixed with ~, ignore.
+multiple domains can be separated with |
+*/
+struct filterdomain *
+parsedomains(char *s, int sep)
+{
+ struct filterdomain *head = NULL, *d, *last = NULL;
+ char *p;
+ int inverse;
+
+ do {
+ inverse = 0;
+ if (*s == '~') {
+ inverse = !inverse;
+ s++;
+ }
+ if (!*s || *s == sep)
+ break;
+
+ if ((p = strchr(s, sep))) /* TODO: should not contain , */
+ *p = '\0';
+
+ d = ecalloc(1, sizeof(struct filterdomain));
+ d->inverse = inverse;
+ d->domain = estrdup(s);
+
+ if (!head)
+ head = last = d;
+ else
+ last = last->next = d;
+
+ if (p) {
+ *p = sep;
+ s = p + 1;
+ }
+ } while (p);
+
+ return head;
+}
+
+void
+parsedomainselement(struct filterrule *f, char *s)
+{
+ struct filterdomain *d, *last;
+
+ for (last = f->domains; last && last->next; last = last->next)
+ ;
+
+ d = parsedomains(s, ',');
+ if (last)
+ last->next = d;
+ else
+ f->domains = d;
+}
+
+void
+parsedomainsoption(struct filterrule *f, char *s)
+{
+ struct filterdomain *d, *last;
+
+ for (last = f->domains; last && last->next; last = last->next)
+ ;
+
+ d = parsedomains(s, '|');
+ if (last)
+ last->next = d;
+ else
+ f->domains = d;
+}
+
+int
+filtertype_cmp(const void *a, const void *b)
+{
+ return strcmp(((struct filtertype *)a)->name, ((struct filtertype *)b)->name);
+}
+
+/* check if domain is the same domain or a subdomain of `s` */
+int
+matchdomain(const char *s, const char *domain)
+{
+ size_t l1, l2;
+
+ l1 = strlen(s);
+ l2 = strlen(domain);
+
+ /* subdomain-specific (longer) or other domain */
+ if (l1 > l2)
+ return 0;
+ /* subdomain */
+ if (l2 > l1 && domain[l2 - l1 - 1] == '.')
+ return !strcmp(&domain[l2 - l1], s);
+
+ return !strcmp(s, domain);
+}
+
+int
+matchrule(struct filterrule *f, const char *uri, const char *type, const char *domain)
+{
+ /* NOTE: order matters, see FilterType enum values */
+ const char *blockstr = "sicoxOSpde^";
+ struct filterdomain *d;
+ char pat[1024];
+ int r, i;
+
+ /* ignore exception rules for now, these are usually paid
+ * for by sites to allow advertisements. */
+ if (f->isexception)
+ return 0;
+
+ if (f->css) {
+ r = f->domains ? 0 : 1;
+ for (d = f->domains; d; d = d->next) {
+ if (matchdomain(d->domain, domain)) {
+ if (r && d->inverse)
+ r = 0;
+ else if (!r && !d->inverse)
+ r = 1;
+ } else if (r && !d->inverse) {
+ r = 0;
+ }
+ }
+ return r;
+ }
+
+ r = snprintf(pat, sizeof(pat), "%s%s%s",
+ f->matchbegin ? "" : "*",
+ f->uri,
+ f->matchend ? "" : "*");
+ if (r == -1 || (size_t)r >= sizeof(pat)) {
+ fprintf(stderr, "warning: pattern too large, ignoring\n");
+ return 0;
+ }
+
+ r = f->domains ? 0 : 1;
+ for (d = f->domains; d; d = d->next) {
+ if (matchdomain(d->domain, domain)) {
+ if (r && d->inverse)
+ r = 0;
+ else if (!r && !d->inverse)
+ r = 1;
+ } else if (r && !d->inverse) {
+ r = 0;
+ }
+ }
+
+ if (r && !match(pat, uri, (f->block & FilterTypeMatchCase) ? 0 : 1)) {
+ for (; *type; type++) {
+ for (i = 0; blockstr[i]; i++) {
+ if (blockstr[i] == *type &&
+ f->block & (1 << i)) {
+ printf("block type '%c'\n", blockstr[i]);
+ }
+ }
+ }
+ return 1;
+ }
+ return 0;
+}
+
+int
+parserule(struct filterrule *f, char *s)
+{
+ struct filtertype key, *ft;
+ int inverse = 0;
+ char *p, *values;
+
+ if (*s == '!' || (*s == '[' && s[strlen(s) - 1] == ']'))
+ return 0; /* skip comment or empty line */
+ for (; *s && isspace(*s); s++)
+ ;
+ if (!*s)
+ return 0; /* line had only whitespace: skip */
+
+ memset(f, 0, sizeof(struct filterrule));
+
+ if ((p = strstr(s, "#@#"))) {
+ *p = '\0';
+ parsedomainselement(f, s);
+ *p = '#';
+ f->css = estrdup(p + 3);
+ f->isexception = 1;
+ goto end; /* end of CSS rule */
+ }
+
+ /* element hiding rule, NOTE: no wildcards are supported,
+ "Simplified element hiding syntax" is not supported. */
+ if ((p = strstr(s, "##"))) {
+ *p = '\0';
+ parsedomainselement(f, s);
+ *p = '#';
+ f->css = estrdup(p + 2);
+ goto end; /* end of rule */
+ }
+
+ if (!strncmp(s, "@@", 2)) {
+ f->isexception = 1;
+ s += 2;
+ }
+ if (*s == '|') {
+ s++;
+ if (*s == '|') {
+ f->matchbegin = 1;
+ s++;
+ } else {
+ f->matchend = 1;
+ }
+ }
+
+ /* no options, use rest of line as uri. */
+ if (!(p = strrchr(s, '$'))) {
+ f->uri = estrdup(s);
+ goto end;
+ }
+
+ /* has options */
+ *p = '\0';
+ f->uri = estrdup(s);
+ *p = '$';
+ s = ++p;
+
+ /* blockmask, has options? default: allow all options, case-sensitive
+ * has no options? default: block all options, case-sensitive */
+ f->block = *s ? (unsigned long)FilterTypeMatchCase : ~0UL;
+ do {
+ if ((p = strchr(s, ',')))
+ *p = '\0';
+ /* match option */
+ inverse = 0;
+ if (*s == '~') {
+ inverse = 1;
+ s++;
+ }
+ if ((values = strchr(s, '=')))
+ *(values) = '\0';
+ key.name = s;
+
+ ft = bsearch(&key, &filtertypes,
+ sizeof(filtertypes) / sizeof(*filtertypes), sizeof(*filtertypes),
+ filtertype_cmp);
+
+ /* restore NUL-terminator for domain= option */
+ if (values)
+ *(values++) = '=';
+
+ if (ft) {
+ if (inverse)
+ f->block &= ~(ft->type);
+ else
+ f->block |= ft->type;
+ if (ft->fn && values)
+ ft->fn(f, values);
+ } else {
+ /* DEBUG */
+ fprintf(stderr, "unknown option: '%s' in rule: %s\n",
+ key.name, f->uri);
+ }
+
+ /* restore ',' */
+ if (p) {
+ *p = ',';
+ s = p + 1;
+ }
+ } while (p);
+end:
+
+ return 1;
+}
+
+void
+debugrule(struct filterrule *r)
+{
+ printf("\turi: %s\n\tcss: %s\n\tisexception: %d\n\tblockmask: %lu\n===\n",
+ r->uri ? r->uri : "", r->css ? r->css : "", r->isexception, r->block);
+}
+
+struct filterrule *
+loadrules(FILE *fp)
+{
+ char *line = NULL;
+ size_t linesiz = 0;
+ ssize_t n;
+ struct filterrule f, *r, *rn = NULL, *rules = NULL;
+
+ /* TODO: handle ferror() */
+ /* load rules */
+ while ((n = getline(&line, &linesiz, fp)) > 0) {
+ if (line[n - 1] == '\n')
+ line[--n] = '\0';
+ if (n > 0 && line[n - 1] == '\r')
+ line[--n] = '\0';
+
+ if (parserule(&f, line)) {
+ r = ecalloc(1, sizeof(struct filterrule));
+ if (!rules)
+ rules = rn = r;
+ else
+ rn = rn->next = r;
+ memcpy(rn, &f, sizeof(struct filterrule));
+ r->line = estrdup(line); /* DEBUG */
+ }
+ }
+ return rules;
+}
+
+Page *
+newpage(WebKitWebPage *page)
+{
+ Page *p;
+
+ p = ecalloc(1, sizeof(Page));
+ p->next = pages;
+ pages = p;
+
+ p->id = webkit_web_page_get_id(page);
+ p->webpage = page;
+
+ return p;
+}
+
+static void
+documentloaded(WebKitWebPage *wp, Page *p)
+{
+ WebKitDOMDocument *doc = webkit_web_page_get_dom_document(wp);
+ WebKitDOMHTMLElement *body = webkit_dom_document_get_body(doc);
+ WebKitDOMElement *el;
+ char sitecss[1000000] = ""; /* TODO: dynamic allocate */
+ struct filterrule *r;
+ char *uri = estrdup((char *)webkit_web_page_get_uri(p->webpage));
+ char *domain, *s;
+
+ /* TODO: improve, hacky */
+ if ((domain = strstr(uri, "://"))) {
+ domain += sizeof("://") - 1;
+ } else {
+ domain = uri;
+ }
+ if ((s = strchr(domain, '/')))
+ *s = '\0';
+
+ printf("uri: %s\n", uri);
+ printf("domain: %s\n", domain);
+
+ /* site-specific CSS */
+ for (r = rules; r; r = r->next) {
+ if (!r->css || !r->domains || !matchrule(r, "", "", domain))
+ continue;
+ strlcat(sitecss, r->css, sizeof(sitecss));
+ strlcat(sitecss, "{display:none;}", sizeof(sitecss));
+ }
+ printf("sitecss: %s\n", sitecss);
+
+ p->view = webkit_dom_document_get_default_view(doc);
+
+ el = webkit_dom_document_create_element(doc, "style", NULL);
+ webkit_dom_element_set_attribute(el, "type", "text/css", NULL);
+ webkit_dom_element_set_inner_html(el, globalcss, NULL);
+ webkit_dom_node_append_child(WEBKIT_DOM_NODE(body), WEBKIT_DOM_NODE(el), NULL);
+
+ el = webkit_dom_document_create_element(doc, "style", NULL);
+ webkit_dom_element_set_attribute(el, "type", "text/css", NULL);
+ webkit_dom_element_set_inner_html(el, sitecss, NULL);
+ webkit_dom_node_append_child(WEBKIT_DOM_NODE(body), WEBKIT_DOM_NODE(el), NULL);
+
+ free(uri);
+}
+
+static gboolean
+sendrequest(WebKitWebPage *wp, WebKitURIRequest *req,
+ WebKitURIResponse *res, Page *p)
+{
+ struct filterrule *r;
+
+ char *uri = estrdup((char *)webkit_web_page_get_uri(p->webpage));
+ char *requri = estrdup((char *)webkit_uri_request_get_uri(req));
+ char *domain, *s;
+
+ /* TODO: improve, hacky */
+ if ((domain = strstr(uri, "://"))) {
+ domain += sizeof("://") - 1;
+ } else {
+ domain = uri;
+ }
+
+ if ((s = strchr(domain, '/')))
+ *s = '\0';
+
+ /* match rules */
+ for (r = rules; r; r = r->next) {
+ if (!r->css && matchrule(r, requri, "csio^", domain)) {
+ printf("requri: %s\n", requri);
+ printf("uri: %s\n", uri);
+ printf("domain: %s\n", domain);
+
+ fprintf(stderr, "blocked: %s, %s\n", domain, requri);
+ free(uri);
+ free(requri);
+ return TRUE;
+ }
+ }
+ free(uri);
+ free(requri);
+
+ return FALSE;
+}
+
+static void
+objectcleared(WebKitScriptWorld *w, WebKitWebPage *wp, WebKitFrame *f, Page *p)
+{
+}
+
+static void
+webpagecreated(WebKitWebExtension *e, WebKitWebPage *p, gpointer unused)
+{
+ Page *np = newpage(p);
+ WebKitScriptWorld *w = webkit_script_world_get_default();
+
+ g_signal_connect(p, "send-request",
+ G_CALLBACK(sendrequest), np);
+ g_signal_connect(w, "window-object-cleared",
+ G_CALLBACK(objectcleared), np);
+ g_signal_connect(p, "document-loaded",
+ G_CALLBACK(documentloaded), np);
+}
+
+G_MODULE_EXPORT void
+webkit_web_extension_initialize_with_user_data(WebKitWebExtension *e, GVariant *gv)
+{
+ struct filterrule *r;
+ FILE *fp;
+ const char *filename = "/home/hiltjo/adblock_rules";
+
+ if (!(fp = fopen(filename, "r"))) {
+ fprintf(stderr, "cannot read rules from file: %s\n", filename);
+ return;
+ }
+ if (!(rules = loadrules(fp))) {
+ fprintf(stderr, "cannot load adblock rules\n");
+ return;
+ }
+ fclose(fp);
+
+ /* general rules: all sites */
+ for (r = rules; r; r = r->next) {
+ if (!r->css || r->domains)
+ continue;
+ strlcat(globalcss, r->css, sizeof(globalcss));
+ strlcat(globalcss, "{display:none;}", sizeof(globalcss));
+ }
+
+ g_signal_connect(e, "page-created",
+ G_CALLBACK(webpagecreated), NULL);
+}