codemadness.org

       initial insertion (twss) - surf-adblock - Surf adblock web extension
 (HTM) git clone git://git.codemadness.org/surf-adblock
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit 888e9c8a2237bf9c374e04737f103e91cdc30267
 (HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
       Date:   Thu, 14 Jul 2016 22:18:31 +0200
       
       initial insertion (twss)
       
       Diffstat:
         A LICENSE                             |      16 ++++++++++++++++
         A Makefile                            |      35 +++++++++++++++++++++++++++++++
         A README                              |       1 +
         A TODO                                |      11 +++++++++++
         A config.mk                           |      32 +++++++++++++++++++++++++++++++
         A dl.sh                               |      20 ++++++++++++++++++++
         A surf-adblock.c                      |     781 +++++++++++++++++++++++++++++++
       
       7 files changed, 896 insertions(+), 0 deletions(-)
       ---
 (DIR) diff --git a/LICENSE b/LICENSE
       @@ -0,0 +1,16 @@
       +ISC License
       +
       +Copyright (c) 2016 Hiltjo Posthuma <hiltjo@codemadness.org>
       +Copyright (c) 2016 Quentin Rameau <quinq@fifth.space>
       +
       +Permission to use, copy, modify, and/or distribute this software for any
       +purpose with or without fee is hereby granted, provided that the above
       +copyright notice and this permission notice appear in all copies.
       +
       +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
       +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
       +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
       +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
       +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
       +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
       +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 (DIR) diff --git a/Makefile b/Makefile
       @@ -0,0 +1,35 @@
       +include config.mk
       +
       +SRC = surf-adblock.c
       +OBJ = ${SRC:.c=.lo}
       +
       +all: surf-adblock.la
       +
       +.SUFFIXES: .la .lo .o .c
       +
       +.c.o:
       +        ${CC} -c ${CFLAGS} $<
       +
       +.c.lo:
       +        ${LIBTOOL} --mode compile --tag CC ${CC} ${LIBCFLAGS} -c $<
       +
       +${OBJ}: config.mk
       +
       +surf-adblock.la: ${OBJ}
       +        ${LIBTOOL} --mode link --tag CC ${CC} ${LIB} ${LIBLDFLAGS} -o $@ \
       +                ${OBJ} -rpath ${DESTDIR}${LIBPREFIX}
       +
       +clean:
       +        rm -rf surf-adblock.la .libs ${OBJ} ${OBJ:.lo=.o}
       +
       +install: all surf-adblock.la
       +        mkdir -p ${DESTDIR}${LIBPREFIX}
       +        ${LIBTOOL} --mode install install -c surf-adblock.la \
       +                ${DESTDIR}${LIBPREFIX}/surf-adblock.la
       +
       +uninstall:
       +        ${LIBTOOL} --mode uninstall rm -f \
       +                ${DESTDIR}${LIBPREFIX}/surf-adblock.la
       +        rm -df ${DESTDIR}${LIBPREFIX} || true
       +
       +.PHONY: all clean install uninstall
 (DIR) diff --git a/README b/README
       @@ -0,0 +1 @@
       +adblock parser (WIP)
 (DIR) diff --git a/TODO b/TODO
       @@ -0,0 +1,11 @@
       +Docs:
       +- https://adblockplus.org/en/filter-cheatsheet
       +- https://adblockplus.org/filters
       +
       +- separate between site-specific and global block rules.
       +- optimize matching.
       +- optimize memory allocation.
       +- optimize: pregenerate one global stylesheet that applies to all sites?
       +? support exception rules #@#
       +
       +
 (DIR) diff --git a/config.mk b/config.mk
       @@ -0,0 +1,32 @@
       +VERSION = 0.1
       +
       +# Customize below to fit your system
       +
       +# paths
       +PREFIX = /usr/local
       +MANPREFIX = ${PREFIX}/share/man
       +LIBPREFIX = ${PREFIX}/lib/surf
       +
       +X11INC = /usr/X11R6/include
       +X11LIB = /usr/X11R6/lib
       +
       +GTKINC = `pkg-config --cflags gtk+-3.0 webkit2gtk-4.0`
       +GTKLIB = `pkg-config --libs gtk+-3.0 webkit2gtk-4.0`
       +WEBEXTINC = `pkg-config --cflags webkit2gtk-4.0 webkit2gtk-web-extension-4.0`
       +WEBEXTLIB = `pkg-config --libs webkit2gtk-4.0 webkit2gtk-web-extension-4.0`
       +
       +# includes and libs
       +INCS = -I. -I/usr/include -I${X11INC} ${GTKINC}
       +LIBS = -L/usr/lib -lc -L${X11LIB} -lX11 ${GTKLIB} -lgthread-2.0
       +
       +# flags
       +CPPFLAGS = -DVERSION=\"${VERSION}\" -DWEBEXTDIR=\"${LIBPREFIX}\" -D_DEFAULT_SOURCE
       +CFLAGS = -std=c99 -pedantic -Wall -Os ${INCS} ${CPPFLAGS}
       +LDFLAGS = -s ${LIBS}
       +LIBCPPFLAGS = -DWEBEXTDIR=\"${LIBPREFIX}\" -DWEBKIT_DOM_USE_UNSTABLE_API
       +LIBCFLAGS = -std=c99 -pedantic -Wall -Os ${WEBEXTINC} ${LIBCPPFLAGS}
       +LIBLDFLAGS = -s ${WEBEXTLIB} -module -avoid-version -no-undefined
       +
       +# compiler and linker
       +CC = cc
       +LIBTOOL = libtool --quiet
 (DIR) diff --git a/dl.sh b/dl.sh
       @@ -0,0 +1,20 @@
       +#!/bin/sh
       +# download adblock lists
       +
       +set -e
       +
       +(while read -r l; do
       +        curl -L "$l"
       +        echo "" # not all lists have line at EOF.
       +done <<!
       +https://easylist.github.io/easylist/easylist.txt
       +https://easylist.github.io/easylist/easyprivacy.txt
       +https://easylist-downloads.adblockplus.org/antiadblockfilters.txt
       +https://easylist-downloads.adblockplus.org/easylistdutch.txt
       +https://easylist.github.io/easylistgermany/easylistgermany.txt
       +https://easylist-downloads.adblockplus.org/liste_fr.txt
       +https://easylist.github.io/easylist/fanboy-annoyance.txt
       +https://easylist.github.io/easylist/fanboy-social.txt
       +!
       +) | awk '{if(!x[$0]++){print $0;}}' > list
       +# remove duplicate lines but keep the order.
 (DIR) diff --git a/surf-adblock.c b/surf-adblock.c
       @@ -0,0 +1,781 @@
       +#include <sys/stat.h>
       +#include <sys/types.h>
       +
       +#include <ctype.h>
       +#include <errno.h>
       +#include <fcntl.h>
       +#include <limits.h>
       +#include <stdio.h>
       +#include <stdlib.h>
       +#include <string.h>
       +#include <wchar.h>
       +#include <wctype.h>
       +
       +#include <webkit2/webkit-web-extension.h>
       +#include <webkitdom/webkitdom.h>
       +#include <webkitdom/WebKitDOMDOMWindowUnstable.h>
       +
       +typedef struct Page {
       +        guint64 id;
       +        WebKitWebPage *webpage;
       +        WebKitDOMDOMWindow *view;
       +        struct Page *next;
       +} Page;
       +
       +struct filterdomain {
       +        char *domain;
       +        int inverse;
       +        struct filterdomain *next;
       +};
       +
       +struct filterrule {
       +        char *line; /* DEBUG */
       +        /* type: match mask, must be atleast 32-bit, see FilterType enum */
       +        unsigned long block;
       +        int matchbegin;
       +        int matchend;
       +        /* is exception rule: prefix @@ for ABP or #@# for CSS */
       +        int isexception;
       +        char *css; /* if non-NULL is CSS rule / hide element rule */
       +        char *uri;
       +        struct filterdomain *domains;
       +        struct filterrule *next;
       +};
       +
       +enum {
       +        FilterTypeScript       = 1 << 0,
       +        FilterTypeImage        = 1 << 1,
       +        FilterTypeCSS          = 1 << 2,
       +        FilterTypeObject       = 1 << 3,
       +        FilterTypeXHR          = 1 << 4,
       +        FilterTypeObjectSub    = 1 << 5,
       +        FilterTypeSubDoc       = 1 << 6,
       +        FilterTypePing         = 1 << 7,
       +        FilterTypeDocument     = 1 << 8,
       +        FilterTypeElemHide     = 1 << 9,
       +        FilterTypeOther        = 1 << 10,
       +        FilterTypeGenericHide  = 1 << 11,
       +        FilterTypeGenericBlock = 1 << 12,
       +        FilterTypeMatchCase    = 1 << 13,
       +};
       +
       +struct filtertype {
       +        /* `type` must be atleast 32-bit, see FilterType enum */
       +        unsigned long type;
       +        char *name;
       +        size_t namelen;
       +        int allowinverse;
       +        int allownormal;
       +        int onlyexception;
       +        void (*fn)(struct filterrule *, char *);
       +};
       +
       +static void parsedomainsoption(struct filterrule *, char *);
       +
       +#define STRP(s) s,sizeof(s)-1
       +
       +struct filtertype filtertypes[] = {
       +        /* NOTE: options with 'type' = 0 are silently ignored and treated as
       +         *       requests for now */
       +        { 0,                      STRP("collapse"),          1, 1, 0, NULL },
       +        { FilterTypeDocument,     STRP("document"),          1, 0, 1, NULL },
       +        { 0,                      STRP("domain"),            0, 1, 0, &parsedomainsoption }, /* domain=... */
       +        { 0,                      STRP("donottrack"),        1, 1, 0, NULL },
       +        { FilterTypeElemHide,     STRP("elemhide"),          0, 0, 1, NULL },
       +        { 0,                      STRP("font"),              1, 1, 0, NULL },
       +        { FilterTypeGenericBlock, STRP("genericblock"),      1, 1, 1, NULL },
       +        { FilterTypeGenericHide,  STRP("generichide"),       1, 1, 1, NULL },
       +        { FilterTypeImage,        STRP("image"),             1, 1, 0, NULL },
       +        { FilterTypeMatchCase,    STRP("match-case"),        1, 1, 0, NULL },
       +        { 0,                      STRP("media"),             1, 1, 0, NULL },
       +        { FilterTypeObject,       STRP("object"),            1, 1, 0, NULL },
       +        { FilterTypeObjectSub,    STRP("object-subrequest"), 1, 1, 0, NULL },
       +        { FilterTypeOther,        STRP("other"),             1, 1, 0, NULL },
       +        { FilterTypePing,         STRP("ping"),              1, 1, 0, NULL },
       +        { 0,                      STRP("popup"),             1, 1, 0, NULL },
       +        { FilterTypeScript,       STRP("script"),            1, 1, 0, NULL },
       +        { FilterTypeCSS,          STRP("stylesheet"),        1, 1, 0, NULL },
       +        { FilterTypeSubDoc,       STRP("subdocument"),       1, 1, 0, NULL },
       +        { 0,                      STRP("third-party"),       1, 1, 0, NULL },
       +        { FilterTypeXHR,          STRP("xmlhttprequest"),    1, 1, 0, NULL },
       +        /* NOTE: site-key not supported */
       +};
       +
       +static Page *pages;
       +static char globalcss[5000000]; /* TEST: dynamic allocate later */
       +static struct filterrule *rules;
       +
       +void *
       +ecalloc(size_t nmemb, size_t size)
       +{
       +        void *p;
       +
       +        if (!(p = calloc(nmemb, size))) {
       +                fprintf(stderr, "calloc: %s\n", strerror(errno));
       +                exit(1);
       +        }
       +
       +        return p;
       +}
       +
       +char *
       +estrdup(const char *s)
       +{
       +        char *p;
       +
       +        if (!(p = strdup(s))) {
       +                fprintf(stderr, "strdup: %s\n", strerror(errno));
       +                exit(1);
       +        }
       +
       +        return p;
       +}
       +
       +#define END          0
       +#define UNMATCHABLE -2
       +#define BRACKET     -3
       +#define CARET       -4
       +#define STAR        -5
       +
       +static int
       +str_next(const char *str, size_t n, size_t *step)
       +{
       +        if (!n) {
       +                *step = 0;
       +                return 0;
       +        }
       +        if (str[0] >= 128U) {
       +                wchar_t wc;
       +                int k = mbtowc(&wc, str, n);
       +                if (k<0) {
       +                        *step = 1;
       +                        return -1;
       +                }
       +                *step = k;
       +                return wc;
       +        }
       +        *step = 1;
       +
       +        return str[0];
       +}
       +
       +static int
       +pat_next(const char *pat, size_t m, size_t *step)
       +{
       +        int esc = 0;
       +
       +        if (!m || !*pat) {
       +                *step = 0;
       +                return END;
       +        }
       +        *step = 1;
       +        if (pat[0]=='\\' && pat[1]) {
       +                *step = 2;
       +                pat++;
       +                esc = 1;
       +                goto escaped;
       +        }
       +        if (pat[0]=='^')
       +                return CARET;
       +        if (pat[0] == '*')
       +                return STAR;
       +escaped:
       +        if (pat[0] >= 128U) {
       +                wchar_t wc;
       +                int k = mbtowc(&wc, pat, m);
       +                if (k<0) {
       +                        *step = 0;
       +                        return UNMATCHABLE;
       +                }
       +                *step = k + esc;
       +                return wc;
       +        }
       +        return pat[0];
       +}
       +
       +static int
       +casefold(int k)
       +{
       +        int c = towupper(k);
       +        return c == k ? towlower(k) : c;
       +}
       +
       +/* match() based on musl-libc fnmatch:
       +   https://git.musl-libc.org/cgit/musl/tree/src/regex/fnmatch.c */
       +int
       +match(const char *pat, const char *str, int fcase)
       +{
       +        size_t m = -1, n = -1;
       +        const char *p, *ptail, *endpat;
       +        const char *s, *stail, *endstr;
       +        size_t pinc, sinc, tailcnt=0;
       +        int c, k, kfold;
       +
       +        for (;;) {
       +                switch ((c = pat_next(pat, m, &pinc))) {
       +                case UNMATCHABLE:
       +                        return 1;
       +                case STAR:
       +                        pat++;
       +                        m--;
       +                        break;
       +                default:
       +                        k = str_next(str, n, &sinc);
       +                        if (k <= 0)
       +                                return (c==END) ? 0 : 1;
       +                        if (c == CARET && (iswdigit(k) || iswalpha(k) || strchr("_-.%", k)))
       +                                return 1;
       +                        str += sinc;
       +                        n -= sinc;
       +                        kfold = fcase ? casefold(k) : k;
       +                        if (k != c && kfold != c)
       +                                return 1;
       +                        pat+=pinc;
       +                        m-=pinc;
       +                        continue;
       +                }
       +                break;
       +        }
       +
       +        /* Compute real pat length if it was initially unknown/-1 */
       +        m = strnlen(pat, m);
       +        endpat = pat + m;
       +
       +        /* Find the last * in pat and count chars needed after it */
       +        for (p=ptail=pat; p<endpat; p+=pinc) {
       +                switch (pat_next(p, endpat-p, &pinc)) {
       +                case UNMATCHABLE:
       +                        return 1;
       +                case STAR:
       +                        tailcnt=0;
       +                        ptail = p+1;
       +                        break;
       +                default:
       +                        tailcnt++;
       +                        break;
       +                }
       +        }
       +
       +        /* Past this point we need not check for UNMATCHABLE in pat,
       +         * because all of pat has already been parsed once. */
       +
       +        /* Compute real str length if it was initially unknown/-1 */
       +        n = strnlen(str, n);
       +        endstr = str + n;
       +        if (n < tailcnt) return 1;
       +
       +        /* Find the final tailcnt chars of str, accounting for UTF-8.
       +         * On illegal sequences we may get it wrong, but in that case
       +         * we necessarily have a matching failure anyway. */
       +        for (s=endstr; s>str && tailcnt; tailcnt--) {
       +                if (s[-1] < 128U || MB_CUR_MAX==1) s--;
       +                else while ((unsigned char)*--s-0x80U<0x40 && s>str);
       +        }
       +        if (tailcnt) return 1;
       +        stail = s;
       +
       +        /* Check that the pat and str tails match */
       +        p = ptail;
       +        for (;;) {
       +                c = pat_next(p, endpat-p, &pinc);
       +                p += pinc;
       +                if ((k = str_next(s, endstr-s, &sinc)) <= 0) {
       +                        if (c != END) return 1;
       +                        break;
       +                }
       +                s += sinc;
       +                kfold = fcase ? casefold(k) : k;
       +                if (k != c && kfold != c)
       +                        return 1;
       +        }
       +
       +        /* We're all done with the tails now, so throw them out */
       +        endstr = stail;
       +        endpat = ptail;
       +
       +        /* Match pattern components until there are none left */
       +        while (pat<endpat) {
       +                p = pat;
       +                s = str;
       +                for (;;) {
       +                        c = pat_next(p, endpat-p, &pinc);
       +                        p += pinc;
       +                        /* Encountering * completes/commits a component */
       +                        if (c == STAR) {
       +                                pat = p;
       +                                str = s;
       +                                break;
       +                        }
       +                        k = str_next(s, endstr-s, &sinc);
       +                        if (!k)
       +                                return 1;
       +                        kfold = fcase ? casefold(k) : k;
       +                        if (k != c && kfold != c)
       +                                break;
       +                        s += sinc;
       +                }
       +                if (c == STAR) continue;
       +                /* If we failed, advance str, by 1 char if it's a valid
       +                 * char, or past all invalid bytes otherwise. */
       +                k = str_next(str, endstr-str, &sinc);
       +                if (k > 0) str += sinc;
       +                else for (str++; str_next(str, endstr-str, &sinc)<0; str++);
       +        }
       +
       +        return 0;
       +}
       +
       +
       +/*
       +domain=...   if domain is prefixed with ~, ignore.
       +multiple domains can be separated with |
       +*/
       +struct filterdomain *
       +parsedomains(char *s, int sep)
       +{
       +        struct filterdomain *head = NULL, *d, *last = NULL;
       +        char *p;
       +        int inverse;
       +
       +        do {
       +                inverse = 0;
       +                if (*s == '~') {
       +                        inverse = !inverse;
       +                        s++;
       +                }
       +                if (!*s || *s == sep)
       +                        break;
       +
       +                if ((p = strchr(s, sep))) /* TODO: should not contain , */
       +                        *p = '\0';
       +
       +                d = ecalloc(1, sizeof(struct filterdomain));
       +                d->inverse = inverse;
       +                d->domain = estrdup(s);
       +
       +                if (!head)
       +                        head = last = d;
       +                else
       +                        last = last->next = d;
       +
       +                if (p) {
       +                        *p = sep;
       +                        s = p + 1;
       +                }
       +        } while (p);
       +
       +        return head;
       +}
       +
       +void
       +parsedomainselement(struct filterrule *f, char *s)
       +{
       +        struct filterdomain *d, *last;
       +
       +        for (last = f->domains; last && last->next; last = last->next)
       +                ;
       +
       +        d = parsedomains(s, ',');
       +        if (last)
       +                last->next = d;
       +        else
       +                f->domains = d;
       +}
       +
       +void
       +parsedomainsoption(struct filterrule *f, char *s)
       +{
       +        struct filterdomain *d, *last;
       +
       +        for (last = f->domains; last && last->next; last = last->next)
       +                ;
       +
       +        d = parsedomains(s, '|');
       +        if (last)
       +                last->next = d;
       +        else
       +                f->domains = d;
       +}
       +
       +int
       +filtertype_cmp(const void *a, const void *b)
       +{
       +        return strcmp(((struct filtertype *)a)->name, ((struct filtertype *)b)->name);
       +}
       +
       +/* check if domain is the same domain or a subdomain of `s` */
       +int
       +matchdomain(const char *s, const char *domain)
       +{
       +        size_t l1, l2;
       +
       +        l1 = strlen(s);
       +        l2 = strlen(domain);
       +
       +        /* subdomain-specific (longer) or other domain */
       +        if (l1 > l2)
       +                return 0;
       +        /* subdomain */
       +        if (l2 > l1 && domain[l2 - l1 - 1] == '.')
       +                return !strcmp(&domain[l2 - l1], s);
       +
       +        return !strcmp(s, domain);
       +}
       +
       +int
       +matchrule(struct filterrule *f, const char *uri, const char *type, const char *domain)
       +{
       +        /* NOTE: order matters, see FilterType enum values */
       +        const char *blockstr = "sicoxOSpde^";
       +        struct filterdomain *d;
       +        char pat[1024];
       +        int r, i;
       +
       +        /* ignore exception rules for now, these are usually paid
       +         * for by sites to allow advertisements. */
       +        if (f->isexception)
       +                return 0;
       +
       +        if (f->css) {
       +                r = f->domains ? 0 : 1;
       +                for (d = f->domains; d; d = d->next) {
       +                        if (matchdomain(d->domain, domain)) {
       +                                if (r && d->inverse)
       +                                        r = 0;
       +                                else if (!r && !d->inverse)
       +                                        r = 1;
       +                        } else if (r && !d->inverse) {
       +                                r = 0;
       +                        }
       +                }
       +                return r;
       +        }
       +
       +        r = snprintf(pat, sizeof(pat), "%s%s%s",
       +                f->matchbegin ? "" : "*",
       +                f->uri,
       +                f->matchend ? "" : "*");
       +        if (r == -1 || (size_t)r >= sizeof(pat)) {
       +                fprintf(stderr, "warning: pattern too large, ignoring\n");
       +                return 0;
       +        }
       +
       +        r = f->domains ? 0 : 1;
       +        for (d = f->domains; d; d = d->next) {
       +                if (matchdomain(d->domain, domain)) {
       +                        if (r && d->inverse)
       +                                r = 0;
       +                        else if (!r && !d->inverse)
       +                                r = 1;
       +                } else if (r && !d->inverse) {
       +                        r = 0;
       +                }
       +        }
       +
       +        if (r && !match(pat, uri, (f->block & FilterTypeMatchCase) ? 0 : 1)) {
       +                for (; *type; type++) {
       +                        for (i = 0; blockstr[i]; i++) {
       +                                if (blockstr[i] == *type &&
       +                                    f->block & (1 << i)) {
       +                                        printf("block type '%c'\n", blockstr[i]);
       +                                }
       +                        }
       +                }
       +                return 1;
       +        }
       +        return 0;
       +}
       +
       +int
       +parserule(struct filterrule *f, char *s)
       +{
       +        struct filtertype key, *ft;
       +        int inverse = 0;
       +        char *p, *values;
       +
       +        if (*s == '!' || (*s == '[' && s[strlen(s) - 1] == ']'))
       +                return 0; /* skip comment or empty line */
       +        for (; *s && isspace(*s); s++)
       +                ;
       +        if (!*s)
       +                return 0; /* line had only whitespace: skip */
       +
       +        memset(f, 0, sizeof(struct filterrule));
       +
       +        if ((p = strstr(s, "#@#"))) {
       +                *p = '\0';
       +                parsedomainselement(f, s);
       +                *p = '#';
       +                f->css = estrdup(p + 3);
       +                f->isexception = 1;
       +                goto end; /* end of CSS rule */
       +        }
       +
       +        /* element hiding rule, NOTE: no wildcards are supported,
       +        "Simplified element hiding syntax" is not supported. */
       +        if ((p = strstr(s, "##"))) {
       +                *p = '\0';
       +                parsedomainselement(f, s);
       +                *p = '#';
       +                f->css = estrdup(p + 2);
       +                goto end; /* end of rule */
       +        }
       +
       +        if (!strncmp(s, "@@", 2)) {
       +                f->isexception = 1;
       +                s += 2;
       +        }
       +        if (*s == '|') {
       +                s++;
       +                if (*s == '|') {
       +                        f->matchbegin = 1;
       +                        s++;
       +                } else {
       +                        f->matchend = 1;
       +                }
       +        }
       +
       +        /* no options, use rest of line as uri. */
       +        if (!(p = strrchr(s, '$'))) {
       +                f->uri = estrdup(s);
       +                goto end;
       +        }
       +
       +        /* has options */
       +        *p = '\0';
       +        f->uri = estrdup(s);
       +        *p = '$';
       +        s = ++p;
       +
       +        /* blockmask, has options? default: allow all options, case-sensitive
       +         * has no options? default: block all options, case-sensitive  */
       +        f->block = *s ? (unsigned long)FilterTypeMatchCase : ~0UL;
       +        do {
       +                if ((p = strchr(s, ',')))
       +                        *p = '\0';
       +                /* match option */
       +                inverse = 0;
       +                if (*s == '~') {
       +                        inverse = 1;
       +                        s++;
       +                }
       +                if ((values = strchr(s, '=')))
       +                        *(values) = '\0';
       +                key.name = s;
       +
       +                ft = bsearch(&key, &filtertypes,
       +                    sizeof(filtertypes) / sizeof(*filtertypes), sizeof(*filtertypes),
       +                    filtertype_cmp);
       +
       +                /* restore NUL-terminator for domain= option */
       +                if (values)
       +                        *(values++) = '=';
       +
       +                if (ft) {
       +                        if (inverse)
       +                                f->block &= ~(ft->type);
       +                        else
       +                                f->block |= ft->type;
       +                        if (ft->fn && values)
       +                                ft->fn(f, values);
       +                } else {
       +                        /* DEBUG */
       +                        fprintf(stderr, "unknown option: '%s' in rule: %s\n",
       +                                key.name, f->uri);
       +                }
       +
       +                /* restore ',' */
       +                if (p) {
       +                        *p = ',';
       +                        s = p + 1;
       +                }
       +        } while (p);
       +end:
       +
       +        return 1;
       +}
       +
       +void
       +debugrule(struct filterrule *r)
       +{
       +        printf("\turi: %s\n\tcss: %s\n\tisexception: %d\n\tblockmask: %lu\n===\n",
       +                r->uri ? r->uri : "", r->css ? r->css : "", r->isexception, r->block);
       +}
       +
       +struct filterrule *
       +loadrules(FILE *fp)
       +{
       +        char *line = NULL;
       +        size_t linesiz = 0;
       +        ssize_t n;
       +        struct filterrule f, *r, *rn = NULL, *rules = NULL;
       +
       +        /* TODO: handle ferror() */
       +        /* load rules */
       +        while ((n = getline(&line, &linesiz, fp)) > 0) {
       +                if (line[n - 1] == '\n')
       +                        line[--n] = '\0';
       +                if (n > 0 && line[n - 1] == '\r')
       +                        line[--n] = '\0';
       +
       +                if (parserule(&f, line)) {
       +                        r = ecalloc(1, sizeof(struct filterrule));
       +                        if (!rules)
       +                                rules = rn = r;
       +                        else
       +                                rn = rn->next = r;
       +                        memcpy(rn, &f, sizeof(struct filterrule));
       +                        r->line = estrdup(line); /* DEBUG */
       +                }
       +        }
       +        return rules;
       +}
       +
       +Page *
       +newpage(WebKitWebPage *page)
       +{
       +        Page *p;
       +
       +        p = ecalloc(1, sizeof(Page));
       +        p->next = pages;
       +        pages = p;
       +
       +        p->id = webkit_web_page_get_id(page);
       +        p->webpage = page;
       +
       +        return p;
       +}
       +
       +static void
       +documentloaded(WebKitWebPage *wp, Page *p)
       +{
       +        WebKitDOMDocument *doc = webkit_web_page_get_dom_document(wp);
       +        WebKitDOMHTMLElement *body = webkit_dom_document_get_body(doc);
       +        WebKitDOMElement *el;
       +        char sitecss[1000000] = ""; /* TODO: dynamic allocate */
       +        struct filterrule *r;
       +        char *uri = estrdup((char *)webkit_web_page_get_uri(p->webpage));
       +        char *domain, *s;
       +
       +        /* TODO: improve, hacky */
       +        if ((domain = strstr(uri, "://"))) {
       +                domain += sizeof("://") - 1;
       +        } else {
       +                domain = uri;
       +        }
       +        if ((s = strchr(domain, '/')))
       +                *s = '\0';
       +
       +        printf("uri: %s\n", uri);
       +        printf("domain: %s\n", domain);
       +
       +        /* site-specific CSS */
       +        for (r = rules; r; r = r->next) {
       +                if (!r->css || !r->domains || !matchrule(r, "", "", domain))
       +                        continue;
       +                strlcat(sitecss, r->css, sizeof(sitecss));
       +                strlcat(sitecss, "{display:none;}", sizeof(sitecss));
       +        }
       +        printf("sitecss: %s\n", sitecss);
       +
       +        p->view = webkit_dom_document_get_default_view(doc);
       +
       +        el = webkit_dom_document_create_element(doc, "style", NULL);
       +        webkit_dom_element_set_attribute(el, "type", "text/css", NULL);
       +        webkit_dom_element_set_inner_html(el, globalcss, NULL);
       +        webkit_dom_node_append_child(WEBKIT_DOM_NODE(body), WEBKIT_DOM_NODE(el), NULL);
       +
       +        el = webkit_dom_document_create_element(doc, "style", NULL);
       +        webkit_dom_element_set_attribute(el, "type", "text/css", NULL);
       +        webkit_dom_element_set_inner_html(el, sitecss, NULL);
       +        webkit_dom_node_append_child(WEBKIT_DOM_NODE(body), WEBKIT_DOM_NODE(el), NULL);
       +
       +        free(uri);
       +}
       +
       +static gboolean
       +sendrequest(WebKitWebPage *wp, WebKitURIRequest *req,
       +                   WebKitURIResponse *res, Page *p)
       +{
       +        struct filterrule *r;
       +
       +        char *uri = estrdup((char *)webkit_web_page_get_uri(p->webpage));
       +        char *requri = estrdup((char *)webkit_uri_request_get_uri(req));
       +        char *domain, *s;
       +
       +        /* TODO: improve, hacky */
       +        if ((domain = strstr(uri, "://"))) {
       +                domain += sizeof("://") - 1;
       +        } else {
       +                domain = uri;
       +        }
       +
       +        if ((s = strchr(domain, '/')))
       +                *s = '\0';
       +
       +        /* match rules */
       +        for (r = rules; r; r = r->next) {
       +                if (!r->css && matchrule(r, requri, "csio^", domain)) {
       +                        printf("requri: %s\n", requri);
       +                        printf("uri:    %s\n", uri);
       +                        printf("domain: %s\n", domain);
       +
       +                        fprintf(stderr, "blocked: %s, %s\n", domain, requri);
       +                        free(uri);
       +                        free(requri);
       +                        return TRUE;
       +                }
       +        }
       +        free(uri);
       +        free(requri);
       +
       +        return FALSE;
       +}
       +
       +static void
       +objectcleared(WebKitScriptWorld *w, WebKitWebPage *wp, WebKitFrame *f, Page *p)
       +{
       +}
       +
       +static void
       +webpagecreated(WebKitWebExtension *e, WebKitWebPage *p, gpointer unused)
       +{
       +        Page *np = newpage(p);
       +        WebKitScriptWorld *w = webkit_script_world_get_default();
       +
       +        g_signal_connect(p, "send-request",
       +                         G_CALLBACK(sendrequest), np);
       +        g_signal_connect(w, "window-object-cleared",
       +                         G_CALLBACK(objectcleared), np);
       +        g_signal_connect(p, "document-loaded",
       +                         G_CALLBACK(documentloaded), np);
       +}
       +
       +G_MODULE_EXPORT void
       +webkit_web_extension_initialize_with_user_data(WebKitWebExtension *e, GVariant *gv)
       +{
       +        struct filterrule *r;
       +        FILE *fp;
       +        const char *filename = "/home/hiltjo/adblock_rules";
       +
       +        if (!(fp = fopen(filename, "r"))) {
       +                fprintf(stderr, "cannot read rules from file: %s\n", filename);
       +                return;
       +        }
       +        if (!(rules = loadrules(fp))) {
       +                fprintf(stderr, "cannot load adblock rules\n");
       +                return;
       +        }
       +        fclose(fp);
       +
       +        /* general rules: all sites */
       +        for (r = rules; r; r = r->next) {
       +                if (!r->css || r->domains)
       +                        continue;
       +                strlcat(globalcss, r->css, sizeof(globalcss));
       +                strlcat(globalcss, "{display:none;}", sizeof(globalcss));
       +        }
       +
       +        g_signal_connect(e, "page-created",
       +                         G_CALLBACK(webpagecreated), NULL);
       +}