codemadness.org

       separate adblock and surf-specific code - surf-adblock - Surf adblock web extension
 (HTM) git clone git://git.codemadness.org/surf-adblock
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit 08e747efa80a44603f80db0fdacb3f63ad210b8e
 (DIR) parent c4841f33f64cd77b30def9c0b11d1ac9ece7f821
 (HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
       Date:   Sun,  4 Jun 2017 14:15:27 +0200
       
       separate adblock and surf-specific code
       
       this will be useful later on for writing test-cases also or re-using
       the code in a daemon.
       
       Diffstat:
         M Makefile                            |       2 +-
         M TODO                                |       2 ++
         A adblock.c                           |     956 +++++++++++++++++++++++++++++++
         M surf-adblock.c                      |     944 +------------------------------
       
       4 files changed, 973 insertions(+), 931 deletions(-)
       ---
 (DIR) diff --git a/Makefile b/Makefile
       @@ -1,6 +1,6 @@
        include config.mk
        
       -SRC = surf-adblock.c
       +SRC = surf-adblock.c adblock.c
        OBJ = ${SRC:.c=.lo}
        
        all: surf-adblock.la
 (DIR) diff --git a/TODO b/TODO
       @@ -1,3 +1,5 @@
       +- optimize towupper for fnmatch? check < 128, see musl optimization.
       +
        - fix blocking of : ||ads.somesite.com^
        
        - fix tweakers.net popup / rule.
 (DIR) diff --git a/adblock.c b/adblock.c
       @@ -0,0 +1,956 @@
       +#include <sys/stat.h>
       +#include <sys/types.h>
       +
       +#include <ctype.h>
       +#include <errno.h>
       +#include <fcntl.h>
       +#include <limits.h>
       +#include <stdarg.h>
       +#include <stdio.h>
       +#include <stdlib.h>
       +#include <string.h>
       +#include <wchar.h>
       +#include <wctype.h>
       +
       +#include "adblock.h"
       +
       +/* String data / memory pool */
       +typedef struct string {
       +        char   *data;   /* data */
       +        size_t  datasz; /* allocated size */
       +        size_t  len;    /* current string length */
       +} String;
       +
       +struct filterdomain {
       +        char *domain;
       +        int inverse;
       +        struct filterdomain *next;
       +};
       +
       +struct filterrule {
       +        /* type: match mask, must be atleast 32-bit, see FilterType enum */
       +        unsigned long block;
       +        int matchbegin;
       +        int matchend;
       +        /* is exception rule: prefix @@ for ABP or #@# for CSS */
       +        int isexception;
       +        char *css; /* if non-NULL is CSS rule / hide element rule */
       +        char *uri;
       +        struct filterdomain *domains;
       +        struct filterrule *next;
       +};
       +
       +enum {
       +        FilterTypeScript       = 1 << 0,
       +        FilterTypeImage        = 1 << 1,
       +        FilterTypeCSS          = 1 << 2,
       +        FilterTypeObject       = 1 << 3,
       +        FilterTypeXHR          = 1 << 4,
       +        FilterTypeObjectSub    = 1 << 5,
       +        FilterTypeSubDoc       = 1 << 6,
       +        FilterTypePing         = 1 << 7,
       +        FilterTypeDocument     = 1 << 8,
       +        FilterTypeElemHide     = 1 << 9,
       +        FilterTypeOther        = 1 << 10,
       +        FilterTypeGenericHide  = 1 << 11,
       +        FilterTypeGenericBlock = 1 << 12,
       +        FilterTypeMatchCase    = 1 << 13,
       +};
       +
       +struct filtertype {
       +        /* `type` must be atleast 32-bit, see FilterType enum */
       +        unsigned long type;
       +        char *name;
       +        size_t namelen;
       +        int allowinverse;
       +        int allownormal;
       +        int onlyexception;
       +        int (*fn)(struct filterrule *, char *);
       +};
       +
       +static int parsedomainsoption(struct filterrule *, char *);
       +
       +#define STRP(s) s,sizeof(s)-1
       +
       +static struct filtertype filtertypes[] = {
       +        /* NOTE: options with 'type' = 0 are silently ignored and treated as
       +         *       requests for now */
       +        { 0,                      STRP("collapse"),          1, 1, 0, NULL },
       +        { FilterTypeDocument,     STRP("document"),          1, 0, 1, NULL },
       +        { 0,                      STRP("domain"),            0, 1, 0,
       +                                     /* domain=... */  &parsedomainsoption },
       +        { 0,                      STRP("donottrack"),        1, 1, 0, NULL },
       +        { FilterTypeElemHide,     STRP("elemhide"),          0, 0, 1, NULL },
       +        { 0,                      STRP("font"),              1, 1, 0, NULL },
       +        { FilterTypeGenericBlock, STRP("genericblock"),      1, 1, 1, NULL },
       +        { FilterTypeGenericHide,  STRP("generichide"),       1, 1, 1, NULL },
       +        { FilterTypeImage,        STRP("image"),             1, 1, 0, NULL },
       +        { FilterTypeMatchCase,    STRP("match-case"),        1, 1, 0, NULL },
       +        { 0,                      STRP("media"),             1, 1, 0, NULL },
       +        { FilterTypeObject,       STRP("object"),            1, 1, 0, NULL },
       +        { FilterTypeObjectSub,    STRP("object-subrequest"), 1, 1, 0, NULL },
       +        { FilterTypeOther,        STRP("other"),             1, 1, 0, NULL },
       +        { FilterTypePing,         STRP("ping"),              1, 1, 0, NULL },
       +        { 0,                      STRP("popup"),             1, 1, 0, NULL },
       +        { FilterTypeScript,       STRP("script"),            1, 1, 0, NULL },
       +        { FilterTypeCSS,          STRP("stylesheet"),        1, 1, 0, NULL },
       +        { FilterTypeSubDoc,       STRP("subdocument"),       1, 1, 0, NULL },
       +        { 0,                      STRP("third-party"),       1, 1, 0, NULL },
       +        { FilterTypeXHR,          STRP("xmlhttprequest"),    1, 1, 0, NULL },
       +        /* NOTE: site-key not supported */
       +};
       +
       +static String globalcss;
       +static struct filterrule *rules;
       +
       +static void
       +weprintf(const char *fmt, ...)
       +{
       +        va_list ap;
       +
       +        fprintf(stderr, "surf-adblock: ");
       +
       +        va_start(ap, fmt);
       +        vfprintf(stderr, fmt, ap);
       +        va_end(ap);
       +}
       +
       +static void *
       +wecalloc(size_t nmemb, size_t size)
       +{
       +        void *p;
       +
       +        if (!(p = calloc(nmemb, size)))
       +                weprintf("calloc: %s\n", strerror(errno));
       +
       +        return p;
       +}
       +
       +static char *
       +westrndup(const char *s, size_t n)
       +{
       +        char *p;
       +
       +        if (!(p = strndup(s, n)))
       +                weprintf("strndup: %s\n", strerror(errno));
       +        return p;
       +}
       +
       +static char *
       +westrdup(const char *s)
       +{
       +        char *p;
       +
       +        if (!(p = strdup(s)))
       +                weprintf("strdup: %s\n", strerror(errno));
       +
       +        return p;
       +}
       +
       +void
       +cleanup(void)
       +{
       +        struct filterrule *r;
       +        struct filterdomain *d;
       +
       +        free(globalcss.data);
       +
       +        for (r = rules; r; r = rules) {
       +                for (d = r->domains; d; d = r->domains) {
       +                        free(d->domain);
       +                        r->domains = d->next;
       +                        free(d);
       +                }
       +                free(r->css);
       +                free(r->uri);
       +                rules = r->next;
       +                free(r);
       +        }
       +}
       +
       +static size_t
       +string_buffer_realloc(String *s, size_t newsz)
       +{
       +        char *tmp;
       +        size_t allocsz;
       +
       +        for (allocsz = 64; allocsz <= newsz; allocsz *= 2)
       +                ;
       +        if (!(tmp = realloc(s->data, allocsz))) {
       +                weprintf("realloc: %s\n", strerror(errno));
       +        } else {
       +                s->data   = tmp;
       +                s->datasz = allocsz;
       +        }
       +
       +        return s->datasz;
       +}
       +
       +static size_t
       +string_append(String *s, const char *data, size_t len)
       +{
       +        size_t newlen;
       +
       +        if (!len)
       +                return len;
       +
       +        newlen = s->len + len;
       +        /* check if allocation is necesary, don't shrink buffer,
       +         * should be more than datasz ofcourse. */
       +        if (newlen >= s->datasz) {
       +                if (string_buffer_realloc(s, newlen + 1) <= newlen)
       +                        return 0;
       +        }
       +        memcpy(s->data + s->len, data, len);
       +        s->len = newlen;
       +        s->data[s->len] = '\0';
       +        return len;
       +}
       +
       +#define END          0
       +#define UNMATCHABLE -2
       +#define BRACKET     -3
       +#define CARET       -4
       +#define STAR        -5
       +
       +static int
       +str_next(const char *str, size_t n, size_t *step)
       +{
       +        if (!n) {
       +                *step = 0;
       +                return 0;
       +        }
       +        if (str[0] >= 128U) {
       +                wchar_t wc;
       +                int k = mbtowc(&wc, str, n);
       +                if (k<0) {
       +                        *step = 1;
       +                        return -1;
       +                }
       +                *step = k;
       +                return wc;
       +        }
       +        *step = 1;
       +
       +        return str[0];
       +}
       +
       +static int
       +pat_next(const char *pat, size_t m, size_t *step)
       +{
       +        int esc = 0;
       +
       +        if (!m || !*pat) {
       +                *step = 0;
       +                return END;
       +        }
       +        *step = 1;
       +        if (pat[0]=='\\' && pat[1]) {
       +                *step = 2;
       +                pat++;
       +                esc = 1;
       +                goto escaped;
       +        }
       +        if (pat[0]=='^')
       +                return CARET;
       +        if (pat[0] == '*')
       +                return STAR;
       +escaped:
       +        if (pat[0] >= 128U) {
       +                wchar_t wc;
       +                int k = mbtowc(&wc, pat, m);
       +                if (k<0) {
       +                        *step = 0;
       +                        return UNMATCHABLE;
       +                }
       +                *step = k + esc;
       +                return wc;
       +        }
       +        return pat[0];
       +}
       +
       +static int
       +casefold(int k)
       +{
       +        int c = towupper(k);
       +        return c == k ? towlower(k) : c;
       +}
       +
       +/* match() based on musl-libc fnmatch:
       +   https://git.musl-libc.org/cgit/musl/tree/src/regex/fnmatch.c */
       +static int
       +match(const char *pat, const char *str, int fcase)
       +{
       +        size_t m = -1, n = -1;
       +        const char *p, *ptail, *endpat;
       +        const char *s, *stail, *endstr;
       +        size_t pinc, sinc, tailcnt=0;
       +        int c, k, kfold;
       +
       +        for (;;) {
       +                switch ((c = pat_next(pat, m, &pinc))) {
       +                case UNMATCHABLE:
       +                        return 1;
       +                case STAR:
       +                        pat++;
       +                        m--;
       +                        break;
       +                default:
       +                        k = str_next(str, n, &sinc);
       +                        /* TODO: write a test-case */
       +                        if (c == CARET && (k == '?' || k == '/' || k <= 0))
       +                                return 1;
       +                        if (k <= 0)
       +                                return (c==END) ? 0 : 1;
       +                        str += sinc;
       +                        n -= sinc;
       +                        kfold = fcase ? casefold(k) : k;
       +                        if (k != c && kfold != c)
       +                                return 1;
       +                        pat+=pinc;
       +                        m-=pinc;
       +                        continue;
       +                }
       +                break;
       +        }
       +
       +        /* Compute real pat length if it was initially unknown/-1 */
       +        m = strnlen(pat, m);
       +        endpat = pat + m;
       +
       +        /* Find the last * in pat and count chars needed after it */
       +        for (p=ptail=pat; p<endpat; p+=pinc) {
       +                switch (pat_next(p, endpat-p, &pinc)) {
       +                case UNMATCHABLE:
       +                        return 1;
       +                case STAR:
       +                        tailcnt=0;
       +                        ptail = p+1;
       +                        break;
       +                default:
       +                        tailcnt++;
       +                        break;
       +                }
       +        }
       +
       +        /* Past this point we need not check for UNMATCHABLE in pat,
       +         * because all of pat has already been parsed once. */
       +
       +        /* Compute real str length if it was initially unknown/-1 */
       +        n = strnlen(str, n);
       +        endstr = str + n;
       +        if (n < tailcnt) return 1;
       +
       +        /* Find the final tailcnt chars of str, accounting for UTF-8.
       +         * On illegal sequences we may get it wrong, but in that case
       +         * we necessarily have a matching failure anyway. */
       +        for (s=endstr; s>str && tailcnt; tailcnt--) {
       +                if (s[-1] < 128U || MB_CUR_MAX==1) s--;
       +                else while ((unsigned char)*--s-0x80U<0x40 && s>str);
       +        }
       +        if (tailcnt) return 1;
       +        stail = s;
       +
       +        /* Check that the pat and str tails match */
       +        p = ptail;
       +        for (;;) {
       +                c = pat_next(p, endpat-p, &pinc);
       +                p += pinc;
       +                if ((k = str_next(s, endstr-s, &sinc)) <= 0) {
       +                        if (c != END) return 1;
       +                        break;
       +                }
       +                s += sinc;
       +                kfold = fcase ? casefold(k) : k;
       +                if (k != c && kfold != c)
       +                        return 1;
       +        }
       +
       +        /* We're all done with the tails now, so throw them out */
       +        endstr = stail;
       +        endpat = ptail;
       +
       +        /* Match pattern components until there are none left */
       +        while (pat<endpat) {
       +                p = pat;
       +                s = str;
       +                for (;;) {
       +                        c = pat_next(p, endpat-p, &pinc);
       +                        p += pinc;
       +                        /* Encountering * completes/commits a component */
       +                        if (c == STAR) {
       +                                pat = p;
       +                                str = s;
       +                                break;
       +                        }
       +                        k = str_next(s, endstr-s, &sinc);
       +                        if (!k)
       +                                return 1;
       +                        kfold = fcase ? casefold(k) : k;
       +                        if (k != c && kfold != c)
       +                                break;
       +                        s += sinc;
       +                }
       +                if (c == STAR) continue;
       +                /* If we failed, advance str, by 1 char if it's a valid
       +                 * char, or past all invalid bytes otherwise. */
       +                k = str_next(str, endstr-str, &sinc);
       +                if (k > 0) str += sinc;
       +                else for (str++; str_next(str, endstr-str, &sinc)<0; str++);
       +        }
       +
       +        return 0;
       +}
       +
       +/*
       +domain=...   if domain is prefixed with ~, ignore.
       +multiple domains can be separated with |
       +*/
       +static int
       +parsedomains(const char *s, int sep, struct filterdomain **head)
       +{
       +        struct filterdomain *d, *last = *head = NULL;
       +        char *p;
       +        int inverse;
       +
       +        do {
       +                inverse = 0;
       +                if (*s == '~') {
       +                        inverse = !inverse;
       +                        s++;
       +                }
       +                if (!*s || *s == sep)
       +                        break;
       +
       +                if (!(d = wecalloc(1, sizeof(struct filterdomain))))
       +                        return -1;
       +                if ((p = strchr(s, sep))) { /* TODO: should not contain ',' */
       +                        d->domain = westrndup(s, p - s);
       +                        s = p + 1;
       +                } else {
       +                        d->domain = westrdup(s);
       +                }
       +                if (!d->domain)
       +                        return -1;
       +                d->inverse = inverse;
       +
       +                if (!*head)
       +                        *head = last = d;
       +                else
       +                        last = last->next = d;
       +        } while (p);
       +
       +        return (*head != NULL);
       +}
       +
       +static int
       +parsedomainselement(struct filterrule *f, char *s)
       +{
       +        struct filterdomain *d, *last;
       +
       +        for (last = f->domains; last && last->next; last = last->next)
       +                ;
       +
       +        if (parsedomains(s, ',', &d) < 0)
       +                return -1;
       +        if (last)
       +                last->next = d;
       +        else
       +                f->domains = d;
       +
       +        return (d != NULL);
       +}
       +
       +static int
       +parsedomainsoption(struct filterrule *f, char *s)
       +{
       +        struct filterdomain *d, *last;
       +
       +        for (last = f->domains; last && last->next; last = last->next)
       +                ;
       +
       +        if (parsedomains(s, '|', &d) < 0)
       +                return -1;
       +        if (last)
       +                last->next = d;
       +        else
       +                f->domains = d;
       +
       +        return (d != NULL);
       +}
       +
       +static int
       +filtertype_cmp(const void *a, const void *b)
       +{
       +        return strcmp(((struct filtertype *)a)->name,
       +                      ((struct filtertype *)b)->name);
       +}
       +
       +/* check if domain is the same domain or a subdomain of `s` */
       +static int
       +matchdomain(const char *s, const char *domain)
       +{
       +        size_t l1, l2;
       +
       +        l1 = strlen(s);
       +        l2 = strlen(domain);
       +
       +        /* subdomain-specific (longer) or other domain */
       +        if (l1 > l2)
       +                return 0;
       +        /* subdomain */
       +        if (l2 > l1 && domain[l2 - l1 - 1] == '.')
       +                return !strcmp(&domain[l2 - l1], s);
       +
       +        return !strcmp(s, domain);
       +}
       +
       +static int
       +matchrule(struct filterrule *f, const char *uri, const char *type,
       +          const char *domain)
       +{
       +        /* NOTE: order matters, see FilterType enum values */
       +        struct filterdomain *d;
       +        char pat[1024];
       +        int r, m;
       +
       +        r = f->domains ? 0 : 1;
       +        for (d = f->domains; d; d = d->next) {
       +                if (matchdomain(d->domain, domain)) {
       +                        if (r && d->inverse)
       +                                r = 0;
       +                        else if (!r && !d->inverse)
       +                                r = 1;
       +                } else if (r && !d->inverse) {
       +                        r = 0;
       +                }
       +        }
       +        if (f->css) {
       +                /* DEBUG */
       +#if 0
       +                if (f->isexception)
       +                        printf("DEBUG, exception rule, CSS: %s, match? %d\n",
       +                        f->css, r);
       +#endif
       +                return r;
       +        }
       +
       +#if 1
       +        /* skip allow rule, TODO: inverse? */
       +        if (!r)
       +                return 0;
       +#endif
       +
       +#if 0
       +        /* DEBUG: test, match if it is a simple pattern */
       +        char *p;
       +        p = strchr(f->uri, '*');
       +        if (!p)
       +                p = strchr(f->uri, '^');
       +        if (!p) {
       +                /* TODO: write a test-case */
       +                if (f->block & FilterTypeMatchCase) {
       +                        if (f->matchbegin)
       +                                m = strncmp(uri, f->uri, strlen(f->uri)) == 0;
       +                        else if (f->matchend)
       +                                m = strlen(f->uri) <= strlen(uri) &&
       +                                        strcmp(&uri[strlen(uri) - strlen(f->uri)], f->uri) == 0;
       +                        else
       +                                m = strstr(uri, f->uri) ? 1 : 0;
       +                } else {
       +                        if (f->matchbegin)
       +                                m = strncasecmp(uri, f->uri, strlen(f->uri)) == 0;
       +                        else if (f->matchend)
       +                                m = strlen(f->uri) <= strlen(uri) &&
       +                                        strcasecmp(&uri[strlen(uri) - strlen(f->uri)], f->uri) == 0;
       +                        else
       +                                m = strcasestr(uri, f->uri) ? 1 : 0;
       +                }
       +                /*m = r ? !m : m;*/
       +                return m;
       +        }
       +#endif
       +
       +        r = snprintf(pat, sizeof(pat), "%s%s%s",
       +                f->matchbegin ? "" : "*",
       +                f->uri,
       +                f->matchend ? "" : "*");
       +        if (r == -1 || (size_t)r >= sizeof(pat)) {
       +                fprintf(stderr, "warning: pattern too large, ignoring\n");
       +                return 0;
       +        }
       +
       +        m = 0;
       +        if (!match(pat, uri, (f->block & FilterTypeMatchCase) ? 0 : 1)) {
       +#if 0
       +                for (; *type; type++) {
       +                        for (i = 0; blockstr[i]; i++) {
       +                                if (blockstr[i] == *type &&
       +                                    f->block & (1 << i))
       +                                        printf("block type '%c'\n", blockstr[i]);
       +                                        return 1;
       +                                }
       +                        }
       +                }
       +
       +                return 0;
       +#endif
       +                m = 1;
       +        }
       +        /*m = r ? !m : m;*/
       +        return m;
       +}
       +
       +static int
       +parserule(struct filterrule *f, char *s)
       +{
       +        struct filtertype key, *ft;
       +        int inverse = 0;
       +        char *p, *values;
       +
       +        if (*s == '!' || (*s == '[' && s[strlen(s) - 1] == ']'))
       +                return 0; /* skip comment or empty line */
       +        for (; *s && isspace(*s); s++)
       +                ;
       +        if (!*s)
       +                return 0; /* line had only whitespace: skip */
       +
       +        memset(f, 0, sizeof(struct filterrule));
       +
       +        if ((p = strstr(s, "#@#"))) {
       +                *p = '\0';
       +                if (parsedomainselement(f, s) < 0)
       +                        return -1;
       +                *p = '#';
       +                if (!(f->css = westrdup(p + 3)))
       +                        return -1;
       +                f->isexception = 1;
       +                goto end; /* end of CSS rule */
       +        }
       +
       +        /* element hiding rule, NOTE: no wildcards are supported,
       +        "Simplified element hiding syntax" is not supported. */
       +        if ((p = strstr(s, "##"))) {
       +                *p = '\0';
       +                if (parsedomainselement(f, s) < 0)
       +                        return -1;
       +                *p = '#';
       +                if (!(f->css = westrdup(p + 2)))
       +                        return -1;
       +                goto end; /* end of rule */
       +        }
       +
       +        if (!strncmp(s, "@@", 2)) {
       +                f->isexception = 1;
       +                s += 2;
       +        }
       +        if (*s == '|') {
       +                s++;
       +                if (*s == '|') {
       +                        f->matchbegin = 1;
       +                        s++;
       +                } else {
       +                        f->matchend = 1;
       +                }
       +        }
       +
       +        /* no options, use rest of line as uri. */
       +        if (!(p = strrchr(s, '$'))) {
       +                if (!(f->uri = westrdup(s)))
       +                        return -1;
       +                goto end;
       +        }
       +
       +        /* has options */
       +        if (!(f->uri = westrndup(s, p - s)))
       +                return -1;
       +        s = ++p;
       +
       +        /* blockmask, has options? default: allow all options, case-sensitive
       +         * has no options? default: block all options, case-sensitive  */
       +        f->block = *s ? (unsigned long)FilterTypeMatchCase : ~0UL;
       +        do {
       +                if ((p = strchr(s, ',')))
       +                        *p = '\0';
       +                /* match option */
       +                inverse = 0;
       +                if (*s == '~') {
       +                        inverse = 1;
       +                        s++;
       +                }
       +                if ((values = strchr(s, '=')))
       +                        *(values) = '\0';
       +                key.name = s;
       +
       +                ft = bsearch(&key, &filtertypes,
       +                             sizeof(filtertypes) / sizeof(*filtertypes),
       +                             sizeof(*filtertypes), filtertype_cmp);
       +
       +                /* restore NUL-terminator for domain= option */
       +                if (values)
       +                        *(values++) = '=';
       +
       +                if (ft) {
       +                        if (inverse)
       +                                f->block &= ~(ft->type);
       +                        else
       +                                f->block |= ft->type;
       +                        if (ft->fn && values)
       +                                ft->fn(f, values);
       +                } else {
       +                        /* DEBUG */
       +                        fprintf(stderr, "ignored: unknown option: '%s' "
       +                                "in rule: %s\n", key.name, f->uri);
       +                }
       +
       +                /* restore ',' */
       +                if (p) {
       +                        *p = ',';
       +                        s = p + 1;
       +                }
       +        } while (p);
       +end:
       +
       +        return 1;
       +}
       +
       +#if 0
       +static void
       +debugrule(struct filterrule *r)
       +{
       +        printf("\turi: %s\n\tcss: %s\n\tisexception: %d\n\tblockmask: "
       +               "%lu\n===\n", r->uri ? r->uri : "", r->css ? r->css : "",
       +               r->isexception, r->block);
       +}
       +#endif
       +
       +static int
       +loadrules(FILE *fp)
       +{
       +        struct filterrule f, *r, *rn = NULL;
       +        char *line = NULL;
       +        size_t linesiz = 0;
       +        ssize_t n;
       +        int ret;
       +
       +        /* TODO: handle ferror() */
       +        /* load rules */
       +        while ((n = getline(&line, &linesiz, fp)) > 0) {
       +                if (line[n - 1] == '\n')
       +                        line[--n] = '\0';
       +                if (n > 0 && line[n - 1] == '\r')
       +                        line[--n] = '\0';
       +
       +                if ((ret = parserule(&f, line) > 0)) {
       +                        if (!(r = wecalloc(1, sizeof(struct filterrule))))
       +                                return -1;
       +                        if (!rules)
       +                                rules = rn = r;
       +                        else
       +                                rn = rn->next = r;
       +                        memcpy(rn, &f, sizeof(struct filterrule));
       +                } else if (ret < 0) {
       +                        return -1;
       +                }
       +        }
       +        return (rules != NULL);
       +}
       +
       +char *
       +getglobalcss(void)
       +{
       +        return globalcss.data;
       +}
       +
       +char *
       +getdocumentcss(const char *uri)
       +{
       +        const char *s;
       +        char domain[256];
       +        String sitecss;
       +        struct filterrule *r;
       +        size_t len;
       +
       +        if (!uri)
       +                return NULL;
       +
       +        if (!(s = strstr(uri, "://")))
       +                return NULL;
       +        s += sizeof("://") - 1;
       +        len = strcspn(s, "/");
       +        memcpy(domain, s, len);
       +        domain[len] = '\0';
       +
       +        printf("uri: %s\n", uri);
       +        printf("domain: %s\n", domain);
       +
       +        /* DEBUG: timing */
       +        struct timespec tp_start, tp_end, tp_diff;
       +        if (clock_gettime(CLOCK_MONOTONIC, &tp_start) == -1) {
       +                fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
       +        }
       +
       +        /* site-specific CSS */
       +        memset(&sitecss, 0, sizeof(sitecss));
       +        for (r = rules; r; r = r->next) {
       +                if (!r->css || !r->domains || !matchrule(r, "", "", domain))
       +                        continue;
       +
       +                len = strlen(r->css);
       +                if (string_append(&sitecss, r->css, len) < len)
       +                        goto err;
       +
       +                s = r->isexception ? "{display:initial;}" : "{display:none;}";
       +                len = strlen(s);
       +                if (string_append(&sitecss, s, len) < len)
       +                        goto err;
       +        }
       +/*        printf("sitecss: %s\n", sitecss.data ? sitecss.data : "<empty>");*/
       +
       +        /* DEBUG: timing */
       +        if (clock_gettime(CLOCK_MONOTONIC, &tp_end) == -1) {
       +                fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
       +        }
       +
       +        tp_diff.tv_sec = tp_end.tv_sec - tp_start.tv_sec;
       +        tp_diff.tv_nsec = tp_end.tv_nsec - tp_start.tv_nsec;
       +        if (tp_diff.tv_nsec < 0) {
       +                tp_diff.tv_sec--;
       +                tp_diff.tv_nsec += 1000000000L;
       +        }
       +
       +        printf("timing: %zu sec, %.3f ms\n",
       +                tp_diff.tv_sec, (float)tp_diff.tv_nsec / 1000000.0f);
       +
       +        if (globalcss.data)
       +                printf("global CSS length in bytes: %zu\n", strlen(globalcss.data));
       +        if (sitecss.data)
       +                printf("site CSS length in bytes: %zu\n", strlen(sitecss.data));
       +
       +        return sitecss.data;
       +
       +err:
       +        free(sitecss.data);
       +        return NULL;
       +}
       +
       +int
       +checkrequest(const char *uri, const char *requri)
       +{
       +        char domain[256];
       +        struct filterrule *r;
       +        const char *s;
       +        size_t len;
       +        int status = 1;
       +
       +        if (!uri || !strcmp(requri, uri))
       +                return 1;
       +
       +        s = strstr(uri, "://") + sizeof("://") - 1;
       +        len = strcspn(s, "/");
       +        memcpy(domain, s, len);
       +        domain[len] = '\0';
       +
       +        /* DEBUG: timing */
       +        struct timespec tp_start, tp_end, tp_diff;
       +        if (clock_gettime(CLOCK_MONOTONIC, &tp_start) == -1) {
       +                fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
       +        }
       +
       +        /* match rules */
       +        for (r = rules; r; r = r->next) {
       +                if (!r->css && matchrule(r, requri, "csio^", domain)) {
       +                        printf("requri: %s\n", requri);
       +                        printf("uri:    %s\n", uri);
       +                        printf("domain: %s\n", domain);
       +
       +                        fprintf(stderr, "blocked: %s, %s\n", domain, requri);
       +
       +                        /* DEBUG: for showing the timing */
       +                        status = 0;
       +                        goto end;
       +                        /*return 1;*/
       +                }
       +        }
       +
       +end:
       +        /* DEBUG: timing */
       +        if (clock_gettime(CLOCK_MONOTONIC, &tp_end) == -1) {
       +                fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
       +        }
       +
       +        tp_diff.tv_sec = tp_end.tv_sec - tp_start.tv_sec;
       +        tp_diff.tv_nsec = tp_end.tv_nsec - tp_start.tv_nsec;
       +        if (tp_diff.tv_nsec < 0) {
       +                tp_diff.tv_sec--;
       +                tp_diff.tv_nsec += 1000000000L;
       +        }
       +
       +        printf("%s [%s] timing: %zu sec, %.3f ms\n",
       +                requri, uri, tp_diff.tv_sec, (float)tp_diff.tv_nsec / 1000000.0f);
       +
       +        return status;
       +}
       +
       +void
       +init(void)
       +{
       +        struct filterrule *r;
       +        FILE *fp;
       +        char filepath[PATH_MAX], *e;
       +        size_t len;
       +        int n;
       +
       +        if ((e = getenv("SURF_ADBLOCK_FILE"))) {
       +                n = snprintf(filepath, sizeof(filepath), "%s", e);
       +        } else {
       +                if (!(e = getenv("HOME")))
       +                        e = "";
       +                n = snprintf(filepath, sizeof(filepath),
       +                             "%s%s.surf/adblockrules", e, e[0] ? "/" : "");
       +        }
       +        if (n < 0 || (size_t)n >= sizeof(filepath)) {
       +                weprintf("fatal: rules file path too long");
       +                return;
       +        }
       +
       +        if (!(fp = fopen(filepath, "r"))) {
       +                weprintf("fatal: cannot open rules file %s: %s\n",
       +                         filepath, strerror(errno));
       +                return;
       +        }
       +
       +        n = loadrules(fp);
       +        fclose(fp);
       +        if (n < 1) {
       +                if (n < 0) {
       +                        weprintf("fatal: cannot read rules from file %s: %s\n",
       +                                 filepath, strerror(errno));
       +                } else  {
       +                        weprintf("fatal: cannot read any rule from file %s\n",
       +                                 filepath);
       +                }
       +                return;
       +        }
       +
       +        /* general CSS rules: all sites */
       +        for (r = rules; r; r = r->next) {
       +                if (!r->css || r->domains)
       +                        continue;
       +
       +                len = strlen(r->css);
       +                if (string_append(&globalcss, r->css, strlen(r->css)) < len) {
       +                        weprintf("cannot load global css selectors "
       +                                 "in memory\n");
       +                        cleanup();
       +                        return;
       +                }
       +                len = sizeof("{display:none;}") - 1;
       +                if (string_append(&globalcss, "{display:none;}", len) < len) {
       +                        weprintf("cannot append css rule "
       +                                 "to global css selectors\n");
       +                        cleanup();
       +                        return;
       +                }
       +        }
       +}
 (DIR) diff --git a/surf-adblock.c b/surf-adblock.c
       @@ -8,769 +8,30 @@
        #include <stdio.h>
        #include <stdlib.h>
        #include <string.h>
       -#include <wchar.h>
       -#include <wctype.h>
        
        #include <webkit2/webkit-web-extension.h>
        #include <webkitdom/webkitdom.h>
        
       -/* String data / memory pool */
       -typedef struct string {
       -        char   *data;   /* data */
       -        size_t  datasz; /* allocated size */
       -        size_t  len;    /* current string length */
       -} String;
       +#include "adblock.h"
        
        typedef struct Page {
                guint64 id;
                WebKitWebPage *webpage;
       -        WebKitDOMDOMWindow *view;
       +        /*WebKitDOMDOMWindow *view;*/
                struct Page *next;
        } Page;
        
       -struct filterdomain {
       -        char *domain;
       -        int inverse;
       -        struct filterdomain *next;
       -};
       -
       -struct filterrule {
       -        /* type: match mask, must be atleast 32-bit, see FilterType enum */
       -        unsigned long block;
       -        int matchbegin;
       -        int matchend;
       -        /* is exception rule: prefix @@ for ABP or #@# for CSS */
       -        int isexception;
       -        char *css; /* if non-NULL is CSS rule / hide element rule */
       -        char *uri;
       -        struct filterdomain *domains;
       -        struct filterrule *next;
       -};
       -
       -enum {
       -        FilterTypeScript       = 1 << 0,
       -        FilterTypeImage        = 1 << 1,
       -        FilterTypeCSS          = 1 << 2,
       -        FilterTypeObject       = 1 << 3,
       -        FilterTypeXHR          = 1 << 4,
       -        FilterTypeObjectSub    = 1 << 5,
       -        FilterTypeSubDoc       = 1 << 6,
       -        FilterTypePing         = 1 << 7,
       -        FilterTypeDocument     = 1 << 8,
       -        FilterTypeElemHide     = 1 << 9,
       -        FilterTypeOther        = 1 << 10,
       -        FilterTypeGenericHide  = 1 << 11,
       -        FilterTypeGenericBlock = 1 << 12,
       -        FilterTypeMatchCase    = 1 << 13,
       -};
       -
       -struct filtertype {
       -        /* `type` must be atleast 32-bit, see FilterType enum */
       -        unsigned long type;
       -        char *name;
       -        size_t namelen;
       -        int allowinverse;
       -        int allownormal;
       -        int onlyexception;
       -        int (*fn)(struct filterrule *, char *);
       -};
       -
       -static int parsedomainsoption(struct filterrule *, char *);
       -
       -#define STRP(s) s,sizeof(s)-1
       -
       -static struct filtertype filtertypes[] = {
       -        /* NOTE: options with 'type' = 0 are silently ignored and treated as
       -         *       requests for now */
       -        { 0,                      STRP("collapse"),          1, 1, 0, NULL },
       -        { FilterTypeDocument,     STRP("document"),          1, 0, 1, NULL },
       -        { 0,                      STRP("domain"),            0, 1, 0,
       -                                     /* domain=... */  &parsedomainsoption },
       -        { 0,                      STRP("donottrack"),        1, 1, 0, NULL },
       -        { FilterTypeElemHide,     STRP("elemhide"),          0, 0, 1, NULL },
       -        { 0,                      STRP("font"),              1, 1, 0, NULL },
       -        { FilterTypeGenericBlock, STRP("genericblock"),      1, 1, 1, NULL },
       -        { FilterTypeGenericHide,  STRP("generichide"),       1, 1, 1, NULL },
       -        { FilterTypeImage,        STRP("image"),             1, 1, 0, NULL },
       -        { FilterTypeMatchCase,    STRP("match-case"),        1, 1, 0, NULL },
       -        { 0,                      STRP("media"),             1, 1, 0, NULL },
       -        { FilterTypeObject,       STRP("object"),            1, 1, 0, NULL },
       -        { FilterTypeObjectSub,    STRP("object-subrequest"), 1, 1, 0, NULL },
       -        { FilterTypeOther,        STRP("other"),             1, 1, 0, NULL },
       -        { FilterTypePing,         STRP("ping"),              1, 1, 0, NULL },
       -        { 0,                      STRP("popup"),             1, 1, 0, NULL },
       -        { FilterTypeScript,       STRP("script"),            1, 1, 0, NULL },
       -        { FilterTypeCSS,          STRP("stylesheet"),        1, 1, 0, NULL },
       -        { FilterTypeSubDoc,       STRP("subdocument"),       1, 1, 0, NULL },
       -        { 0,                      STRP("third-party"),       1, 1, 0, NULL },
       -        { FilterTypeXHR,          STRP("xmlhttprequest"),    1, 1, 0, NULL },
       -        /* NOTE: site-key not supported */
       -};
       -
       -static String globalcss;
        static Page *pages;
       -static struct filterrule *rules;
       -
       -static void
       -cleanup(void)
       -{
       -        struct filterrule *r;
       -        struct filterdomain *d;
       -
       -        free(globalcss.data);
       -
       -        for (r = rules; r; r = rules) {
       -                for (d = r->domains; d; d = r->domains) {
       -                        free(d->domain);
       -                        r->domains = d->next;
       -                        free(d);
       -                }
       -                free(r->css);
       -                free(r->uri);
       -                rules = r->next;
       -                free(r);
       -        }
       -}
       -
       -static void
       -weprintf(const char *fmt, ...)
       -{
       -        va_list ap;
       -
       -        fprintf(stderr, "surf-adblock: ");
       -
       -        va_start(ap, fmt);
       -        vfprintf(stderr, fmt, ap);
       -        va_end(ap);
       -}
       -
       -static size_t
       -string_buffer_realloc(String *s, size_t newsz)
       -{
       -        char *tmp;
       -        size_t allocsz;
       -
       -        for (allocsz = 64; allocsz <= newsz; allocsz *= 2)
       -                ;
       -        if (!(tmp = realloc(s->data, allocsz))) {
       -                weprintf("realloc: %s\n", strerror(errno));
       -        } else {
       -                s->data   = tmp;
       -                s->datasz = allocsz;
       -        }
       -
       -        return s->datasz;
       -}
       -
       -static size_t
       -string_append(String *s, const char *data, size_t len)
       -{
       -        size_t newlen;
       -
       -        if (!len)
       -                return len;
       -
       -        newlen = s->len + len;
       -        /* check if allocation is necesary, don't shrink buffer,
       -         * should be more than datasz ofcourse. */
       -        if (newlen >= s->datasz) {
       -                if (string_buffer_realloc(s, newlen + 1) <= newlen)
       -                        return 0;
       -        }
       -        memcpy(s->data + s->len, data, len);
       -        s->len = newlen;
       -        s->data[s->len] = '\0';
       -        return len;
       -}
       -
       -static void *
       -wecalloc(size_t nmemb, size_t size)
       -{
       -        void *p;
       -
       -        if (!(p = calloc(nmemb, size)))
       -                weprintf("calloc: %s\n", strerror(errno));
       -
       -        return p;
       -}
       -
       -static char *
       -westrndup(const char *s, size_t n)
       -{
       -        char *p;
       -
       -        if (!(p = strndup(s, n)))
       -                weprintf("strndup: %s\n", strerror(errno));
       -        return p;
       -}
       -
       -static char *
       -westrdup(const char *s)
       -{
       -        char *p;
       -
       -        if (!(p = strdup(s)))
       -                weprintf("strdup: %s\n", strerror(errno));
       -
       -        return p;
       -}
       -
       -#define END          0
       -#define UNMATCHABLE -2
       -#define BRACKET     -3
       -#define CARET       -4
       -#define STAR        -5
       -
       -static int
       -str_next(const char *str, size_t n, size_t *step)
       -{
       -        if (!n) {
       -                *step = 0;
       -                return 0;
       -        }
       -        if (str[0] >= 128U) {
       -                wchar_t wc;
       -                int k = mbtowc(&wc, str, n);
       -                if (k<0) {
       -                        *step = 1;
       -                        return -1;
       -                }
       -                *step = k;
       -                return wc;
       -        }
       -        *step = 1;
       -
       -        return str[0];
       -}
       -
       -static int
       -pat_next(const char *pat, size_t m, size_t *step)
       -{
       -        int esc = 0;
       -
       -        if (!m || !*pat) {
       -                *step = 0;
       -                return END;
       -        }
       -        *step = 1;
       -        if (pat[0]=='\\' && pat[1]) {
       -                *step = 2;
       -                pat++;
       -                esc = 1;
       -                goto escaped;
       -        }
       -        if (pat[0]=='^')
       -                return CARET;
       -        if (pat[0] == '*')
       -                return STAR;
       -escaped:
       -        if (pat[0] >= 128U) {
       -                wchar_t wc;
       -                int k = mbtowc(&wc, pat, m);
       -                if (k<0) {
       -                        *step = 0;
       -                        return UNMATCHABLE;
       -                }
       -                *step = k + esc;
       -                return wc;
       -        }
       -        return pat[0];
       -}
       -
       -static int
       -casefold(int k)
       -{
       -        int c = towupper(k);
       -        return c == k ? towlower(k) : c;
       -}
       -
       -/* match() based on musl-libc fnmatch:
       -   https://git.musl-libc.org/cgit/musl/tree/src/regex/fnmatch.c */
       -static int
       -match(const char *pat, const char *str, int fcase)
       -{
       -        size_t m = -1, n = -1;
       -        const char *p, *ptail, *endpat;
       -        const char *s, *stail, *endstr;
       -        size_t pinc, sinc, tailcnt=0;
       -        int c, k, kfold;
       -
       -        for (;;) {
       -                switch ((c = pat_next(pat, m, &pinc))) {
       -                case UNMATCHABLE:
       -                        return 1;
       -                case STAR:
       -                        pat++;
       -                        m--;
       -                        break;
       -                default:
       -                        k = str_next(str, n, &sinc);
       -                        /* TODO: write a test-case */
       -                        if (c == CARET && (k == '?' || k == '/' || k <= 0))
       -                                return 1;
       -                        if (k <= 0)
       -                                return (c==END) ? 0 : 1;
       -                        str += sinc;
       -                        n -= sinc;
       -                        kfold = fcase ? casefold(k) : k;
       -                        if (k != c && kfold != c)
       -                                return 1;
       -                        pat+=pinc;
       -                        m-=pinc;
       -                        continue;
       -                }
       -                break;
       -        }
       -
       -        /* Compute real pat length if it was initially unknown/-1 */
       -        m = strnlen(pat, m);
       -        endpat = pat + m;
       -
       -        /* Find the last * in pat and count chars needed after it */
       -        for (p=ptail=pat; p<endpat; p+=pinc) {
       -                switch (pat_next(p, endpat-p, &pinc)) {
       -                case UNMATCHABLE:
       -                        return 1;
       -                case STAR:
       -                        tailcnt=0;
       -                        ptail = p+1;
       -                        break;
       -                default:
       -                        tailcnt++;
       -                        break;
       -                }
       -        }
       -
       -        /* Past this point we need not check for UNMATCHABLE in pat,
       -         * because all of pat has already been parsed once. */
       -
       -        /* Compute real str length if it was initially unknown/-1 */
       -        n = strnlen(str, n);
       -        endstr = str + n;
       -        if (n < tailcnt) return 1;
       -
       -        /* Find the final tailcnt chars of str, accounting for UTF-8.
       -         * On illegal sequences we may get it wrong, but in that case
       -         * we necessarily have a matching failure anyway. */
       -        for (s=endstr; s>str && tailcnt; tailcnt--) {
       -                if (s[-1] < 128U || MB_CUR_MAX==1) s--;
       -                else while ((unsigned char)*--s-0x80U<0x40 && s>str);
       -        }
       -        if (tailcnt) return 1;
       -        stail = s;
       -
       -        /* Check that the pat and str tails match */
       -        p = ptail;
       -        for (;;) {
       -                c = pat_next(p, endpat-p, &pinc);
       -                p += pinc;
       -                if ((k = str_next(s, endstr-s, &sinc)) <= 0) {
       -                        if (c != END) return 1;
       -                        break;
       -                }
       -                s += sinc;
       -                kfold = fcase ? casefold(k) : k;
       -                if (k != c && kfold != c)
       -                        return 1;
       -        }
       -
       -        /* We're all done with the tails now, so throw them out */
       -        endstr = stail;
       -        endpat = ptail;
       -
       -        /* Match pattern components until there are none left */
       -        while (pat<endpat) {
       -                p = pat;
       -                s = str;
       -                for (;;) {
       -                        c = pat_next(p, endpat-p, &pinc);
       -                        p += pinc;
       -                        /* Encountering * completes/commits a component */
       -                        if (c == STAR) {
       -                                pat = p;
       -                                str = s;
       -                                break;
       -                        }
       -                        k = str_next(s, endstr-s, &sinc);
       -                        if (!k)
       -                                return 1;
       -                        kfold = fcase ? casefold(k) : k;
       -                        if (k != c && kfold != c)
       -                                break;
       -                        s += sinc;
       -                }
       -                if (c == STAR) continue;
       -                /* If we failed, advance str, by 1 char if it's a valid
       -                 * char, or past all invalid bytes otherwise. */
       -                k = str_next(str, endstr-str, &sinc);
       -                if (k > 0) str += sinc;
       -                else for (str++; str_next(str, endstr-str, &sinc)<0; str++);
       -        }
       -
       -        return 0;
       -}
       -
       -/*
       -domain=...   if domain is prefixed with ~, ignore.
       -multiple domains can be separated with |
       -*/
       -static int
       -parsedomains(const char *s, int sep, struct filterdomain **head)
       -{
       -        struct filterdomain *d, *last = *head = NULL;
       -        char *p;
       -        int inverse;
       -
       -        do {
       -                inverse = 0;
       -                if (*s == '~') {
       -                        inverse = !inverse;
       -                        s++;
       -                }
       -                if (!*s || *s == sep)
       -                        break;
       -
       -                if (!(d = wecalloc(1, sizeof(struct filterdomain))))
       -                        return -1;
       -                if ((p = strchr(s, sep))) { /* TODO: should not contain ',' */
       -                        d->domain = westrndup(s, p - s);
       -                        s = p + 1;
       -                } else {
       -                        d->domain = westrdup(s);
       -                }
       -                if (!d->domain)
       -                        return -1;
       -                d->inverse = inverse;
       -
       -                if (!*head)
       -                        *head = last = d;
       -                else
       -                        last = last->next = d;
       -        } while (p);
       -
       -        return (*head != NULL);
       -}
       -
       -static int
       -parsedomainselement(struct filterrule *f, char *s)
       -{
       -        struct filterdomain *d, *last;
       -
       -        for (last = f->domains; last && last->next; last = last->next)
       -                ;
       -
       -        if (parsedomains(s, ',', &d) < 0)
       -                return -1;
       -        if (last)
       -                last->next = d;
       -        else
       -                f->domains = d;
       -
       -        return (d != NULL);
       -}
       -
       -static int
       -parsedomainsoption(struct filterrule *f, char *s)
       -{
       -        struct filterdomain *d, *last;
       -
       -        for (last = f->domains; last && last->next; last = last->next)
       -                ;
       -
       -        if (parsedomains(s, '|', &d) < 0)
       -                return -1;
       -        if (last)
       -                last->next = d;
       -        else
       -                f->domains = d;
       -
       -        return (d != NULL);
       -}
       -
       -static int
       -filtertype_cmp(const void *a, const void *b)
       -{
       -        return strcmp(((struct filtertype *)a)->name,
       -                      ((struct filtertype *)b)->name);
       -}
       -
       -/* check if domain is the same domain or a subdomain of `s` */
       -static int
       -matchdomain(const char *s, const char *domain)
       -{
       -        size_t l1, l2;
       -
       -        l1 = strlen(s);
       -        l2 = strlen(domain);
       -
       -        /* subdomain-specific (longer) or other domain */
       -        if (l1 > l2)
       -                return 0;
       -        /* subdomain */
       -        if (l2 > l1 && domain[l2 - l1 - 1] == '.')
       -                return !strcmp(&domain[l2 - l1], s);
       -
       -        return !strcmp(s, domain);
       -}
       -
       -static int
       -matchrule(struct filterrule *f, const char *uri, const char *type,
       -          const char *domain)
       -{
       -        /* NOTE: order matters, see FilterType enum values */
       -        struct filterdomain *d;
       -        char pat[1024];
       -        int r, m;
       -
       -        r = f->domains ? 0 : 1;
       -        for (d = f->domains; d; d = d->next) {
       -                if (matchdomain(d->domain, domain)) {
       -                        if (r && d->inverse)
       -                                r = 0;
       -                        else if (!r && !d->inverse)
       -                                r = 1;
       -                } else if (r && !d->inverse) {
       -                        r = 0;
       -                }
       -        }
       -        if (f->css) {
       -                /* DEBUG */
       -#if 0
       -                if (f->isexception)
       -                        printf("DEBUG, exception rule, CSS: %s, match? %d\n",
       -                        f->css, r);
       -#endif
       -                return r;
       -        }
       -
       -#if 1
       -        /* skip allow rule, TODO: inverse? */
       -        if (!r)
       -                return 0;
       -#endif
       -
       -#if 0
       -        /* DEBUG: test, match if it is a simple pattern */
       -        char *p;
       -        p = strchr(f->uri, '*');
       -        if (!p)
       -                p = strchr(f->uri, '^');
       -        if (!p) {
       -                /* TODO: write a test-case */
       -                if (f->block & FilterTypeMatchCase) {
       -                        if (f->matchbegin)
       -                                m = strncmp(uri, f->uri, strlen(f->uri)) == 0;
       -                        else if (f->matchend)
       -                                m = strlen(f->uri) <= strlen(uri) &&
       -                                        strcmp(&uri[strlen(uri) - strlen(f->uri)], f->uri) == 0;
       -                        else
       -                                m = strstr(uri, f->uri) ? 1 : 0;
       -                } else {
       -                        if (f->matchbegin)
       -                                m = strncasecmp(uri, f->uri, strlen(f->uri)) == 0;
       -                        else if (f->matchend)
       -                                m = strlen(f->uri) <= strlen(uri) &&
       -                                        strcasecmp(&uri[strlen(uri) - strlen(f->uri)], f->uri) == 0;
       -                        else
       -                                m = strcasestr(uri, f->uri) ? 1 : 0;
       -                }
       -                /*m = r ? !m : m;*/
       -                return m;
       -        }
       -#endif
       -
       -        r = snprintf(pat, sizeof(pat), "%s%s%s",
       -                f->matchbegin ? "" : "*",
       -                f->uri,
       -                f->matchend ? "" : "*");
       -        if (r == -1 || (size_t)r >= sizeof(pat)) {
       -                fprintf(stderr, "warning: pattern too large, ignoring\n");
       -                return 0;
       -        }
       -
       -        m = 0;
       -        if (!match(pat, uri, (f->block & FilterTypeMatchCase) ? 0 : 1)) {
       -#if 0
       -                for (; *type; type++) {
       -                        for (i = 0; blockstr[i]; i++) {
       -                                if (blockstr[i] == *type &&
       -                                    f->block & (1 << i))
       -                                        printf("block type '%c'\n", blockstr[i]);
       -                                        return 1;
       -                                }
       -                        }
       -                }
       -
       -                return 0;
       -#endif
       -                m = 1;
       -        }
       -        /*m = r ? !m : m;*/
       -        return m;
       -}
       -
       -static int
       -parserule(struct filterrule *f, char *s)
       -{
       -        struct filtertype key, *ft;
       -        int inverse = 0;
       -        char *p, *values;
       -
       -        if (*s == '!' || (*s == '[' && s[strlen(s) - 1] == ']'))
       -                return 0; /* skip comment or empty line */
       -        for (; *s && isspace(*s); s++)
       -                ;
       -        if (!*s)
       -                return 0; /* line had only whitespace: skip */
       -
       -        memset(f, 0, sizeof(struct filterrule));
       -
       -        if ((p = strstr(s, "#@#"))) {
       -                *p = '\0';
       -                if (parsedomainselement(f, s) < 0)
       -                        return -1;
       -                *p = '#';
       -                if (!(f->css = westrdup(p + 3)))
       -                        return -1;
       -                f->isexception = 1;
       -                goto end; /* end of CSS rule */
       -        }
       -
       -        /* element hiding rule, NOTE: no wildcards are supported,
       -        "Simplified element hiding syntax" is not supported. */
       -        if ((p = strstr(s, "##"))) {
       -                *p = '\0';
       -                if (parsedomainselement(f, s) < 0)
       -                        return -1;
       -                *p = '#';
       -                if (!(f->css = westrdup(p + 2)))
       -                        return -1;
       -                goto end; /* end of rule */
       -        }
       -
       -        if (!strncmp(s, "@@", 2)) {
       -                f->isexception = 1;
       -                s += 2;
       -        }
       -        if (*s == '|') {
       -                s++;
       -                if (*s == '|') {
       -                        f->matchbegin = 1;
       -                        s++;
       -                } else {
       -                        f->matchend = 1;
       -                }
       -        }
       -
       -        /* no options, use rest of line as uri. */
       -        if (!(p = strrchr(s, '$'))) {
       -                if (!(f->uri = westrdup(s)))
       -                        return -1;
       -                goto end;
       -        }
       -
       -        /* has options */
       -        if (!(f->uri = westrndup(s, p - s)))
       -                return -1;
       -        s = ++p;
       -
       -        /* blockmask, has options? default: allow all options, case-sensitive
       -         * has no options? default: block all options, case-sensitive  */
       -        f->block = *s ? (unsigned long)FilterTypeMatchCase : ~0UL;
       -        do {
       -                if ((p = strchr(s, ',')))
       -                        *p = '\0';
       -                /* match option */
       -                inverse = 0;
       -                if (*s == '~') {
       -                        inverse = 1;
       -                        s++;
       -                }
       -                if ((values = strchr(s, '=')))
       -                        *(values) = '\0';
       -                key.name = s;
       -
       -                ft = bsearch(&key, &filtertypes,
       -                             sizeof(filtertypes) / sizeof(*filtertypes),
       -                             sizeof(*filtertypes), filtertype_cmp);
       -
       -                /* restore NUL-terminator for domain= option */
       -                if (values)
       -                        *(values++) = '=';
       -
       -                if (ft) {
       -                        if (inverse)
       -                                f->block &= ~(ft->type);
       -                        else
       -                                f->block |= ft->type;
       -                        if (ft->fn && values)
       -                                ft->fn(f, values);
       -                } else {
       -                        /* DEBUG */
       -                        fprintf(stderr, "ignored: unknown option: '%s' "
       -                                "in rule: %s\n", key.name, f->uri);
       -                }
       -
       -                /* restore ',' */
       -                if (p) {
       -                        *p = ',';
       -                        s = p + 1;
       -                }
       -        } while (p);
       -end:
       -
       -        return 1;
       -}
       -
       -#if 0
       -static void
       -debugrule(struct filterrule *r)
       -{
       -        printf("\turi: %s\n\tcss: %s\n\tisexception: %d\n\tblockmask: "
       -               "%lu\n===\n", r->uri ? r->uri : "", r->css ? r->css : "",
       -               r->isexception, r->block);
       -}
       -#endif
       -
       -static int
       -loadrules(FILE *fp)
       -{
       -        struct filterrule f, *r, *rn = NULL;
       -        char *line = NULL;
       -        size_t linesiz = 0;
       -        ssize_t n;
       -        int ret;
       -
       -        /* TODO: handle ferror() */
       -        /* load rules */
       -        while ((n = getline(&line, &linesiz, fp)) > 0) {
       -                if (line[n - 1] == '\n')
       -                        line[--n] = '\0';
       -                if (n > 0 && line[n - 1] == '\r')
       -                        line[--n] = '\0';
       -
       -                if ((ret = parserule(&f, line) > 0)) {
       -                        if (!(r = wecalloc(1, sizeof(struct filterrule))))
       -                                return -1;
       -                        if (!rules)
       -                                rules = rn = r;
       -                        else
       -                                rn = rn->next = r;
       -                        memcpy(rn, &f, sizeof(struct filterrule));
       -                } else if (ret < 0) {
       -                        return -1;
       -                }
       -        }
       -        return (rules != NULL);
       -}
        
        static Page *
        newpage(WebKitWebPage *page)
        {
                Page *p;
        
       -        if (!(p = wecalloc(1, sizeof(Page))))
       +        if (!(p = calloc(1, sizeof(Page)))) {
       +                fprintf(stderr, "surf-adblock: calloc: %s\n", strerror(errno));
                        return NULL;
       +        }
                p->next = pages;
                pages = p;
        
       @@ -783,146 +44,32 @@ newpage(WebKitWebPage *page)
        static void
        documentloaded(WebKitWebPage *wp, Page *p)
        {
       -        char domain[256];
                WebKitDOMDocument *doc = webkit_web_page_get_dom_document(wp);
                WebKitDOMHTMLElement *body = webkit_dom_document_get_body(doc);
                WebKitDOMElement *el;
       -        String sitecss;
       -        struct filterrule *r;
       -        const char *s, *uri = webkit_web_page_get_uri(p->webpage);
       -        size_t len;
       -
       -        if (!uri)
       -                return;
       -
       -        s = strstr(uri, "://") + sizeof("://") - 1;
       -        len = strcspn(s, "/");
       -        memcpy(domain, s, len);
       -        domain[len] = '\0';
       -
       -        printf("uri: %s\n", uri);
       -        printf("domain: %s\n", domain);
       -
       -        /* DEBUG: timing */
       -        struct timespec tp_start, tp_end, tp_diff;
       -        if (clock_gettime(CLOCK_MONOTONIC, &tp_start) == -1) {
       -                fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
       -        }
       -
       -        /* site-specific CSS */
       -        memset(&sitecss, 0, sizeof(sitecss));
       -        for (r = rules; r; r = r->next) {
       -                if (!r->css || !r->domains || !matchrule(r, "", "", domain))
       -                        continue;
       -
       -                len = strlen(r->css);
       -                if (string_append(&sitecss, r->css, len) < len)
       -                        return;
       -
       -                s = r->isexception ? "{display:initial;}" : "{display:none;}";
       -                len = strlen(s);
       -                if (string_append(&sitecss, s, len) < len)
       -                        return;
       -        }
       -/*        printf("sitecss: %s\n", sitecss.data ? sitecss.data : "<empty>");*/
       -
       -        /* DEBUG: timing */
       -        if (clock_gettime(CLOCK_MONOTONIC, &tp_end) == -1) {
       -                fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
       -        }
       -
       -        tp_diff.tv_sec = tp_end.tv_sec - tp_start.tv_sec;
       -        tp_diff.tv_nsec = tp_end.tv_nsec - tp_start.tv_nsec;
       -        if (tp_diff.tv_nsec < 0) {
       -                tp_diff.tv_sec--;
       -                tp_diff.tv_nsec += 1000000000L;
       -        }
       -
       -        printf("timing: %zu sec, %.3f ms\n",
       -                tp_diff.tv_sec, (float)tp_diff.tv_nsec / 1000000.0f);
       +        const char *uri = webkit_web_page_get_uri(p->webpage);
       +        char *css, *globalcss;
        
       -        if (globalcss.data)
       -                printf("global CSS length in bytes: %zu\n", strlen(globalcss.data));
       -        if (sitecss.data)
       -                printf("site CSS length in bytes: %zu\n", strlen(sitecss.data));
       +        /*p->view = webkit_dom_document_get_default_view(doc);*/
        
       -        p->view = webkit_dom_document_get_default_view(doc);
       -
       -        if (globalcss.data) {
       +        if ((globalcss = getglobalcss())) {
                        el = webkit_dom_document_create_element(doc, "style", NULL);
                        webkit_dom_element_set_attribute(el, "type", "text/css", NULL);
       -                webkit_dom_element_set_inner_html(el, globalcss.data, NULL);
       +                webkit_dom_element_set_inner_html(el, globalcss, NULL);
                        webkit_dom_node_append_child(WEBKIT_DOM_NODE(body),
                                                     WEBKIT_DOM_NODE(el), NULL);
                }
        
       -        if (sitecss.data) {
       +        if ((css = getdocumentcss(uri))) {
                        el = webkit_dom_document_create_element(doc, "style", NULL);
                        webkit_dom_element_set_attribute(el, "type", "text/css", NULL);
       -                webkit_dom_element_set_inner_html(el, sitecss.data, NULL);
       +                webkit_dom_element_set_inner_html(el, css, NULL);
                        webkit_dom_node_append_child(WEBKIT_DOM_NODE(body),
                                                     WEBKIT_DOM_NODE(el), NULL);
                }
        
       -        free(sitecss.data);
       -}
       -
       -int
       -checkrequest(const char *uri, const char *requri)
       -{
       -        char domain[256];
       -        struct filterrule *r;
       -        const char *s;
       -        size_t len;
       -        int status = 1;
       -
       -        if (!uri || !strcmp(requri, uri))
       -                return 1;
       -
       -        s = strstr(uri, "://") + sizeof("://") - 1;
       -        len = strcspn(s, "/");
       -        memcpy(domain, s, len);
       -        domain[len] = '\0';
       -
       -        /* DEBUG: timing */
       -        struct timespec tp_start, tp_end, tp_diff;
       -        if (clock_gettime(CLOCK_MONOTONIC, &tp_start) == -1) {
       -                fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
       -        }
       -
       -        /* match rules */
       -        for (r = rules; r; r = r->next) {
       -                if (!r->css && matchrule(r, requri, "csio^", domain)) {
       -                        printf("requri: %s\n", requri);
       -                        printf("uri:    %s\n", uri);
       -                        printf("domain: %s\n", domain);
       -
       -                        fprintf(stderr, "blocked: %s, %s\n", domain, requri);
       -
       -                        /* DEBUG: for showing the timing */
       -                        status = 0;
       -                        goto end;
       -                        /*return 1;*/
       -                }
       -        }
       -
       -end:
       -        /* DEBUG: timing */
       -        if (clock_gettime(CLOCK_MONOTONIC, &tp_end) == -1) {
       -                fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
       -        }
       -
       -        tp_diff.tv_sec = tp_end.tv_sec - tp_start.tv_sec;
       -        tp_diff.tv_nsec = tp_end.tv_nsec - tp_start.tv_nsec;
       -        if (tp_diff.tv_nsec < 0) {
       -                tp_diff.tv_sec--;
       -                tp_diff.tv_nsec += 1000000000L;
       -        }
       -
       -        printf("%s [%s] timing: %zu sec, %.3f ms\n",
       -                requri, uri, tp_diff.tv_sec, (float)tp_diff.tv_nsec / 1000000.0f);
       -
       -        return status;
       +        free(css);
       +        /* NOTE: globalcss free'd at cleanup() */
        }
        
        static gboolean
       @@ -941,69 +88,6 @@ sendrequest(WebKitWebPage *wp, WebKitURIRequest *req,
                return status;
        }
        
       -void
       -init(void)
       -{
       -        struct filterrule *r;
       -        FILE *fp;
       -        char filepath[PATH_MAX], *e;
       -        size_t len;
       -        int n;
       -
       -        if ((e = getenv("SURF_ADBLOCK_FILE"))) {
       -                n = snprintf(filepath, sizeof(filepath), "%s", e);
       -        } else {
       -                if (!(e = getenv("HOME")))
       -                        e = "";
       -                n = snprintf(filepath, sizeof(filepath),
       -                             "%s%s.surf/adblockrules", e, e[0] ? "/" : "");
       -        }
       -        if (n < 0 || (size_t)n >= sizeof(filepath)) {
       -                weprintf("fatal: rules file path too long");
       -                return;
       -        }
       -
       -        if (!(fp = fopen(filepath, "r"))) {
       -                weprintf("fatal: cannot open rules file %s: %s\n",
       -                         filepath, strerror(errno));
       -                return;
       -        }
       -
       -        n = loadrules(fp);
       -        fclose(fp);
       -        if (n < 1) {
       -                if (n < 0) {
       -                        weprintf("fatal: cannot read rules from file %s: %s\n",
       -                                 filepath, strerror(errno));
       -                } else  {
       -                        weprintf("fatal: cannot read any rule from file %s\n",
       -                                 filepath);
       -                }
       -                return;
       -        }
       -
       -        /* general CSS rules: all sites */
       -        for (r = rules; r; r = r->next) {
       -                if (!r->css || r->domains)
       -                        continue;
       -
       -                len = strlen(r->css);
       -                if (string_append(&globalcss, r->css, strlen(r->css)) < len) {
       -                        weprintf("cannot load global css selectors "
       -                                 "in memory\n");
       -                        cleanup();
       -                        return;
       -                }
       -                len = sizeof("{display:none;}") - 1;
       -                if (string_append(&globalcss, "{display:none;}", len) < len) {
       -                        weprintf("cannot append css rule "
       -                                 "to global css selectors\n");
       -                        cleanup();
       -                        return;
       -                }
       -        }
       -}
       -
        static void
        webpagecreated(WebKitWebExtension *e, WebKitWebPage *p, gpointer unused)
        {