adblock.c - surf-adblock - Surf adblock web extension
 (HTM) git clone git://git.codemadness.org/surf-adblock
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       adblock.c (22106B)
       ---
            1 #include <sys/stat.h>
            2 #include <sys/types.h>
            3 
            4 #include <ctype.h>
            5 #include <errno.h>
            6 #include <fcntl.h>
            7 #include <limits.h>
            8 #include <stdarg.h>
            9 #include <stdio.h>
           10 #include <stdlib.h>
           11 #include <string.h>
           12 #include <time.h> /* DEBUG: used for clock_gettime, remove later */
           13 #include <wchar.h>
           14 #include <wctype.h>
           15 
           16 #include "adblock.h"
           17 
           18 /* String data / memory pool */
           19 typedef struct string {
           20         char   *data;   /* data */
           21         size_t  datasz; /* allocated size */
           22         size_t  len;    /* current string length */
           23 } String;
           24 
           25 struct filterdomain {
           26         char *domain;
           27         int inverse;
           28         struct filterdomain *next;
           29 };
           30 
           31 struct filterrule {
           32         /* type: match mask, must be atleast 32-bit, see FilterType enum */
           33         unsigned long block;
           34         int matchbegin;
           35         int matchend;
           36         /* is exception rule: prefix @@ for ABP or #@# for CSS */
           37         int isexception;
           38         const char *css; /* if non-NULL is CSS rule / hide element rule */
           39         const char *uri;
           40         struct filterdomain *domains;
           41         struct filterrule *next;
           42 };
           43 
           44 enum {
           45         FilterTypeScript       = 1 << 0,
           46         FilterTypeImage        = 1 << 1,
           47         FilterTypeCSS          = 1 << 2,
           48         FilterTypeObject       = 1 << 3,
           49         FilterTypeXHR          = 1 << 4,
           50         FilterTypeObjectSub    = 1 << 5,
           51         FilterTypeSubDoc       = 1 << 6,
           52         FilterTypePing         = 1 << 7,
           53         FilterTypeDocument     = 1 << 8,
           54         FilterTypeElemHide     = 1 << 9,
           55         FilterTypeOther        = 1 << 10,
           56         FilterTypeGenericHide  = 1 << 11,
           57         FilterTypeGenericBlock = 1 << 12,
           58         FilterTypeMatchCase    = 1 << 13,
           59 };
           60 
           61 struct filtertype {
           62         /* `type` must be atleast 32-bit, see FilterType enum */
           63         unsigned long type;
           64         char *name;
           65         size_t namelen;
           66         int allowinverse;
           67         int allownormal;
           68         int onlyexception;
           69         int (*fn)(struct filterrule *, char *);
           70 };
           71 
           72 static int parsedomainsoption(struct filterrule *, char *);
           73 
           74 #define STRP(s) s,sizeof(s)-1
           75 
           76 static struct filtertype filtertypes[] = {
           77         /* NOTE: options with 'type' = 0 are silently ignored and treated as
           78          *       requests for now */
           79         { 0,                      STRP("collapse"),          1, 1, 0, NULL },
           80         { FilterTypeDocument,     STRP("document"),          1, 0, 1, NULL },
           81         { 0,                      STRP("domain"),            0, 1, 0,
           82                                      /* domain=... */  &parsedomainsoption },
           83         { 0,                      STRP("donottrack"),        1, 1, 0, NULL },
           84         { FilterTypeElemHide,     STRP("elemhide"),          0, 0, 1, NULL },
           85         { 0,                      STRP("font"),              1, 1, 0, NULL },
           86         { FilterTypeGenericBlock, STRP("genericblock"),      1, 1, 1, NULL },
           87         { FilterTypeGenericHide,  STRP("generichide"),       1, 1, 1, NULL },
           88         { FilterTypeImage,        STRP("image"),             1, 1, 0, NULL },
           89         { FilterTypeMatchCase,    STRP("match-case"),        1, 1, 0, NULL },
           90         { 0,                      STRP("media"),             1, 1, 0, NULL },
           91         { FilterTypeObject,       STRP("object"),            1, 1, 0, NULL },
           92         { FilterTypeObjectSub,    STRP("object-subrequest"), 1, 1, 0, NULL },
           93         { FilterTypeOther,        STRP("other"),             1, 1, 0, NULL },
           94         { FilterTypePing,         STRP("ping"),              1, 1, 0, NULL },
           95         { 0,                      STRP("popup"),             1, 1, 0, NULL },
           96         { FilterTypeScript,       STRP("script"),            1, 1, 0, NULL },
           97         { FilterTypeCSS,          STRP("stylesheet"),        1, 1, 0, NULL },
           98         { FilterTypeSubDoc,       STRP("subdocument"),       1, 1, 0, NULL },
           99         { 0,                      STRP("third-party"),       1, 1, 0, NULL },
          100         { FilterTypeXHR,          STRP("xmlhttprequest"),    1, 1, 0, NULL },
          101         /* NOTE: site-key not supported */
          102 };
          103 
          104 static String globalcss;
          105 static struct filterrule *rules;
          106 
          107 static void
          108 weprintf(const char *fmt, ...)
          109 {
          110         va_list ap;
          111 
          112         fprintf(stderr, "surf-adblock: ");
          113 
          114         va_start(ap, fmt);
          115         vfprintf(stderr, fmt, ap);
          116         va_end(ap);
          117 }
          118 
          119 static void *
          120 wecalloc(size_t nmemb, size_t size)
          121 {
          122         void *p;
          123 
          124         if (!(p = calloc(nmemb, size)))
          125                 weprintf("calloc: %s\n", strerror(errno));
          126 
          127         return p;
          128 }
          129 
          130 static char *
          131 westrndup(const char *s, size_t n)
          132 {
          133         char *p;
          134 
          135         if (!(p = strndup(s, n)))
          136                 weprintf("strndup: %s\n", strerror(errno));
          137         return p;
          138 }
          139 
          140 static char *
          141 westrdup(const char *s)
          142 {
          143         char *p;
          144 
          145         if (!(p = strdup(s)))
          146                 weprintf("strdup: %s\n", strerror(errno));
          147 
          148         return p;
          149 }
          150 
          151 static size_t
          152 string_buffer_realloc(String *s, size_t newsz)
          153 {
          154         char *tmp;
          155         size_t allocsz;
          156 
          157         for (allocsz = 64; allocsz <= newsz; allocsz *= 2)
          158                 ;
          159         if (!(tmp = realloc(s->data, allocsz))) {
          160                 weprintf("realloc: %s\n", strerror(errno));
          161         } else {
          162                 s->data   = tmp;
          163                 s->datasz = allocsz;
          164         }
          165 
          166         return s->datasz;
          167 }
          168 
          169 static size_t
          170 string_append(String *s, const char *data, size_t len)
          171 {
          172         size_t newlen;
          173 
          174         if (!len)
          175                 return len;
          176 
          177         newlen = s->len + len;
          178         /* check if allocation is necesary, don't shrink buffer,
          179          * should be more than datasz ofcourse. */
          180         if (newlen >= s->datasz) {
          181                 if (string_buffer_realloc(s, newlen + 1) <= newlen)
          182                         return 0;
          183         }
          184         memcpy(s->data + s->len, data, len);
          185         s->len = newlen;
          186         s->data[s->len] = '\0';
          187 
          188         return len;
          189 }
          190 
          191 #define END          0
          192 #define UNMATCHABLE -2
          193 #define CARET       -3
          194 #define STAR        -4
          195 
          196 static int
          197 str_next(const char *str, size_t n, size_t *step)
          198 {
          199         if (!n) {
          200                 *step = 0;
          201                 return 0;
          202         }
          203         if (str[0] >= 128U) {
          204                 wchar_t wc;
          205                 int k = mbtowc(&wc, str, n);
          206                 if (k<0) {
          207                         *step = 1;
          208                         return -1;
          209                 }
          210                 *step = k;
          211                 return wc;
          212         }
          213         *step = 1;
          214 
          215         return str[0];
          216 }
          217 
          218 static int
          219 pat_next(const char *pat, size_t m, size_t *step)
          220 {
          221         int esc = 0;
          222 
          223         if (!m || !*pat) {
          224                 *step = 0;
          225                 return END;
          226         }
          227         *step = 1;
          228         if (pat[0]=='\\' && pat[1]) {
          229                 *step = 2;
          230                 pat++;
          231                 esc = 1;
          232                 goto escaped;
          233         }
          234         if (pat[0]=='^')
          235                 return CARET;
          236         if (pat[0] == '*')
          237                 return STAR;
          238 escaped:
          239         if (pat[0] >= 128U) {
          240                 wchar_t wc;
          241                 int k = mbtowc(&wc, pat, m);
          242                 if (k<0) {
          243                         *step = 0;
          244                         return UNMATCHABLE;
          245                 }
          246                 *step = k + esc;
          247                 return wc;
          248         }
          249         return pat[0];
          250 }
          251 
          252 static int
          253 casefold(int k)
          254 {
          255         int c;
          256 
          257         /* optimization: -2% last measured.
          258         if ((unsigned)k < 128) {
          259                 c = toupper(k);
          260                 return c == k ? tolower(k) : c;
          261         }*/
          262         c = towupper(k);
          263         return c == k ? towlower(k) : c;
          264 }
          265 
          266 /* match() based on musl-libc fnmatch:
          267    https://git.musl-libc.org/cgit/musl/tree/src/regex/fnmatch.c */
          268 static int
          269 match(const char *pat, const char *str, int fcase)
          270 {
          271         size_t m = -1, n = -1;
          272         const char *p, *ptail, *endpat;
          273         const char *s, *stail, *endstr;
          274         size_t pinc, sinc, tailcnt=0;
          275         int c, k, kfold;
          276 
          277         for (;;) {
          278                 switch ((c = pat_next(pat, m, &pinc))) {
          279                 case UNMATCHABLE:
          280                         return 1;
          281                 case STAR:
          282                         pat++;
          283                         m--;
          284                         break;
          285                 case CARET:
          286                         k = str_next(str, n, &sinc);
          287                         if (k <= 0)
          288                                 return (c==END) ? 0 : 1;
          289                         str += sinc;
          290                         n -= sinc;
          291                         if (k != '?' && k != '/')
          292                                 return 1;
          293                         pat++;
          294                         m--;
          295                         break;
          296                 default:
          297                         k = str_next(str, n, &sinc);
          298                         if (k <= 0)
          299                                 return (c==END) ? 0 : 1;
          300                         str += sinc;
          301                         n -= sinc;
          302                         kfold = fcase ? casefold(k) : k;
          303                         if (k != c && kfold != c)
          304                                 return 1;
          305                         pat+=pinc;
          306                         m-=pinc;
          307                         continue;
          308                 }
          309                 break;
          310         }
          311 
          312         /* Compute real pat length if it was initially unknown/-1 */
          313         m = strnlen(pat, m);
          314         endpat = pat + m;
          315 
          316         /* Find the last * in pat and count chars needed after it */
          317         for (p=ptail=pat; p<endpat; p+=pinc) {
          318                 switch (pat_next(p, endpat-p, &pinc)) {
          319                 case UNMATCHABLE:
          320                         return 1;
          321                 case STAR:
          322                         tailcnt=0;
          323                         ptail = p+1;
          324                         break;
          325                 default:
          326                         tailcnt++;
          327                         break;
          328                 }
          329         }
          330 
          331         /* Past this point we need not check for UNMATCHABLE in pat,
          332          * because all of pat has already been parsed once. */
          333 
          334         /* Compute real str length if it was initially unknown/-1 */
          335         n = strnlen(str, n);
          336         endstr = str + n;
          337         if (n < tailcnt) return 1;
          338 
          339         /* Find the final tailcnt chars of str, accounting for UTF-8.
          340          * On illegal sequences we may get it wrong, but in that case
          341          * we necessarily have a matching failure anyway. */
          342         for (s=endstr; s>str && tailcnt; tailcnt--) {
          343                 if (s[-1] < 128U || MB_CUR_MAX==1) s--;
          344                 else while ((unsigned char)*--s-0x80U<0x40 && s>str);
          345         }
          346         if (tailcnt) return 1;
          347         stail = s;
          348 
          349         /* Check that the pat and str tails match */
          350         p = ptail;
          351         for (;;) {
          352                 c = pat_next(p, endpat-p, &pinc);
          353                 p += pinc;
          354                 if ((k = str_next(s, endstr-s, &sinc)) <= 0) {
          355                         if (c != END) return 1;
          356                         break;
          357                 }
          358                 s += sinc;
          359                 if (c == CARET) {
          360                         if  (k != '/' && k != '?')
          361                                 return 1;
          362                 } else {
          363                         kfold = fcase ? casefold(k) : k;
          364                         if (k != c && kfold != c)
          365                                 return 1;
          366                 }
          367         }
          368 
          369         /* We're all done with the tails now, so throw them out */
          370         endstr = stail;
          371         endpat = ptail;
          372 
          373         /* Match pattern components until there are none left */
          374         while (pat<endpat) {
          375                 p = pat;
          376                 s = str;
          377                 for (;;) {
          378                         c = pat_next(p, endpat-p, &pinc);
          379                         p += pinc;
          380                         /* Encountering * completes/commits a component */
          381                         if (c == STAR) {
          382                                 pat = p;
          383                                 str = s;
          384                                 break;
          385                         }
          386                         k = str_next(s, endstr-s, &sinc);
          387                         if (!k)
          388                                 return 1;
          389                         s += sinc;
          390                         if (c == CARET) {
          391                                 if (k != '/' && k != '?')
          392                                         break;
          393                         } else {
          394                                 kfold = fcase ? casefold(k) : k;
          395                                 if (k != c && kfold != c)
          396                                         break;
          397                         }
          398 
          399                 }
          400                 if (c == STAR) continue;
          401                 /* If we failed, advance str, by 1 char if it's a valid
          402                  * char, or past all invalid bytes otherwise. */
          403                 k = str_next(str, endstr-str, &sinc);
          404                 if (k > 0) str += sinc;
          405                 else for (str++; str_next(str, endstr-str, &sinc)<0; str++);
          406         }
          407 
          408         return 0;
          409 }
          410 
          411 /*
          412 domain=...   if domain is prefixed with ~, ignore.
          413 multiple domains can be separated with |
          414 */
          415 static int
          416 parsedomains(const char *s, int sep, struct filterdomain **head)
          417 {
          418         struct filterdomain *d, *last = *head = NULL;
          419         char *p;
          420         int inverse;
          421 
          422         do {
          423                 inverse = 0;
          424                 if (*s == '~') {
          425                         inverse = !inverse;
          426                         s++;
          427                 }
          428                 if (!*s || *s == sep)
          429                         break;
          430 
          431                 if (!(d = wecalloc(1, sizeof(struct filterdomain))))
          432                         return -1;
          433                 if ((p = strchr(s, sep))) { /* TODO: should not contain ',' */
          434                         d->domain = westrndup(s, p - s);
          435                         s = p + 1;
          436                 } else {
          437                         d->domain = westrdup(s);
          438                 }
          439                 if (!d->domain)
          440                         return -1;
          441                 d->inverse = inverse;
          442 
          443                 if (!*head)
          444                         *head = last = d;
          445                 else
          446                         last = last->next = d;
          447         } while (p);
          448 
          449         return (*head != NULL);
          450 }
          451 
          452 static int
          453 parsedomainselement(struct filterrule *f, char *s)
          454 {
          455         struct filterdomain *d, *last;
          456 
          457         for (last = f->domains; last && last->next; last = last->next)
          458                 ;
          459 
          460         if (parsedomains(s, ',', &d) < 0)
          461                 return -1;
          462         if (last)
          463                 last->next = d;
          464         else
          465                 f->domains = d;
          466 
          467         return (d != NULL);
          468 }
          469 
          470 static int
          471 parsedomainsoption(struct filterrule *f, char *s)
          472 {
          473         struct filterdomain *d, *last;
          474 
          475         for (last = f->domains; last && last->next; last = last->next)
          476                 ;
          477 
          478         if (parsedomains(s, '|', &d) < 0)
          479                 return -1;
          480         if (last)
          481                 last->next = d;
          482         else
          483                 f->domains = d;
          484 
          485         return (d != NULL);
          486 }
          487 
          488 static int
          489 filtertype_cmp(const void *a, const void *b)
          490 {
          491         return strcmp(((struct filtertype *)a)->name,
          492                       ((struct filtertype *)b)->name);
          493 }
          494 
          495 /* check if domain is the same domain or a subdomain of `s` */
          496 static int
          497 matchdomain(const char *s, const char *domain)
          498 {
          499         size_t l1, l2;
          500 
          501         l1 = strlen(s);
          502         l2 = strlen(domain);
          503 
          504         /* subdomain-specific (longer) or other domain */
          505         if (l1 > l2)
          506                 return 0;
          507         /* subdomain */
          508         if (l2 > l1 && domain[l2 - l1 - 1] == '.')
          509                 return !strcmp(&domain[l2 - l1], s);
          510 
          511         return !strcmp(s, domain);
          512 }
          513 
          514 static int
          515 matchrule(struct filterrule *f, const char *fromuri, const char *fromdomain,
          516         const char *fromrel,
          517         const char *requri, const char *reqdomain, const char *reqrel,
          518         const char *type)
          519 {
          520         /* NOTE: order matters, see FilterType enum values */
          521         struct filterdomain *d;
          522         char pat[1024];
          523         const char *uri;
          524         int len, r;
          525 
          526         r = f->domains ? 0 : 1;
          527         for (d = f->domains; d; d = d->next) {
          528                 if (matchdomain(d->domain, fromdomain)) {
          529                         if (r && d->inverse)
          530                                 r = 0;
          531                         else if (!r && !d->inverse)
          532                                 r = 1;
          533                 } else if (r && !d->inverse) {
          534                         r = 0;
          535                 }
          536         }
          537         if (f->css) {
          538                 /* DEBUG */
          539 #if 0
          540                 if (f->isexception)
          541                         printf("DEBUG, exception rule, CSS: %s, match? %d\n",
          542                         f->css, r);
          543 #endif
          544                 return r;
          545         }
          546 
          547 #if 1
          548         /* skip allow rule, TODO: inverse? */
          549         if (!r)
          550                 return 0;
          551 #endif
          552 
          553         /* match begin including domain */
          554         if (f->matchbegin) {
          555                 /* TODO: match domain part of pattern */
          556                 /* TODO: preprocess pattern if it is matchbegin? */
          557 
          558                 len = strcspn(f->uri, "^/");
          559 
          560                 /* match domain without dot */
          561                 r = snprintf(pat, sizeof(pat), "%.*s",
          562                         len, f->uri);
          563                 if (r == -1 || (size_t)r >= sizeof(pat)) {
          564                         fprintf(stderr, "warning: pattern too large, ignoring\n");
          565                         return 0;
          566                 }
          567 
          568                 /* TODO: block type mask */
          569                 if (match(pat, reqdomain, (f->block & FilterTypeMatchCase) ? 0 : 1)) {
          570                         /* match domain with dot */
          571                         r = snprintf(pat, sizeof(pat), "*.%.*s",
          572                                 len, f->uri);
          573                         if (r == -1 || (size_t)r >= sizeof(pat)) {
          574                                 fprintf(stderr, "warning: pattern too large, ignoring\n");
          575                                 return 0;
          576                         }
          577 
          578                         /* TODO: block type mask */
          579                         if (match(pat, reqdomain, (f->block & FilterTypeMatchCase) ? 0 : 1))
          580                                 return 0;
          581                 }
          582 
          583                 /* match on path */
          584                 r = snprintf(pat, sizeof(pat), "*%s%s",
          585                         f->uri + len,
          586                         f->matchend ? "" : "*");
          587                 uri = reqrel;
          588         } else {
          589                 r = snprintf(pat, sizeof(pat), "*%s%s",
          590                         f->uri,
          591                         f->matchend ? "" : "*");
          592                 uri = requri;
          593 
          594         }
          595         if (r == -1 || (size_t)r >= sizeof(pat)) {
          596                 fprintf(stderr, "warning: pattern too large, ignoring\n");
          597                 return 0;
          598         }
          599 
          600         /* TODO: block type mask */
          601         if (!match(pat, uri, (f->block & FilterTypeMatchCase) ? 0 : 1))
          602                 return 1;
          603 
          604         return 0;
          605 }
          606 
          607 static int
          608 parserule(struct filterrule *f, char *s)
          609 {
          610         struct filtertype key, *ft;
          611         int inverse = 0;
          612         char *p, *values;
          613 
          614         if (*s == '!' || (*s == '[' && s[strlen(s) - 1] == ']'))
          615                 return 0; /* skip comment or empty line */
          616         for (; *s && isspace(*s); s++)
          617                 ;
          618         if (!*s)
          619                 return 0; /* line had only whitespace: skip */
          620 
          621         memset(f, 0, sizeof(struct filterrule));
          622 
          623         if ((p = strstr(s, "#@#"))) {
          624                 *p = '\0';
          625                 if (parsedomainselement(f, s) < 0)
          626                         return -1;
          627                 *p = '#';
          628                 if (!(f->css = westrdup(p + 3)))
          629                         return -1;
          630                 f->isexception = 1;
          631                 goto end; /* end of CSS rule */
          632         }
          633 
          634         /* element hiding rule, NOTE: no wildcards are supported,
          635            "Simplified element hiding syntax" (legacy) is not supported. */
          636         if ((p = strstr(s, "##"))) {
          637                 *p = '\0';
          638                 if (parsedomainselement(f, s) < 0)
          639                         return -1;
          640                 *p = '#';
          641                 if (!(f->css = westrdup(p + 2)))
          642                         return -1;
          643                 goto end; /* end of rule */
          644         }
          645 
          646         if (!strncmp(s, "@@", 2)) {
          647                 f->isexception = 1;
          648                 s += 2;
          649         }
          650         if (*s == '|') {
          651                 s++;
          652                 if (*s == '|') {
          653                         f->matchbegin = 1;
          654                         s++;
          655                 } else {
          656                         f->matchend = 1;
          657                 }
          658         }
          659 
          660         /* no options, use rest of line as uri. */
          661         if (!(p = strrchr(s, '$'))) {
          662                 if (!(f->uri = westrdup(s)))
          663                         return -1;
          664                 goto end;
          665         }
          666 
          667         /* has options */
          668         if (!(f->uri = westrndup(s, p - s)))
          669                 return -1;
          670 
          671         s = ++p;
          672 
          673         /* blockmask, has options? default: allow all options, case-sensitive
          674          * has no options? default: block all options, case-sensitive  */
          675         f->block = *s ? (unsigned long)FilterTypeMatchCase : ~0UL;
          676         do {
          677                 if ((p = strchr(s, ',')))
          678                         *p = '\0';
          679                 /* match option */
          680                 inverse = 0;
          681                 if (*s == '~') {
          682                         inverse = 1;
          683                         s++;
          684                 }
          685                 if ((values = strchr(s, '=')))
          686                         *(values) = '\0';
          687                 key.name = s;
          688 
          689                 ft = bsearch(&key, &filtertypes,
          690                              sizeof(filtertypes) / sizeof(*filtertypes),
          691                              sizeof(*filtertypes), filtertype_cmp);
          692 
          693                 /* restore NUL-terminator for domain= option */
          694                 if (values)
          695                         *(values++) = '=';
          696 
          697                 if (ft) {
          698                         if (inverse)
          699                                 f->block &= ~(ft->type);
          700                         else
          701                                 f->block |= ft->type;
          702                         if (ft->fn && values)
          703                                 ft->fn(f, values);
          704                 } else {
          705                         /* DEBUG */
          706 #if 0
          707                         fprintf(stderr, "ignored: unknown option: '%s' "
          708                                 "in rule: %s\n", key.name, f->uri);
          709 #endif
          710                 }
          711 
          712                 /* restore ',' */
          713                 if (p) {
          714                         *p = ',';
          715                         s = p + 1;
          716                 }
          717         } while (p);
          718 end:
          719 
          720         return 1;
          721 }
          722 
          723 #if 0
          724 static void
          725 debugrule(struct filterrule *r)
          726 {
          727         printf("\turi: %s\n\tcss: %s\n\tisexception: %d\n\tblockmask: "
          728                "%lu\n===\n", r->uri ? r->uri : "", r->css ? r->css : "",
          729                r->isexception, r->block);
          730 }
          731 #endif
          732 
          733 static int
          734 loadrules(FILE *fp)
          735 {
          736         struct filterrule f, *r, *rn = NULL;
          737         char *line = NULL;
          738         size_t linesiz = 0;
          739         ssize_t n;
          740         int ret;
          741 
          742         /* load rules */
          743         while ((n = getline(&line, &linesiz, fp)) > 0) {
          744                 if (line[n - 1] == '\n')
          745                         line[--n] = '\0';
          746                 if (n > 0 && line[n - 1] == '\r')
          747                         line[--n] = '\0';
          748 
          749                 if ((ret = parserule(&f, line) > 0)) {
          750                         if (!(r = wecalloc(1, sizeof(struct filterrule))))
          751                                 return -1;
          752                         if (!rules)
          753                                 rules = rn = r;
          754                         else
          755                                 rn = rn->next = r;
          756                         memcpy(rn, &f, sizeof(struct filterrule));
          757                 } else if (ret < 0) {
          758                         return -1;
          759                 }
          760         }
          761         if (ferror(fp)) {
          762                 weprintf("getline: %s\n", strerror(errno));
          763                 return -1;
          764         }
          765         return (rules != NULL);
          766 }
          767 
          768 char *
          769 getglobalcss(void)
          770 {
          771         return globalcss.data;
          772 }
          773 
          774 char *
          775 getdocumentcss(const char *fromuri)
          776 {
          777         const char *s;
          778         char fromdomain[256];
          779         String sitecss;
          780         struct filterrule *r;
          781         size_t len;
          782 
          783         /* skip protocol */
          784         if ((s = strstr(fromuri, "://")))
          785                 fromuri = s + sizeof("://") - 1;
          786         len = strcspn(fromuri, "/"); /* TODO: ":/" */
          787         memcpy(fromdomain, fromuri, len);
          788         fromdomain[len] = '\0';
          789 
          790         printf("fromuri:    %s\n", fromuri);
          791         printf("fromdomain: %s\n", fromdomain);
          792 
          793         /* DEBUG: timing */
          794         struct timespec tp_start, tp_end, tp_diff;
          795         if (clock_gettime(CLOCK_MONOTONIC, &tp_start) == -1) {
          796                 fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
          797         }
          798 
          799         /* site-specific CSS */
          800         memset(&sitecss, 0, sizeof(sitecss));
          801         for (r = rules; r; r = r->next) {
          802                 if (!r->css || !r->domains ||
          803                     !matchrule(r, "", fromdomain, "", "", "", "", ""))
          804                         continue;
          805 
          806                 len = strlen(r->css);
          807                 if (string_append(&sitecss, r->css, len) < len)
          808                         goto err;
          809 
          810                 s = r->isexception ? "{display:initial;}" : "{display:none;}";
          811                 len = strlen(s);
          812                 if (string_append(&sitecss, s, len) < len)
          813                         goto err;
          814         }
          815 /*        printf("sitecss: %s\n", sitecss.data ? sitecss.data : "<empty>");*/
          816 
          817         /* DEBUG: timing */
          818         if (clock_gettime(CLOCK_MONOTONIC, &tp_end) == -1) {
          819                 fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
          820         }
          821 
          822         tp_diff.tv_sec = tp_end.tv_sec - tp_start.tv_sec;
          823         tp_diff.tv_nsec = tp_end.tv_nsec - tp_start.tv_nsec;
          824         if (tp_diff.tv_nsec < 0) {
          825                 tp_diff.tv_sec--;
          826                 tp_diff.tv_nsec += 1000000000L;
          827         }
          828 
          829         printf("timing: %lld sec, %.3f ms\n",
          830                 (long long)tp_diff.tv_sec, (float)tp_diff.tv_nsec / 1000000.0f);
          831 
          832         if (globalcss.data)
          833                 printf("global CSS length in bytes: %zu\n", strlen(globalcss.data));
          834         if (sitecss.data)
          835                 printf("site CSS length in bytes: %zu\n", strlen(sitecss.data));
          836 
          837         return sitecss.data;
          838 
          839 err:
          840         free(sitecss.data);
          841         /*memset(&sitecss, 0, sizeof(sitecss));*/
          842 
          843         return NULL;
          844 }
          845 
          846 int
          847 allowrequest(const char *fromuri, const char *requri)
          848 {
          849         struct filterrule *r;
          850         char fromdomain[256], reqdomain[256];
          851         const char *s, *reqrel, *fromrel;
          852         size_t len;
          853         int status = 1;
          854 
          855         /* skip protocol part */
          856         if ((s = strstr(fromuri, "://")))
          857                 fromuri = s + sizeof("://") - 1;
          858         if ((s = strstr(requri, "://")))
          859                 requri = s + sizeof("://") - 1;
          860 
          861         len = strcspn(fromuri, ":/"); /* TODO: ":/", but support IPV6... */
          862         memcpy(fromdomain, fromuri, len);
          863         fromdomain[len] = '\0';
          864 
          865         len = strcspn(requri, ":/"); /* TODO: ":/", but support IPV6... */
          866         memcpy(reqdomain, requri, len);
          867         reqdomain[len] = '\0';
          868 
          869         fromrel = &fromuri[strcspn(fromuri, "/")];
          870         reqrel = &requri[strcspn(requri, "/")];
          871 
          872 #if 0
          873         printf("req %s = %s\n", requri, reqrel);
          874         printf("from %s = %s\n", fromuri, fromrel);
          875 #endif
          876 
          877         /* DEBUG: timing */
          878         struct timespec tp_start, tp_end, tp_diff;
          879         if (clock_gettime(CLOCK_MONOTONIC, &tp_start) == -1)
          880                 fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
          881 
          882         /* match rules */
          883         for (r = rules; r; r = r->next) {
          884                 if (!r->css && matchrule(r, fromuri, fromdomain,
          885                                          fromrel, requri, reqdomain, reqrel, "csio^")) {
          886 #if 0
          887                         printf("reqrel:      %s\n", reqrel);
          888                         printf("reqdomain:   %s\n", reqdomain);
          889                         printf("requri:      %s\n", requri);
          890                         printf("from uri:    %s\n", fromuri);
          891                         printf("from domain: %s\n", fromdomain);
          892 #endif
          893 
          894                         fprintf(stderr, "blocked: %s, %s\n", fromdomain, requri);
          895                         fprintf(stderr, "rule:    %s\n", r->uri);
          896                         fprintf(stderr, "===\n");
          897 
          898                         /* DEBUG: for showing the timing */
          899                         status = 0;
          900                         goto end;
          901                         /*return 1;*/
          902                 }
          903         }
          904 
          905 end:
          906         /* DEBUG: timing */
          907         if (clock_gettime(CLOCK_MONOTONIC, &tp_end) == -1) {
          908                 fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
          909         }
          910 
          911         tp_diff.tv_sec = tp_end.tv_sec - tp_start.tv_sec;
          912         tp_diff.tv_nsec = tp_end.tv_nsec - tp_start.tv_nsec;
          913         if (tp_diff.tv_nsec < 0) {
          914                 tp_diff.tv_sec--;
          915                 tp_diff.tv_nsec += 1000000000L;
          916         }
          917 
          918         printf("%s [%s] timing: %lld sec, %.3f ms\n",
          919                 requri, fromuri, (long long)tp_diff.tv_sec,
          920                 (float)tp_diff.tv_nsec / 1000000.0f);
          921 
          922         return status;
          923 }
          924 
          925 void
          926 cleanup(void)
          927 {
          928         struct filterrule *r;
          929         struct filterdomain *d;
          930 
          931         free(globalcss.data);
          932         memset(&globalcss, 0, sizeof(globalcss));
          933 
          934         for (r = rules; r; r = rules) {
          935                 for (d = r->domains; d; d = r->domains) {
          936                         free(d->domain);
          937                         r->domains = d->next;
          938                         free(d);
          939                 }
          940                 free(r->css);
          941                 free(r->uri);
          942                 rules = r->next;
          943                 free(r);
          944         }
          945         rules = NULL;
          946 }
          947 
          948 void
          949 init(void)
          950 {
          951         struct filterrule *r;
          952         FILE *fp;
          953         const char *s;
          954         char filepath[PATH_MAX], *e;
          955         size_t len;
          956         int n;
          957 
          958         if ((e = getenv("SURF_ADBLOCK_FILE"))) {
          959                 n = snprintf(filepath, sizeof(filepath), "%s", e);
          960         } else {
          961                 if (!(e = getenv("HOME")))
          962                         e = "";
          963                 n = snprintf(filepath, sizeof(filepath),
          964                              "%s%s.surf/adblockrules", e, e[0] ? "/" : "");
          965         }
          966         if (n < 0 || (size_t)n >= sizeof(filepath)) {
          967                 weprintf("fatal: rules file path too long");
          968                 return;
          969         }
          970 
          971         if (!(fp = fopen(filepath, "r"))) {
          972                 weprintf("fatal: cannot open rules file %s: %s\n",
          973                          filepath, strerror(errno));
          974                 return;
          975         }
          976 
          977         n = loadrules(fp);
          978         fclose(fp);
          979         if (n < 1) {
          980                 if (n < 0) {
          981                         weprintf("fatal: cannot read rules from file %s: %s\n",
          982                                  filepath, strerror(errno));
          983                 } else  {
          984                         weprintf("fatal: cannot read any rule from file %s\n",
          985                                  filepath);
          986                 }
          987                 return;
          988         }
          989 
          990         /* general CSS rules: all sites */
          991         for (r = rules; r; r = r->next) {
          992                 if (!r->css || r->domains)
          993                         continue;
          994 
          995                 len = strlen(r->css);
          996                 if (string_append(&globalcss, r->css, len) < len) {
          997                         weprintf("cannot append CSS rule to global CSS selectors\n");
          998                         cleanup();
          999                         return;
         1000                 }
         1001 
         1002                 s = r->isexception ? "{display:initial;}" : "{display:none;}";
         1003                 len = strlen(s);
         1004                 if (string_append(&globalcss, s, len) < len) {
         1005                         weprintf("cannot append CSS rule to global CSS selectors\n");
         1006                         cleanup();
         1007                         return;
         1008                 }
         1009         }
         1010 }