adblock.c - surf-adblock - Surf adblock web extension
(HTM) git clone git://git.codemadness.org/surf-adblock
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
adblock.c (22106B)
---
1 #include <sys/stat.h>
2 #include <sys/types.h>
3
4 #include <ctype.h>
5 #include <errno.h>
6 #include <fcntl.h>
7 #include <limits.h>
8 #include <stdarg.h>
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <time.h> /* DEBUG: used for clock_gettime, remove later */
13 #include <wchar.h>
14 #include <wctype.h>
15
16 #include "adblock.h"
17
18 /* String data / memory pool */
19 typedef struct string {
20 char *data; /* data */
21 size_t datasz; /* allocated size */
22 size_t len; /* current string length */
23 } String;
24
25 struct filterdomain {
26 char *domain;
27 int inverse;
28 struct filterdomain *next;
29 };
30
31 struct filterrule {
32 /* type: match mask, must be atleast 32-bit, see FilterType enum */
33 unsigned long block;
34 int matchbegin;
35 int matchend;
36 /* is exception rule: prefix @@ for ABP or #@# for CSS */
37 int isexception;
38 const char *css; /* if non-NULL is CSS rule / hide element rule */
39 const char *uri;
40 struct filterdomain *domains;
41 struct filterrule *next;
42 };
43
44 enum {
45 FilterTypeScript = 1 << 0,
46 FilterTypeImage = 1 << 1,
47 FilterTypeCSS = 1 << 2,
48 FilterTypeObject = 1 << 3,
49 FilterTypeXHR = 1 << 4,
50 FilterTypeObjectSub = 1 << 5,
51 FilterTypeSubDoc = 1 << 6,
52 FilterTypePing = 1 << 7,
53 FilterTypeDocument = 1 << 8,
54 FilterTypeElemHide = 1 << 9,
55 FilterTypeOther = 1 << 10,
56 FilterTypeGenericHide = 1 << 11,
57 FilterTypeGenericBlock = 1 << 12,
58 FilterTypeMatchCase = 1 << 13,
59 };
60
61 struct filtertype {
62 /* `type` must be atleast 32-bit, see FilterType enum */
63 unsigned long type;
64 char *name;
65 size_t namelen;
66 int allowinverse;
67 int allownormal;
68 int onlyexception;
69 int (*fn)(struct filterrule *, char *);
70 };
71
72 static int parsedomainsoption(struct filterrule *, char *);
73
74 #define STRP(s) s,sizeof(s)-1
75
76 static struct filtertype filtertypes[] = {
77 /* NOTE: options with 'type' = 0 are silently ignored and treated as
78 * requests for now */
79 { 0, STRP("collapse"), 1, 1, 0, NULL },
80 { FilterTypeDocument, STRP("document"), 1, 0, 1, NULL },
81 { 0, STRP("domain"), 0, 1, 0,
82 /* domain=... */ &parsedomainsoption },
83 { 0, STRP("donottrack"), 1, 1, 0, NULL },
84 { FilterTypeElemHide, STRP("elemhide"), 0, 0, 1, NULL },
85 { 0, STRP("font"), 1, 1, 0, NULL },
86 { FilterTypeGenericBlock, STRP("genericblock"), 1, 1, 1, NULL },
87 { FilterTypeGenericHide, STRP("generichide"), 1, 1, 1, NULL },
88 { FilterTypeImage, STRP("image"), 1, 1, 0, NULL },
89 { FilterTypeMatchCase, STRP("match-case"), 1, 1, 0, NULL },
90 { 0, STRP("media"), 1, 1, 0, NULL },
91 { FilterTypeObject, STRP("object"), 1, 1, 0, NULL },
92 { FilterTypeObjectSub, STRP("object-subrequest"), 1, 1, 0, NULL },
93 { FilterTypeOther, STRP("other"), 1, 1, 0, NULL },
94 { FilterTypePing, STRP("ping"), 1, 1, 0, NULL },
95 { 0, STRP("popup"), 1, 1, 0, NULL },
96 { FilterTypeScript, STRP("script"), 1, 1, 0, NULL },
97 { FilterTypeCSS, STRP("stylesheet"), 1, 1, 0, NULL },
98 { FilterTypeSubDoc, STRP("subdocument"), 1, 1, 0, NULL },
99 { 0, STRP("third-party"), 1, 1, 0, NULL },
100 { FilterTypeXHR, STRP("xmlhttprequest"), 1, 1, 0, NULL },
101 /* NOTE: site-key not supported */
102 };
103
104 static String globalcss;
105 static struct filterrule *rules;
106
107 static void
108 weprintf(const char *fmt, ...)
109 {
110 va_list ap;
111
112 fprintf(stderr, "surf-adblock: ");
113
114 va_start(ap, fmt);
115 vfprintf(stderr, fmt, ap);
116 va_end(ap);
117 }
118
119 static void *
120 wecalloc(size_t nmemb, size_t size)
121 {
122 void *p;
123
124 if (!(p = calloc(nmemb, size)))
125 weprintf("calloc: %s\n", strerror(errno));
126
127 return p;
128 }
129
130 static char *
131 westrndup(const char *s, size_t n)
132 {
133 char *p;
134
135 if (!(p = strndup(s, n)))
136 weprintf("strndup: %s\n", strerror(errno));
137 return p;
138 }
139
140 static char *
141 westrdup(const char *s)
142 {
143 char *p;
144
145 if (!(p = strdup(s)))
146 weprintf("strdup: %s\n", strerror(errno));
147
148 return p;
149 }
150
151 static size_t
152 string_buffer_realloc(String *s, size_t newsz)
153 {
154 char *tmp;
155 size_t allocsz;
156
157 for (allocsz = 64; allocsz <= newsz; allocsz *= 2)
158 ;
159 if (!(tmp = realloc(s->data, allocsz))) {
160 weprintf("realloc: %s\n", strerror(errno));
161 } else {
162 s->data = tmp;
163 s->datasz = allocsz;
164 }
165
166 return s->datasz;
167 }
168
169 static size_t
170 string_append(String *s, const char *data, size_t len)
171 {
172 size_t newlen;
173
174 if (!len)
175 return len;
176
177 newlen = s->len + len;
178 /* check if allocation is necesary, don't shrink buffer,
179 * should be more than datasz ofcourse. */
180 if (newlen >= s->datasz) {
181 if (string_buffer_realloc(s, newlen + 1) <= newlen)
182 return 0;
183 }
184 memcpy(s->data + s->len, data, len);
185 s->len = newlen;
186 s->data[s->len] = '\0';
187
188 return len;
189 }
190
191 #define END 0
192 #define UNMATCHABLE -2
193 #define CARET -3
194 #define STAR -4
195
196 static int
197 str_next(const char *str, size_t n, size_t *step)
198 {
199 if (!n) {
200 *step = 0;
201 return 0;
202 }
203 if (str[0] >= 128U) {
204 wchar_t wc;
205 int k = mbtowc(&wc, str, n);
206 if (k<0) {
207 *step = 1;
208 return -1;
209 }
210 *step = k;
211 return wc;
212 }
213 *step = 1;
214
215 return str[0];
216 }
217
218 static int
219 pat_next(const char *pat, size_t m, size_t *step)
220 {
221 int esc = 0;
222
223 if (!m || !*pat) {
224 *step = 0;
225 return END;
226 }
227 *step = 1;
228 if (pat[0]=='\\' && pat[1]) {
229 *step = 2;
230 pat++;
231 esc = 1;
232 goto escaped;
233 }
234 if (pat[0]=='^')
235 return CARET;
236 if (pat[0] == '*')
237 return STAR;
238 escaped:
239 if (pat[0] >= 128U) {
240 wchar_t wc;
241 int k = mbtowc(&wc, pat, m);
242 if (k<0) {
243 *step = 0;
244 return UNMATCHABLE;
245 }
246 *step = k + esc;
247 return wc;
248 }
249 return pat[0];
250 }
251
252 static int
253 casefold(int k)
254 {
255 int c;
256
257 /* optimization: -2% last measured.
258 if ((unsigned)k < 128) {
259 c = toupper(k);
260 return c == k ? tolower(k) : c;
261 }*/
262 c = towupper(k);
263 return c == k ? towlower(k) : c;
264 }
265
266 /* match() based on musl-libc fnmatch:
267 https://git.musl-libc.org/cgit/musl/tree/src/regex/fnmatch.c */
268 static int
269 match(const char *pat, const char *str, int fcase)
270 {
271 size_t m = -1, n = -1;
272 const char *p, *ptail, *endpat;
273 const char *s, *stail, *endstr;
274 size_t pinc, sinc, tailcnt=0;
275 int c, k, kfold;
276
277 for (;;) {
278 switch ((c = pat_next(pat, m, &pinc))) {
279 case UNMATCHABLE:
280 return 1;
281 case STAR:
282 pat++;
283 m--;
284 break;
285 case CARET:
286 k = str_next(str, n, &sinc);
287 if (k <= 0)
288 return (c==END) ? 0 : 1;
289 str += sinc;
290 n -= sinc;
291 if (k != '?' && k != '/')
292 return 1;
293 pat++;
294 m--;
295 break;
296 default:
297 k = str_next(str, n, &sinc);
298 if (k <= 0)
299 return (c==END) ? 0 : 1;
300 str += sinc;
301 n -= sinc;
302 kfold = fcase ? casefold(k) : k;
303 if (k != c && kfold != c)
304 return 1;
305 pat+=pinc;
306 m-=pinc;
307 continue;
308 }
309 break;
310 }
311
312 /* Compute real pat length if it was initially unknown/-1 */
313 m = strnlen(pat, m);
314 endpat = pat + m;
315
316 /* Find the last * in pat and count chars needed after it */
317 for (p=ptail=pat; p<endpat; p+=pinc) {
318 switch (pat_next(p, endpat-p, &pinc)) {
319 case UNMATCHABLE:
320 return 1;
321 case STAR:
322 tailcnt=0;
323 ptail = p+1;
324 break;
325 default:
326 tailcnt++;
327 break;
328 }
329 }
330
331 /* Past this point we need not check for UNMATCHABLE in pat,
332 * because all of pat has already been parsed once. */
333
334 /* Compute real str length if it was initially unknown/-1 */
335 n = strnlen(str, n);
336 endstr = str + n;
337 if (n < tailcnt) return 1;
338
339 /* Find the final tailcnt chars of str, accounting for UTF-8.
340 * On illegal sequences we may get it wrong, but in that case
341 * we necessarily have a matching failure anyway. */
342 for (s=endstr; s>str && tailcnt; tailcnt--) {
343 if (s[-1] < 128U || MB_CUR_MAX==1) s--;
344 else while ((unsigned char)*--s-0x80U<0x40 && s>str);
345 }
346 if (tailcnt) return 1;
347 stail = s;
348
349 /* Check that the pat and str tails match */
350 p = ptail;
351 for (;;) {
352 c = pat_next(p, endpat-p, &pinc);
353 p += pinc;
354 if ((k = str_next(s, endstr-s, &sinc)) <= 0) {
355 if (c != END) return 1;
356 break;
357 }
358 s += sinc;
359 if (c == CARET) {
360 if (k != '/' && k != '?')
361 return 1;
362 } else {
363 kfold = fcase ? casefold(k) : k;
364 if (k != c && kfold != c)
365 return 1;
366 }
367 }
368
369 /* We're all done with the tails now, so throw them out */
370 endstr = stail;
371 endpat = ptail;
372
373 /* Match pattern components until there are none left */
374 while (pat<endpat) {
375 p = pat;
376 s = str;
377 for (;;) {
378 c = pat_next(p, endpat-p, &pinc);
379 p += pinc;
380 /* Encountering * completes/commits a component */
381 if (c == STAR) {
382 pat = p;
383 str = s;
384 break;
385 }
386 k = str_next(s, endstr-s, &sinc);
387 if (!k)
388 return 1;
389 s += sinc;
390 if (c == CARET) {
391 if (k != '/' && k != '?')
392 break;
393 } else {
394 kfold = fcase ? casefold(k) : k;
395 if (k != c && kfold != c)
396 break;
397 }
398
399 }
400 if (c == STAR) continue;
401 /* If we failed, advance str, by 1 char if it's a valid
402 * char, or past all invalid bytes otherwise. */
403 k = str_next(str, endstr-str, &sinc);
404 if (k > 0) str += sinc;
405 else for (str++; str_next(str, endstr-str, &sinc)<0; str++);
406 }
407
408 return 0;
409 }
410
411 /*
412 domain=... if domain is prefixed with ~, ignore.
413 multiple domains can be separated with |
414 */
415 static int
416 parsedomains(const char *s, int sep, struct filterdomain **head)
417 {
418 struct filterdomain *d, *last = *head = NULL;
419 char *p;
420 int inverse;
421
422 do {
423 inverse = 0;
424 if (*s == '~') {
425 inverse = !inverse;
426 s++;
427 }
428 if (!*s || *s == sep)
429 break;
430
431 if (!(d = wecalloc(1, sizeof(struct filterdomain))))
432 return -1;
433 if ((p = strchr(s, sep))) { /* TODO: should not contain ',' */
434 d->domain = westrndup(s, p - s);
435 s = p + 1;
436 } else {
437 d->domain = westrdup(s);
438 }
439 if (!d->domain)
440 return -1;
441 d->inverse = inverse;
442
443 if (!*head)
444 *head = last = d;
445 else
446 last = last->next = d;
447 } while (p);
448
449 return (*head != NULL);
450 }
451
452 static int
453 parsedomainselement(struct filterrule *f, char *s)
454 {
455 struct filterdomain *d, *last;
456
457 for (last = f->domains; last && last->next; last = last->next)
458 ;
459
460 if (parsedomains(s, ',', &d) < 0)
461 return -1;
462 if (last)
463 last->next = d;
464 else
465 f->domains = d;
466
467 return (d != NULL);
468 }
469
470 static int
471 parsedomainsoption(struct filterrule *f, char *s)
472 {
473 struct filterdomain *d, *last;
474
475 for (last = f->domains; last && last->next; last = last->next)
476 ;
477
478 if (parsedomains(s, '|', &d) < 0)
479 return -1;
480 if (last)
481 last->next = d;
482 else
483 f->domains = d;
484
485 return (d != NULL);
486 }
487
488 static int
489 filtertype_cmp(const void *a, const void *b)
490 {
491 return strcmp(((struct filtertype *)a)->name,
492 ((struct filtertype *)b)->name);
493 }
494
495 /* check if domain is the same domain or a subdomain of `s` */
496 static int
497 matchdomain(const char *s, const char *domain)
498 {
499 size_t l1, l2;
500
501 l1 = strlen(s);
502 l2 = strlen(domain);
503
504 /* subdomain-specific (longer) or other domain */
505 if (l1 > l2)
506 return 0;
507 /* subdomain */
508 if (l2 > l1 && domain[l2 - l1 - 1] == '.')
509 return !strcmp(&domain[l2 - l1], s);
510
511 return !strcmp(s, domain);
512 }
513
514 static int
515 matchrule(struct filterrule *f, const char *fromuri, const char *fromdomain,
516 const char *fromrel,
517 const char *requri, const char *reqdomain, const char *reqrel,
518 const char *type)
519 {
520 /* NOTE: order matters, see FilterType enum values */
521 struct filterdomain *d;
522 char pat[1024];
523 const char *uri;
524 int len, r;
525
526 r = f->domains ? 0 : 1;
527 for (d = f->domains; d; d = d->next) {
528 if (matchdomain(d->domain, fromdomain)) {
529 if (r && d->inverse)
530 r = 0;
531 else if (!r && !d->inverse)
532 r = 1;
533 } else if (r && !d->inverse) {
534 r = 0;
535 }
536 }
537 if (f->css) {
538 /* DEBUG */
539 #if 0
540 if (f->isexception)
541 printf("DEBUG, exception rule, CSS: %s, match? %d\n",
542 f->css, r);
543 #endif
544 return r;
545 }
546
547 #if 1
548 /* skip allow rule, TODO: inverse? */
549 if (!r)
550 return 0;
551 #endif
552
553 /* match begin including domain */
554 if (f->matchbegin) {
555 /* TODO: match domain part of pattern */
556 /* TODO: preprocess pattern if it is matchbegin? */
557
558 len = strcspn(f->uri, "^/");
559
560 /* match domain without dot */
561 r = snprintf(pat, sizeof(pat), "%.*s",
562 len, f->uri);
563 if (r == -1 || (size_t)r >= sizeof(pat)) {
564 fprintf(stderr, "warning: pattern too large, ignoring\n");
565 return 0;
566 }
567
568 /* TODO: block type mask */
569 if (match(pat, reqdomain, (f->block & FilterTypeMatchCase) ? 0 : 1)) {
570 /* match domain with dot */
571 r = snprintf(pat, sizeof(pat), "*.%.*s",
572 len, f->uri);
573 if (r == -1 || (size_t)r >= sizeof(pat)) {
574 fprintf(stderr, "warning: pattern too large, ignoring\n");
575 return 0;
576 }
577
578 /* TODO: block type mask */
579 if (match(pat, reqdomain, (f->block & FilterTypeMatchCase) ? 0 : 1))
580 return 0;
581 }
582
583 /* match on path */
584 r = snprintf(pat, sizeof(pat), "*%s%s",
585 f->uri + len,
586 f->matchend ? "" : "*");
587 uri = reqrel;
588 } else {
589 r = snprintf(pat, sizeof(pat), "*%s%s",
590 f->uri,
591 f->matchend ? "" : "*");
592 uri = requri;
593
594 }
595 if (r == -1 || (size_t)r >= sizeof(pat)) {
596 fprintf(stderr, "warning: pattern too large, ignoring\n");
597 return 0;
598 }
599
600 /* TODO: block type mask */
601 if (!match(pat, uri, (f->block & FilterTypeMatchCase) ? 0 : 1))
602 return 1;
603
604 return 0;
605 }
606
607 static int
608 parserule(struct filterrule *f, char *s)
609 {
610 struct filtertype key, *ft;
611 int inverse = 0;
612 char *p, *values;
613
614 if (*s == '!' || (*s == '[' && s[strlen(s) - 1] == ']'))
615 return 0; /* skip comment or empty line */
616 for (; *s && isspace(*s); s++)
617 ;
618 if (!*s)
619 return 0; /* line had only whitespace: skip */
620
621 memset(f, 0, sizeof(struct filterrule));
622
623 if ((p = strstr(s, "#@#"))) {
624 *p = '\0';
625 if (parsedomainselement(f, s) < 0)
626 return -1;
627 *p = '#';
628 if (!(f->css = westrdup(p + 3)))
629 return -1;
630 f->isexception = 1;
631 goto end; /* end of CSS rule */
632 }
633
634 /* element hiding rule, NOTE: no wildcards are supported,
635 "Simplified element hiding syntax" (legacy) is not supported. */
636 if ((p = strstr(s, "##"))) {
637 *p = '\0';
638 if (parsedomainselement(f, s) < 0)
639 return -1;
640 *p = '#';
641 if (!(f->css = westrdup(p + 2)))
642 return -1;
643 goto end; /* end of rule */
644 }
645
646 if (!strncmp(s, "@@", 2)) {
647 f->isexception = 1;
648 s += 2;
649 }
650 if (*s == '|') {
651 s++;
652 if (*s == '|') {
653 f->matchbegin = 1;
654 s++;
655 } else {
656 f->matchend = 1;
657 }
658 }
659
660 /* no options, use rest of line as uri. */
661 if (!(p = strrchr(s, '$'))) {
662 if (!(f->uri = westrdup(s)))
663 return -1;
664 goto end;
665 }
666
667 /* has options */
668 if (!(f->uri = westrndup(s, p - s)))
669 return -1;
670
671 s = ++p;
672
673 /* blockmask, has options? default: allow all options, case-sensitive
674 * has no options? default: block all options, case-sensitive */
675 f->block = *s ? (unsigned long)FilterTypeMatchCase : ~0UL;
676 do {
677 if ((p = strchr(s, ',')))
678 *p = '\0';
679 /* match option */
680 inverse = 0;
681 if (*s == '~') {
682 inverse = 1;
683 s++;
684 }
685 if ((values = strchr(s, '=')))
686 *(values) = '\0';
687 key.name = s;
688
689 ft = bsearch(&key, &filtertypes,
690 sizeof(filtertypes) / sizeof(*filtertypes),
691 sizeof(*filtertypes), filtertype_cmp);
692
693 /* restore NUL-terminator for domain= option */
694 if (values)
695 *(values++) = '=';
696
697 if (ft) {
698 if (inverse)
699 f->block &= ~(ft->type);
700 else
701 f->block |= ft->type;
702 if (ft->fn && values)
703 ft->fn(f, values);
704 } else {
705 /* DEBUG */
706 #if 0
707 fprintf(stderr, "ignored: unknown option: '%s' "
708 "in rule: %s\n", key.name, f->uri);
709 #endif
710 }
711
712 /* restore ',' */
713 if (p) {
714 *p = ',';
715 s = p + 1;
716 }
717 } while (p);
718 end:
719
720 return 1;
721 }
722
723 #if 0
724 static void
725 debugrule(struct filterrule *r)
726 {
727 printf("\turi: %s\n\tcss: %s\n\tisexception: %d\n\tblockmask: "
728 "%lu\n===\n", r->uri ? r->uri : "", r->css ? r->css : "",
729 r->isexception, r->block);
730 }
731 #endif
732
733 static int
734 loadrules(FILE *fp)
735 {
736 struct filterrule f, *r, *rn = NULL;
737 char *line = NULL;
738 size_t linesiz = 0;
739 ssize_t n;
740 int ret;
741
742 /* load rules */
743 while ((n = getline(&line, &linesiz, fp)) > 0) {
744 if (line[n - 1] == '\n')
745 line[--n] = '\0';
746 if (n > 0 && line[n - 1] == '\r')
747 line[--n] = '\0';
748
749 if ((ret = parserule(&f, line) > 0)) {
750 if (!(r = wecalloc(1, sizeof(struct filterrule))))
751 return -1;
752 if (!rules)
753 rules = rn = r;
754 else
755 rn = rn->next = r;
756 memcpy(rn, &f, sizeof(struct filterrule));
757 } else if (ret < 0) {
758 return -1;
759 }
760 }
761 if (ferror(fp)) {
762 weprintf("getline: %s\n", strerror(errno));
763 return -1;
764 }
765 return (rules != NULL);
766 }
767
768 char *
769 getglobalcss(void)
770 {
771 return globalcss.data;
772 }
773
774 char *
775 getdocumentcss(const char *fromuri)
776 {
777 const char *s;
778 char fromdomain[256];
779 String sitecss;
780 struct filterrule *r;
781 size_t len;
782
783 /* skip protocol */
784 if ((s = strstr(fromuri, "://")))
785 fromuri = s + sizeof("://") - 1;
786 len = strcspn(fromuri, "/"); /* TODO: ":/" */
787 memcpy(fromdomain, fromuri, len);
788 fromdomain[len] = '\0';
789
790 printf("fromuri: %s\n", fromuri);
791 printf("fromdomain: %s\n", fromdomain);
792
793 /* DEBUG: timing */
794 struct timespec tp_start, tp_end, tp_diff;
795 if (clock_gettime(CLOCK_MONOTONIC, &tp_start) == -1) {
796 fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
797 }
798
799 /* site-specific CSS */
800 memset(&sitecss, 0, sizeof(sitecss));
801 for (r = rules; r; r = r->next) {
802 if (!r->css || !r->domains ||
803 !matchrule(r, "", fromdomain, "", "", "", "", ""))
804 continue;
805
806 len = strlen(r->css);
807 if (string_append(&sitecss, r->css, len) < len)
808 goto err;
809
810 s = r->isexception ? "{display:initial;}" : "{display:none;}";
811 len = strlen(s);
812 if (string_append(&sitecss, s, len) < len)
813 goto err;
814 }
815 /* printf("sitecss: %s\n", sitecss.data ? sitecss.data : "<empty>");*/
816
817 /* DEBUG: timing */
818 if (clock_gettime(CLOCK_MONOTONIC, &tp_end) == -1) {
819 fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
820 }
821
822 tp_diff.tv_sec = tp_end.tv_sec - tp_start.tv_sec;
823 tp_diff.tv_nsec = tp_end.tv_nsec - tp_start.tv_nsec;
824 if (tp_diff.tv_nsec < 0) {
825 tp_diff.tv_sec--;
826 tp_diff.tv_nsec += 1000000000L;
827 }
828
829 printf("timing: %lld sec, %.3f ms\n",
830 (long long)tp_diff.tv_sec, (float)tp_diff.tv_nsec / 1000000.0f);
831
832 if (globalcss.data)
833 printf("global CSS length in bytes: %zu\n", strlen(globalcss.data));
834 if (sitecss.data)
835 printf("site CSS length in bytes: %zu\n", strlen(sitecss.data));
836
837 return sitecss.data;
838
839 err:
840 free(sitecss.data);
841 /*memset(&sitecss, 0, sizeof(sitecss));*/
842
843 return NULL;
844 }
845
846 int
847 allowrequest(const char *fromuri, const char *requri)
848 {
849 struct filterrule *r;
850 char fromdomain[256], reqdomain[256];
851 const char *s, *reqrel, *fromrel;
852 size_t len;
853 int status = 1;
854
855 /* skip protocol part */
856 if ((s = strstr(fromuri, "://")))
857 fromuri = s + sizeof("://") - 1;
858 if ((s = strstr(requri, "://")))
859 requri = s + sizeof("://") - 1;
860
861 len = strcspn(fromuri, ":/"); /* TODO: ":/", but support IPV6... */
862 memcpy(fromdomain, fromuri, len);
863 fromdomain[len] = '\0';
864
865 len = strcspn(requri, ":/"); /* TODO: ":/", but support IPV6... */
866 memcpy(reqdomain, requri, len);
867 reqdomain[len] = '\0';
868
869 fromrel = &fromuri[strcspn(fromuri, "/")];
870 reqrel = &requri[strcspn(requri, "/")];
871
872 #if 0
873 printf("req %s = %s\n", requri, reqrel);
874 printf("from %s = %s\n", fromuri, fromrel);
875 #endif
876
877 /* DEBUG: timing */
878 struct timespec tp_start, tp_end, tp_diff;
879 if (clock_gettime(CLOCK_MONOTONIC, &tp_start) == -1)
880 fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
881
882 /* match rules */
883 for (r = rules; r; r = r->next) {
884 if (!r->css && matchrule(r, fromuri, fromdomain,
885 fromrel, requri, reqdomain, reqrel, "csio^")) {
886 #if 0
887 printf("reqrel: %s\n", reqrel);
888 printf("reqdomain: %s\n", reqdomain);
889 printf("requri: %s\n", requri);
890 printf("from uri: %s\n", fromuri);
891 printf("from domain: %s\n", fromdomain);
892 #endif
893
894 fprintf(stderr, "blocked: %s, %s\n", fromdomain, requri);
895 fprintf(stderr, "rule: %s\n", r->uri);
896 fprintf(stderr, "===\n");
897
898 /* DEBUG: for showing the timing */
899 status = 0;
900 goto end;
901 /*return 1;*/
902 }
903 }
904
905 end:
906 /* DEBUG: timing */
907 if (clock_gettime(CLOCK_MONOTONIC, &tp_end) == -1) {
908 fprintf(stderr, "clock_gettime: %s\n", strerror(errno));
909 }
910
911 tp_diff.tv_sec = tp_end.tv_sec - tp_start.tv_sec;
912 tp_diff.tv_nsec = tp_end.tv_nsec - tp_start.tv_nsec;
913 if (tp_diff.tv_nsec < 0) {
914 tp_diff.tv_sec--;
915 tp_diff.tv_nsec += 1000000000L;
916 }
917
918 printf("%s [%s] timing: %lld sec, %.3f ms\n",
919 requri, fromuri, (long long)tp_diff.tv_sec,
920 (float)tp_diff.tv_nsec / 1000000.0f);
921
922 return status;
923 }
924
925 void
926 cleanup(void)
927 {
928 struct filterrule *r;
929 struct filterdomain *d;
930
931 free(globalcss.data);
932 memset(&globalcss, 0, sizeof(globalcss));
933
934 for (r = rules; r; r = rules) {
935 for (d = r->domains; d; d = r->domains) {
936 free(d->domain);
937 r->domains = d->next;
938 free(d);
939 }
940 free(r->css);
941 free(r->uri);
942 rules = r->next;
943 free(r);
944 }
945 rules = NULL;
946 }
947
948 void
949 init(void)
950 {
951 struct filterrule *r;
952 FILE *fp;
953 const char *s;
954 char filepath[PATH_MAX], *e;
955 size_t len;
956 int n;
957
958 if ((e = getenv("SURF_ADBLOCK_FILE"))) {
959 n = snprintf(filepath, sizeof(filepath), "%s", e);
960 } else {
961 if (!(e = getenv("HOME")))
962 e = "";
963 n = snprintf(filepath, sizeof(filepath),
964 "%s%s.surf/adblockrules", e, e[0] ? "/" : "");
965 }
966 if (n < 0 || (size_t)n >= sizeof(filepath)) {
967 weprintf("fatal: rules file path too long");
968 return;
969 }
970
971 if (!(fp = fopen(filepath, "r"))) {
972 weprintf("fatal: cannot open rules file %s: %s\n",
973 filepath, strerror(errno));
974 return;
975 }
976
977 n = loadrules(fp);
978 fclose(fp);
979 if (n < 1) {
980 if (n < 0) {
981 weprintf("fatal: cannot read rules from file %s: %s\n",
982 filepath, strerror(errno));
983 } else {
984 weprintf("fatal: cannot read any rule from file %s\n",
985 filepath);
986 }
987 return;
988 }
989
990 /* general CSS rules: all sites */
991 for (r = rules; r; r = r->next) {
992 if (!r->css || r->domains)
993 continue;
994
995 len = strlen(r->css);
996 if (string_append(&globalcss, r->css, len) < len) {
997 weprintf("cannot append CSS rule to global CSS selectors\n");
998 cleanup();
999 return;
1000 }
1001
1002 s = r->isexception ? "{display:initial;}" : "{display:none;}";
1003 len = strlen(s);
1004 if (string_append(&globalcss, s, len) < len) {
1005 weprintf("cannot append CSS rule to global CSS selectors\n");
1006 cleanup();
1007 return;
1008 }
1009 }
1010 }