tinitial support to ignore tags (script and CSS) - webdump - [FORK] git://git.codemadness.org/webdump
 (HTM) git clone git://git.z3bra.org/webdump.git
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit 421341e1a2b737cb269a144a1634511705161651
 (DIR) parent 6dae546b7c15b859321849c8b7b7294e6d916adc
 (HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
       Date:   Sat, 22 Jul 2017 16:02:56 +0200
       
       initial support to ignore tags (script and CSS)
       
       tthis is not fully working yet because scripts can contain literal characters
       such as < and >.
       
       Diffstat:
         M main.c                              |      21 +++++++++++++++++++++
       
       1 file changed, 21 insertions(+), 0 deletions(-)
       ---
 (DIR) diff --git a/main.c b/main.c
       t@@ -17,6 +17,7 @@ static int isdatastart;
        
        struct node {
                char tag[256];
       +        int isignore;
                int ispre;
                int isinline;
                int isblock;
       t@@ -28,6 +29,12 @@ static char src[4096]; /* src or href attribute */
        static struct node nodes[MAX_DEPTH];
        static int curnode;
        
       +/* TODO: support literal text in script somehow? < > */
       +static char *ignoretags[] = {
       +        "style",
       +        "script",
       +};
       +
        static char *pretags[] = {
                "pre",
                "code",
       t@@ -88,6 +95,8 @@ xmldata(XMLParser *p, const char *data, size_t datalen)
                const char *s = data;
        
                cur = &nodes[curnode];
       +        if (cur->isignore)
       +                goto end;
        
                /* TODO: if not <pre> or w/e, skip? */
                if (isdatastart && isspace(*s)) {
       t@@ -110,6 +119,7 @@ xmldata(XMLParser *p, const char *data, size_t datalen)
                        }
                }
        
       +end:
                /* TODO: remove trailing space also ? */
                isdatastart = 0;
        }
       t@@ -139,6 +149,12 @@ xmltagstart(XMLParser *p, const char *tag, size_t taglen)
                src[0] = '\0'; /* src, href */
                strlcpy(cur->tag, tag, sizeof(cur->tag));
        
       +        for (i = 0; i < sizeof(ignoretags) / sizeof(*ignoretags); i++) {
       +                if (!strcmp(ignoretags[i], tag)) {
       +                        cur->isignore = 1;
       +                        break;
       +                }
       +        }
                for (i = 0; i < sizeof(pretags) / sizeof(*pretags); i++) {
                        if (!strcmp(pretags[i], tag)) {
                                cur->ispre = 1;
       t@@ -168,6 +184,8 @@ xmltagend(XMLParser *p, const char *tag, size_t taglen, int isshort)
                if (curnode)
                        curnode--;
                cur = &nodes[curnode];
       +        if (cur->isignore)
       +                return;
        
        #if 0
                if (src[0])
       t@@ -196,6 +214,9 @@ xmltagstartparsed(XMLParser *p, const char *tag, size_t taglen, int isshort)
                int i;
        
                cur = &nodes[curnode];
       +        if (cur->isignore)
       +                return;
       +
                if (cur->isblock)
                        fputs("\n", stdout);