tsimplify tag type matching, add nested list-item support... - webdump - [FORK] git://git.codemadness.org/webdump
 (HTM) git clone git://git.z3bra.org/webdump.git
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit 6437b1c9d5dd27a1e29e10bda42264127383281e
 (DIR) parent dcc69463abb4a70f95b6126629e5d6ab57e393e3
 (HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
       Date:   Sat, 22 Jul 2017 15:14:02 +0200
       
       simplify tag type matching, add nested list-item support...
       
       ... indicate page headers (will be improved).
       
       Diffstat:
         M main.c                              |      90 +++++++++++++++++++++++++++-----
       
       1 file changed, 78 insertions(+), 12 deletions(-)
       ---
 (DIR) diff --git a/main.c b/main.c
       t@@ -27,6 +27,35 @@ struct node {
        static struct node nodes[MAX_DEPTH];
        static int curnode;
        
       +static char *inlinetags[] = {
       +        "b",
       +        "i",
       +        "u",
       +        "strong",
       +        "em",
       +        "a",
       +        "span",
       +        "img",
       +};
       +
       +static char *blocktags[] = {
       +        "h1",
       +        "h2",
       +        "h3",
       +        "h4",
       +        "h5",
       +        "h6",
       +        "p",
       +        "ul",
       +        "lo",
       +        "li",
       +        "hr",
       +        "br",
       +        "title",
       +        "tr",
       +        "table",
       +};
       +
        static void
        printindent(int count)
        {
       t@@ -133,20 +162,27 @@ static void
        xmltagstart(XMLParser *p, const char *tag, size_t taglen)
        {
                struct node *cur = &nodes[curnode];
       +        int i;
        
                memset(cur, 0, sizeof(*cur));
                strlcpy(cur->tag, tag, sizeof(cur->tag));
        
       -        if (!strcmp(tag, "pre"))
       +        if (!strcmp(tag, "pre")) {
                        cur->ispre = 1;
       -        else if (tag[0] == 'h' && tag[1] >= '1' && tag[1] <= '6' && tag[2] == '\0' ||
       -                !strcmp(tag, "p") || !strcmp(tag, "ul") || !strcmp(tag, "ol") ||
       -                !strcmp(tag, "li") || !strcmp(tag, "hr") ||
       -                !strcmp(tag, "br") || !strcmp(tag, "title") || !strcmp(tag, "tr") ||
       -                !strcmp(tag, "table"))
       -                cur->isblock = 1;
       -        else if (!strcmp(tag, "a") || !strcmp(tag, "span") || !strcmp(tag, "img"))
       -                cur->isinline = 1;
       +        } else {
       +                for (i = 0; i < sizeof(blocktags) / sizeof(*blocktags); i++) {
       +                        if (!strcmp(blocktags[i], tag)) {
       +                                cur->isblock = 1;
       +                                break;
       +                        }
       +                }
       +                for (i = 0; i < sizeof(inlinetags) / sizeof(*inlinetags); i++) {
       +                        if (!strcmp(inlinetags[i], tag)) {
       +                                cur->isinline = 1;
       +                                break;
       +                        }
       +                }
       +        }
        
                if (!cur->isinline)
                        printindent(curnode);
       t@@ -158,6 +194,7 @@ static void
        xmltagend(XMLParser *p, const char *tag, size_t taglen, int isshort)
        {
                struct node *cur;
       +        int i;
        
                if (curnode)
                        curnode--;
       t@@ -170,22 +207,46 @@ xmltagend(XMLParser *p, const char *tag, size_t taglen, int isshort)
                if (!cur->isinline)
                        printindent(curnode);
        /*        printf("</%s>", tag);*/
       +
       +
                if (cur->isblock)
                        fputs("\n", stdout);
       +
       +        if (taglen == 2 && tag[0] == 'h' && tag[1] >= '1' && tag[1] <= '6') {
       +                if (tag[1] >= '3')
       +                        for (i = 0; i < 36; i++)
       +                                putchar('-');
       +                else if (tag[1] >= '1')
       +                        for (i = 0; i < 36; i++)
       +                                putchar('=');
       +                putchar('\n');
       +        }
        }
        
        static void
        xmltagstartparsed(XMLParser *p, const char *tag, size_t taglen, int isshort)
        {
                struct node *cur;
       +        int i;
        
                cur = &nodes[curnode];
                if (cur->isblock)
                        putchar('\n');
       -        if (!strcmp(cur->tag, "li"))
       +        if (!strcmp(cur->tag, "li")) {
       +                /* indent nested list items */
       +                for (i = curnode; i; i--) {
       +                        if (!strcmp(nodes[i].tag, "li"))
       +                                continue;
       +                        if (!strcmp(nodes[i].tag, "ul") ||
       +                            !strcmp(nodes[i].tag, "ol")) {
       +                                fputs("    ", stdout);
       +                        }
       +                }
                        fputs("* ", stdout);
       -        else if (!strcmp(cur->tag, "hr"))
       -                fputs("----------", stdout);
       +        } else if (!strcmp(cur->tag, "hr")) {
       +                for (i = 0; i < 36; i++)
       +                        putchar('-');
       +        }
        
                if (isshort)
                        return;
       t@@ -202,6 +263,11 @@ xmlattr(XMLParser *p, const char *tag, size_t taglen, const char *name,
        /*        if (!strcmp(tag, "a") && !strcmp(name, "href") && valuelen)
                        printf(" [%s]", value);*/
        
       +        /* TODO: check alt and title attr also? */
       +/*        if (!strcmp(tag, "img") && !strcmp(name, "src") && valuelen) {
       +                printf(" [%s]", value);
       +        }*/
       +
        /*        printf(" %s=\"%s\"", name, value);*/
        }