tmore refactoring, update TODO and bump LICENSE year - webdump - [FORK] git://git.codemadness.org/webdump
 (HTM) git clone git://git.z3bra.org/webdump.git
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit fd8b8950efb4f0b5d2d2bb679b7ded6131725fb5
 (DIR) parent d87d026a246edadd201b607c15881172ac2564f1
 (HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
       Date:   Sat, 21 Sep 2019 16:25:35 +0200
       
       more refactoring, update TODO and bump LICENSE year
       
       Diffstat:
         M LICENSE                             |       2 +-
         M TODO                                |       1 +
         M main.c                              |     172 +++++++++++++++++--------------
       
       3 files changed, 97 insertions(+), 78 deletions(-)
       ---
 (DIR) diff --git a/LICENSE b/LICENSE
       t@@ -1,6 +1,6 @@
        ISC License
        
       -Copyright (c) 2017-2018 Hiltjo Posthuma <hiltjo@codemadness.org>
       +Copyright (c) 2017-2019 Hiltjo Posthuma <hiltjo@codemadness.org>
        
        Permission to use, copy, modify, and/or distribute this software for any
        purpose with or without fee is hereby granted, provided that the above
 (DIR) diff --git a/TODO b/TODO
       t@@ -1,5 +1,6 @@
        - base href.
          specify and parse relative url, allow to specify base and also parse <base href="">
       +- handle <link /> to RSS/Atom feed, show as link.
        - handle whitespace, and tag types properly: atleast: inline-block, inline, block, pre
        - print safe (not certain control chars, except newline, TAB etc).
        - improve/remove duplicate white-space/newlines?
 (DIR) diff --git a/main.c b/main.c
       t@@ -19,6 +19,8 @@ struct uri {
                char port[6];     /* numeric port */
        };
        
       +static int termwidth = 72;
       +
        #if 0
        /* linked-list of link references */
        struct linkref {
       t@@ -33,12 +35,18 @@ static int linkcount;
        #endif
        
        enum DisplayType {
       -        DisplayInline = 1,
       -        DisplayPre = 2,
       -        DisplayInlineBlock = 4,
       -        DisplayBlock = 8,
       -        DisplayListItem = 16,
       -        DisplayTableCell = 32,
       +        DisplayUnknown     = 0,
       +        DisplayNone        = 1,
       +        DisplayPre         = 2,
       +        DisplayInline      = 4,
       +        DisplayInlineBlock = 8,
       +        DisplayBlock       = 16,
       +        DisplayList        = 32,
       +        DisplayListItem    = 64,
       +        DisplayTable       = 128,
       +        DisplayTableRow    = 256,
       +        DisplayTableCell   = 512,
       +        DisplayHeader      = 1024,
        };
        
        struct node {
       t@@ -66,10 +74,6 @@ static char src[4096]; /* src or href attribute */
        static struct node nodes[MAX_DEPTH];
        static int curnode;
        
       -/* TODO: temporary workaround, handle whitespace, and tag types properly:
       -   atleast: inline-block, inline, block, pre */
       -static int ignoredata;
       -
        static struct {
                char *tag;
                enum DisplayType displaytype;
       t@@ -87,27 +91,32 @@ static struct {
                { "span", DisplayInline },
                { "img", DisplayInline },
                { "label", DisplayInline },
       +        /* table */
       +        { "table", DisplayTable },
       +        /* table-row */
       +        { "tr", DisplayTableRow },
                /* table-cell */
                { "td", DisplayTableCell },
                { "th", DisplayTableCell },
                /* list-item */
                { "li", DisplayListItem },
       +        /* header */
       +        { "h1", DisplayHeader },
       +        { "h2", DisplayHeader },
       +        { "h3", DisplayHeader },
       +        { "h4", DisplayHeader },
       +        { "h5", DisplayHeader },
       +        { "h6", DisplayHeader },
       +        /* break */
       +        { "br", 0 },
       +        /* list */
       +        { "ul", DisplayList },
       +        { "ol", DisplayList },
                /* block */
       -        { "h1", DisplayBlock },
       -        { "h2", DisplayBlock },
       -        { "h3", DisplayBlock },
       -        { "h4", DisplayBlock },
       -        { "h5", DisplayBlock },
       -        { "h6", DisplayBlock },
                { "p", DisplayBlock },
       -        { "ul", DisplayBlock },
       -        { "lo", DisplayBlock },
       +        { "blockquote", DisplayBlock },
                { "hr", DisplayBlock },
       -        { "br", DisplayBlock },
                { "title", DisplayBlock },
       -        { "tr", DisplayBlock },
       -        { "table", DisplayBlock },
       -        { "blockquote", DisplayBlock },
                { "div", DisplayBlock },
        };
        
       t@@ -380,6 +389,12 @@ absuri(char *buf, size_t bufsiz, const char *link, const char *base)
        static void
        xmlcdata(XMLParser *p, const char *data, size_t datalen)
        {
       +        struct node *cur;
       +
       +        cur = &nodes[curnode];
       +        if (cur->displaytype & DisplayNone)
       +                return;
       +
                printsafe(data);
        }
        
       t@@ -394,21 +409,10 @@ xmldataend(XMLParser *p)
                        return;
        
                start = htmldata.data;
       -#if 1
       +
       +        /* TODO: white-space handling */
                s = start;
                e = s + strlen(s);
       -#else
       -        /* TODO: white-space handling */
       -        for (s = start; *s; s++) {
       -                if (*s != '\r' && *s != '\n')
       -                        break;
       -        }
       -
       -        for (e = s + strlen(s); e > s; e--) {
       -                if (*e != '\r' && *e != '\n')
       -                        break;
       -        }
       -#endif
        
                if (cur->displaytype & DisplayPre) {
                        fwrite(s, 1, e - s, stdout);
       t@@ -433,17 +437,26 @@ xmldataend(XMLParser *p)
        static void
        xmldata(XMLParser *p, const char *data, size_t datalen)
        {
       -        if (ignoredata)
       +        struct node *cur;
       +
       +        cur = &nodes[curnode];
       +        if (cur->displaytype & DisplayNone)
                        return;
       +
                string_append(&htmldata, data, datalen);
        }
        
        static void
        xmldataentity(XMLParser *p, const char *data, size_t datalen)
        {
       +        struct node *cur;
                char buf[16];
                int n;
        
       +        cur = &nodes[curnode];
       +        if (cur->displaytype & DisplayNone)
       +                return;
       +
                /* convert basic XML entities */
                /* &nbsp; &copy;, copy table from Links (check license) */
                /* rsquo, hellip, ndash, lsquo */
       t@@ -471,11 +484,7 @@ xmltagstart(XMLParser *x, const char *t, size_t tl)
                src[0] = '\0'; /* src, href */
                strlcpy(cur->tag, t, sizeof(cur->tag));
        
       -        if (!strcasecmp(t, "table"))
       -                ignoredata = 1;
       -        else if (!strcasecmp(t, "td") || !strcasecmp(t, "th"))
       -                ignoredata = 0;
       -
       +        /* set display type */
                for (i = 0; i < sizeof(tags) / sizeof(*tags); i++) {
                        if (!strcasecmp(tags[i].tag, t)) {
                                cur->displaytype |= tags[i].displaytype;
       t@@ -492,27 +501,32 @@ xmltagend(XMLParser *p, const char *t, size_t tl, int isshort)
        
                cur = &nodes[curnode];
        
       -        if (!strcasecmp(t, "tr")) {
       -                fputs(" | ", stdout); /* HACK: last cell */
       -                return;
       -        } else if (!strcasecmp(t, "td") || !strcasecmp(t, "th")) {
       -                ignoredata = 1;
       -                return;
       -        } else if (!strcasecmp(t, "table")) {
       -                ignoredata = 0;
       -        }
       -
       -        if (cur->displaytype & DisplayBlock)
       +        if (cur->displaytype & DisplayBlock) {
       +                fputs("\n", stdout);
       +        } else if (cur->displaytype & DisplayPre) {
       +                fputs("\n", stdout);
       +        } else if (cur->displaytype & DisplayTable) {
       +                fputs("\n", stdout);
       +        } else if (cur->displaytype & DisplayTableRow) {
       +                fputs(" | ", stdout); /* HACK: assume last cell */
       +        } else if (cur->displaytype & DisplayTableCell) {
       +        } else if (cur->displaytype & DisplayList) {
       +                fputs("\n", stdout);
       +        } else if (cur->displaytype & DisplayListItem) {
       +                fputs("\n", stdout);
       +        } else if (cur->displaytype & DisplayHeader) {
       +                fputs("\n", stdout);
       +                if (tl == 2 && t[0] == 'h' && t[1] >= '1' && t[1] <= '6') {
       +                        if (t[1] >= '3')
       +                                for (i = 0; i < termwidth; i++)
       +                                        putchar('-');
       +                        else if (t[1] >= '1')
       +                                for (i = 0; i < termwidth; i++)
       +                                        putchar('=');
       +                        putchar('\n');
       +                }
       +        } else if (!strcasecmp(t, "br")) {
                        fputs("\n", stdout);
       -
       -        if (tl == 2 && t[0] == 'h' && t[1] >= '1' && t[1] <= '6') {
       -                if (t[1] >= '3')
       -                        for (i = 0; i < 72; i++)
       -                                putchar('-');
       -                else if (t[1] >= '1')
       -                        for (i = 0; i < 72; i++)
       -                                putchar('=');
       -                putchar('\n');
                }
        
                curnode--;
       t@@ -541,9 +555,7 @@ xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort)
        
                cur = &nodes[curnode];
        
       -        if (cur->displaytype & DisplayBlock)
       -                fputs("\n", stdout);
       -#if 0
       +#ifdef maybe
                /* show links as reference at the bottom */
                if (src[0]) {
                        printf(" [%d]", ++linkcount);
       t@@ -556,6 +568,7 @@ xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort)
                        else
                                links_cur = links_cur->next = ecalloc(1, sizeof(*links_head));
                        links_cur->type = estrdup(t);
       +                /* TODO: absuri */
                        links_cur->url = estrdup(src);
                }
                src[0] = '\0';
       t@@ -575,26 +588,29 @@ xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort)
                        }
                }
        
       -        if (cur->displaytype & DisplayBlock)
       +        if (cur->displaytype & DisplayBlock) {
                        fputs("\n", stdout);
       -
       -        if (!strcasecmp(t, "td") || !strcasecmp(t, "th"))
       -                fputs(" | ", stdout); /* HACK */
       -
       -        if (!strcasecmp(t, "li")) {
       +        } else if (cur->displaytype & DisplayHeader) {
       +                fputs("\n", stdout);
       +        } else if (cur->displaytype & DisplayTableRow) {
       +                fputs("\n", stdout);
       +        } else if (cur->displaytype & DisplayTableCell) {
       +                fputs(" | ", stdout);
       +        } else if (cur->displaytype & DisplayList) {
       +                fputs("\n", stdout);
       +        } else if (cur->displaytype & DisplayListItem) {
                        /* indent nested list items */
                        for (i = curnode; i; i--) {
       -                        if (!strcasecmp(nodes[i].tag, "li"))
       +                        if (nodes[i].displaytype & DisplayListItem)
                                        continue;
       -                        if (!strcasecmp(nodes[i].tag, "ul") ||
       -                            !strcasecmp(nodes[i].tag, "ol"))
       +                        if (nodes[i].displaytype & DisplayList)
                                        fputs("    ", stdout);
                        }
                        /* TODO: for <ol>, keep list counter on ol element (parent),
                           support ordered number type only */
                        fputs("* ", stdout);
       -        } else if (!strcasecmp(t, "hr")) {
       -                for (i = 0; i < 72; i++)
       +        } else if (!strcasecmp(t, "hr")) { /* ruler */
       +                for (i = 0; i < termwidth; i++)
                                putchar('-');
                }
        }
       t@@ -612,7 +628,7 @@ xmlattr(XMLParser *p, const char *tag, size_t taglen, const char *name,
                        strlcpy(src, value, sizeof(src));
        }
        
       -#if 0
       +#ifdef maybe
        void
        printlinkrefs(void)
        {
       t@@ -644,7 +660,9 @@ main(void)
                parser.getnext = getchar;
                xml_parse(&parser);
        
       -/*        printlinkrefs();*/
       +#ifdef maybe
       +        printlinkrefs();
       +#endif
                putchar('\n');
        
                return 0;