tmore refactoring, update TODO and bump LICENSE year - webdump - [FORK] git://git.codemadness.org/webdump
(HTM) git clone git://git.z3bra.org/webdump.git
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
(DIR) commit fd8b8950efb4f0b5d2d2bb679b7ded6131725fb5
(DIR) parent d87d026a246edadd201b607c15881172ac2564f1
(HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Sat, 21 Sep 2019 16:25:35 +0200
more refactoring, update TODO and bump LICENSE year
Diffstat:
M LICENSE | 2 +-
M TODO | 1 +
M main.c | 172 +++++++++++++++++--------------
3 files changed, 97 insertions(+), 78 deletions(-)
---
(DIR) diff --git a/LICENSE b/LICENSE
t@@ -1,6 +1,6 @@
ISC License
-Copyright (c) 2017-2018 Hiltjo Posthuma <hiltjo@codemadness.org>
+Copyright (c) 2017-2019 Hiltjo Posthuma <hiltjo@codemadness.org>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
(DIR) diff --git a/TODO b/TODO
t@@ -1,5 +1,6 @@
- base href.
specify and parse relative url, allow to specify base and also parse <base href="">
+- handle <link /> to RSS/Atom feed, show as link.
- handle whitespace, and tag types properly: atleast: inline-block, inline, block, pre
- print safe (not certain control chars, except newline, TAB etc).
- improve/remove duplicate white-space/newlines?
(DIR) diff --git a/main.c b/main.c
t@@ -19,6 +19,8 @@ struct uri {
char port[6]; /* numeric port */
};
+static int termwidth = 72;
+
#if 0
/* linked-list of link references */
struct linkref {
t@@ -33,12 +35,18 @@ static int linkcount;
#endif
enum DisplayType {
- DisplayInline = 1,
- DisplayPre = 2,
- DisplayInlineBlock = 4,
- DisplayBlock = 8,
- DisplayListItem = 16,
- DisplayTableCell = 32,
+ DisplayUnknown = 0,
+ DisplayNone = 1,
+ DisplayPre = 2,
+ DisplayInline = 4,
+ DisplayInlineBlock = 8,
+ DisplayBlock = 16,
+ DisplayList = 32,
+ DisplayListItem = 64,
+ DisplayTable = 128,
+ DisplayTableRow = 256,
+ DisplayTableCell = 512,
+ DisplayHeader = 1024,
};
struct node {
t@@ -66,10 +74,6 @@ static char src[4096]; /* src or href attribute */
static struct node nodes[MAX_DEPTH];
static int curnode;
-/* TODO: temporary workaround, handle whitespace, and tag types properly:
- atleast: inline-block, inline, block, pre */
-static int ignoredata;
-
static struct {
char *tag;
enum DisplayType displaytype;
t@@ -87,27 +91,32 @@ static struct {
{ "span", DisplayInline },
{ "img", DisplayInline },
{ "label", DisplayInline },
+ /* table */
+ { "table", DisplayTable },
+ /* table-row */
+ { "tr", DisplayTableRow },
/* table-cell */
{ "td", DisplayTableCell },
{ "th", DisplayTableCell },
/* list-item */
{ "li", DisplayListItem },
+ /* header */
+ { "h1", DisplayHeader },
+ { "h2", DisplayHeader },
+ { "h3", DisplayHeader },
+ { "h4", DisplayHeader },
+ { "h5", DisplayHeader },
+ { "h6", DisplayHeader },
+ /* break */
+ { "br", 0 },
+ /* list */
+ { "ul", DisplayList },
+ { "ol", DisplayList },
/* block */
- { "h1", DisplayBlock },
- { "h2", DisplayBlock },
- { "h3", DisplayBlock },
- { "h4", DisplayBlock },
- { "h5", DisplayBlock },
- { "h6", DisplayBlock },
{ "p", DisplayBlock },
- { "ul", DisplayBlock },
- { "lo", DisplayBlock },
+ { "blockquote", DisplayBlock },
{ "hr", DisplayBlock },
- { "br", DisplayBlock },
{ "title", DisplayBlock },
- { "tr", DisplayBlock },
- { "table", DisplayBlock },
- { "blockquote", DisplayBlock },
{ "div", DisplayBlock },
};
t@@ -380,6 +389,12 @@ absuri(char *buf, size_t bufsiz, const char *link, const char *base)
static void
xmlcdata(XMLParser *p, const char *data, size_t datalen)
{
+ struct node *cur;
+
+ cur = &nodes[curnode];
+ if (cur->displaytype & DisplayNone)
+ return;
+
printsafe(data);
}
t@@ -394,21 +409,10 @@ xmldataend(XMLParser *p)
return;
start = htmldata.data;
-#if 1
+
+ /* TODO: white-space handling */
s = start;
e = s + strlen(s);
-#else
- /* TODO: white-space handling */
- for (s = start; *s; s++) {
- if (*s != '\r' && *s != '\n')
- break;
- }
-
- for (e = s + strlen(s); e > s; e--) {
- if (*e != '\r' && *e != '\n')
- break;
- }
-#endif
if (cur->displaytype & DisplayPre) {
fwrite(s, 1, e - s, stdout);
t@@ -433,17 +437,26 @@ xmldataend(XMLParser *p)
static void
xmldata(XMLParser *p, const char *data, size_t datalen)
{
- if (ignoredata)
+ struct node *cur;
+
+ cur = &nodes[curnode];
+ if (cur->displaytype & DisplayNone)
return;
+
string_append(&htmldata, data, datalen);
}
static void
xmldataentity(XMLParser *p, const char *data, size_t datalen)
{
+ struct node *cur;
char buf[16];
int n;
+ cur = &nodes[curnode];
+ if (cur->displaytype & DisplayNone)
+ return;
+
/* convert basic XML entities */
/* ©, copy table from Links (check license) */
/* rsquo, hellip, ndash, lsquo */
t@@ -471,11 +484,7 @@ xmltagstart(XMLParser *x, const char *t, size_t tl)
src[0] = '\0'; /* src, href */
strlcpy(cur->tag, t, sizeof(cur->tag));
- if (!strcasecmp(t, "table"))
- ignoredata = 1;
- else if (!strcasecmp(t, "td") || !strcasecmp(t, "th"))
- ignoredata = 0;
-
+ /* set display type */
for (i = 0; i < sizeof(tags) / sizeof(*tags); i++) {
if (!strcasecmp(tags[i].tag, t)) {
cur->displaytype |= tags[i].displaytype;
t@@ -492,27 +501,32 @@ xmltagend(XMLParser *p, const char *t, size_t tl, int isshort)
cur = &nodes[curnode];
- if (!strcasecmp(t, "tr")) {
- fputs(" | ", stdout); /* HACK: last cell */
- return;
- } else if (!strcasecmp(t, "td") || !strcasecmp(t, "th")) {
- ignoredata = 1;
- return;
- } else if (!strcasecmp(t, "table")) {
- ignoredata = 0;
- }
-
- if (cur->displaytype & DisplayBlock)
+ if (cur->displaytype & DisplayBlock) {
+ fputs("\n", stdout);
+ } else if (cur->displaytype & DisplayPre) {
+ fputs("\n", stdout);
+ } else if (cur->displaytype & DisplayTable) {
+ fputs("\n", stdout);
+ } else if (cur->displaytype & DisplayTableRow) {
+ fputs(" | ", stdout); /* HACK: assume last cell */
+ } else if (cur->displaytype & DisplayTableCell) {
+ } else if (cur->displaytype & DisplayList) {
+ fputs("\n", stdout);
+ } else if (cur->displaytype & DisplayListItem) {
+ fputs("\n", stdout);
+ } else if (cur->displaytype & DisplayHeader) {
+ fputs("\n", stdout);
+ if (tl == 2 && t[0] == 'h' && t[1] >= '1' && t[1] <= '6') {
+ if (t[1] >= '3')
+ for (i = 0; i < termwidth; i++)
+ putchar('-');
+ else if (t[1] >= '1')
+ for (i = 0; i < termwidth; i++)
+ putchar('=');
+ putchar('\n');
+ }
+ } else if (!strcasecmp(t, "br")) {
fputs("\n", stdout);
-
- if (tl == 2 && t[0] == 'h' && t[1] >= '1' && t[1] <= '6') {
- if (t[1] >= '3')
- for (i = 0; i < 72; i++)
- putchar('-');
- else if (t[1] >= '1')
- for (i = 0; i < 72; i++)
- putchar('=');
- putchar('\n');
}
curnode--;
t@@ -541,9 +555,7 @@ xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort)
cur = &nodes[curnode];
- if (cur->displaytype & DisplayBlock)
- fputs("\n", stdout);
-#if 0
+#ifdef maybe
/* show links as reference at the bottom */
if (src[0]) {
printf(" [%d]", ++linkcount);
t@@ -556,6 +568,7 @@ xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort)
else
links_cur = links_cur->next = ecalloc(1, sizeof(*links_head));
links_cur->type = estrdup(t);
+ /* TODO: absuri */
links_cur->url = estrdup(src);
}
src[0] = '\0';
t@@ -575,26 +588,29 @@ xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort)
}
}
- if (cur->displaytype & DisplayBlock)
+ if (cur->displaytype & DisplayBlock) {
fputs("\n", stdout);
-
- if (!strcasecmp(t, "td") || !strcasecmp(t, "th"))
- fputs(" | ", stdout); /* HACK */
-
- if (!strcasecmp(t, "li")) {
+ } else if (cur->displaytype & DisplayHeader) {
+ fputs("\n", stdout);
+ } else if (cur->displaytype & DisplayTableRow) {
+ fputs("\n", stdout);
+ } else if (cur->displaytype & DisplayTableCell) {
+ fputs(" | ", stdout);
+ } else if (cur->displaytype & DisplayList) {
+ fputs("\n", stdout);
+ } else if (cur->displaytype & DisplayListItem) {
/* indent nested list items */
for (i = curnode; i; i--) {
- if (!strcasecmp(nodes[i].tag, "li"))
+ if (nodes[i].displaytype & DisplayListItem)
continue;
- if (!strcasecmp(nodes[i].tag, "ul") ||
- !strcasecmp(nodes[i].tag, "ol"))
+ if (nodes[i].displaytype & DisplayList)
fputs(" ", stdout);
}
/* TODO: for <ol>, keep list counter on ol element (parent),
support ordered number type only */
fputs("* ", stdout);
- } else if (!strcasecmp(t, "hr")) {
- for (i = 0; i < 72; i++)
+ } else if (!strcasecmp(t, "hr")) { /* ruler */
+ for (i = 0; i < termwidth; i++)
putchar('-');
}
}
t@@ -612,7 +628,7 @@ xmlattr(XMLParser *p, const char *tag, size_t taglen, const char *name,
strlcpy(src, value, sizeof(src));
}
-#if 0
+#ifdef maybe
void
printlinkrefs(void)
{
t@@ -644,7 +660,9 @@ main(void)
parser.getnext = getchar;
xml_parse(&parser);
-/* printlinkrefs();*/
+#ifdef maybe
+ printlinkrefs();
+#endif
putchar('\n');
return 0;