tsimplify tag type matching, add nested list-item support... - webdump - [FORK] git://git.codemadness.org/webdump
(HTM) git clone git://git.z3bra.org/webdump.git
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
(DIR) commit 6437b1c9d5dd27a1e29e10bda42264127383281e
(DIR) parent dcc69463abb4a70f95b6126629e5d6ab57e393e3
(HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Sat, 22 Jul 2017 15:14:02 +0200
simplify tag type matching, add nested list-item support...
... indicate page headers (will be improved).
Diffstat:
M main.c | 90 +++++++++++++++++++++++++++-----
1 file changed, 78 insertions(+), 12 deletions(-)
---
(DIR) diff --git a/main.c b/main.c
t@@ -27,6 +27,35 @@ struct node {
static struct node nodes[MAX_DEPTH];
static int curnode;
+static char *inlinetags[] = {
+ "b",
+ "i",
+ "u",
+ "strong",
+ "em",
+ "a",
+ "span",
+ "img",
+};
+
+static char *blocktags[] = {
+ "h1",
+ "h2",
+ "h3",
+ "h4",
+ "h5",
+ "h6",
+ "p",
+ "ul",
+ "lo",
+ "li",
+ "hr",
+ "br",
+ "title",
+ "tr",
+ "table",
+};
+
static void
printindent(int count)
{
t@@ -133,20 +162,27 @@ static void
xmltagstart(XMLParser *p, const char *tag, size_t taglen)
{
struct node *cur = &nodes[curnode];
+ int i;
memset(cur, 0, sizeof(*cur));
strlcpy(cur->tag, tag, sizeof(cur->tag));
- if (!strcmp(tag, "pre"))
+ if (!strcmp(tag, "pre")) {
cur->ispre = 1;
- else if (tag[0] == 'h' && tag[1] >= '1' && tag[1] <= '6' && tag[2] == '\0' ||
- !strcmp(tag, "p") || !strcmp(tag, "ul") || !strcmp(tag, "ol") ||
- !strcmp(tag, "li") || !strcmp(tag, "hr") ||
- !strcmp(tag, "br") || !strcmp(tag, "title") || !strcmp(tag, "tr") ||
- !strcmp(tag, "table"))
- cur->isblock = 1;
- else if (!strcmp(tag, "a") || !strcmp(tag, "span") || !strcmp(tag, "img"))
- cur->isinline = 1;
+ } else {
+ for (i = 0; i < sizeof(blocktags) / sizeof(*blocktags); i++) {
+ if (!strcmp(blocktags[i], tag)) {
+ cur->isblock = 1;
+ break;
+ }
+ }
+ for (i = 0; i < sizeof(inlinetags) / sizeof(*inlinetags); i++) {
+ if (!strcmp(inlinetags[i], tag)) {
+ cur->isinline = 1;
+ break;
+ }
+ }
+ }
if (!cur->isinline)
printindent(curnode);
t@@ -158,6 +194,7 @@ static void
xmltagend(XMLParser *p, const char *tag, size_t taglen, int isshort)
{
struct node *cur;
+ int i;
if (curnode)
curnode--;
t@@ -170,22 +207,46 @@ xmltagend(XMLParser *p, const char *tag, size_t taglen, int isshort)
if (!cur->isinline)
printindent(curnode);
/* printf("</%s>", tag);*/
+
+
if (cur->isblock)
fputs("\n", stdout);
+
+ if (taglen == 2 && tag[0] == 'h' && tag[1] >= '1' && tag[1] <= '6') {
+ if (tag[1] >= '3')
+ for (i = 0; i < 36; i++)
+ putchar('-');
+ else if (tag[1] >= '1')
+ for (i = 0; i < 36; i++)
+ putchar('=');
+ putchar('\n');
+ }
}
static void
xmltagstartparsed(XMLParser *p, const char *tag, size_t taglen, int isshort)
{
struct node *cur;
+ int i;
cur = &nodes[curnode];
if (cur->isblock)
putchar('\n');
- if (!strcmp(cur->tag, "li"))
+ if (!strcmp(cur->tag, "li")) {
+ /* indent nested list items */
+ for (i = curnode; i; i--) {
+ if (!strcmp(nodes[i].tag, "li"))
+ continue;
+ if (!strcmp(nodes[i].tag, "ul") ||
+ !strcmp(nodes[i].tag, "ol")) {
+ fputs(" ", stdout);
+ }
+ }
fputs("* ", stdout);
- else if (!strcmp(cur->tag, "hr"))
- fputs("----------", stdout);
+ } else if (!strcmp(cur->tag, "hr")) {
+ for (i = 0; i < 36; i++)
+ putchar('-');
+ }
if (isshort)
return;
t@@ -202,6 +263,11 @@ xmlattr(XMLParser *p, const char *tag, size_t taglen, const char *name,
/* if (!strcmp(tag, "a") && !strcmp(name, "href") && valuelen)
printf(" [%s]", value);*/
+ /* TODO: check alt and title attr also? */
+/* if (!strcmp(tag, "img") && !strcmp(name, "src") && valuelen) {
+ printf(" [%s]", value);
+ }*/
+
/* printf(" %s=\"%s\"", name, value);*/
}