tsimplify pre-like tags parsing, support some basic entities - webdump - [FORK] git://git.codemadness.org/webdump
(HTM) git clone git://git.z3bra.org/webdump.git
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
(DIR) commit db8bb6f1a0e1ca29e8cdfd8a6c098fc1076fea80
(DIR) parent 6437b1c9d5dd27a1e29e10bda42264127383281e
(HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Sat, 22 Jul 2017 15:29:22 +0200
simplify pre-like tags parsing, support some basic entities
remove comment handling
Diffstat:
M main.c | 105 +++++++++++--------------------
1 file changed, 37 insertions(+), 68 deletions(-)
---
(DIR) diff --git a/main.c b/main.c
t@@ -27,6 +27,11 @@ struct node {
static struct node nodes[MAX_DEPTH];
static int curnode;
+static char *pretags[] = {
+ "pre",
+ "code",
+};
+
static char *inlinetags[] = {
"b",
"i",
t@@ -36,6 +41,7 @@ static char *inlinetags[] = {
"a",
"span",
"img",
+ "td",
};
static char *blocktags[] = {
t@@ -57,35 +63,9 @@ static char *blocktags[] = {
};
static void
-printindent(int count)
-{
-/* while (count--)
- putchar('\t');*/
-}
-
-static void
-xmlcommentstart(XMLParser *p)
-{
- /*printf("<!--");*/
-}
-
-static void
-xmlcomment(XMLParser *p, const char *data, size_t datalen)
-{
- /*printf("%s", data);*/
-}
-
-static void
-xmlcommentend(XMLParser *p)
-{
- /*printf("-->");*/
-}
-
-static void
xmlcdatastart(XMLParser *p)
{
iscdatastart = 1;
-/* printf("<![CDATA[");*/
}
static void
t@@ -99,7 +79,6 @@ xmlcdata(XMLParser *p, const char *data, size_t datalen)
static void
xmlcdataend(XMLParser *p)
{
-/* printf("]]>");*/
iscdatastart = 0;
}
t@@ -152,10 +131,16 @@ xmldata(XMLParser *p, const char *data, size_t datalen)
static void
xmldataentity(XMLParser *p, const char *data, size_t datalen)
{
- /* TODO: convert HTML entity */
- /*printf("%s", data);*/
-
- xmldata(p, data, datalen);
+ char buf[16];
+ int n;
+
+ /* convert basic XML entities */
+ /* TODO: support some more HTML entities */
+ n = xml_entitytostr(data, buf, sizeof(buf));
+ if (n <= 0)
+ xmldata(p, data, datalen);
+ else
+ fputs(buf, stdout);
}
static void
t@@ -167,27 +152,24 @@ xmltagstart(XMLParser *p, const char *tag, size_t taglen)
memset(cur, 0, sizeof(*cur));
strlcpy(cur->tag, tag, sizeof(cur->tag));
- if (!strcmp(tag, "pre")) {
- cur->ispre = 1;
- } else {
- for (i = 0; i < sizeof(blocktags) / sizeof(*blocktags); i++) {
- if (!strcmp(blocktags[i], tag)) {
- cur->isblock = 1;
- break;
- }
+ for (i = 0; i < sizeof(pretags) / sizeof(*pretags); i++) {
+ if (!strcmp(pretags[i], tag)) {
+ cur->ispre = 1;
+ break;
}
- for (i = 0; i < sizeof(inlinetags) / sizeof(*inlinetags); i++) {
- if (!strcmp(inlinetags[i], tag)) {
- cur->isinline = 1;
- break;
- }
+ }
+ for (i = 0; i < sizeof(blocktags) / sizeof(*blocktags); i++) {
+ if (!strcmp(blocktags[i], tag)) {
+ cur->isblock = 1;
+ break;
+ }
+ }
+ for (i = 0; i < sizeof(inlinetags) / sizeof(*inlinetags); i++) {
+ if (!strcmp(inlinetags[i], tag)) {
+ cur->isinline = 1;
+ break;
}
}
-
- if (!cur->isinline)
- printindent(curnode);
-
-/* printf("<%s", tag);*/
}
static void
t@@ -199,15 +181,7 @@ xmltagend(XMLParser *p, const char *tag, size_t taglen, int isshort)
if (curnode)
curnode--;
- if (isshort) {
-/* printf("/>");*/
- return;
- }
cur = &nodes[curnode];
- if (!cur->isinline)
- printindent(curnode);
-/* printf("</%s>", tag);*/
-
if (cur->isblock)
fputs("\n", stdout);
t@@ -238,22 +212,20 @@ xmltagstartparsed(XMLParser *p, const char *tag, size_t taglen, int isshort)
if (!strcmp(nodes[i].tag, "li"))
continue;
if (!strcmp(nodes[i].tag, "ul") ||
- !strcmp(nodes[i].tag, "ol")) {
+ !strcmp(nodes[i].tag, "ol"))
fputs(" ", stdout);
- }
}
+ /* TODO: for <ol>, keep list counter on ol element (parent),
+ support ordered number type only */
fputs("* ", stdout);
} else if (!strcmp(cur->tag, "hr")) {
for (i = 0; i < 36; i++)
putchar('-');
}
- if (isshort)
- return;
-
+ if (curnode >= MAX_DEPTH - 2)
+ errx(1, "max depth reached: %d\n", curnode);
curnode++;
-
-/* printf(">");*/
}
static void
t@@ -272,7 +244,7 @@ xmlattr(XMLParser *p, const char *tag, size_t taglen, const char *name,
}
int
-main(int argc, char *argv[])
+main(void)
{
if (pledge("stdio", NULL) < 0)
err(1, "pledge");
t@@ -281,9 +253,6 @@ main(int argc, char *argv[])
parser.xmlcdatastart = xmlcdatastart;
parser.xmlcdata = xmlcdata;
parser.xmlcdataend = xmlcdataend;
- parser.xmlcommentstart = xmlcommentstart;
- parser.xmlcomment = xmlcomment;
- parser.xmlcommentend = xmlcommentend;
parser.xmldatastart = xmldatastart;
parser.xmldata = xmldata;
parser.xmldataend = xmldataend;