add support for more tags and change the markup and display block-type of some - webdump - HTML to plain-text converter for webpages
(HTM) git clone git://git.codemadness.org/webdump
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
(DIR) commit 7e848a418c711f6857328b5489172a34d44587c8
(DIR) parent 91d236dab89449465eb123d756a450a17eb4195a
(HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Wed, 13 Sep 2023 20:35:17 +0200
add support for more tags and change the markup and display block-type of some
... also add initial types: Button, Select, SelectMulti and Option.
Diffstat:
M webdump.c | 53 ++++++++++++++++++++-----------
1 file changed, 35 insertions(+), 18 deletions(-)
---
(DIR) diff --git a/webdump.c b/webdump.c
@@ -70,18 +70,22 @@ enum DisplayType {
DisplayUnknown = 0,
DisplayInline = 1 << 0,
DisplayInlineBlock = 1 << 1, /* unused for now */
- DisplayInput = 1 << 2,
- DisplayBlock = 1 << 3,
- DisplayNone = 1 << 4,
- DisplayPre = 1 << 5,
- DisplayList = 1 << 6,
- DisplayListOrdered = 1 << 7,
- DisplayListItem = 1 << 8,
- DisplayTable = 1 << 9,
- DisplayTableRow = 1 << 10,
- DisplayTableCell = 1 << 11,
- DisplayHeader = 1 << 12,
- DisplayDl = 1 << 13
+ DisplayBlock = 1 << 2,
+ DisplayNone = 1 << 3,
+ DisplayPre = 1 << 4,
+ DisplayList = 1 << 5,
+ DisplayListOrdered = 1 << 6,
+ DisplayListItem = 1 << 7,
+ DisplayTable = 1 << 8,
+ DisplayTableRow = 1 << 9,
+ DisplayTableCell = 1 << 10,
+ DisplayHeader = 1 << 11,
+ DisplayDl = 1 << 12,
+ DisplayInput = 1 << 13,
+ DisplayButton = 1 << 14,
+ DisplaySelect = 1 << 15,
+ DisplaySelectMulti = 1 << 16,
+ DisplayOption = 1 << 17
};
/* ANSI markup */
@@ -210,6 +214,7 @@ static struct selectors *sel_hide, *sel_show;
/* tag displaytype markup parent v o b a i */
static struct tag tags[] = {
{ "a", DisplayInline, MarkupUnderline, 0, 0, 0, 0, 0, 0 },
+{ "address", DisplayBlock, 0, 0, 0, 0, 0, 0, 0 },
{ "area", DisplayInline, 0, 0, 1, 0, 0, 0, 0 },
{ "article", DisplayBlock, 0, 0, 0, 0, 0, 0, 0 },
{ "aside", DisplayBlock, 0, 0, 0, 0, 0, 0, 0 },
@@ -220,14 +225,18 @@ static struct tag tags[] = {
{ "blockquote", DisplayBlock, 0, 0, 0, 0, 0, 0, 2 },
{ "body", DisplayBlock, 0, 0, 0, 0, 0, 0, 0 },
{ "br", 0, 0, 0, 1, 0, 0, 0, 0 },
-{ "code", DisplayInline, 0, 0, 0, 0, 0, 0, 0 },
+{ "button", DisplayInline | DisplayButton, 0, 0, 0, 0, 0, 0, 0 },
+{ "cite", DisplayInline, MarkupItalic, 0, 0, 0, 0, 0, 0 },
{ "col", DisplayInline, 0, 0, 1, 0, 0, 0, 0 },
{ "colgroup", DisplayInline, 0, 0, 0, 1, 0, 0, 0 },
+{ "datalist", DisplayNone, 0, 0, 0, 0, 0, 0, 0 },
{ "dd", DisplayBlock, 0, 0, 0, 1, 0, 0, 4 },
{ "del", DisplayInline, MarkupStrike, 0, 0, 0, 0, 0, 0 },
{ "details", DisplayBlock, 0, 0, 0, 0, 0, 0, 0 },
+{ "dfn", DisplayInline, MarkupItalic, 0, 0, 0, 0, 0, 0 },
+{ "dir", DisplayList, 0, 0, 0, 0, 1, 1, 2 },
{ "div", DisplayBlock, 0, 0, 0, 0, 0, 0, 0 },
-{ "dl", DisplayBlock|DisplayDl, 0, 0, 0, 0, 0, 0, 0 },
+{ "dl", DisplayBlock | DisplayDl, 0, 0, 0, 0, 0, 0, 0 },
{ "dt", DisplayBlock, MarkupBold, 0, 0, 1, 0, 0, 0 },
{ "em", DisplayInline, MarkupItalic, 0, 0, 0, 0, 0, 0 },
{ "embed", DisplayInline, 0, 0, 1, 0, 0, 0, 0 },
@@ -249,20 +258,27 @@ static struct tag tags[] = {
{ "i", DisplayInline, MarkupItalic, 0, 0, 0, 0, 0, 0 },
{ "img", DisplayInline, MarkupUnderline, 0, 1, 0, 0, 0, 0 },
{ "input", DisplayInput, 0, 0, 1, 0, 0, 0, 0 },
-{ "label", DisplayInline, MarkupBold, 0, 0, 0, 0, 0, 0 },
+{ "ins", DisplayInline, MarkupUnderline, 0, 0, 0, 0, 0, 0 },
+{ "label", DisplayInline, 0, 0, 0, 0, 0, 0, 0 },
{ "legend", DisplayBlock, 0, 0, 0, 0, 0, 0, 0 },
{ "li", DisplayListItem, 0, DisplayList, 0, 1, 0, 0, 0 },
{ "link", DisplayInline, 0, 0, 1, 0, 0, 0, 0 },
{ "main", DisplayBlock, 0, 0, 0, 0, 0, 0, 0 },
+{ "mark", DisplayInline, MarkupReverse, 0, 0, 0, 0, 0, 0 },
+{ "menu", DisplayList, 0, 0, 0, 0, 1, 1, 2 },
{ "meta", DisplayInline, 0, 0, 1, 0, 0, 0, 0 },
{ "nav", DisplayBlock, 0, 0, 0, 0, 0, 0, 0 },
+{ "object", DisplayInline, 0, 0, 0, 0, 0, 0, 0 },
{ "ol", DisplayList | DisplayListOrdered, 0, 0, 0, 0, 1, 1, 0 },
-{ "option", DisplayNone, 0, 0, 0, 1, 0, 0, 0 }, /* prevent clutter and hide all options for now */
+{ "option", DisplayInline | DisplayOption, 0, 0, 0, 1, 0, 0, 0 },
{ "p", DisplayBlock, 0, 0, 0, 1, 1, 1, 0 },
{ "param", DisplayInline, 0, 0, 1, 0, 0, 0, 0 },
{ "pre", DisplayPre, 0, 0, 0, 0, 1, 1, 4 },
{ "s", DisplayInline, MarkupStrike, 0, 0, 0, 0, 0, 0 },
+{ "search", DisplayBlock, 0, 0, 0, 0, 0, 0, 0 },
{ "script", DisplayNone, 0, 0, 0, 0, 0, 0, 0 },
+{ "section", DisplayBlock, 0, 0, 0, 0, 0, 0, 0 },
+{ "select", DisplayInline | DisplaySelect, 0, 0, 0, 0, 0, 0, 0 },
{ "source", DisplayInline, 0, 0, 1, 0, 0, 0, 0 },
{ "strike", DisplayInline, MarkupStrike, 0, 0, 0, 0, 0, 0 },
{ "strong", DisplayInline, MarkupBold, 0, 0, 0, 0, 0, 0 },
@@ -276,14 +292,15 @@ static struct tag tags[] = {
{ "tfoot", DisplayInline, 0, DisplayTable, 0, 1, 0, 0, 0 },
{ "th", DisplayTableCell, MarkupBold, DisplayTableRow, 0, 1, 0, 0, 0 },
{ "thead", DisplayInline, 0, DisplayTable, 0, 1, 0, 0, 0 },
-{ "time", DisplayInline, 0, 0, 0, 0, 0, 0, 0 },
{ "title", DisplayBlock, 0, 0, 0, 0, 0, 1, -DEFAULT_INDENT },
{ "tr", DisplayTableRow, 0, DisplayTable, 0, 1, 0, 0, 0 },
{ "track", DisplayInline, 0, 0, 1, 0, 0, 0, 0 },
{ "u", DisplayInline, MarkupUnderline, 0, 0, 0, 0, 0, 0 },
{ "ul", DisplayList, 0, 0, 0, 0, 1, 1, 2 },
+{ "var", DisplayInline, MarkupItalic, 0, 0, 0, 0, 0, 0 },
{ "video", DisplayInline, MarkupUnderline, 0, 0, 0, 0, 0, 0 },
-{ "wbr", DisplayInline, 0, 0, 1, 0, 0, 0, 0 }
+{ "wbr", DisplayInline, 0, 0, 1, 0, 0, 0, 0 },
+{ "xmp", DisplayPre, 0, 0, 0, 0, 1, 1, 4 }
};
/* hint for compilers and static analyzers that a function exits */