improve link references, add option to show full URL inline - webdump - HTML to plain-text converter for webpages
(HTM) git clone git://git.codemadness.org/webdump
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
(DIR) commit 6365a78f6c050106e64b281d29d8ef550f131bf1
(DIR) parent 56ec7ea6c49d79cc3aaf301d2e6040e15d17785a
(HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Fri, 8 Sep 2023 11:25:13 +0200
improve link references, add option to show full URL inline
- fix URL references not being visible when only the -l option is specified
(without -i). Now each option can be specified separately.
- add -I option to show full URL option inline.
Diffstat:
M webdump.1 | 5 ++++-
M webdump.c | 90 +++++++++++++++++--------------
2 files changed, 53 insertions(+), 42 deletions(-)
---
(DIR) diff --git a/webdump.1 b/webdump.1
@@ -6,7 +6,7 @@
.Nd convert HTML to plain-text
.Sh SYNOPSIS
.Nm
-.Op Fl 8ailrx
+.Op Fl 8aiIlrx
.Op Fl b Ar baseurl
.Op Fl s Ar selector
.Op Fl u Ar selector
@@ -30,6 +30,9 @@ This is used to make links absolute.
.It Fl i
Toggle if link reference numbers are displayed inline or not, by default it is
not enabled.
+.It Fl I
+Toggle if URLs for link reference are displayed inline or not, by default it is
+not enabled.
.It Fl l
Toggle if link references are displayed at the bottom or not, by default it is
not enabled.
(DIR) diff --git a/webdump.c b/webdump.c
@@ -47,6 +47,7 @@ struct uri {
static int allowansi = 0; /* allow ANSI escape codes */
static int showrefbottom = 0; /* show link references at the bottom */
static int showrefinline = 0; /* show link reference number inline */
+static int showurlinline = 0; /* show full link reference inline */
static int linewrap = 0; /* line-wrapping */
static int termwidth = 77; /* terminal width */
static int resources = 0; /* write resources line-by-line to fd 3? */
@@ -1319,46 +1320,49 @@ handleinlinelink(void)
char buf[4096], *url;
int b, r;
- /* show links as reference at the bottom */
- if ((showrefbottom || resources) && (attr_src.len || attr_href.len)) {
- /* by default use the original URL */
- if (attr_src.len)
- url = attr_src.data;
- else
- url = attr_href.data;
-
- b = -1;
- if (uri_hasscheme(url))
- ; /* already absolute: nothing to do */
- else if (basehref[0]) /* prefer -b option over <base> */
- b = uri_parse(basehref, &base);
- else if (basehrefdoc[0])
- b = uri_parse(basehrefdoc, &base);
-
- if (b != -1 &&
- uri_parse(url, &olduri) != -1 &&
- uri_makeabs(&newuri, &olduri, &base) != -1 &&
- newuri.proto[0]) {
- r = uri_format(buf, sizeof(buf), &newuri);
- if (r >= 0 && (size_t)r < sizeof(buf))
- url = buf;
- }
+ if (!showrefbottom && !showrefinline && !showurlinline && !resources)
+ return; /* there is no need to collect the reference */
- if (!url[0])
- return;
+ if (!attr_src.len && !attr_href.len)
+ return; /* there is no reference */
- cur = &nodes[curnode];
+ /* by default use the original URL */
+ if (attr_src.len)
+ url = attr_src.data;
+ else
+ url = attr_href.data;
+
+ b = -1;
+ if (uri_hasscheme(url))
+ ; /* already absolute: nothing to do */
+ else if (basehref[0]) /* prefer -b option over <base> */
+ b = uri_parse(basehref, &base);
+ else if (basehrefdoc[0])
+ b = uri_parse(basehrefdoc, &base);
+
+ if (b != -1 &&
+ uri_parse(url, &olduri) != -1 &&
+ uri_makeabs(&newuri, &olduri, &base) != -1 &&
+ newuri.proto[0]) {
+ r = uri_format(buf, sizeof(buf), &newuri);
+ if (r >= 0 && (size_t)r < sizeof(buf))
+ url = buf;
+ }
- if (showrefinline && !(cur->tag.displaytype & DisplayNone)) {
- string_clear(&nodes_links[curnode]);
- string_append(&nodes_links[curnode], url, strlen(url));
- }
+ if (!url[0])
+ return;
+
+ cur = &nodes[curnode];
- /* add hidden links directly to the reference,
- the order doesn't matter */
- if (cur->tag.displaytype & DisplayNone)
- addlinkref(url, cur->tag.name, 1);
+ if (!(cur->tag.displaytype & DisplayNone)) {
+ string_clear(&nodes_links[curnode]);
+ string_append(&nodes_links[curnode], url, strlen(url));
}
+
+ /* add hidden links directly to the reference,
+ the order doesn't matter */
+ if (cur->tag.displaytype & DisplayNone)
+ addlinkref(url, cur->tag.name, 1);
}
void
@@ -1574,11 +1578,12 @@ endnode(struct node *cur)
/* add link and show the link number in the visible order */
if (!ishidden && nodes_links[curnode].len > 0) {
addlinkref(nodes_links[curnode].data, cur->tag.name, ishidden);
-#if 1
- hprintf("[%zu]", ++linkcount);
-#else
- hprintf("[%s: %s]", cur->tag.name, nodes_links[curnode].data);
-#endif
+ if (showrefinline)
+ hprintf("[%zu]", ++linkcount);
+ if (showurlinline)
+ hprintf(" [%s: %s]",
+ !tagcmp(cur->tag.name, "a") ? "link" : cur->tag.name,
+ nodes_links[curnode].data);
}
handleendtag(&(cur->tag));
@@ -2014,7 +2019,7 @@ xmlattrstart(XMLParser *p, const char *t, size_t tl, const char *n,
void
usage(void)
{
- fprintf(stderr, "%s [-8ailrx] [-b basehref] [-s selector] [-u selector] [-w termwidth]\n", argv0);
+ fprintf(stderr, "%s [-8aiIlrx] [-b basehref] [-s selector] [-u selector] [-w termwidth]\n", argv0);
exit(1);
}
@@ -2038,6 +2043,9 @@ main(int argc, char **argv)
case 'i':
showrefinline = !showrefinline;
break;
+ case 'I':
+ showurlinline = !showurlinline;
+ break;
case 'l':
showrefbottom = !showrefbottom;
break;