tinitial support to ignore tags (script and CSS) - webdump - [FORK] git://git.codemadness.org/webdump
(HTM) git clone git://git.z3bra.org/webdump.git
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
(DIR) commit 421341e1a2b737cb269a144a1634511705161651
(DIR) parent 6dae546b7c15b859321849c8b7b7294e6d916adc
(HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Sat, 22 Jul 2017 16:02:56 +0200
initial support to ignore tags (script and CSS)
tthis is not fully working yet because scripts can contain literal characters
such as < and >.
Diffstat:
M main.c | 21 +++++++++++++++++++++
1 file changed, 21 insertions(+), 0 deletions(-)
---
(DIR) diff --git a/main.c b/main.c
t@@ -17,6 +17,7 @@ static int isdatastart;
struct node {
char tag[256];
+ int isignore;
int ispre;
int isinline;
int isblock;
t@@ -28,6 +29,12 @@ static char src[4096]; /* src or href attribute */
static struct node nodes[MAX_DEPTH];
static int curnode;
+/* TODO: support literal text in script somehow? < > */
+static char *ignoretags[] = {
+ "style",
+ "script",
+};
+
static char *pretags[] = {
"pre",
"code",
t@@ -88,6 +95,8 @@ xmldata(XMLParser *p, const char *data, size_t datalen)
const char *s = data;
cur = &nodes[curnode];
+ if (cur->isignore)
+ goto end;
/* TODO: if not <pre> or w/e, skip? */
if (isdatastart && isspace(*s)) {
t@@ -110,6 +119,7 @@ xmldata(XMLParser *p, const char *data, size_t datalen)
}
}
+end:
/* TODO: remove trailing space also ? */
isdatastart = 0;
}
t@@ -139,6 +149,12 @@ xmltagstart(XMLParser *p, const char *tag, size_t taglen)
src[0] = '\0'; /* src, href */
strlcpy(cur->tag, tag, sizeof(cur->tag));
+ for (i = 0; i < sizeof(ignoretags) / sizeof(*ignoretags); i++) {
+ if (!strcmp(ignoretags[i], tag)) {
+ cur->isignore = 1;
+ break;
+ }
+ }
for (i = 0; i < sizeof(pretags) / sizeof(*pretags); i++) {
if (!strcmp(pretags[i], tag)) {
cur->ispre = 1;
t@@ -168,6 +184,8 @@ xmltagend(XMLParser *p, const char *tag, size_t taglen, int isshort)
if (curnode)
curnode--;
cur = &nodes[curnode];
+ if (cur->isignore)
+ return;
#if 0
if (src[0])
t@@ -196,6 +214,9 @@ xmltagstartparsed(XMLParser *p, const char *tag, size_t taglen, int isshort)
int i;
cur = &nodes[curnode];
+ if (cur->isignore)
+ return;
+
if (cur->isblock)
fputs("\n", stdout);