when ignoring then ignore all attribute parsing aswell - tscrape - twitter scraper
(HTM) git clone git://git.codemadness.org/tscrape
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
(DIR) commit 2dc167003132b6d9db8e779f26681c560c07a119
(DIR) parent 1ff56f1ce94cd62b0c16ee343917435c9048b8b8
(HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Fri, 25 Aug 2017 17:51:12 +0200
when ignoring then ignore all attribute parsing aswell
Diffstat:
M tscrape.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
---
(DIR) diff --git a/tscrape.c b/tscrape.c
@@ -197,6 +197,9 @@ static void
xmlattr(XMLParser *x, const char *t, size_t tl, const char *a, size_t al,
const char *v, size_t vl)
{
+ if (isignore)
+ return;
+
/* NOTE: assumes classname attribute is set before data-* in current tag */
if (!state && !strcmp(t, "div") && isclassmatch(classname, STRP("user-actions"))) {
if (!strcmp(a, "data-screen-name")) {
@@ -252,7 +255,7 @@ xmlattrentity(XMLParser *x, const char *t, size_t tl, const char *a, size_t al,
char buf[16];
ssize_t len;
- if (!state)
+ if (!state || isignore)
return;
if ((len = html_entitytostr(v, buf, sizeof(buf))) > 0)
xmlattr(x, t, tl, a, al, buf, (size_t)len);