From 04519259fd49cf54e392ca1df6db821d33b29327 Mon Sep 17 00:00:00 2001 From: Leonardo Taccari Date: Sat, 6 Jun 2020 01:11:57 +0200 Subject: [PATCH 1/4] Further expand URLs in retweets --- tscrape.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/tscrape.c b/tscrape.c index 31d7863..4cd45aa 100644 --- a/tscrape.c +++ b/tscrape.c @@ -250,6 +250,12 @@ addurl(const char *url, const char *expanded_url) { struct url *u; + for (u = urls; u; u = u->next) { + if (!strncmp(url, u->url, u->url_len)) { + return; + } + } + if (!(u = calloc(1, sizeof(*u)))) err(1, "calloc"); strlcpy(u->url, url, sizeof(u->url)); @@ -389,25 +395,26 @@ processnodes(struct json_node *nodes, size_t depth, const char *str) } } -// TODO: retweeted.status.entities.urls[] -#if 0 - if (depth == 6 && + if (depth == 7 && nodes[0].type == JSON_TYPE_ARRAY && nodes[1].type == JSON_TYPE_OBJECT && nodes[2].type == JSON_TYPE_OBJECT && nodes[3].type == JSON_TYPE_OBJECT && nodes[4].type == JSON_TYPE_ARRAY && - nodes[5].type == JSON_TYPE_STRING && + nodes[5].type == JSON_TYPE_OBJECT && + nodes[6].type == JSON_TYPE_STRING && !strcmp(nodes[2].name, "retweeted_status") && !strcmp(nodes[3].name, "entities") && !strcmp(nodes[4].name, "urls")) { - if (!strcmp(nodes[5].name, "url")) { - printf("DEBUG: url: %s\n", str); - } else if (!strcmp(nodes[5].name, "expanded_url")) { - printf("DEBUG: expanded_url: %s\n", str); + if (!strcmp(nodes[6].name, "url")) { +// printf("DEBUG: url: %s\n", str); + strlcpy(url, str, sizeof(url)); + } else if (!strcmp(nodes[6].name, "expanded_url")) { +// printf("DEBUG: expanded_url: %s\n", str); + addurl(url, str); + url[0] = '\0'; } } -#endif } int -- 2.24.1