Expand extended_entities URLs in retweets - tscrape - twitter scraper
(HTM) git clone git://git.codemadness.org/tscrape
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
(DIR) commit 9cc1b7e985affd8764385d3d6cf6476804230cdd
(DIR) parent d270b9dc10bc3d05b096f2dd34256dc9b962b951
(HTM) Author: Leonardo Taccari <iamleot@gmail.com>
Date: Sat, 6 Jun 2020 01:50:22 +0200
Expand extended_entities URLs in retweets
Diffstat:
M tscrape.c | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+), 0 deletions(-)
---
(DIR) diff --git a/tscrape.c b/tscrape.c
@@ -415,6 +415,29 @@ processnodes(struct json_node *nodes, size_t depth, const char *str)
url[0] = '\0';
}
}
+
+ /* [].retweeted_status.extended_entities.media[].url */
+ if (depth == 7 &&
+ nodes[0].type == JSON_TYPE_ARRAY &&
+ nodes[1].type == JSON_TYPE_OBJECT &&
+ nodes[2].type == JSON_TYPE_OBJECT &&
+ nodes[3].type == JSON_TYPE_OBJECT &&
+ nodes[4].type == JSON_TYPE_ARRAY &&
+ nodes[5].type == JSON_TYPE_OBJECT &&
+ nodes[6].type == JSON_TYPE_STRING &&
+ !strcmp(nodes[2].name, "retweeted_status") &&
+ !strcmp(nodes[3].name, "extended_entities") &&
+ !strcmp(nodes[4].name, "media")) {
+ if (!strcmp(nodes[6].name, "url")) {
+// printf("DEBUG: url: %s\n", str);
+ strlcpy(url, str, sizeof(url));
+ } else if (!strcmp(nodes[6].name, "expanded_url")) {
+// printf("DEBUG: expanded_url: %s\n", str);
+ /* assumes "expanded_url" is specified after "url" */
+ addurl(url, str);
+ url[0] = '\0';
+ }
+ }
}
int