Further expand URLs in retweets - tscrape - twitter scraper
(HTM) git clone git://git.codemadness.org/tscrape
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
(DIR) commit e2c4c24378d937edd6f9d717267d9f08b268df78
(DIR) parent d204e2373cc9f7e3f3afa3d4f2afb7976f67b4ae
(HTM) Author: Leonardo Taccari <iamleot@gmail.com>
Date: Sat, 6 Jun 2020 01:11:57 +0200
Further expand URLs in retweets
Diffstat:
M tscrape.c | 25 ++++++++++++++++---------
1 file changed, 16 insertions(+), 9 deletions(-)
---
(DIR) diff --git a/tscrape.c b/tscrape.c
@@ -250,6 +250,12 @@ addurl(const char *url, const char *expanded_url)
{
struct url *u;
+ for (u = urls; u; u = u->next) {
+ if (!strncmp(url, u->url, u->url_len)) {
+ return;
+ }
+ }
+
if (!(u = calloc(1, sizeof(*u))))
err(1, "calloc");
strlcpy(u->url, url, sizeof(u->url));
@@ -389,25 +395,26 @@ processnodes(struct json_node *nodes, size_t depth, const char *str)
}
}
-// TODO: retweeted.status.entities.urls[]
-#if 0
- if (depth == 6 &&
+ if (depth == 7 &&
nodes[0].type == JSON_TYPE_ARRAY &&
nodes[1].type == JSON_TYPE_OBJECT &&
nodes[2].type == JSON_TYPE_OBJECT &&
nodes[3].type == JSON_TYPE_OBJECT &&
nodes[4].type == JSON_TYPE_ARRAY &&
- nodes[5].type == JSON_TYPE_STRING &&
+ nodes[5].type == JSON_TYPE_OBJECT &&
+ nodes[6].type == JSON_TYPE_STRING &&
!strcmp(nodes[2].name, "retweeted_status") &&
!strcmp(nodes[3].name, "entities") &&
!strcmp(nodes[4].name, "urls")) {
- if (!strcmp(nodes[5].name, "url")) {
- printf("DEBUG: url: %s\n", str);
- } else if (!strcmp(nodes[5].name, "expanded_url")) {
- printf("DEBUG: expanded_url: %s\n", str);
+ if (!strcmp(nodes[6].name, "url")) {
+// printf("DEBUG: url: %s\n", str);
+ strlcpy(url, str, sizeof(url));
+ } else if (!strcmp(nodes[6].name, "expanded_url")) {
+// printf("DEBUG: expanded_url: %s\n", str);
+ addurl(url, str);
+ url[0] = '\0';
}
}
-#endif
}
int