Expand all photos URLs - tscrape - twitter scraper
(HTM) git clone git://git.codemadness.org/tscrape
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
(DIR) commit 86910f72369e655ce2db017e64646def543627b9
(DIR) parent 9cc1b7e985affd8764385d3d6cf6476804230cdd
(HTM) Author: Leonardo Taccari <iamleot@gmail.com>
Date: Sat, 6 Jun 2020 02:23:37 +0200
Expand all photos URLs
Diffstat:
M tscrape.c | 36 +++++++++++++++++++++++--------
1 file changed, 27 insertions(+), 9 deletions(-)
---
(DIR) diff --git a/tscrape.c b/tscrape.c
@@ -41,7 +41,7 @@ struct url {
static struct tweet *tweets, *tc;
static struct url *urls, *uc;
-static char url[256];
+static char expanded_url[1024], media_url[1024], url[256];
#define MAX_PINNED 5
static char pinnedids[MAX_PINNED][64];
@@ -384,14 +384,23 @@ processnodes(struct json_node *nodes, size_t depth, const char *str)
nodes[5].type == JSON_TYPE_STRING &&
!strcmp(nodes[2].name, "extended_entities") &&
!strcmp(nodes[3].name, "media")) {
- if (!strcmp(nodes[5].name, "url")) {
+ if (!strcmp(nodes[5].name, "media_url_https")) {
+// printf("DEBUG: media_url_https: %s\n", str);
+ strlcpy(media_url, str, sizeof(media_url));
+ } else if (!strcmp(nodes[5].name, "url")) {
// printf("DEBUG: url: %s\n", str);
strlcpy(url, str, sizeof(url));
} else if (!strcmp(nodes[5].name, "expanded_url")) {
// printf("DEBUG: expanded_url: %s\n", str);
- /* assumes "expanded_url" is specified after "url" */
- addurl(url, str);
- url[0] = '\0';
+ strlcpy(expanded_url, str, sizeof(expanded_url));
+ } else if (!strcmp(nodes[5].name, "type")) {
+// printf("DEBUG: type: %s\n", str);
+ if (!strcmp(str, "photo")) {
+ addurl(url, media_url);
+ } else {
+ addurl(url, expanded_url);
+ }
+ media_url[0] = url[0] = expanded_url[0] = '\0';
}
}
@@ -428,14 +437,23 @@ processnodes(struct json_node *nodes, size_t depth, const char *str)
!strcmp(nodes[2].name, "retweeted_status") &&
!strcmp(nodes[3].name, "extended_entities") &&
!strcmp(nodes[4].name, "media")) {
- if (!strcmp(nodes[6].name, "url")) {
+ if (!strcmp(nodes[6].name, "media_url_https")) {
+// printf("DEBUG: media_url_https: %s\n", str);
+ strlcpy(media_url, str, sizeof(media_url));
+ } else if (!strcmp(nodes[6].name, "url")) {
// printf("DEBUG: url: %s\n", str);
strlcpy(url, str, sizeof(url));
} else if (!strcmp(nodes[6].name, "expanded_url")) {
// printf("DEBUG: expanded_url: %s\n", str);
- /* assumes "expanded_url" is specified after "url" */
- addurl(url, str);
- url[0] = '\0';
+ strlcpy(expanded_url, str, sizeof(expanded_url));
+ } else if (!strcmp(nodes[6].name, "type")) {
+// printf("DEBUG: type: %s\n", str);
+ if (!strcmp(str, "photo")) {
+ addurl(url, media_url);
+ } else {
+ addurl(url, expanded_url);
+ }
+ media_url[0] = url[0] = expanded_url[0] = '\0';
}
}
}