Expand all photos URLs - tscrape - twitter scraper
 (HTM) git clone git://git.codemadness.org/tscrape
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit 86910f72369e655ce2db017e64646def543627b9
 (DIR) parent 9cc1b7e985affd8764385d3d6cf6476804230cdd
 (HTM) Author: Leonardo Taccari <iamleot@gmail.com>
       Date:   Sat,  6 Jun 2020 02:23:37 +0200
       
       Expand all photos URLs
       
       Diffstat:
         M tscrape.c                           |      36 +++++++++++++++++++++++--------
       
       1 file changed, 27 insertions(+), 9 deletions(-)
       ---
 (DIR) diff --git a/tscrape.c b/tscrape.c
       @@ -41,7 +41,7 @@ struct url {
        
        static struct tweet *tweets, *tc;
        static struct url *urls, *uc;
       -static char url[256];
       +static char expanded_url[1024], media_url[1024], url[256];
        
        #define MAX_PINNED 5
        static char pinnedids[MAX_PINNED][64];
       @@ -384,14 +384,23 @@ processnodes(struct json_node *nodes, size_t depth, const char *str)
                    nodes[5].type == JSON_TYPE_STRING &&
                    !strcmp(nodes[2].name, "extended_entities") &&
                    !strcmp(nodes[3].name, "media")) {
       -                if (!strcmp(nodes[5].name, "url")) {
       +                if (!strcmp(nodes[5].name, "media_url_https")) {
       +//                        printf("DEBUG: media_url_https: %s\n", str);
       +                        strlcpy(media_url, str, sizeof(media_url));
       +                } else if (!strcmp(nodes[5].name, "url")) {
        //                        printf("DEBUG: url: %s\n", str);
                                strlcpy(url, str, sizeof(url));
                        } else if (!strcmp(nodes[5].name, "expanded_url")) {
        //                        printf("DEBUG: expanded_url: %s\n", str);
       -                        /* assumes "expanded_url" is specified after "url" */
       -                        addurl(url, str);
       -                        url[0] = '\0';
       +                        strlcpy(expanded_url, str, sizeof(expanded_url));
       +                } else if (!strcmp(nodes[5].name, "type")) {
       +//                        printf("DEBUG: type: %s\n", str);
       +                        if (!strcmp(str, "photo")) {
       +                                addurl(url, media_url);
       +                        } else {
       +                                addurl(url, expanded_url);
       +                        }
       +                        media_url[0] = url[0] = expanded_url[0] = '\0';
                        }
                }
        
       @@ -428,14 +437,23 @@ processnodes(struct json_node *nodes, size_t depth, const char *str)
                    !strcmp(nodes[2].name, "retweeted_status") &&
                    !strcmp(nodes[3].name, "extended_entities") &&
                    !strcmp(nodes[4].name, "media")) {
       -                if (!strcmp(nodes[6].name, "url")) {
       +                if (!strcmp(nodes[6].name, "media_url_https")) {
       +//                        printf("DEBUG: media_url_https: %s\n", str);
       +                        strlcpy(media_url, str, sizeof(media_url));
       +                } else if (!strcmp(nodes[6].name, "url")) {
        //                        printf("DEBUG: url: %s\n", str);
                                strlcpy(url, str, sizeof(url));
                        } else if (!strcmp(nodes[6].name, "expanded_url")) {
        //                        printf("DEBUG: expanded_url: %s\n", str);
       -                        /* assumes "expanded_url" is specified after "url" */
       -                        addurl(url, str);
       -                        url[0] = '\0';
       +                        strlcpy(expanded_url, str, sizeof(expanded_url));
       +                } else if (!strcmp(nodes[6].name, "type")) {
       +//                        printf("DEBUG: type: %s\n", str);
       +                        if (!strcmp(str, "photo")) {
       +                                addurl(url, media_url);
       +                        } else {
       +                                addurl(url, expanded_url);
       +                        }
       +                        media_url[0] = url[0] = expanded_url[0] = '\0';
                        }
                }
        }