youtube.c - frontends - front-ends for some sites (experiment)
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       youtube.c (16065B)
       ---
            1 #include <sys/socket.h>
            2 #include <sys/types.h>
            3 
            4 #include <ctype.h>
            5 #include <errno.h>
            6 #include <netdb.h>
            7 #include <stdarg.h>
            8 #include <stdio.h>
            9 #include <stdlib.h>
           10 #include <string.h>
           11 #include <unistd.h>
           12 
           13 #include "https.h"
           14 #include "json.h"
           15 #include "util.h"
           16 #include "youtube.h"
           17 
           18 static long long
           19 getnum(const char *s)
           20 {
           21         long long l;
           22 
           23         l = strtoll(s, 0, 10);
           24         if (l < 0)
           25                 l = 0;
           26         return l;
           27 }
           28 
           29 static char *
           30 youtube_request(const char *path)
           31 {
           32         return request("www.youtube.com", path, "");
           33 }
           34 
           35 static char *
           36 request_video(const char *videoid)
           37 {
           38         char path[2048];
           39         int r;
           40 
           41         r = snprintf(path, sizeof(path), "/watch?v=%s", videoid);
           42         /* check if request is too long (truncation) */
           43         if (r < 0 || (size_t)r >= sizeof(path))
           44                 return NULL;
           45 
           46         return youtube_request(path);
           47 }
           48 
           49 static char *
           50 request_channel_videos(const char *channelid)
           51 {
           52         char path[2048];
           53         int r;
           54 
           55         r = snprintf(path, sizeof(path), "/channel/%s/videos", channelid);
           56         /* check if request is too long (truncation) */
           57         if (r < 0 || (size_t)r >= sizeof(path))
           58                 return NULL;
           59 
           60         return youtube_request(path);
           61 }
           62 
           63 static char *
           64 request_user_videos(const char *user)
           65 {
           66         char path[2048];
           67         int r;
           68 
           69         r = snprintf(path, sizeof(path), "/user/%s/videos", user);
           70         /* check if request is too long (truncation) */
           71         if (r < 0 || (size_t)r >= sizeof(path))
           72                 return NULL;
           73 
           74         return youtube_request(path);
           75 }
           76 
           77 static char *
           78 request_search(const char *s, const char *page, const char *order)
           79 {
           80         char path[4096];
           81 
           82         snprintf(path, sizeof(path), "/results?search_query=%s", s);
           83 
           84         /* NOTE: pagination doesn't work at the moment:
           85            this parameter is not supported anymore by Youtube */
           86         if (page[0]) {
           87                 strlcat(path, "&page=", sizeof(path));
           88                 strlcat(path, page, sizeof(path));
           89         }
           90 
           91         if (order[0] && strcmp(order, "relevance")) {
           92                 strlcat(path, "&sp=", sizeof(path));
           93                 if (!strcmp(order, "date"))
           94                         strlcat(path, "CAI%3D", sizeof(path));
           95                 else if (!strcmp(order, "views"))
           96                         strlcat(path, "CAM%3D", sizeof(path));
           97                 else if (!strcmp(order, "rating"))
           98                         strlcat(path, "CAE%3D", sizeof(path));
           99         }
          100 
          101         /* check if request is too long (truncation) */
          102         if (strlen(path) >= sizeof(path) - 1)
          103                 return NULL;
          104 
          105         return youtube_request(path);
          106 }
          107 
          108 static int
          109 extractjson_search(const char *s, const char **start, const char **end)
          110 {
          111         *start = strstr(s, "window[\"ytInitialData\"] = ");
          112         if (*start) {
          113                 (*start) += sizeof("window[\"ytInitialData\"] = ") - 1;
          114         } else {
          115                 *start = strstr(s, "var ytInitialData = ");
          116                 if (*start)
          117                         (*start) += sizeof("var ytInitialData = ") - 1;
          118         }
          119         if (!*start)
          120                 return -1;
          121         *end = strstr(*start, "};\n");
          122         if (!*end)
          123                 *end = strstr(*start, "}; \n");
          124         if (!*end)
          125                 *end = strstr(*start, "};<");
          126         if (!*end)
          127                 return -1;
          128         (*end)++;
          129 
          130         return 0;
          131 }
          132 
          133 static int
          134 extractjson_video(const char *s, const char **start, const char **end)
          135 {
          136         *start = strstr(s, "var ytInitialPlayerResponse = ");
          137         if (!*start)
          138                 return -1;
          139         (*start) += sizeof("var ytInitialPlayerResponse = ") - 1;
          140         *end = strstr(*start, "};<");
          141         if (!*end)
          142                 return -1;
          143         (*end)++;
          144 
          145         return 0;
          146 }
          147 
          148 static int
          149 isrenderername(const char *name)
          150 {
          151         return !strcmp(name, "videoRenderer");
          152 }
          153 
          154 static void
          155 processnode_search(struct json_node *nodes, size_t depth, const char *value, size_t valuelen,
          156         void *pp)
          157 {
          158         struct search_response *r = (struct search_response *)pp;
          159         static struct item *item;
          160 
          161         if (r->nitems > MAX_VIDEOS)
          162                 return;
          163 
          164         /* new item, structures can be very deep, just check the end for:
          165            (items|contents)[].videoRenderer objects */
          166         if (depth >= 3 &&
          167             nodes[depth - 1].type == JSON_TYPE_OBJECT &&
          168             isrenderername(nodes[depth - 1].name)) {
          169                 r->nitems++;
          170                 return;
          171         }
          172 
          173         if (r->nitems == 0)
          174                 return;
          175         item = &(r->items[r->nitems - 1]);
          176 
          177         if (depth >= 4 &&
          178             nodes[depth - 1].type == JSON_TYPE_STRING &&
          179             isrenderername(nodes[depth - 2].name) &&
          180             !strcmp(nodes[depth - 1].name, "videoId")) {
          181                 strlcpy(item->id, value, sizeof(item->id));
          182         }
          183 
          184         if (depth >= 7 &&
          185             nodes[depth - 5].type == JSON_TYPE_OBJECT &&
          186             nodes[depth - 4].type == JSON_TYPE_OBJECT &&
          187             nodes[depth - 3].type == JSON_TYPE_ARRAY &&
          188             nodes[depth - 2].type == JSON_TYPE_OBJECT &&
          189             nodes[depth - 1].type == JSON_TYPE_STRING &&
          190             isrenderername(nodes[depth - 5].name) &&
          191             !strcmp(nodes[depth - 4].name, "title") &&
          192             !strcmp(nodes[depth - 3].name, "runs") &&
          193             !strcmp(nodes[depth - 1].name, "text") &&
          194                 !item->title[0]) {
          195                 strlcpy(item->title, value, sizeof(item->title));
          196         }
          197 
          198         /* in search listing there is a short description, string items are appended */
          199         if (depth >= 8 &&
          200             nodes[depth - 7].type == JSON_TYPE_OBJECT &&
          201             nodes[depth - 6].type == JSON_TYPE_ARRAY &&
          202             nodes[depth - 5].type == JSON_TYPE_OBJECT &&
          203             nodes[depth - 4].type == JSON_TYPE_OBJECT &&
          204             nodes[depth - 3].type == JSON_TYPE_ARRAY &&
          205             nodes[depth - 2].type == JSON_TYPE_OBJECT &&
          206             nodes[depth - 1].type == JSON_TYPE_STRING &&
          207             isrenderername(nodes[depth - 7].name) &&
          208             !strcmp(nodes[depth - 6].name, "detailedMetadataSnippets") &&
          209             !strcmp(nodes[depth - 4].name, "snippetText") &&
          210             !strcmp(nodes[depth - 3].name, "runs") &&
          211             !strcmp(nodes[depth - 1].name, "text")) {
          212                 strlcat(item->shortdescription, value, sizeof(item->shortdescription));
          213         }
          214 
          215         /* in channel/user videos listing there is a short description, string items are appended */
          216         if (depth >= 7 &&
          217             nodes[depth - 5].type == JSON_TYPE_OBJECT &&
          218             nodes[depth - 4].type == JSON_TYPE_OBJECT &&
          219             nodes[depth - 3].type == JSON_TYPE_ARRAY &&
          220             nodes[depth - 2].type == JSON_TYPE_OBJECT &&
          221             nodes[depth - 1].type == JSON_TYPE_STRING &&
          222             isrenderername(nodes[depth - 5].name) &&
          223             !strcmp(nodes[depth - 4].name, "descriptionSnippet") &&
          224             !strcmp(nodes[depth - 3].name, "runs") &&
          225             !strcmp(nodes[depth - 1].name, "text")) {
          226                 strlcat(item->shortdescription, value, sizeof(item->shortdescription));
          227         }
          228 
          229         /* try to detect members/sponsor/subscription-only videos */
          230         if (depth >= 7 &&
          231             nodes[depth - 5].type == JSON_TYPE_OBJECT &&
          232             nodes[depth - 4].type == JSON_TYPE_ARRAY &&
          233             nodes[depth - 3].type == JSON_TYPE_OBJECT &&
          234             nodes[depth - 2].type == JSON_TYPE_OBJECT &&
          235             nodes[depth - 1].type == JSON_TYPE_STRING &&
          236             isrenderername(nodes[depth - 5].name) &&
          237             !strcmp(nodes[depth - 4].name, "badges") &&
          238             !strcmp(nodes[depth - 2].name, "metadataBadgeRenderer") &&
          239             !strcmp(nodes[depth - 1].name, "label")) {
          240                 if (strstr(value, "Members only"))
          241                         item->membersonly = 1;
          242         }
          243 
          244         if (depth >= 5 &&
          245             nodes[depth - 4].type == JSON_TYPE_OBJECT &&
          246             nodes[depth - 3].type == JSON_TYPE_OBJECT &&
          247             nodes[depth - 2].type == JSON_TYPE_OBJECT &&
          248             nodes[depth - 1].type == JSON_TYPE_STRING &&
          249             isrenderername(nodes[depth - 3].name) &&
          250             !strcmp(nodes[depth - 1].name, "simpleText")) {
          251                 if (!strcmp(nodes[depth - 2].name, "viewCountText") &&
          252                     !item->viewcount[0]) {
          253                         strlcpy(item->viewcount, value, sizeof(item->viewcount));
          254                 } else if (!strcmp(nodes[depth - 2].name, "lengthText") &&
          255                     !item->duration[0]) {
          256                         strlcpy(item->duration, value, sizeof(item->duration));
          257                 } else if (!strcmp(nodes[depth - 2].name, "publishedTimeText") &&
          258                     !item->publishedat[0]) {
          259                         strlcpy(item->publishedat, value, sizeof(item->publishedat));
          260                 }
          261         }
          262 
          263         if (depth >= 9 &&
          264             nodes[depth - 8].type == JSON_TYPE_OBJECT &&
          265             nodes[depth - 7].type == JSON_TYPE_OBJECT &&
          266             nodes[depth - 6].type == JSON_TYPE_OBJECT &&
          267             nodes[depth - 5].type == JSON_TYPE_ARRAY &&
          268             nodes[depth - 4].type == JSON_TYPE_OBJECT &&
          269             nodes[depth - 3].type == JSON_TYPE_OBJECT &&
          270             nodes[depth - 2].type == JSON_TYPE_OBJECT &&
          271             nodes[depth - 1].type == JSON_TYPE_STRING &&
          272             isrenderername(nodes[depth - 7].name) &&
          273             !strcmp(nodes[depth - 6].name, "longBylineText") &&
          274             !strcmp(nodes[depth - 5].name, "runs") &&
          275             !strcmp(nodes[depth - 3].name, "navigationEndpoint") &&
          276             !strcmp(nodes[depth - 2].name, "browseEndpoint")) {
          277                 if (!strcmp(nodes[depth - 1].name, "browseId")) {
          278                         strlcpy(item->channelid, value, sizeof(item->channelid));
          279                 }
          280         }
          281 
          282         if (depth >= 7 &&
          283             nodes[depth - 6].type == JSON_TYPE_OBJECT &&
          284             nodes[depth - 5].type == JSON_TYPE_OBJECT &&
          285             nodes[depth - 4].type == JSON_TYPE_OBJECT &&
          286             nodes[depth - 3].type == JSON_TYPE_ARRAY &&
          287             nodes[depth - 2].type == JSON_TYPE_OBJECT &&
          288             nodes[depth - 1].type == JSON_TYPE_STRING &&
          289             isrenderername(nodes[depth - 5].name) &&
          290             !strcmp(nodes[depth - 4].name, "longBylineText") &&
          291             !strcmp(nodes[depth - 3].name, "runs")) {
          292                 if (!strcmp(nodes[depth - 1].name, "text") &&
          293                     !item->channeltitle[0]) {
          294                         strlcpy(item->channeltitle, value, sizeof(item->channeltitle));
          295                 }
          296         }
          297 }
          298 
          299 static struct search_response *
          300 parse_search_response(const char *data)
          301 {
          302         struct search_response *r;
          303         struct item *item;
          304         const char *s, *start, *end;
          305         size_t i, len;
          306         int ret;
          307 
          308         if (!(s = strstr(data, "\r\n\r\n")))
          309                 return NULL; /* invalid response */
          310         /* skip header */
          311         s += strlen("\r\n\r\n");
          312 
          313         if (!(r = calloc(1, sizeof(*r))))
          314                 return NULL;
          315 
          316         if (extractjson_search(s, &start, &end) == -1) {
          317                 free(r);
          318                 return NULL;
          319         }
          320 
          321         ret = parsejson(start, end - start, processnode_search, r);
          322         if (ret < 0) {
          323                 free(r);
          324                 return NULL;
          325         }
          326 
          327         /* workaround: sometimes playlists or topics are listed as channels, filter
          328            these topic/playlist links away because they won't work for channel videos. The
          329            JSON response would have to be parsed in a different way than channels. */
          330         for (i = 0; i < r->nitems; i++) {
          331                 item = &(r->items[i]);
          332                 len = strlen(item->channeltitle);
          333 
          334                 if (len > sizeof(" - Topic") &&
          335                     !strcmp(item->channeltitle + len - sizeof(" - Topic") + 1, " - Topic")) {
          336                         /* reset information that doesn't work for topics */
          337                         item->channelid[0] = '\0';
          338                         item->viewcount[0] = '\0';
          339                 }
          340         }
          341 
          342         return r;
          343 }
          344 
          345 static void
          346 processnode_video(struct json_node *nodes, size_t depth, const char *value, size_t valuelen,
          347         void *pp)
          348 {
          349         struct video_response *r = (struct video_response *)pp;
          350         struct video_format *f;
          351 
          352         if (depth > 1) {
          353                 /* playability status: could be unplayable / members-only video */
          354                 if (nodes[0].type == JSON_TYPE_OBJECT &&
          355                     !strcmp(nodes[1].name, "playabilityStatus")) { /* example: "UNPLAYABLE" */
          356                         if (depth == 3 &&
          357                             nodes[2].type == JSON_TYPE_STRING &&
          358                             !strcmp(nodes[2].name, "status")) {
          359                                 strlcpy(r->playabilitystatus, value, sizeof(r->playabilitystatus));
          360                         }
          361                         if (depth == 3 &&
          362                             nodes[2].type == JSON_TYPE_STRING &&
          363                             !strcmp(nodes[2].name, "reason")) {
          364                                 strlcpy(r->playabilityreason, value, sizeof(r->playabilityreason));
          365                         }
          366                 }
          367 
          368                 if (nodes[0].type == JSON_TYPE_OBJECT &&
          369                     !strcmp(nodes[1].name, "streamingData")) {
          370                         if (depth == 2 &&
          371                             nodes[2].type == JSON_TYPE_STRING &&
          372                             !strcmp(nodes[2].name, "expiresInSeconds")) {
          373                                 r->expiresinseconds = getnum(value);
          374                         }
          375 
          376                         if (depth >= 3 &&
          377                             nodes[2].type == JSON_TYPE_ARRAY &&
          378                             (!strcmp(nodes[2].name, "formats") ||
          379                             !strcmp(nodes[2].name, "adaptiveFormats"))) {
          380                                 if (r->nformats > MAX_FORMATS)
          381                                         return; /* ignore: don't add too many formats */
          382 
          383                                 if (depth == 4 && nodes[3].type == JSON_TYPE_OBJECT)
          384                                         r->nformats++;
          385 
          386                                 if (r->nformats == 0)
          387                                         return;
          388                                 f = &(r->formats[r->nformats - 1]); /* current video format item */
          389 
          390                                 if (depth == 5 &&
          391                                     nodes[2].type == JSON_TYPE_ARRAY &&
          392                                     nodes[3].type == JSON_TYPE_OBJECT &&
          393                                     (nodes[4].type == JSON_TYPE_STRING ||
          394                                     nodes[4].type == JSON_TYPE_NUMBER ||
          395                                     nodes[4].type == JSON_TYPE_BOOL)) {
          396                                         if (!strcmp(nodes[4].name, "width")) {
          397                                                 f->width = getnum(value);
          398                                         } else if (!strcmp(nodes[4].name, "height")) {
          399                                                 f->height = getnum(value);
          400                                         } else if (!strcmp(nodes[4].name, "url")) {
          401                                                 strlcpy(f->url, value, sizeof(f->url));
          402                                         } else if (!strcmp(nodes[4].name, "signatureCipher")) {
          403                                                 strlcpy(f->signaturecipher, value, sizeof(f->signaturecipher));
          404                                         } else if (!strcmp(nodes[4].name, "qualityLabel")) {
          405                                                 strlcpy(f->qualitylabel, value, sizeof(f->qualitylabel));
          406                                         } else if (!strcmp(nodes[4].name, "quality")) {
          407                                                 strlcpy(f->quality, value, sizeof(f->quality));
          408                                         } else if (!strcmp(nodes[4].name, "fps")) {
          409                                                 f->fps = getnum(value);
          410                                         } else if (!strcmp(nodes[4].name, "bitrate")) {
          411                                                 f->bitrate = getnum(value);
          412                                         } else if (!strcmp(nodes[4].name, "averageBitrate")) {
          413                                                 f->averagebitrate = getnum(value);
          414                                         } else if (!strcmp(nodes[4].name, "mimeType")) {
          415                                                 strlcpy(f->mimetype, value, sizeof(f->mimetype));
          416                                         } else if (!strcmp(nodes[4].name, "itag")) {
          417                                                 f->itag = getnum(value);
          418                                         } else if (!strcmp(nodes[4].name, "contentLength")) {
          419                                                 f->contentlength = getnum(value);
          420                                         } else if (!strcmp(nodes[4].name, "lastModified")) {
          421                                                 f->lastmodified = getnum(value);
          422                                         } else if (!strcmp(nodes[4].name, "audioChannels")) {
          423                                                 f->audiochannels = getnum(value);
          424                                         } else if (!strcmp(nodes[4].name, "audioSampleRate")) {
          425                                                 f->audiosamplerate = getnum(value);
          426                                         }
          427                                 }
          428                         }
          429                 }
          430         }
          431 
          432         if (depth == 4 &&
          433             nodes[0].type == JSON_TYPE_OBJECT &&
          434             nodes[1].type == JSON_TYPE_OBJECT &&
          435             nodes[2].type == JSON_TYPE_OBJECT &&
          436             nodes[3].type == JSON_TYPE_STRING &&
          437             !strcmp(nodes[1].name, "microformat") &&
          438             !strcmp(nodes[2].name, "playerMicroformatRenderer")) {
          439                 r->isfound = 1;
          440 
          441                 if (!strcmp(nodes[3].name, "publishDate")) {
          442                         strlcpy(r->publishdate, value, sizeof(r->publishdate));
          443                 } else if (!strcmp(nodes[3].name, "uploadDate")) {
          444                         strlcpy(r->uploaddate, value, sizeof(r->uploaddate));
          445                 } else if (!strcmp(nodes[3].name, "category")) {
          446                         strlcpy(r->category, value, sizeof(r->category));
          447                 } else if (!strcmp(nodes[3].name, "isFamilySafe")) {
          448                         r->isfamilysafe = !strcmp(value, "true");
          449                 } else if (!strcmp(nodes[3].name, "isUnlisted")) {
          450                         r->isunlisted = !strcmp(value, "true");
          451                 }
          452         }
          453 
          454         if (depth == 3) {
          455                 if (nodes[0].type == JSON_TYPE_OBJECT &&
          456                     nodes[2].type == JSON_TYPE_STRING &&
          457                     !strcmp(nodes[1].name, "videoDetails")) {
          458                         r->isfound = 1;
          459 
          460                         if (!strcmp(nodes[2].name, "title")) {
          461                                 strlcpy(r->title, value, sizeof(r->title));
          462                         } else if (!strcmp(nodes[2].name, "videoId")) {
          463                                 strlcpy(r->id, value, sizeof(r->id));
          464                         } else if (!strcmp(nodes[2].name, "lengthSeconds")) {
          465                                 r->lengthseconds = getnum(value);
          466                         } else if (!strcmp(nodes[2].name, "author")) {
          467                                 strlcpy(r->author, value, sizeof(r->author));
          468                         } else if (!strcmp(nodes[2].name, "viewCount")) {
          469                                 r->viewcount = getnum(value);
          470                         } else if (!strcmp(nodes[2].name, "channelId")) {
          471                                 strlcpy(r->channelid, value, sizeof(r->channelid));
          472                         } else if (!strcmp(nodes[2].name, "shortDescription")) {
          473                                 strlcpy(r->shortdescription, value, sizeof(r->shortdescription));
          474                         }
          475                 }
          476         }
          477 }
          478 
          479 static struct video_response *
          480 parse_video_response(const char *data)
          481 {
          482         struct video_response *r;
          483         const char *s, *start, *end;
          484         int ret;
          485 
          486         if (!(s = strstr(data, "\r\n\r\n")))
          487                 return NULL; /* invalid response */
          488         /* skip header */
          489         s += strlen("\r\n\r\n");
          490 
          491         if (!(r = calloc(1, sizeof(*r))))
          492                 return NULL;
          493 
          494         if (extractjson_video(s, &start, &end) == -1) {
          495                 free(r);
          496                 return NULL;
          497         }
          498 
          499         ret = parsejson(start, end - start, processnode_video, r);
          500         if (ret < 0) {
          501                 free(r);
          502                 return NULL;
          503         }
          504         return r;
          505 }
          506 
          507 struct search_response *
          508 youtube_search(const char *rawsearch, const char *page, const char *order)
          509 {
          510         const char *data;
          511 
          512         if (!(data = request_search(rawsearch, page, order)))
          513                 return NULL;
          514 
          515         return parse_search_response(data);
          516 }
          517 
          518 struct search_response *
          519 youtube_channel_videos(const char *channelid)
          520 {
          521         const char *data;
          522 
          523         if (!(data = request_channel_videos(channelid)))
          524                 return NULL;
          525 
          526         return parse_search_response(data);
          527 }
          528 
          529 struct search_response *
          530 youtube_user_videos(const char *user)
          531 {
          532         const char *data;
          533 
          534         if (!(data = request_user_videos(user)))
          535                 return NULL;
          536 
          537         return parse_search_response(data);
          538 }
          539 
          540 struct video_response *
          541 youtube_video(const char *videoid)
          542 {
          543         const char *data;
          544 
          545         if (!(data = request_video(videoid)))
          546                 return NULL;
          547 
          548         return parse_video_response(data);
          549 }