youtube.c - frontends - front-ends for some sites (experiment)
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
youtube.c (16065B)
---
1 #include <sys/socket.h>
2 #include <sys/types.h>
3
4 #include <ctype.h>
5 #include <errno.h>
6 #include <netdb.h>
7 #include <stdarg.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <unistd.h>
12
13 #include "https.h"
14 #include "json.h"
15 #include "util.h"
16 #include "youtube.h"
17
18 static long long
19 getnum(const char *s)
20 {
21 long long l;
22
23 l = strtoll(s, 0, 10);
24 if (l < 0)
25 l = 0;
26 return l;
27 }
28
29 static char *
30 youtube_request(const char *path)
31 {
32 return request("www.youtube.com", path, "");
33 }
34
35 static char *
36 request_video(const char *videoid)
37 {
38 char path[2048];
39 int r;
40
41 r = snprintf(path, sizeof(path), "/watch?v=%s", videoid);
42 /* check if request is too long (truncation) */
43 if (r < 0 || (size_t)r >= sizeof(path))
44 return NULL;
45
46 return youtube_request(path);
47 }
48
49 static char *
50 request_channel_videos(const char *channelid)
51 {
52 char path[2048];
53 int r;
54
55 r = snprintf(path, sizeof(path), "/channel/%s/videos", channelid);
56 /* check if request is too long (truncation) */
57 if (r < 0 || (size_t)r >= sizeof(path))
58 return NULL;
59
60 return youtube_request(path);
61 }
62
63 static char *
64 request_user_videos(const char *user)
65 {
66 char path[2048];
67 int r;
68
69 r = snprintf(path, sizeof(path), "/user/%s/videos", user);
70 /* check if request is too long (truncation) */
71 if (r < 0 || (size_t)r >= sizeof(path))
72 return NULL;
73
74 return youtube_request(path);
75 }
76
77 static char *
78 request_search(const char *s, const char *page, const char *order)
79 {
80 char path[4096];
81
82 snprintf(path, sizeof(path), "/results?search_query=%s", s);
83
84 /* NOTE: pagination doesn't work at the moment:
85 this parameter is not supported anymore by Youtube */
86 if (page[0]) {
87 strlcat(path, "&page=", sizeof(path));
88 strlcat(path, page, sizeof(path));
89 }
90
91 if (order[0] && strcmp(order, "relevance")) {
92 strlcat(path, "&sp=", sizeof(path));
93 if (!strcmp(order, "date"))
94 strlcat(path, "CAI%3D", sizeof(path));
95 else if (!strcmp(order, "views"))
96 strlcat(path, "CAM%3D", sizeof(path));
97 else if (!strcmp(order, "rating"))
98 strlcat(path, "CAE%3D", sizeof(path));
99 }
100
101 /* check if request is too long (truncation) */
102 if (strlen(path) >= sizeof(path) - 1)
103 return NULL;
104
105 return youtube_request(path);
106 }
107
108 static int
109 extractjson_search(const char *s, const char **start, const char **end)
110 {
111 *start = strstr(s, "window[\"ytInitialData\"] = ");
112 if (*start) {
113 (*start) += sizeof("window[\"ytInitialData\"] = ") - 1;
114 } else {
115 *start = strstr(s, "var ytInitialData = ");
116 if (*start)
117 (*start) += sizeof("var ytInitialData = ") - 1;
118 }
119 if (!*start)
120 return -1;
121 *end = strstr(*start, "};\n");
122 if (!*end)
123 *end = strstr(*start, "}; \n");
124 if (!*end)
125 *end = strstr(*start, "};<");
126 if (!*end)
127 return -1;
128 (*end)++;
129
130 return 0;
131 }
132
133 static int
134 extractjson_video(const char *s, const char **start, const char **end)
135 {
136 *start = strstr(s, "var ytInitialPlayerResponse = ");
137 if (!*start)
138 return -1;
139 (*start) += sizeof("var ytInitialPlayerResponse = ") - 1;
140 *end = strstr(*start, "};<");
141 if (!*end)
142 return -1;
143 (*end)++;
144
145 return 0;
146 }
147
148 static int
149 isrenderername(const char *name)
150 {
151 return !strcmp(name, "videoRenderer");
152 }
153
154 static void
155 processnode_search(struct json_node *nodes, size_t depth, const char *value, size_t valuelen,
156 void *pp)
157 {
158 struct search_response *r = (struct search_response *)pp;
159 static struct item *item;
160
161 if (r->nitems > MAX_VIDEOS)
162 return;
163
164 /* new item, structures can be very deep, just check the end for:
165 (items|contents)[].videoRenderer objects */
166 if (depth >= 3 &&
167 nodes[depth - 1].type == JSON_TYPE_OBJECT &&
168 isrenderername(nodes[depth - 1].name)) {
169 r->nitems++;
170 return;
171 }
172
173 if (r->nitems == 0)
174 return;
175 item = &(r->items[r->nitems - 1]);
176
177 if (depth >= 4 &&
178 nodes[depth - 1].type == JSON_TYPE_STRING &&
179 isrenderername(nodes[depth - 2].name) &&
180 !strcmp(nodes[depth - 1].name, "videoId")) {
181 strlcpy(item->id, value, sizeof(item->id));
182 }
183
184 if (depth >= 7 &&
185 nodes[depth - 5].type == JSON_TYPE_OBJECT &&
186 nodes[depth - 4].type == JSON_TYPE_OBJECT &&
187 nodes[depth - 3].type == JSON_TYPE_ARRAY &&
188 nodes[depth - 2].type == JSON_TYPE_OBJECT &&
189 nodes[depth - 1].type == JSON_TYPE_STRING &&
190 isrenderername(nodes[depth - 5].name) &&
191 !strcmp(nodes[depth - 4].name, "title") &&
192 !strcmp(nodes[depth - 3].name, "runs") &&
193 !strcmp(nodes[depth - 1].name, "text") &&
194 !item->title[0]) {
195 strlcpy(item->title, value, sizeof(item->title));
196 }
197
198 /* in search listing there is a short description, string items are appended */
199 if (depth >= 8 &&
200 nodes[depth - 7].type == JSON_TYPE_OBJECT &&
201 nodes[depth - 6].type == JSON_TYPE_ARRAY &&
202 nodes[depth - 5].type == JSON_TYPE_OBJECT &&
203 nodes[depth - 4].type == JSON_TYPE_OBJECT &&
204 nodes[depth - 3].type == JSON_TYPE_ARRAY &&
205 nodes[depth - 2].type == JSON_TYPE_OBJECT &&
206 nodes[depth - 1].type == JSON_TYPE_STRING &&
207 isrenderername(nodes[depth - 7].name) &&
208 !strcmp(nodes[depth - 6].name, "detailedMetadataSnippets") &&
209 !strcmp(nodes[depth - 4].name, "snippetText") &&
210 !strcmp(nodes[depth - 3].name, "runs") &&
211 !strcmp(nodes[depth - 1].name, "text")) {
212 strlcat(item->shortdescription, value, sizeof(item->shortdescription));
213 }
214
215 /* in channel/user videos listing there is a short description, string items are appended */
216 if (depth >= 7 &&
217 nodes[depth - 5].type == JSON_TYPE_OBJECT &&
218 nodes[depth - 4].type == JSON_TYPE_OBJECT &&
219 nodes[depth - 3].type == JSON_TYPE_ARRAY &&
220 nodes[depth - 2].type == JSON_TYPE_OBJECT &&
221 nodes[depth - 1].type == JSON_TYPE_STRING &&
222 isrenderername(nodes[depth - 5].name) &&
223 !strcmp(nodes[depth - 4].name, "descriptionSnippet") &&
224 !strcmp(nodes[depth - 3].name, "runs") &&
225 !strcmp(nodes[depth - 1].name, "text")) {
226 strlcat(item->shortdescription, value, sizeof(item->shortdescription));
227 }
228
229 /* try to detect members/sponsor/subscription-only videos */
230 if (depth >= 7 &&
231 nodes[depth - 5].type == JSON_TYPE_OBJECT &&
232 nodes[depth - 4].type == JSON_TYPE_ARRAY &&
233 nodes[depth - 3].type == JSON_TYPE_OBJECT &&
234 nodes[depth - 2].type == JSON_TYPE_OBJECT &&
235 nodes[depth - 1].type == JSON_TYPE_STRING &&
236 isrenderername(nodes[depth - 5].name) &&
237 !strcmp(nodes[depth - 4].name, "badges") &&
238 !strcmp(nodes[depth - 2].name, "metadataBadgeRenderer") &&
239 !strcmp(nodes[depth - 1].name, "label")) {
240 if (strstr(value, "Members only"))
241 item->membersonly = 1;
242 }
243
244 if (depth >= 5 &&
245 nodes[depth - 4].type == JSON_TYPE_OBJECT &&
246 nodes[depth - 3].type == JSON_TYPE_OBJECT &&
247 nodes[depth - 2].type == JSON_TYPE_OBJECT &&
248 nodes[depth - 1].type == JSON_TYPE_STRING &&
249 isrenderername(nodes[depth - 3].name) &&
250 !strcmp(nodes[depth - 1].name, "simpleText")) {
251 if (!strcmp(nodes[depth - 2].name, "viewCountText") &&
252 !item->viewcount[0]) {
253 strlcpy(item->viewcount, value, sizeof(item->viewcount));
254 } else if (!strcmp(nodes[depth - 2].name, "lengthText") &&
255 !item->duration[0]) {
256 strlcpy(item->duration, value, sizeof(item->duration));
257 } else if (!strcmp(nodes[depth - 2].name, "publishedTimeText") &&
258 !item->publishedat[0]) {
259 strlcpy(item->publishedat, value, sizeof(item->publishedat));
260 }
261 }
262
263 if (depth >= 9 &&
264 nodes[depth - 8].type == JSON_TYPE_OBJECT &&
265 nodes[depth - 7].type == JSON_TYPE_OBJECT &&
266 nodes[depth - 6].type == JSON_TYPE_OBJECT &&
267 nodes[depth - 5].type == JSON_TYPE_ARRAY &&
268 nodes[depth - 4].type == JSON_TYPE_OBJECT &&
269 nodes[depth - 3].type == JSON_TYPE_OBJECT &&
270 nodes[depth - 2].type == JSON_TYPE_OBJECT &&
271 nodes[depth - 1].type == JSON_TYPE_STRING &&
272 isrenderername(nodes[depth - 7].name) &&
273 !strcmp(nodes[depth - 6].name, "longBylineText") &&
274 !strcmp(nodes[depth - 5].name, "runs") &&
275 !strcmp(nodes[depth - 3].name, "navigationEndpoint") &&
276 !strcmp(nodes[depth - 2].name, "browseEndpoint")) {
277 if (!strcmp(nodes[depth - 1].name, "browseId")) {
278 strlcpy(item->channelid, value, sizeof(item->channelid));
279 }
280 }
281
282 if (depth >= 7 &&
283 nodes[depth - 6].type == JSON_TYPE_OBJECT &&
284 nodes[depth - 5].type == JSON_TYPE_OBJECT &&
285 nodes[depth - 4].type == JSON_TYPE_OBJECT &&
286 nodes[depth - 3].type == JSON_TYPE_ARRAY &&
287 nodes[depth - 2].type == JSON_TYPE_OBJECT &&
288 nodes[depth - 1].type == JSON_TYPE_STRING &&
289 isrenderername(nodes[depth - 5].name) &&
290 !strcmp(nodes[depth - 4].name, "longBylineText") &&
291 !strcmp(nodes[depth - 3].name, "runs")) {
292 if (!strcmp(nodes[depth - 1].name, "text") &&
293 !item->channeltitle[0]) {
294 strlcpy(item->channeltitle, value, sizeof(item->channeltitle));
295 }
296 }
297 }
298
299 static struct search_response *
300 parse_search_response(const char *data)
301 {
302 struct search_response *r;
303 struct item *item;
304 const char *s, *start, *end;
305 size_t i, len;
306 int ret;
307
308 if (!(s = strstr(data, "\r\n\r\n")))
309 return NULL; /* invalid response */
310 /* skip header */
311 s += strlen("\r\n\r\n");
312
313 if (!(r = calloc(1, sizeof(*r))))
314 return NULL;
315
316 if (extractjson_search(s, &start, &end) == -1) {
317 free(r);
318 return NULL;
319 }
320
321 ret = parsejson(start, end - start, processnode_search, r);
322 if (ret < 0) {
323 free(r);
324 return NULL;
325 }
326
327 /* workaround: sometimes playlists or topics are listed as channels, filter
328 these topic/playlist links away because they won't work for channel videos. The
329 JSON response would have to be parsed in a different way than channels. */
330 for (i = 0; i < r->nitems; i++) {
331 item = &(r->items[i]);
332 len = strlen(item->channeltitle);
333
334 if (len > sizeof(" - Topic") &&
335 !strcmp(item->channeltitle + len - sizeof(" - Topic") + 1, " - Topic")) {
336 /* reset information that doesn't work for topics */
337 item->channelid[0] = '\0';
338 item->viewcount[0] = '\0';
339 }
340 }
341
342 return r;
343 }
344
345 static void
346 processnode_video(struct json_node *nodes, size_t depth, const char *value, size_t valuelen,
347 void *pp)
348 {
349 struct video_response *r = (struct video_response *)pp;
350 struct video_format *f;
351
352 if (depth > 1) {
353 /* playability status: could be unplayable / members-only video */
354 if (nodes[0].type == JSON_TYPE_OBJECT &&
355 !strcmp(nodes[1].name, "playabilityStatus")) { /* example: "UNPLAYABLE" */
356 if (depth == 3 &&
357 nodes[2].type == JSON_TYPE_STRING &&
358 !strcmp(nodes[2].name, "status")) {
359 strlcpy(r->playabilitystatus, value, sizeof(r->playabilitystatus));
360 }
361 if (depth == 3 &&
362 nodes[2].type == JSON_TYPE_STRING &&
363 !strcmp(nodes[2].name, "reason")) {
364 strlcpy(r->playabilityreason, value, sizeof(r->playabilityreason));
365 }
366 }
367
368 if (nodes[0].type == JSON_TYPE_OBJECT &&
369 !strcmp(nodes[1].name, "streamingData")) {
370 if (depth == 2 &&
371 nodes[2].type == JSON_TYPE_STRING &&
372 !strcmp(nodes[2].name, "expiresInSeconds")) {
373 r->expiresinseconds = getnum(value);
374 }
375
376 if (depth >= 3 &&
377 nodes[2].type == JSON_TYPE_ARRAY &&
378 (!strcmp(nodes[2].name, "formats") ||
379 !strcmp(nodes[2].name, "adaptiveFormats"))) {
380 if (r->nformats > MAX_FORMATS)
381 return; /* ignore: don't add too many formats */
382
383 if (depth == 4 && nodes[3].type == JSON_TYPE_OBJECT)
384 r->nformats++;
385
386 if (r->nformats == 0)
387 return;
388 f = &(r->formats[r->nformats - 1]); /* current video format item */
389
390 if (depth == 5 &&
391 nodes[2].type == JSON_TYPE_ARRAY &&
392 nodes[3].type == JSON_TYPE_OBJECT &&
393 (nodes[4].type == JSON_TYPE_STRING ||
394 nodes[4].type == JSON_TYPE_NUMBER ||
395 nodes[4].type == JSON_TYPE_BOOL)) {
396 if (!strcmp(nodes[4].name, "width")) {
397 f->width = getnum(value);
398 } else if (!strcmp(nodes[4].name, "height")) {
399 f->height = getnum(value);
400 } else if (!strcmp(nodes[4].name, "url")) {
401 strlcpy(f->url, value, sizeof(f->url));
402 } else if (!strcmp(nodes[4].name, "signatureCipher")) {
403 strlcpy(f->signaturecipher, value, sizeof(f->signaturecipher));
404 } else if (!strcmp(nodes[4].name, "qualityLabel")) {
405 strlcpy(f->qualitylabel, value, sizeof(f->qualitylabel));
406 } else if (!strcmp(nodes[4].name, "quality")) {
407 strlcpy(f->quality, value, sizeof(f->quality));
408 } else if (!strcmp(nodes[4].name, "fps")) {
409 f->fps = getnum(value);
410 } else if (!strcmp(nodes[4].name, "bitrate")) {
411 f->bitrate = getnum(value);
412 } else if (!strcmp(nodes[4].name, "averageBitrate")) {
413 f->averagebitrate = getnum(value);
414 } else if (!strcmp(nodes[4].name, "mimeType")) {
415 strlcpy(f->mimetype, value, sizeof(f->mimetype));
416 } else if (!strcmp(nodes[4].name, "itag")) {
417 f->itag = getnum(value);
418 } else if (!strcmp(nodes[4].name, "contentLength")) {
419 f->contentlength = getnum(value);
420 } else if (!strcmp(nodes[4].name, "lastModified")) {
421 f->lastmodified = getnum(value);
422 } else if (!strcmp(nodes[4].name, "audioChannels")) {
423 f->audiochannels = getnum(value);
424 } else if (!strcmp(nodes[4].name, "audioSampleRate")) {
425 f->audiosamplerate = getnum(value);
426 }
427 }
428 }
429 }
430 }
431
432 if (depth == 4 &&
433 nodes[0].type == JSON_TYPE_OBJECT &&
434 nodes[1].type == JSON_TYPE_OBJECT &&
435 nodes[2].type == JSON_TYPE_OBJECT &&
436 nodes[3].type == JSON_TYPE_STRING &&
437 !strcmp(nodes[1].name, "microformat") &&
438 !strcmp(nodes[2].name, "playerMicroformatRenderer")) {
439 r->isfound = 1;
440
441 if (!strcmp(nodes[3].name, "publishDate")) {
442 strlcpy(r->publishdate, value, sizeof(r->publishdate));
443 } else if (!strcmp(nodes[3].name, "uploadDate")) {
444 strlcpy(r->uploaddate, value, sizeof(r->uploaddate));
445 } else if (!strcmp(nodes[3].name, "category")) {
446 strlcpy(r->category, value, sizeof(r->category));
447 } else if (!strcmp(nodes[3].name, "isFamilySafe")) {
448 r->isfamilysafe = !strcmp(value, "true");
449 } else if (!strcmp(nodes[3].name, "isUnlisted")) {
450 r->isunlisted = !strcmp(value, "true");
451 }
452 }
453
454 if (depth == 3) {
455 if (nodes[0].type == JSON_TYPE_OBJECT &&
456 nodes[2].type == JSON_TYPE_STRING &&
457 !strcmp(nodes[1].name, "videoDetails")) {
458 r->isfound = 1;
459
460 if (!strcmp(nodes[2].name, "title")) {
461 strlcpy(r->title, value, sizeof(r->title));
462 } else if (!strcmp(nodes[2].name, "videoId")) {
463 strlcpy(r->id, value, sizeof(r->id));
464 } else if (!strcmp(nodes[2].name, "lengthSeconds")) {
465 r->lengthseconds = getnum(value);
466 } else if (!strcmp(nodes[2].name, "author")) {
467 strlcpy(r->author, value, sizeof(r->author));
468 } else if (!strcmp(nodes[2].name, "viewCount")) {
469 r->viewcount = getnum(value);
470 } else if (!strcmp(nodes[2].name, "channelId")) {
471 strlcpy(r->channelid, value, sizeof(r->channelid));
472 } else if (!strcmp(nodes[2].name, "shortDescription")) {
473 strlcpy(r->shortdescription, value, sizeof(r->shortdescription));
474 }
475 }
476 }
477 }
478
479 static struct video_response *
480 parse_video_response(const char *data)
481 {
482 struct video_response *r;
483 const char *s, *start, *end;
484 int ret;
485
486 if (!(s = strstr(data, "\r\n\r\n")))
487 return NULL; /* invalid response */
488 /* skip header */
489 s += strlen("\r\n\r\n");
490
491 if (!(r = calloc(1, sizeof(*r))))
492 return NULL;
493
494 if (extractjson_video(s, &start, &end) == -1) {
495 free(r);
496 return NULL;
497 }
498
499 ret = parsejson(start, end - start, processnode_video, r);
500 if (ret < 0) {
501 free(r);
502 return NULL;
503 }
504 return r;
505 }
506
507 struct search_response *
508 youtube_search(const char *rawsearch, const char *page, const char *order)
509 {
510 const char *data;
511
512 if (!(data = request_search(rawsearch, page, order)))
513 return NULL;
514
515 return parse_search_response(data);
516 }
517
518 struct search_response *
519 youtube_channel_videos(const char *channelid)
520 {
521 const char *data;
522
523 if (!(data = request_channel_videos(channelid)))
524 return NULL;
525
526 return parse_search_response(data);
527 }
528
529 struct search_response *
530 youtube_user_videos(const char *user)
531 {
532 const char *data;
533
534 if (!(data = request_user_videos(user)))
535 return NULL;
536
537 return parse_search_response(data);
538 }
539
540 struct video_response *
541 youtube_video(const char *videoid)
542 {
543 const char *data;
544
545 if (!(data = request_video(videoid)))
546 return NULL;
547
548 return parse_video_response(data);
549 }