youtube: various improvements - frontends - front-ends for some sites (experiment)
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
(DIR) commit 11f745425e13385e5a69cf3f8cdceaa3027dad64
(DIR) parent 587b2d3d299bff29e6b941c22fe7aa526cbc9135
(HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Fri, 24 Feb 2023 21:51:44 +0100
youtube: various improvements
- initial support for detailed information of a video, only for youtube/cli for
now (-i option).
- list video formats per video, similar to youtube-dl/yt-dlp.
- various small fixes and improvements.
Diffstat:
M youtube/cli.c | 227 ++++++++++++++++++++++++++-----
M youtube/youtube.c | 211 ++++++++++++++++++++++++++++++-
M youtube/youtube.h | 50 ++++++++++++++++++++++++++-----
3 files changed, 437 insertions(+), 51 deletions(-)
---
(DIR) diff --git a/youtube/cli.c b/youtube/cli.c
@@ -26,17 +26,31 @@ printescape(const char *s)
fputc(*s, stdout);
}
+void
+printescape_multiline(const char *s, const char *indent)
+{
+ int i = 0;
+
+ for (; *s; ++s) {
+ if (!i)
+ fputs(indent, stdout);
+
+ if (*s == '\n') {
+ i = 0;
+ fputc(*s, stdout);
+ } else if (!iscntrl((unsigned char)*s)) {
+ fputc(*s, stdout);
+ i = 1;
+ }
+ }
+}
+
int
-render_tsv(struct search_response *r)
+render_search_tsv(struct search_response *r)
{
struct item *videos = r->items;
size_t i;
- if (pledge("stdio", NULL) == -1) {
- fprintf(stderr, "pledge: %s\n", strerror(errno));
- exit(1);
- }
-
for (i = 0; i < r->nitems; i++) {
OUTESCAPE(videos[i].id);
OUT("\t");
@@ -73,7 +87,7 @@ render_tsv(struct search_response *r)
}
int
-render(struct search_response *r)
+render_search(struct search_response *r)
{
struct item *videos = r->items;
size_t i;
@@ -81,31 +95,39 @@ render(struct search_response *r)
for (i = 0; i < r->nitems; i++) {
switch (videos[i].linktype) {
case Channel:
- OUT("[Channel] ");
+ OUT("Channel: ");
OUTESCAPE(videos[i].channeltitle);
break;
case Movie:
- OUT("[Movie] ");
+ OUT("Movie: ");
OUTESCAPE(videos[i].title);
break;
case Playlist:
- OUT("[Playlist] ");
+ OUT("Playlist: ");
OUTESCAPE(videos[i].title);
break;
default:
+ OUT(" ");
OUTESCAPE(videos[i].title);
break;
}
+ if (videos[i].duration[0]) {
+ OUT(" [");
+ OUTESCAPE(videos[i].duration);
+ OUT("]");
+ }
OUT("\n");
if (videos[i].id[0]) {
- OUT("URL: https://www.youtube.com/embed/");
+ OUT("URL: https://www.youtube.com/embed/");
OUTESCAPE(videos[i].id);
OUT("\n");
}
if (videos[i].channelid[0] || videos[i].userid[0]) {
- OUT("Atom feed: https://www.youtube.com/feeds/videos.xml?");
+ OUT("Channel: ");
+ OUTESCAPE(videos[i].channeltitle);
+ OUT(": https://www.youtube.com/feeds/videos.xml?");
if (videos[i].channelid[0]) {
OUT("channel_id=");
OUTESCAPE(videos[i].channelid);
@@ -115,37 +137,153 @@ render(struct search_response *r)
}
OUT("\n");
}
-
- if (videos[i].channelid[0] || videos[i].userid[0]) {
- OUT("Channel title: ");
- OUTESCAPE(videos[i].channeltitle);
- OUT("\n");
- if (videos[i].channelid[0]) {
- OUT("Channelid: ");
- OUTESCAPE(videos[i].channelid);
- OUT("\n");
- } else if (videos[i].userid[0]) {
- OUT("Userid: ");
- OUTESCAPE(videos[i].userid);
- OUT("\n");
- }
- }
if (videos[i].publishedat[0]) {
- OUT("Published: ");
+ OUT("Published: ");
OUTESCAPE(videos[i].publishedat);
OUT("\n");
}
if (videos[i].viewcount[0]) {
- OUT("Viewcount: ");
+ OUT("Views: ");
OUTESCAPE(videos[i].viewcount);
OUT("\n");
}
- if (videos[i].duration[0]) {
- OUT("Duration: " );
- OUTESCAPE(videos[i].duration);
+ OUT("\n");
+ }
+
+ return 0;
+}
+
+int
+render_video(struct video_response *r)
+{
+ struct video_format *f;
+ long l;
+ int i;
+
+ OUT("URL: ");
+ OUTESCAPE(r->id);
+ OUT(", https://www.youtube.com/embed/");
+ OUTESCAPE(r->id);
+ OUT("\n");
+
+ OUT("Title: ");
+ OUTESCAPE(r->title);
+ OUT("\n");
+
+ OUT("Views: ");
+ OUTESCAPE(r->viewcount);
+ OUT("\n");
+
+ OUT("Length: ");
+ OUTESCAPE(r->lengthseconds);
+ OUT("\n");
+
+ OUT("Published: ");
+ OUTESCAPE(r->publishdate);
+ OUT("\n");
+
+ OUT("Uploaded: ");
+ OUTESCAPE(r->uploaddate);
+ OUT("\n");
+
+ if (r->author[0]) {
+ OUT("Channel: ");
+ OUTESCAPE(r->author);
+ if (r->channelid[0]) {
+ OUT(": https://www.youtube.com/feeds/videos.xml?channel_id=");
+ OUTESCAPE(r->channelid);
+ }
+ OUT("\n");
+ }
+
+ if (r->shortdescription[0]) {
+ OUT("Description:\n\n");
+ printescape_multiline(r->shortdescription, "");
+ OUT("\n");
+ }
+
+ if (r->nformats == 0)
+ return 0;
+
+ OUT("\n\nFormats:\n\n");
+
+ /* links expiration */
+ if (r->expiresinseconds[0]) {
+ OUT("Expires in ");
+ OUTESCAPE(r->expiresinseconds);
+ OUT(" seconds\n");
+ }
+
+ for (i = 0; i < r->nformats; i++) {
+ f = &(r->formats[i]);
+
+#if 0
+ l = strtol(f->width, NULL, 10);
+ if (l < 1280)
+ continue;
+ l = strtol(f->height, NULL, 10);
+ if (l < 720)
+ continue;
+#endif
+
+#if 0
+ OUT("\titag: ");
+ OUTESCAPE(f->itag);
+ OUT("\n");
+
+ OUT("\tLast modified: ");
+ OUTESCAPE(f->lastmodified);
+ OUT("\n");
+
+ OUT("\tContent-Length: ");
+ OUTESCAPE(f->contentlength);
+ OUT("\n");
+#endif
+
+ OUT("\tURL: ");
+ OUTESCAPE(f->url);
+ OUT("\n");
+
+ OUT("\tMime-type: ");
+ OUTESCAPE(f->mimetype);
+ OUT("\n");
+
+ OUT("\tBitrate: ");
+ OUTESCAPE(f->bitrate);
+ OUT("\n");
+
+ OUT("\tQuality: ");
+ if (f->qualitylabel[0])
+ OUTESCAPE(f->qualitylabel);
+ else if (f->quality[0])
+ OUTESCAPE(f->quality);
+
+ if (f->width[0]) {
+ OUT(", ");
+ OUTESCAPE(f->width);
+ OUT("x");
+ OUTESCAPE(f->height);
+ OUT("");
+ }
+ if (f->fps[0]) {
+ OUT(", ");
+ OUTESCAPE(f->fps);
+ OUT(" FPS");
+ }
+ OUT("\n");
+
+ if (f->audiochannels[0]) {
+ OUT("\tAudio channels: ");
+ OUTESCAPE(f->audiochannels);
+ OUT("\n");
+ }
+ if (f->audiosamplerate[0]) {
+ OUT("\tAudio sample rate: ");
+ OUTESCAPE(f->audiosamplerate);
OUT("\n");
}
- OUT("===\n");
+
+ OUT("\n");
}
return 0;
@@ -154,7 +292,7 @@ render(struct search_response *r)
static void
usage(const char *argv0)
{
- fprintf(stderr, "usage: %s [-t] <keyword> | <-c channelid> | <-u user>\n", argv0);
+ fprintf(stderr, "usage: %s [-t] <keyword> | <-c channelid> | <-u user> | <-i videoid>\n", argv0);
exit(1);
}
@@ -162,8 +300,9 @@ int
main(int argc, char *argv[])
{
struct search_response *r = NULL;
+ struct video_response *vr = NULL;
char search[1024];
- const char *keywords = NULL, *channelid = NULL, *user = NULL;
+ const char *keywords = NULL, *channelid = NULL, *user = NULL, *videoid = NULL;
int i, usetsv = 0;
if (pledge("stdio dns inet rpath unveil", NULL) == -1) {
@@ -180,6 +319,12 @@ main(int argc, char *argv[])
channelid = argv[i + 1];
i++;
break;
+ case 'i':
+ if (i + 1 >= argc)
+ usage(argv[0]);
+ videoid = argv[i + 1];
+ i++;
+ break;
case 'u':
if (i + 1 >= argc)
usage(argv[0]);
@@ -212,6 +357,14 @@ main(int argc, char *argv[])
r = youtube_channel_videos(channelid);
} else if (user) {
r = youtube_user_videos(user);
+ } else if (videoid) {
+ vr = youtube_video(videoid);
+ if (!vr || vr->isfound == 0) {
+ OUT("No video found\n");
+ exit(1);
+ }
+ render_video(vr);
+ return 0;
} else if (keywords) {
if (!uriencode(keywords, search, sizeof(search)))
usage(argv[0]);
@@ -228,9 +381,9 @@ main(int argc, char *argv[])
}
if (usetsv)
- render_tsv(r);
+ render_search_tsv(r);
else
- render(r);
+ render_search(r);
return 0;
}
(DIR) diff --git a/youtube/youtube.c b/youtube/youtube.c
@@ -22,9 +22,25 @@ youtube_request(const char *path)
}
static char *
+request_video(const char *videoid)
+{
+ char path[2048];
+ int r;
+
+ r = snprintf(path, sizeof(path), "/watch?v=%s", videoid);
+ /* check if request is too long (truncation) */
+ if (r < 0 || (size_t)r >= sizeof(path))
+ return NULL;
+
+// return readfile("/tmp/data"); // DEBUG
+
+ return youtube_request(path);
+}
+
+static char *
request_channel_videos(const char *channelid)
{
- char path[4096];
+ char path[2048];
int r;
r = snprintf(path, sizeof(path), "/channel/%s/videos", channelid);
@@ -38,7 +54,7 @@ request_channel_videos(const char *channelid)
static char *
request_user_videos(const char *user)
{
- char path[4096];
+ char path[2048];
int r;
r = snprintf(path, sizeof(path), "/user/%s/videos", user);
@@ -81,7 +97,7 @@ request_search(const char *s, const char *page, const char *order)
}
static int
-extractjson(const char *s, const char **start, const char **end)
+extractjson_search(const char *s, const char **start, const char **end)
{
*start = strstr(s, "window[\"ytInitialData\"] = ");
if (*start) {
@@ -105,8 +121,23 @@ extractjson(const char *s, const char **start, const char **end)
return 0;
}
+static int
+extractjson_video(const char *s, const char **start, const char **end)
+{
+ *start = strstr(s, "var ytInitialPlayerResponse = ");
+ if (!*start)
+ return -1;
+ (*start) += sizeof("var ytInitialPlayerResponse = ") - 1;
+ *end = strstr(*start, "};<");
+ if (!*end)
+ return -1;
+ (*end)++;
+
+ return 0;
+}
+
static void
-processnode(struct json_node *nodes, size_t depth, const char *value,
+processnode_search(struct json_node *nodes, size_t depth, const char *value,
void *pp)
{
struct search_response *r = (struct search_response *)pp;
@@ -141,7 +172,6 @@ processnode(struct json_node *nodes, size_t depth, const char *value,
nodes[depth - 3].type == JSON_TYPE_ARRAY &&
nodes[depth - 2].type == JSON_TYPE_OBJECT &&
nodes[depth - 1].type == JSON_TYPE_STRING &&
-
!strcmp(nodes[depth - 5].name, "videoRenderer") &&
!strcmp(nodes[depth - 4].name, "title") &&
!strcmp(nodes[depth - 3].name, "runs") &&
@@ -150,6 +180,23 @@ processnode(struct json_node *nodes, size_t depth, const char *value,
strlcpy(item->title, value, sizeof(item->title));
}
+ /* in channel/user videos listing there is a short description */
+#ifdef neinneinnein
+ if (depth >= 7 &&
+ nodes[depth - 5].type == JSON_TYPE_OBJECT &&
+ nodes[depth - 4].type == JSON_TYPE_OBJECT &&
+ nodes[depth - 3].type == JSON_TYPE_ARRAY &&
+ nodes[depth - 2].type == JSON_TYPE_OBJECT &&
+ nodes[depth - 1].type == JSON_TYPE_STRING &&
+ !strcmp(nodes[depth - 5].name, "videoRenderer") &&
+ !strcmp(nodes[depth - 4].name, "descriptionSnippet") &&
+ !strcmp(nodes[depth - 3].name, "runs") &&
+ !strcmp(nodes[depth - 1].name, "text") &&
+ !item->shortdescription[0]) {
+ strlcpy(item->shortdescription, value, sizeof(item->shortdescription));
+ }
+#endif
+
if (depth >= 5 &&
nodes[depth - 4].type == JSON_TYPE_OBJECT &&
nodes[depth - 3].type == JSON_TYPE_OBJECT &&
@@ -220,12 +267,151 @@ parse_search_response(const char *data)
if (!(r = calloc(1, sizeof(*r))))
return NULL;
- if (extractjson(s, &start, &end) == -1) {
+ if (extractjson_search(s, &start, &end) == -1) {
free(r);
return NULL;
}
- ret = parsejson(start, end - start, processnode, r);
+ ret = parsejson(start, end - start, processnode_search, r);
+ if (ret < 0) {
+ free(r);
+ return NULL;
+ }
+ return r;
+}
+
+static void
+processnode_video(struct json_node *nodes, size_t depth, const char *value,
+ void *pp)
+{
+ struct video_response *r = (struct video_response *)pp;
+ struct video_format *f;
+ static struct item *item;
+
+ if (depth > 1) {
+ if (nodes[0].type == JSON_TYPE_OBJECT &&
+ !strcmp(nodes[1].name, "streamingData")) {
+ r->isfound = 1; /* a video is found */
+
+ if (depth == 2 &&
+ nodes[2].type == JSON_TYPE_STRING &&
+ !strcmp(nodes[2].name, "expiresInSeconds")) {
+ strlcpy(r->expiresinseconds, value, sizeof(r->expiresinseconds));
+ }
+
+ if (depth >= 3 &&
+ nodes[2].type == JSON_TYPE_ARRAY &&
+ (!strcmp(nodes[2].name, "formats") ||
+ !strcmp(nodes[2].name, "adaptiveFormats"))) {
+ if (r->nformats > MAX_FORMATS)
+ return; /* ignore: don't add too many formats */
+
+ if (depth == 4 && nodes[3].type == JSON_TYPE_OBJECT) {
+ r->nformats++;
+ }
+
+ if (r->nformats == 0)
+ return;
+ f = &(r->formats[r->nformats - 1]); /* current video format item */
+
+ if (depth == 5 &&
+ nodes[2].type == JSON_TYPE_ARRAY &&
+ nodes[3].type == JSON_TYPE_OBJECT &&
+ (nodes[4].type == JSON_TYPE_STRING ||
+ nodes[4].type == JSON_TYPE_NUMBER ||
+ nodes[4].type == JSON_TYPE_BOOL)) {
+ if (!strcmp(nodes[4].name, "width")) {
+ strlcpy(f->width, value, sizeof(f->width));
+ } else if (!strcmp(nodes[4].name, "height")) {
+ strlcpy(f->height, value, sizeof(f->height));
+ } else if (!strcmp(nodes[4].name, "url")) {
+ strlcpy(f->url, value, sizeof(f->url));
+ } else if (!strcmp(nodes[4].name, "qualityLabel")) {
+ strlcpy(f->qualitylabel, value, sizeof(f->qualitylabel));
+ } else if (!strcmp(nodes[4].name, "quality")) {
+ strlcpy(f->quality, value, sizeof(f->quality));
+ } else if (!strcmp(nodes[4].name, "fps")) {
+ strlcpy(f->fps, value, sizeof(f->fps));
+ } else if (!strcmp(nodes[4].name, "bitrate")) {
+ strlcpy(f->bitrate, value, sizeof(f->bitrate));
+ } else if (!strcmp(nodes[4].name, "mimeType")) {
+ strlcpy(f->mimetype, value, sizeof(f->mimetype));
+ } else if (!strcmp(nodes[4].name, "itag")) {
+ strlcpy(f->itag, value, sizeof(f->itag));
+ } else if (!strcmp(nodes[4].name, "contentLength")) {
+ strlcpy(f->contentlength, value, sizeof(f->contentlength));
+ } else if (!strcmp(nodes[4].name, "lastModified")) {
+ strlcpy(f->lastmodified, value, sizeof(f->lastmodified));
+ } else if (!strcmp(nodes[4].name, "audioChannels")) {
+ strlcpy(f->audiochannels, value, sizeof(f->audiochannels));
+ } else if (!strcmp(nodes[4].name, "audioSampleRate")) {
+ strlcpy(f->audiosamplerate, value, sizeof(f->audiosamplerate));
+ }
+ }
+ }
+ }
+ }
+
+ if (depth == 4 &&
+ nodes[0].type == JSON_TYPE_OBJECT &&
+ nodes[1].type == JSON_TYPE_OBJECT &&
+ nodes[2].type == JSON_TYPE_OBJECT &&
+ nodes[3].type == JSON_TYPE_STRING &&
+ !strcmp(nodes[1].name, "microformat") &&
+ !strcmp(nodes[2].name, "playerMicroformatRenderer")) {
+ if (!strcmp(nodes[3].name, "publishDate")) {
+ strlcpy(r->publishdate, value, sizeof(r->publishdate));
+ } if (!strcmp(nodes[3].name, "uploadDate")) {
+ strlcpy(r->uploaddate, value, sizeof(r->uploaddate));
+ }
+ }
+
+ if (depth == 3) {
+ if (nodes[0].type == JSON_TYPE_OBJECT &&
+ nodes[2].type == JSON_TYPE_STRING &&
+ !strcmp(nodes[1].name, "videoDetails")) {
+ if (!strcmp(nodes[2].name, "title")) {
+ strlcpy(r->title, value, sizeof(r->title));
+ } else if (!strcmp(nodes[2].name, "videoId")) {
+ strlcpy(r->id, value, sizeof(r->id));
+ } else if (!strcmp(nodes[2].name, "lengthSeconds")) {
+ strlcpy(r->lengthseconds, value, sizeof(r->lengthseconds));
+ } else if (!strcmp(nodes[2].name, "author")) {
+ strlcpy(r->author, value, sizeof(r->author));
+ } else if (!strcmp(nodes[2].name, "viewCount")) {
+ strlcpy(r->viewcount, value, sizeof(r->viewcount));
+ } else if (!strcmp(nodes[2].name, "channelId")) {
+ strlcpy(r->channelid, value, sizeof(r->channelid));
+ } else if (!strcmp(nodes[2].name, "shortDescription")) {
+ strlcpy(r->shortdescription, value, sizeof(r->shortdescription));
+ }
+ }
+ }
+}
+
+static struct video_response *
+parse_video_response(const char *data)
+{
+ struct video_response *r;
+ const char *s, *start, *end;
+ int ret;
+
+ if (!(s = strstr(data, "\r\n\r\n")))
+ return NULL; /* invalid response */
+ /* skip header */
+ s += strlen("\r\n\r\n");
+
+// s = data; // DEBUG
+
+ if (!(r = calloc(1, sizeof(*r))))
+ return NULL;
+
+ if (extractjson_video(s, &start, &end) == -1) {
+ free(r);
+ return NULL;
+ }
+
+ ret = parsejson(start, end - start, processnode_video, r);
if (ret < 0) {
free(r);
return NULL;
@@ -265,3 +451,14 @@ youtube_user_videos(const char *user)
return parse_search_response(data);
}
+
+struct video_response *
+youtube_video(const char *videoid)
+{
+ const char *data;
+
+ if (!(data = request_video(videoid)))
+ return NULL;
+
+ return parse_video_response(data);
+}
(DIR) diff --git a/youtube/youtube.h b/youtube/youtube.h
@@ -8,19 +8,55 @@ struct item {
char publishedat[32];
char viewcount[32];
char duration[32];
+
+#ifdef neinneinnein
+ char shortdescription[4096];
+#endif
};
-#define MAX_VIDEOS 100
+#define MAX_VIDEOS 50
struct search_response {
struct item items[MAX_VIDEOS + 1];
size_t nitems;
};
-struct search_response *
-youtube_search(const char *rawsearch, const char *page, const char *order);
+struct video_format {
+ char itag[32]; /* video id */
+ char url[2048];
+ char mimetype[256]; /* mime-type and video codecs, etc */
+ char bitrate[256];
+ char width[32]; /* pixel width */
+ char height[32]; /* pixel width */
+ char fps[16]; /* frames-per-second */
+ char qualitylabel[64];
+ char quality[64];
+ char contentlength[64]; /* content length in bytes */
+ char lastmodified[64];
+ char audiosamplerate[32];
+ char audiochannels[16];
+};
+
+#define MAX_FORMATS 50
+struct video_response {
+ char id[32]; /* video id */
+ char title[1024];
+ char author[1024]; /* channel name / title */
+ char channelid[256];
+ char publishdate[32]; /* YYYY-mm-dd */
+ char uploaddate[32]; /* YYYY-mm-dd */
+ char viewcount[32];
+ char lengthseconds[32];
+ char shortdescription[4096 * 4];
+
+ int isfound;
-struct search_response *
-youtube_channel_videos(const char *channelid);
+ /* expiration for URLs in video formats */
+ char expiresinseconds[32];
+ struct video_format formats[MAX_FORMATS + 1];
+ int nformats;
+};
-struct search_response *
-youtube_user_videos(const char *user);
+struct search_response *youtube_search(const char *rawsearch, const char *page, const char *order);
+struct search_response *youtube_channel_videos(const char *channelid);
+struct search_response *youtube_user_videos(const char *user);
+struct video_response *youtube_video(const char *videoid);