youtube: add channel2tsv output - frontends - front-ends for some sites (experiment)
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
(DIR) commit 2be30b4f834c64d4478e8cff231ee9b29601edc0
(DIR) parent 0ddeddd9e7acba6abe47ccaf8563b712cf96a037
(HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Sat, 11 Feb 2023 19:01:42 +0100
youtube: add channel2tsv output
* Make the parser a bit less strict so it can also parse the channel page
with videos.
* Add a function that can fetch the channel videos by channel ID.
* Add a tool that outputs channel videos to a TAB-separated format.
Diffstat:
M Makefile | 6 +++++-
A youtube/channel2tsv.c | 108 +++++++++++++++++++++++++++++++
M youtube/youtube.c | 93 +++++++++++++++++++++++--------
M youtube/youtube.h | 3 +++
4 files changed, 187 insertions(+), 23 deletions(-)
---
(DIR) diff --git a/Makefile b/Makefile
@@ -25,6 +25,7 @@ BIN = \
reddit/cli \
reddit/gopher \
youtube/cgi \
+ youtube/channel2tsv \
youtube/cli \
youtube/gopher
@@ -97,11 +98,14 @@ twitch/cgi: ${LIB} twitch/twitch.o twitch/cgi.o
twitch/gopher: ${LIB} twitch/twitch.o twitch/gopher.o
${CC} -o $@ twitch/gopher.o twitch/twitch.o ${LIB} ${LDFLAGS} ${LIBTLS_LDFLAGS_STATIC}
-youtube: youtube/cgi youtube/cli youtube/gopher
+youtube: youtube/cgi youtube/channel2tsv youtube/cli youtube/gopher
youtube/cgi: ${LIB} youtube/youtube.o youtube/cgi.o
${CC} -o $@ youtube/cgi.o youtube/youtube.o ${LIB} ${LDFLAGS} ${LIBTLS_LDFLAGS_STATIC}
+youtube/channel2tsv: ${LIB} youtube/youtube.o youtube/channel2tsv.o
+ ${CC} -o $@ youtube/channel2tsv.o youtube/youtube.o ${LIB} ${LDFLAGS} ${LIBTLS_LDFLAGS}
+
youtube/cli: ${LIB} youtube/youtube.o youtube/cli.o
${CC} -o $@ youtube/cli.o youtube/youtube.o ${LIB} ${LDFLAGS} ${LIBTLS_LDFLAGS}
(DIR) diff --git a/youtube/channel2tsv.c b/youtube/channel2tsv.c
@@ -0,0 +1,108 @@
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <netdb.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "https.h"
+#include "util.h"
+#include "youtube.h"
+
+#define OUT(s) fputs((s), stdout)
+#define OUTESCAPE(s) printescape((s))
+
+/* print: ignore control-characters */
+void
+printescape(const char *s)
+{
+ for (; *s; ++s)
+ if (!iscntrl((unsigned char)*s))
+ fputc(*s, stdout);
+}
+
+int
+render(struct search_response *r)
+{
+ struct item *videos = r->items;
+ size_t i;
+
+ if (pledge("stdio", NULL) == -1) {
+ fprintf(stderr, "pledge: %s\n", strerror(errno));
+ exit(1);
+ }
+
+ for (i = 0; i < r->nitems; i++) {
+ switch (videos[i].linktype) {
+ case Channel:
+ case Movie:
+ case Playlist:
+ continue;
+ default:
+ break;
+ }
+
+ OUTESCAPE(videos[i].id);
+ OUT("\t");
+ if (videos[i].id[0]) {
+ OUT("https://www.youtube.com/embed/");
+ OUTESCAPE(videos[i].id);
+ }
+ OUT("\t");
+ OUTESCAPE(videos[i].title);
+ OUT("\t");
+ OUTESCAPE(videos[i].publishedat);
+ OUT("\t");
+ OUTESCAPE(videos[i].viewcount);
+ OUT("\t");
+ OUTESCAPE(videos[i].duration);
+ OUT("\n");
+ }
+
+ return 0;
+}
+
+static void
+usage(const char *argv0)
+{
+ fprintf(stderr, "usage: %s <channelid>\n", argv0);
+ exit(1);
+}
+
+int
+main(int argc, char *argv[])
+{
+ struct search_response *r;
+ char channelid[1024];
+
+ if (pledge("stdio dns inet rpath unveil", NULL) == -1) {
+ fprintf(stderr, "pledge: %s\n", strerror(errno));
+ exit(1);
+ }
+ if (unveil(TLS_CA_CERT_FILE, "r") == -1) {
+ fprintf(stderr, "unveil: %s\n", strerror(errno));
+ exit(1);
+ }
+ if (unveil(NULL, NULL) == -1) {
+ fprintf(stderr, "unveil: %s\n", strerror(errno));
+ exit(1);
+ }
+
+ if (argc < 2 || !argv[1][0])
+ usage(argv[0]);
+ if (!uriencode(argv[1], channelid, sizeof(channelid)))
+ usage(argv[0]);
+
+ r = youtube_channel_videos(channelid);
+ if (!r || r->nitems == 0)
+ exit(1);
+
+ render(r);
+
+ return 0;
+}
(DIR) diff --git a/youtube/youtube.c b/youtube/youtube.c
@@ -22,6 +22,20 @@ youtube_request(const char *path)
}
static char *
+request_channel_videos(const char *channelid)
+{
+ char path[4096];
+ int r;
+
+ r = snprintf(path, sizeof(path), "/channel/%s/videos", channelid);
+ /* check if request is too long (truncation) */
+ if (r < 0 || (size_t)r >= sizeof(path))
+ return NULL;
+
+ return youtube_request(path);
+}
+
+static char *
request_search(const char *s, const char *page, const char *order)
{
char path[4096];
@@ -90,11 +104,11 @@ processnode(struct json_node *nodes, size_t depth, const char *value,
/* new item, structures can be very deep, just check the end for:
(items|contents)[].videoRenderer objects */
if (depth >= 3 &&
- nodes[depth - 3].type == JSON_TYPE_ARRAY &&
- nodes[depth - 2].type == JSON_TYPE_OBJECT &&
+// nodes[depth - 3].type == JSON_TYPE_ARRAY &&
+// nodes[depth - 2].type == JSON_TYPE_OBJECT &&
nodes[depth - 1].type == JSON_TYPE_OBJECT &&
- (!strcmp(nodes[depth - 3].name, "items") ||
- !strcmp(nodes[depth - 3].name, "contents")) &&
+// (!strcmp(nodes[depth - 3].name, "items") ||
+// !strcmp(nodes[depth - 3].name, "content")) &&
!strcmp(nodes[depth - 1].name, "videoRenderer")) {
r->nitems++;
return;
@@ -105,27 +119,28 @@ processnode(struct json_node *nodes, size_t depth, const char *value,
item = &(r->items[r->nitems - 1]);
if (depth >= 4 &&
- nodes[depth - 4].type == JSON_TYPE_ARRAY &&
- nodes[depth - 3].type == JSON_TYPE_OBJECT &&
- nodes[depth - 2].type == JSON_TYPE_OBJECT &&
+// nodes[depth - 4].type == JSON_TYPE_ARRAY &&
+// nodes[depth - 3].type == JSON_TYPE_OBJECT &&
+// nodes[depth - 2].type == JSON_TYPE_OBJECT &&
nodes[depth - 1].type == JSON_TYPE_STRING &&
- (!strcmp(nodes[depth - 4].name, "items") ||
- !strcmp(nodes[depth - 4].name, "contents")) &&
+// (!strcmp(nodes[depth - 4].name, "items") ||
+// !strcmp(nodes[depth - 4].name, "contents")) &&
!strcmp(nodes[depth - 2].name, "videoRenderer") &&
!strcmp(nodes[depth - 1].name, "videoId")) {
strlcpy(item->id, value, sizeof(item->id));
}
if (depth >= 7 &&
- nodes[depth - 7].type == JSON_TYPE_ARRAY &&
- nodes[depth - 6].type == JSON_TYPE_OBJECT &&
+// nodes[depth - 7].type == JSON_TYPE_ARRAY &&
+// nodes[depth - 6].type == JSON_TYPE_OBJECT &&
nodes[depth - 5].type == JSON_TYPE_OBJECT &&
nodes[depth - 4].type == JSON_TYPE_OBJECT &&
nodes[depth - 3].type == JSON_TYPE_ARRAY &&
nodes[depth - 2].type == JSON_TYPE_OBJECT &&
nodes[depth - 1].type == JSON_TYPE_STRING &&
- (!strcmp(nodes[depth - 7].name, "items") ||
- !strcmp(nodes[depth - 7].name, "contents")) &&
+// (!strcmp(nodes[depth - 7].name, "items") ||
+// !strcmp(nodes[depth - 7].name, "contents")) &&
+
!strcmp(nodes[depth - 5].name, "videoRenderer") &&
!strcmp(nodes[depth - 4].name, "title") &&
!strcmp(nodes[depth - 3].name, "runs") &&
@@ -135,13 +150,13 @@ processnode(struct json_node *nodes, size_t depth, const char *value,
}
if (depth >= 5 &&
- nodes[depth - 5].type == JSON_TYPE_ARRAY &&
+// nodes[depth - 5].type == JSON_TYPE_ARRAY &&
nodes[depth - 4].type == JSON_TYPE_OBJECT &&
nodes[depth - 3].type == JSON_TYPE_OBJECT &&
nodes[depth - 2].type == JSON_TYPE_OBJECT &&
nodes[depth - 1].type == JSON_TYPE_STRING &&
- (!strcmp(nodes[depth - 5].name, "items") ||
- !strcmp(nodes[depth - 5].name, "contents")) &&
+// (!strcmp(nodes[depth - 5].name, "items") ||
+// !strcmp(nodes[depth - 5].name, "contents")) &&
!strcmp(nodes[depth - 3].name, "videoRenderer") &&
!strcmp(nodes[depth - 1].name, "simpleText")) {
if (!strcmp(nodes[depth - 2].name, "viewCountText") &&
@@ -157,7 +172,7 @@ processnode(struct json_node *nodes, size_t depth, const char *value,
}
if (depth >= 9 &&
- nodes[depth - 9].type == JSON_TYPE_ARRAY &&
+// nodes[depth - 9].type == JSON_TYPE_ARRAY &&
nodes[depth - 8].type == JSON_TYPE_OBJECT &&
nodes[depth - 7].type == JSON_TYPE_OBJECT &&
nodes[depth - 6].type == JSON_TYPE_OBJECT &&
@@ -166,8 +181,8 @@ processnode(struct json_node *nodes, size_t depth, const char *value,
nodes[depth - 3].type == JSON_TYPE_OBJECT &&
nodes[depth - 2].type == JSON_TYPE_OBJECT &&
nodes[depth - 1].type == JSON_TYPE_STRING &&
- (!strcmp(nodes[depth - 9].name, "items") ||
- !strcmp(nodes[depth - 9].name, "contents")) &&
+// (!strcmp(nodes[depth - 9].name, "items") ||
+// !strcmp(nodes[depth - 9].name, "contents")) &&
!strcmp(nodes[depth - 7].name, "videoRenderer") &&
!strcmp(nodes[depth - 6].name, "longBylineText") &&
!strcmp(nodes[depth - 5].name, "runs") &&
@@ -179,15 +194,15 @@ processnode(struct json_node *nodes, size_t depth, const char *value,
}
if (depth >= 7 &&
- nodes[depth - 7].type == JSON_TYPE_ARRAY &&
+// nodes[depth - 7].type == JSON_TYPE_ARRAY &&
nodes[depth - 6].type == JSON_TYPE_OBJECT &&
nodes[depth - 5].type == JSON_TYPE_OBJECT &&
nodes[depth - 4].type == JSON_TYPE_OBJECT &&
nodes[depth - 3].type == JSON_TYPE_ARRAY &&
nodes[depth - 2].type == JSON_TYPE_OBJECT &&
nodes[depth - 1].type == JSON_TYPE_STRING &&
- (!strcmp(nodes[depth - 7].name, "items") ||
- !strcmp(nodes[depth - 7].name, "contents")) &&
+// (!strcmp(nodes[depth - 7].name, "items") ||
+// !strcmp(nodes[depth - 7].name, "contents")) &&
!strcmp(nodes[depth - 5].name, "videoRenderer") &&
!strcmp(nodes[depth - 4].name, "longBylineText") &&
!strcmp(nodes[depth - 3].name, "runs")) {
@@ -231,3 +246,37 @@ youtube_search(const char *rawsearch, const char *page, const char *order)
return r;
}
+
+struct search_response *
+youtube_channel_videos(const char *channelid)
+{
+ struct search_response *r;
+ char *data, *s, *start, *end;
+ int ret;
+
+ if (!(data = request_channel_videos(channelid)))
+ return NULL;
+
+ if (!(s = strstr(data, "\r\n\r\n")))
+ return NULL; /* invalid response */
+ /* skip header */
+ s += strlen("\r\n\r\n");
+
+ if (!(r = calloc(1, sizeof(*r))))
+ return NULL;
+
+ if (extractjson(s, &start, &end) == -1) {
+ fprintf(stderr, "error extracting JSON");
+ free(r);
+ return NULL;
+ }
+
+ ret = parsejson(start, end - start, processnode, r);
+ if (ret < 0) {
+// fprintf(stderr, "error parsing JSON");
+ free(r);
+ return NULL;
+ }
+
+ return r;
+}
(DIR) diff --git a/youtube/youtube.h b/youtube/youtube.h
@@ -19,3 +19,6 @@ struct search_response {
struct search_response *
youtube_search(const char *rawsearch, const char *page, const char *order);
+
+struct search_response *
+youtube_channel_videos(const char *channelid);