add initial version of youtube/feed - frontends - front-ends for some sites (experiment)
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
(DIR) commit f5a6863b5397d1cc3ad31de291be11fae6256b5f
(DIR) parent 7b18c287f2fcf98227ff2ec1fdd4eeb8050e8166
(HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Wed, 10 May 2023 01:10:51 +0200
add initial version of youtube/feed
This fetches the Youtube Atom feed and the channel videos and combines the data.
It can output:
- Atom
- sfeed(5)
- JSON / JSON Feed
It can run in command-line and CGI mode.
For now it only adds the video duration in the title and filters away Youtube
shorts.
The Atom parser is based on sfeed.
Diffstat:
M Makefile | 4 ++++
M util.h | 7 +++++++
A youtube/feed.c | 1001 +++++++++++++++++++++++++++++++
3 files changed, 1012 insertions(+), 0 deletions(-)
---
(DIR) diff --git a/Makefile b/Makefile
@@ -22,6 +22,7 @@ LIBTLS_LDFLAGS_STATIC = -ltls -lssl -lcrypto -static
BIN = \
youtube/cgi \
youtube/cli \
+ youtube/feed \
youtube/gopher
SRC = ${BIN:=.c} \
@@ -68,6 +69,9 @@ youtube/cgi: ${LIB} youtube/youtube.o youtube/cgi.o
youtube/cli: ${LIB} youtube/youtube.o youtube/cli.o
${CC} -o $@ youtube/cli.o youtube/youtube.o ${LIB} ${LDFLAGS} ${LIBTLS_LDFLAGS}
+youtube/feed: ${LIB} youtube/youtube.o youtube/feed.o
+ ${CC} -o $@ youtube/feed.o youtube/youtube.o ${LIB} ${LDFLAGS} ${LIBTLS_LDFLAGS_STATIC}
+
youtube/gopher: ${LIB} youtube/youtube.o youtube/gopher.o
${CC} -o $@ youtube/gopher.o youtube/youtube.o ${LIB} ${LDFLAGS} ${LIBTLS_LDFLAGS_STATIC}
(DIR) diff --git a/util.h b/util.h
@@ -3,6 +3,13 @@
#define unveil(p1,p2) 0
#endif
+/* ctype-like macros, but always compatible with ASCII / UTF-8 */
+#define ISALPHA(c) ((((unsigned)c) | 32) - 'a' < 26)
+#define ISCNTRL(c) ((c) < ' ' || (c) == 0x7f)
+#define ISDIGIT(c) (((unsigned)c) - '0' < 10)
+#define ISSPACE(c) ((c) == ' ' || ((((unsigned)c) - '\t') < 5))
+#define TOLOWER(c) ((((unsigned)c) - 'A' < 26) ? ((c) | 32) : (c))
+
#undef strlcat
size_t strlcat(char *, const char *, size_t);
#undef strlcpy
(DIR) diff --git a/youtube/feed.c b/youtube/feed.c
@@ -0,0 +1,1001 @@
+#include <err.h>
+#include <errno.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <time.h>
+
+#include "https.h"
+#include "util.h"
+#include "youtube.h"
+#include "xml.h"
+
+#define ISINCONTENT(ctx) ((ctx).iscontent && !((ctx).iscontenttag))
+#define ISCONTENTTAG(ctx) (!((ctx).iscontent) && (ctx).iscontenttag)
+
+/* string and byte-length */
+#define STRP(s) s,sizeof(s)-1
+
+enum FeedType {
+ FeedTypeNone = 0,
+ FeedTypeAtom = 2
+};
+
+/* String data / memory pool */
+typedef struct string {
+ char *data; /* data */
+ size_t len; /* string length */
+ size_t bufsiz; /* allocated size */
+} String;
+
+/* NOTE: the order of these fields (content, date, author) indicate the
+ * priority to use them, from least important to high. */
+enum TagId {
+ TagUnknown = 0,
+ /* Atom */
+ /* creation date has higher priority */
+ AtomTagPublished,
+ AtomTagTitle,
+ AtomTagMediaDescription,
+ AtomTagId,
+ AtomTagLink,
+ AtomTagLinkAlternate,
+ AtomTagAuthor, AtomTagAuthorName,
+ TagYoutubeVideoId,
+ TagLast
+};
+
+typedef struct feedtag {
+ char *name; /* name of tag to match */
+ size_t len; /* len of `name` */
+ enum TagId id; /* unique ID */
+} FeedTag;
+
+typedef struct field {
+ String str;
+ enum TagId tagid; /* tagid set previously, used for tag priority */
+} FeedField;
+
+enum {
+ /* sfeed fields */
+ FeedFieldTime = 0, FeedFieldTitle, FeedFieldLink, FeedFieldContent,
+ FeedFieldId, FeedFieldAuthor, FeedFieldEnclosure, FeedFieldCategory,
+ FeedFieldYoutubeId, /* yt:videoId */
+ FeedFieldLast
+};
+
+typedef struct feedcontext {
+ String *field; /* current FeedItem field String */
+ FeedField fields[FeedFieldLast]; /* data for current item */
+ FeedTag tag; /* unique current parsed tag */
+ int iscontent; /* in content data */
+ int iscontenttag; /* in content tag */
+ enum FeedType feedtype;
+} FeedContext;
+
+static long long datetounix(long long, int, int, int, int, int);
+static FeedTag * gettag(enum FeedType, const char *, size_t);
+static long gettzoffset(const char *);
+static int isattr(const char *, size_t, const char *, size_t);
+static int istag(const char *, size_t, const char *, size_t);
+static int parsetime(const char *, long long *);
+
+static void atom_header(void);
+static void atom_item(void);
+static void atom_footer(void);
+static void json_header(void);
+static void json_item(void);
+static void json_footer(void);
+static void sfeed_item(void); /* TSV / sfeed */
+
+static void string_append(String *, const char *, size_t);
+static void string_buffer_realloc(String *, size_t);
+static void string_clear(String *);
+static void string_print_encoded(String *);
+static void string_print_timestamp(String *);
+static void string_print(String *);
+static void xmlattr(XMLParser *, const char *, size_t, const char *, size_t,
+ const char *, size_t);
+static void xmlattrentity(XMLParser *, const char *, size_t, const char *,
+ size_t, const char *, size_t);
+static void xmlattrstart(XMLParser *, const char *, size_t, const char *,
+ size_t);
+static void xmldata(XMLParser *, const char *, size_t);
+static void xmldataentity(XMLParser *, const char *, size_t);
+static void xmltagend(XMLParser *, const char *, size_t, int);
+static void xmltagstart(XMLParser *, const char *, size_t);
+static void xmltagstartparsed(XMLParser *, const char *, size_t, int);
+
+/* Atom, must be alphabetical order */
+static const FeedTag atomtags[] = {
+ { STRP("author"), AtomTagAuthor },
+ { STRP("id"), AtomTagId },
+ /* Atom: <link href="" />, RSS has <link></link> */
+ { STRP("link"), AtomTagLink },
+ { STRP("media:description"), AtomTagMediaDescription },
+ { STRP("published"), AtomTagPublished },
+ { STRP("title"), AtomTagTitle },
+ { STRP("yt:videoId"), TagYoutubeVideoId }
+};
+
+/* special case: nested <author><name> */
+static const FeedTag atomtagauthor = { STRP("author"), AtomTagAuthor };
+static const FeedTag atomtagauthorname = { STRP("name"), AtomTagAuthorName };
+
+/* reference to no / unknown tag */
+static const FeedTag notag = { STRP(""), TagUnknown };
+
+/* map TagId type to RSS/Atom field, all tags must be defined */
+static const int fieldmap[TagLast] = {
+ [TagUnknown] = -1,
+ /* Atom */
+ [AtomTagPublished] = FeedFieldTime,
+ [AtomTagTitle] = FeedFieldTitle,
+ [AtomTagMediaDescription] = FeedFieldContent,
+ [AtomTagId] = FeedFieldId,
+ [AtomTagLink] = -1,
+ [AtomTagLinkAlternate] = FeedFieldLink,
+ [AtomTagAuthor] = -1,
+ [AtomTagAuthorName] = FeedFieldAuthor,
+ [TagYoutubeVideoId] = FeedFieldYoutubeId
+};
+
+static const int FieldSeparator = '\t';
+
+static FeedContext ctx;
+static XMLParser parser; /* XML parser state */
+static String attrrel, tmpstr;
+
+static struct search_response *search_res = NULL;
+static void (*printfields)(void) = sfeed_item;
+static int cgimode = 0;
+
+static int
+tagcmp(const void *v1, const void *v2)
+{
+ return strcasecmp(((FeedTag *)v1)->name, ((FeedTag *)v2)->name);
+}
+
+/* Unique tagid for parsed tag name. */
+static FeedTag *
+gettag(enum FeedType feedtype, const char *name, size_t namelen)
+{
+ FeedTag f, *r = NULL;
+
+ f.name = (char *)name;
+
+ switch (feedtype) {
+ case FeedTypeAtom:
+ r = bsearch(&f, atomtags, sizeof(atomtags) / sizeof(atomtags[0]),
+ sizeof(atomtags[0]), tagcmp);
+ break;
+ default:
+ break;
+ }
+
+ return r;
+}
+
+/* Clear string only; don't free, prevents unnecessary reallocation. */
+static void
+string_clear(String *s)
+{
+ if (s->data)
+ s->data[0] = '\0';
+ s->len = 0;
+}
+
+static void
+string_buffer_realloc(String *s, size_t newlen)
+{
+ size_t alloclen;
+
+ if (newlen > SIZE_MAX / 2) {
+ alloclen = SIZE_MAX;
+ } else {
+ for (alloclen = 64; alloclen <= newlen; alloclen *= 2)
+ ;
+ }
+ if (!(s->data = realloc(s->data, alloclen)))
+ err(1, "realloc");
+ s->bufsiz = alloclen;
+}
+
+/* Append data to String, s->data and data may not overlap. */
+static void
+string_append(String *s, const char *data, size_t len)
+{
+ if (!len)
+ return;
+
+ if (s->len >= SIZE_MAX - len) {
+ errno = ENOMEM;
+ err(1, "realloc");
+ }
+
+ /* check if allocation is necessary, never shrink the buffer. */
+ if (s->len + len >= s->bufsiz)
+ string_buffer_realloc(s, s->len + len + 1);
+ memcpy(s->data + s->len, data, len);
+ s->len += len;
+ s->data[s->len] = '\0';
+}
+
+/* Print text, encode TABs, newlines and '\', remove other whitespace.
+ * Remove leading and trailing whitespace. */
+static void
+string_print_encoded(String *s)
+{
+ const char *p, *e;
+
+ if (!s->data || !s->len)
+ return;
+
+ p = s->data;
+ e = p + strlen(p);
+
+ for (; *p && p != e; p++) {
+ switch (*p) {
+ case '\n': putchar('\\'); putchar('n'); break;
+ case '\\': putchar('\\'); putchar('\\'); break;
+ case '\t': putchar('\\'); putchar('t'); break;
+ default:
+ /* ignore control chars */
+ if (!ISCNTRL((unsigned char)*p))
+ putchar(*p);
+ break;
+ }
+ }
+}
+
+/* Print text, replace TABs, carriage return and other whitespace with ' '.
+ * Other control chars are removed. Remove leading and trailing whitespace. */
+static void
+string_print(String *s)
+{
+ char *p, *e;
+
+ if (!s->data || !s->len)
+ return;
+
+ p = s->data;
+ e = p + s->len;
+ for (; *p && p != e; p++) {
+ if (ISSPACE((unsigned char)*p))
+ putchar(' '); /* any whitespace to space */
+ else if (!ISCNTRL((unsigned char)*p))
+ /* ignore other control chars */
+ putchar(*p);
+ }
+}
+
+/* Print as UNIX timestamp, print nothing if the time is empty or invalid. */
+static void
+string_print_timestamp(String *s)
+{
+ long long t;
+
+ if (!s->data || !s->len)
+ return;
+
+ if (parsetime(s->data, &t) != -1)
+ printf("%lld", t);
+}
+
+/* Convert time fields. Returns a signed (at least) 64-bit UNIX timestamp.
+ Parameters should be passed as they are in a struct tm:
+ that is: year = year - 1900, month = month - 1. */
+static long long
+datetounix(long long year, int mon, int day, int hour, int min, int sec)
+{
+ /* seconds in a month in a regular (non-leap) year */
+ static const long secs_through_month[] = {
+ 0, 31 * 86400, 59 * 86400, 90 * 86400,
+ 120 * 86400, 151 * 86400, 181 * 86400, 212 * 86400,
+ 243 * 86400, 273 * 86400, 304 * 86400, 334 * 86400 };
+ int is_leap = 0, cycles, centuries = 0, leaps = 0, rem;
+ long long t;
+
+ /* optimization: handle common range year 1902 up to and including 2038 */
+ if (year - 2ULL <= 136) {
+ /* amount of leap days relative to 1970: every 4 years */
+ leaps = (year - 68) >> 2;
+ if (!((year - 68) & 3)) {
+ leaps--;
+ is_leap = 1;
+ } else {
+ is_leap = 0;
+ }
+ t = 31536000 * (year - 70) + (86400 * leaps); /* 365 * 86400 = 31536000 */
+ } else {
+ /* general leap year calculation:
+ leap years occur mostly every 4 years but every 100 years
+ a leap year is skipped unless the year is divisible by 400 */
+ cycles = (year - 100) / 400;
+ rem = (year - 100) % 400;
+ if (rem < 0) {
+ cycles--;
+ rem += 400;
+ }
+ if (!rem) {
+ is_leap = 1;
+ } else {
+ if (rem >= 300)
+ centuries = 3, rem -= 300;
+ else if (rem >= 200)
+ centuries = 2, rem -= 200;
+ else if (rem >= 100)
+ centuries = 1, rem -= 100;
+ if (rem) {
+ leaps = rem / 4U;
+ rem %= 4U;
+ is_leap = !rem;
+ }
+ }
+ leaps += (97 * cycles) + (24 * centuries) - is_leap;
+
+ /* adjust 8 leap days from 1970 up to and including 2000:
+ ((30 * 365) + 8) * 86400 = 946771200 */
+ t = ((year - 100) * 31536000LL) + (leaps * 86400LL) + 946771200LL;
+ }
+ t += secs_through_month[mon];
+ if (is_leap && mon >= 2)
+ t += 86400;
+ t += 86400LL * (day - 1);
+ t += 3600LL * hour;
+ t += 60LL * min;
+ t += sec;
+
+ return t;
+}
+
+/* Get timezone from string, return time offset in seconds from UTC.
+ * NOTE: only parses timezones in RFC-822, many other timezone names are
+ * ambiguous anyway.
+ * ANSI and military zones are defined wrong in RFC822 and are unsupported,
+ * see note on RFC2822 4.3 page 32. */
+static long
+gettzoffset(const char *s)
+{
+ const char *p;
+ long tzhour = 0, tzmin = 0;
+ size_t i;
+
+ switch (*s) {
+ case '-': /* offset */
+ case '+':
+ for (i = 0, p = s + 1; i < 2 && ISDIGIT((unsigned char)*p); i++, p++)
+ tzhour = (tzhour * 10) + (*p - '0');
+ if (*p == ':')
+ p++;
+ for (i = 0; i < 2 && ISDIGIT((unsigned char)*p); i++, p++)
+ tzmin = (tzmin * 10) + (*p - '0');
+ return ((tzhour * 3600) + (tzmin * 60)) * (s[0] == '-' ? -1 : 1);
+ default: /* timezone name */
+ break;
+ }
+ return 0;
+}
+
+/* Parse time string `s` into the UNIX timestamp `tp`.
+ Returns 0 on success or -1 on failure. */
+static int
+parsetime(const char *s, long long *tp)
+{
+ int va[6] = { 0 }, i, v, vi;
+
+ /* formats "%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S" or "%Y%m%d%H%M%S" */
+ if (!ISDIGIT((unsigned char)s[0]) ||
+ !ISDIGIT((unsigned char)s[1]) ||
+ !ISDIGIT((unsigned char)s[2]) ||
+ !ISDIGIT((unsigned char)s[3]))
+ return -1;
+
+ /* parse time parts (and possibly remaining date parts) */
+ for (vi = 0; *s && vi < 6; vi++) {
+ for (i = 0, v = 0; i < ((vi == 0) ? 4 : 2) &&
+ ISDIGIT((unsigned char)*s); s++, i++) {
+ v = (v * 10) + (*s - '0');
+ }
+ va[vi] = v;
+
+ if ((vi < 2 && *s == '-') ||
+ (vi == 2 && (*s == 'T' || ISSPACE((unsigned char)*s))) ||
+ (vi > 2 && *s == ':'))
+ s++;
+ }
+
+ /* invalid range */
+ if (va[0] < 0 || va[0] > 9999 ||
+ va[1] < 1 || va[1] > 12 ||
+ va[2] < 1 || va[2] > 31 ||
+ va[3] < 0 || va[3] > 23 ||
+ va[4] < 0 || va[4] > 59 ||
+ va[5] < 0 || va[5] > 60) /* allow leap second */
+ return -1;
+
+ *tp = datetounix(va[0] - 1900, va[1] - 1, va[2], va[3], va[4], va[5]) -
+ gettzoffset(s);
+
+ return 0;
+}
+
+static void
+atom_header(void)
+{
+ fputs("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
+ "<feed xmlns=\"http://www.w3.org/2005/Atom\">\n"
+ "\t<title>Newsfeed</title>\n", stdout);
+}
+
+static void
+atom_footer(void)
+{
+ fputs("</feed>\n", stdout);
+}
+
+static void
+atom_item(void)
+{
+ struct item *v, *found = NULL;
+ size_t i;
+
+ /* must have a video id */
+ if (!ctx.fields[FeedFieldYoutubeId].str.len)
+ return;
+
+ for (i = 0; i < search_res->nitems; i++) {
+ v = &(search_res->items[i]);
+ if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id))
+ found = v;
+ }
+ /* Only print the video if it was found in the feed aswell.
+ This way it filters away shorts too. */
+ if (!found)
+ return;
+
+ fputs("<entry>\n\t<title>", stdout);
+ xmlencode(ctx.fields[FeedFieldTitle].str.data);
+ if (found->duration[0]) {
+ fputs(" [", stdout);
+ xmlencode(found->duration);
+ fputs("]", stdout);
+ }
+ fputs("</title>\n", stdout);
+ if (ctx.fields[FeedFieldLink].str.len) {
+ fputs("\t<link rel=\"alternate\" href=\"", stdout);
+ xmlencode(ctx.fields[FeedFieldLink].str.data);
+ fputs("\" />\n", stdout);
+ }
+ /* prefer link over id for Atom <id>. */
+ fputs("\t<id>", stdout);
+ if (ctx.fields[FeedFieldLink].str.len)
+ xmlencode(ctx.fields[FeedFieldLink].str.data);
+ else if (ctx.fields[FeedFieldId].str.len)
+ xmlencode(ctx.fields[FeedFieldId].str.data);
+ fputs("</id>\n", stdout);
+
+ /* just print the original timestamp, it should conform */
+ fputs("\t<updated>", stdout);
+ string_print(&ctx.fields[FeedFieldTime].str);
+ fputs("</updated>\n", stdout);
+
+ if (ctx.fields[FeedFieldAuthor].str.len) {
+ fputs("\t<author><name>", stdout);
+ xmlencode(ctx.fields[FeedFieldAuthor].str.data);
+ fputs("</name></author>\n", stdout);
+ }
+ if (ctx.fields[FeedFieldContent].str.len) {
+ fputs("\t<content>", stdout);
+ xmlencode(ctx.fields[FeedFieldContent].str.data);
+ fputs("</content>\n", stdout);
+ }
+ fputs("</entry>\n", stdout);
+}
+
+static void
+json_header(void)
+{
+ fputs("{\n"
+ "\"version\": \"https://jsonfeed.org/version/1.1\",\n"
+ "\"title\": \"Newsfeed\",\n"
+ "\"items\": [\n", stdout);
+}
+
+static void
+json_footer(void)
+{
+ fputs("]\n}\n", stdout);
+}
+
+static void
+json_printfield(const char *s)
+{
+ for (; *s; s++) {
+ if (*s == '\\')
+ fputs("\\\\", stdout);
+ else if (*s == '"')
+ fputs("\\\"", stdout);
+ else if (ISCNTRL((unsigned char)*s))
+ printf("\\u00%02x", (unsigned char)*s);
+ else
+ putchar(*s);
+ }
+}
+
+static void
+json_item(void)
+{
+ static int json_firstitem = 1;
+ struct item *v, *found = NULL;
+ size_t i;
+
+ /* must have a video id */
+ if (!ctx.fields[FeedFieldYoutubeId].str.len)
+ return;
+
+ for (i = 0; i < search_res->nitems; i++) {
+ v = &(search_res->items[i]);
+ if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id))
+ found = v;
+ }
+ /* Only print the video if it was found in the feed aswell.
+ This way it filters away shorts too. */
+ if (!found)
+ return;
+
+ if (!json_firstitem)
+ fputs(",\n", stdout);
+ json_firstitem = 0;
+
+ fputs("{\n\t\"id\": \"", stdout);
+ json_printfield(ctx.fields[FeedFieldId].str.data);
+ fputs("\"", stdout);
+
+ /* just print the original timestamp, it should conform */
+ fputs(",\n\t\"date_published\": \"", stdout);
+ string_print(&ctx.fields[FeedFieldTime].str);
+ fputs("\"", stdout);
+
+ fputs(",\n\t\"title\": \"", stdout);
+ json_printfield(ctx.fields[FeedFieldTitle].str.data);
+ if (found->duration[0]) {
+ fputs(" [", stdout);
+ json_printfield(found->duration);
+ fputs("]", stdout);
+ }
+ fputs("\"", stdout);
+
+ if (ctx.fields[FeedFieldLink].str.len) {
+ fputs(",\n\t\"url\": \"", stdout);
+ json_printfield(ctx.fields[FeedFieldLink].str.data);
+ fputs("\"", stdout);
+ }
+
+ if (ctx.fields[FeedFieldAuthor].str.len) {
+ fputs(",\n\t\"authors\": [{\"name\": \"", stdout);
+ json_printfield(ctx.fields[FeedFieldAuthor].str.data);
+ fputs("\"}]", stdout);
+ }
+
+ fputs(",\n\t\"content_text\": \"", stdout);
+ json_printfield(ctx.fields[FeedFieldContent].str.data);
+ fputs("\"\n}", stdout);
+}
+
+static void
+sfeed_item(void)
+{
+ struct item *v, *found = NULL;
+ size_t i;
+
+ /* must have a video id */
+ if (!ctx.fields[FeedFieldYoutubeId].str.len)
+ return;
+
+ for (i = 0; i < search_res->nitems; i++) {
+ v = &(search_res->items[i]);
+ if (!strcmp(ctx.fields[FeedFieldYoutubeId].str.data, v->id))
+ found = v;
+ }
+ /* Only print the video if it was found in the feed aswell.
+ This way it filters away shorts too. */
+ if (!found)
+ return;
+
+ string_print_timestamp(&ctx.fields[FeedFieldTime].str);
+ putchar(FieldSeparator);
+ string_print(&ctx.fields[FeedFieldTitle].str);
+ if (found->duration[0]) {
+ fputs(" [", stdout);
+ fputs(found->duration, stdout);
+ fputs("]", stdout);
+ }
+ putchar(FieldSeparator);
+ string_print(&ctx.fields[FeedFieldLink].str);
+ putchar(FieldSeparator);
+ string_print_encoded(&ctx.fields[FeedFieldContent].str);
+ putchar(FieldSeparator);
+ fputs("plain", stdout);
+ putchar(FieldSeparator);
+ string_print(&ctx.fields[FeedFieldId].str);
+ putchar(FieldSeparator);
+ string_print(&ctx.fields[FeedFieldAuthor].str);
+ putchar(FieldSeparator);
+ /* no/empty enclosure */
+ putchar(FieldSeparator);
+ /* empty category */
+ putchar('\n');
+}
+
+static int
+istag(const char *name, size_t len, const char *name2, size_t len2)
+{
+ return (len == len2 && !strcasecmp(name, name2));
+}
+
+static int
+isattr(const char *name, size_t len, const char *name2, size_t len2)
+{
+ return (len == len2 && !strcasecmp(name, name2));
+}
+
+static void
+xmlattr(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl,
+ const char *v, size_t vl)
+{
+ if (ISINCONTENT(ctx))
+ return;
+
+ if (!ctx.tag.id)
+ return;
+
+ if (ISCONTENTTAG(ctx))
+ return;
+
+ if (ctx.tag.id == AtomTagLink) {
+ if (isattr(n, nl, STRP("rel"))) {
+ string_append(&attrrel, v, vl);
+ } else if (isattr(n, nl, STRP("href"))) {
+ string_append(&tmpstr, v, vl);
+ }
+ }
+}
+
+static void
+xmlattrentity(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl,
+ const char *data, size_t datalen)
+{
+ char buf[8];
+ int len;
+
+ if (ISINCONTENT(ctx))
+ return;
+
+ if (!ctx.tag.id)
+ return;
+
+ /* try to translate entity, else just pass as data to
+ * xmlattr handler. */
+ if ((len = xml_entitytostr(data, buf, sizeof(buf))) > 0)
+ xmlattr(p, t, tl, n, nl, buf, (size_t)len);
+ else
+ xmlattr(p, t, tl, n, nl, data, datalen);
+}
+
+static void
+xmlattrstart(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl)
+{
+ if (ISINCONTENT(ctx))
+ return;
+
+ if (attrrel.len && isattr(n, nl, STRP("rel")))
+ string_clear(&attrrel);
+ else if (tmpstr.len &&
+ (isattr(n, nl, STRP("href")) ||
+ isattr(n, nl, STRP("url"))))
+ string_clear(&tmpstr); /* use the last value for multiple attribute values */
+}
+
+static void
+xmldata(XMLParser *p, const char *s, size_t len)
+{
+ if (!ctx.field)
+ return;
+
+ string_append(ctx.field, s, len);
+}
+
+static void
+xmldataentity(XMLParser *p, const char *data, size_t datalen)
+{
+ char buf[8];
+ int len;
+
+ if (!ctx.field)
+ return;
+
+ /* try to translate entity, else just pass as data to
+ * xmldata handler. */
+ if ((len = xml_entitytostr(data, buf, sizeof(buf))) > 0)
+ xmldata(p, buf, (size_t)len);
+ else
+ xmldata(p, data, datalen);
+}
+
+static void
+xmltagstart(XMLParser *p, const char *t, size_t tl)
+{
+ const FeedTag *f;
+
+ if (ISINCONTENT(ctx))
+ return;
+
+ /* start of RSS or Atom item / entry */
+ if (ctx.feedtype == FeedTypeNone) {
+ if (istag(t, tl, STRP("entry")))
+ ctx.feedtype = FeedTypeAtom;
+ return;
+ }
+
+ /* field tagid already set or nested tags. */
+ if (ctx.tag.id) {
+ /* nested <author><name> for Atom */
+ if (ctx.tag.id == AtomTagAuthor &&
+ istag(t, tl, STRP("name"))) {
+ memcpy(&(ctx.tag), &atomtagauthorname, sizeof(ctx.tag));
+ } else {
+ return; /* other nested tags are not allowed: return */
+ }
+ }
+
+ /* in item */
+ if (ctx.tag.id == TagUnknown) {
+ if (!(f = gettag(ctx.feedtype, t, tl)))
+ f = ¬ag;
+ memcpy(&(ctx.tag), f, sizeof(ctx.tag));
+ }
+
+ ctx.iscontenttag = (fieldmap[ctx.tag.id] == FeedFieldContent);
+ string_clear(&attrrel);
+}
+
+static void
+xmltagstartparsed(XMLParser *p, const char *t, size_t tl, int isshort)
+{
+ enum TagId tagid;
+
+ if (ISINCONTENT(ctx))
+ return;
+
+ /* set tag type based on its attribute value */
+ if (ctx.tag.id == AtomTagLink) {
+ /* empty or "alternate": other types could be
+ "enclosure", "related", "self" or "via" */
+ if (!attrrel.len || isattr(attrrel.data, attrrel.len, STRP("alternate")))
+ ctx.tag.id = AtomTagLinkAlternate;
+ else
+ ctx.tag.id = AtomTagLink; /* unknown */
+ }
+
+ tagid = ctx.tag.id;
+
+ /* map tag type to field: unknown or lesser priority is ignored,
+ when tags of the same type are repeated only the first is used. */
+ if (fieldmap[tagid] == -1 ||
+ tagid <= ctx.fields[fieldmap[tagid]].tagid) {
+ return;
+ }
+
+ if (ctx.iscontenttag) {
+ ctx.iscontent = 1;
+ ctx.iscontenttag = 0;
+ }
+
+ ctx.field = &(ctx.fields[fieldmap[tagid]].str);
+ ctx.fields[fieldmap[tagid]].tagid = tagid;
+
+ /* clear field if it is overwritten (with a priority order) for the new
+ value, if the field can have multiple values then do not clear it. */
+ string_clear(ctx.field);
+}
+
+static void
+xmltagend(XMLParser *p, const char *t, size_t tl, int isshort)
+{
+ size_t i;
+
+ if (ctx.feedtype == FeedTypeNone)
+ return;
+
+ if (ISINCONTENT(ctx)) {
+ /* not a closed content field */
+ if (!istag(ctx.tag.name, ctx.tag.len, t, tl))
+ return;
+ } else if (ctx.tag.id && istag(ctx.tag.name, ctx.tag.len, t, tl)) {
+ /* matched tag end: close it */
+ } else if (!ctx.tag.id && ((ctx.feedtype == FeedTypeAtom &&
+ istag(t, tl, STRP("entry"))))) /* Atom */
+ {
+ /* end of Atom entry */
+ printfields();
+
+ /* clear strings */
+ for (i = 0; i < FeedFieldLast; i++) {
+ string_clear(&ctx.fields[i].str);
+ ctx.fields[i].tagid = TagUnknown;
+ }
+ /* allow parsing of Atom and RSS concatenated in one XML stream. */
+ ctx.feedtype = FeedTypeNone;
+ } else {
+ return; /* not end of field */
+ }
+
+ /* temporary string: for fields that cannot be processed
+ directly and need more context, for example by its tag
+ attributes, like the Atom link rel="alternate|enclosure". */
+ if (tmpstr.len && ctx.field) {
+ string_clear(ctx.field);
+ string_append(ctx.field, tmpstr.data, tmpstr.len);
+ }
+
+ /* close field */
+ string_clear(&tmpstr); /* reuse and clear temporary string */
+
+ if (ctx.tag.id == AtomTagAuthorName)
+ memcpy(&(ctx.tag), &atomtagauthor, sizeof(ctx.tag)); /* outer tag */
+ else
+ memcpy(&(ctx.tag), ¬ag, sizeof(ctx.tag));
+
+ ctx.iscontent = 0;
+ ctx.field = NULL;
+}
+
+static char *
+request_channel_feed(const char *channelid)
+{
+ char path[2048];
+ int r;
+
+ r = snprintf(path, sizeof(path), "/feeds/videos.xml?channel_id=%s", channelid);
+ /* check if request is too long (truncation) */
+ if (r < 0 || (size_t)r >= sizeof(path))
+ return NULL;
+
+ return request("www.youtube.com", path, "");
+}
+
+int
+isvalidchannel(const char *s)
+{
+ size_t len;
+
+ for (len = 0; *s; s++, len++) {
+ if (ISALPHA((unsigned char)*s) ||
+ ISDIGIT((unsigned char)*s) ||
+ *s == '-' || *s == '_')
+ continue;
+ return 0;
+ }
+
+ return *s == '\0' && len == 24;
+}
+
+void
+usage(void)
+{
+ if (cgimode) {
+ fputs("Status: 400 Bad Request\r\n", stdout);
+ fputs("Content-Type: text/plain; charset=utf-8\r\n\r\n", stdout);
+ fputs("400 Bad Request\n", stdout);
+ exit(0);
+ } else {
+ fputs("usage: feed <channelid> [atom|json|tsv]\n", stderr);
+ exit(1);
+ }
+}
+
+int
+main(int argc, char *argv[])
+{
+ char buf[256];
+ const char *channelid = NULL;
+ char *data, *format = "tsv", *p, *requesturi, *tmp;
+ size_t i;
+
+ if (pledge("stdio dns inet rpath unveil", NULL) == -1)
+ err(1, "pledge");
+
+ if ((tmp = getenv("REQUEST_URI"))) {
+ cgimode = 1;
+
+ strlcpy(buf, tmp, sizeof(buf));
+ requesturi = buf;
+
+ if (!(p = strrchr(requesturi, '/')))
+ usage();
+
+ channelid = p + 1;
+ if ((p = strrchr(channelid, '.'))) {
+ *p = '\0'; /* NULL terminate */
+ format = p + 1;
+ }
+ } else {
+ if (argc <= 1)
+ usage();
+
+ channelid = argv[1];
+ if (argc > 2)
+ format = argv[2];
+ }
+ if (!channelid || !isvalidchannel(channelid))
+ usage();
+
+ if (!strcmp(format, "atom") || !strcmp(format, "xml"))
+ printfields = atom_item;
+ else if (!strcmp(format, "json"))
+ printfields = json_item;
+ else if (!strcmp(format, "tsv") || !strcmp(format, "sfeed"))
+ printfields = sfeed_item;
+ else
+ usage();
+
+ search_res = youtube_channel_videos(channelid);
+ if (!search_res || search_res->nitems == 0) {
+ /* error or no videos found */
+ return 0;
+ }
+
+ if (!(data = request_channel_feed(channelid)))
+ return 1; /* error, no data at all */
+
+ if (pledge("stdio", NULL) == -1)
+ err(1, "pledge");
+
+ setxmldata(data, strlen(data));
+
+ memcpy(&(ctx.tag), ¬ag, sizeof(ctx.tag));
+
+ parser.xmlattr = xmlattr;
+ parser.xmlattrentity = xmlattrentity;
+ parser.xmlattrstart = xmlattrstart;
+ parser.xmlcdata = xmldata;
+ parser.xmldata = xmldata;
+ parser.xmldataentity = xmldataentity;
+ parser.xmltagend = xmltagend;
+ parser.xmltagstart = xmltagstart;
+ parser.xmltagstartparsed = xmltagstartparsed;
+
+ /* init all fields, make sure it has a value */
+ for (i = 0; i < FeedFieldLast; i++) {
+ string_append(&(ctx.fields[i].str), " ", 1);
+ string_clear(&(ctx.fields[i].str));
+ }
+
+ if (cgimode) {
+ fputs("Status: 200 OK\r\n", stdout);
+ if (!strcmp(format, "atom") || !strcmp(format, "xml"))
+ fputs("Content-Type: text/xml; charset=utf-8\r\n\r\n", stdout);
+ else if (!strcmp(format, "json"))
+ fputs("Content-Type: application/json; charset=utf-8\r\n\r\n", stdout);
+ else
+ fputs("Content-Type: text/plain; charset=utf-8\r\n\r\n", stdout);
+ }
+
+ if (!strcmp(format, "atom") || !strcmp(format, "xml"))
+ atom_header();
+ else if (!strcmp(format, "json"))
+ json_header();
+
+ /* NOTE: getnext is defined in xml.h for inline optimization */
+ xml_parse(&parser);
+
+ if (!strcmp(format, "atom"))
+ atom_footer();
+ else if (!strcmp(format, "json"))
+ json_footer();
+
+ return 0;
+}