add jf2sfeed: convert JSON Feed to sfeed(5) - jfconvert - JSON Feed (subset) to sfeed or Atom converter
(HTM) git clone git://git.codemadness.org/jfconvert
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
(DIR) commit 6dd24b7a0e38fe5d01726178ac4b5cc5f325cd59
(DIR) parent 469bc51805a16876507da21e3145e05bb3c57e72
(HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Tue, 4 Apr 2023 18:39:13 +0200
add jf2sfeed: convert JSON Feed to sfeed(5)
Diffstat:
M Makefile | 16 ++++++++--------
M README | 7 +++++--
A jf2sfeed.1 | 39 +++++++++++++++++++++++++++++++
A jf2sfeed.c | 538 +++++++++++++++++++++++++++++++
4 files changed, 590 insertions(+), 10 deletions(-)
---
(DIR) diff --git a/Makefile b/Makefile
@@ -1,6 +1,6 @@
.POSIX:
-NAME = jf2atom
+NAME = jfconvert
VERSION = 0.1
# paths
@@ -11,14 +11,14 @@ DOCPREFIX = ${PREFIX}/share/doc/${NAME}
RANLIB = ranlib
# use system flags.
-JFA_CFLAGS = ${CFLAGS}
-JFA_LDFLAGS = ${LDFLAGS}
-JFA_CPPFLAGS = -D_DEFAULT_SOURCE
+JFCONVERT_CFLAGS = ${CFLAGS}
+JFCONVERT_LDFLAGS = ${LDFLAGS}
+JFCONVERT_CPPFLAGS = -D_DEFAULT_SOURCE
# uncomment for conservative locked I/O.
-#JFA_CPPFLAGS = -D_DEFAULT_SOURCE -DGETNEXT=getchar
+#JFCONVERT_CPPFLAGS = -D_DEFAULT_SOURCE -DGETNEXT=getchar
-BIN = ${NAME}
+BIN = jf2atom jf2sfeed
SRC = ${BIN:=.c}
HDR = json.h
MAN1 = ${BIN:=.1}
@@ -41,10 +41,10 @@ OBJ = ${SRC:.c=.o} ${LIBJSONOBJ}
${OBJ}: ${HDR}
.o:
- ${CC} ${JFA_LDFLAGS} -o $@ $< ${LIB}
+ ${CC} ${JFCONVERT_LDFLAGS} -o $@ $< ${LIB}
.c.o:
- ${CC} ${JFA_CFLAGS} ${JFA_CPPFLAGS} -o $@ -c $<
+ ${CC} ${JFCONVERT_CFLAGS} ${JFCONVERT_CPPFLAGS} -o $@ -c $<
${LIBJSON}: ${LIBJSONOBJ}
${AR} -rc $@ $?
(DIR) diff --git a/README b/README
@@ -1,9 +1,12 @@
-jf2atom
+jfconvert
-------
+JSON Feed (subset) to sfeed(5) converter.
JSON Feed (subset) to Atom converter.
JSON Feed specification: https://www.jsonfeed.org/version/1/
+
+sfeed: https://codemadness.org/sfeed.html
Atom specification: https://datatracker.ietf.org/doc/html/rfc4287
@@ -31,7 +34,7 @@ Optional dependencies
Examples and documentation
--------------------------
-See the man page.
+See the man pages.
License
(DIR) diff --git a/jf2sfeed.1 b/jf2sfeed.1
@@ -0,0 +1,39 @@
+.Dd April 4, 2023
+.Dt JF2SFEED 1
+.Os
+.Sh NAME
+.Nm jf2sfeed
+.Nd convert JSON Feed to sfeed
+.Sh SYNOPSIS
+.Nm
+.Sh DESCRIPTION
+.Nm
+reads JSON data from stdin.
+It writes sfeed TSV data to stdout.
+.Sh EXIT STATUS
+.Ex -std
+.Sh EXAMPLES
+.Bd -literal
+jf2sfeed < input.json
+.Ed
+.Pp
+An example to support JSON Feed in sfeed:
+.Bd -literal
+curl -s 'https://codemadness.org/jsonfeed_content.json' | jf2sfeed | sfeed_curses
+.Ed
+.Sh SEE ALSO
+.Xr awk 1 ,
+.Xr curl 1 ,
+.Xr sfeed 1
+.Sh STANDARDS
+.Rs
+.%T The Atom Syndication Format
+.%R RFC 4287
+.Re
+.Rs
+.%T JSON Feed Version 1.1
+.%U https://www.jsonfeed.org/version/1.1/
+.%D Nov, 2022
+.Re
+.Sh AUTHORS
+.An Hiltjo Posthuma Aq Mt hiltjo@codemadness.org
(DIR) diff --git a/jf2sfeed.c b/jf2sfeed.c
@@ -0,0 +1,538 @@
+#include <errno.h>
+#include <limits.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef __OpenBSD__
+#include <unistd.h>
+#else
+#define pledge(a,b) 0
+#endif
+
+#include "json.h"
+
+/* hint for compilers and static analyzers that a function exits */
+#ifndef __dead
+#define __dead
+#endif
+
+/* ctype-like macros, but always compatible with ASCII / UTF-8 */
+#define ISALPHA(c) ((((unsigned)c) | 32) - 'a' < 26)
+#define ISCNTRL(c) ((c) < ' ' || (c) == 0x7f)
+#define ISDIGIT(c) (((unsigned)c) - '0' < 10)
+#define ISSPACE(c) ((c) == ' ' || ((((unsigned)c) - '\t') < 5))
+
+/* compare attributes case-sensitively */
+#define attrcmp strcmp
+
+enum {
+ FeedFieldTime = 0, FeedFieldTitle, FeedFieldLink, FeedFieldContent,
+ FeedFieldId, FeedFieldAuthor, FeedFieldEnclosure, FeedFieldCategory,
+ FeedFieldLast
+};
+
+enum ContentType {
+ ContentTypeNone = 0,
+ ContentTypePlain = 1,
+ ContentTypeHTML = 2
+};
+static const char *contenttypes[] = { "", "plain", "html" };
+
+/* String data / memory pool */
+typedef struct string {
+ char *data; /* data */
+ size_t len; /* string length */
+ size_t bufsiz; /* allocated size */
+} String;
+
+static String fields[FeedFieldLast]; /* data for current item */
+static enum ContentType contenttype; /* content-type for item */
+static int itemisopen = 0;
+
+static const int FieldSeparator = '\t';
+/* separator for multiple values in a field, separator should be 1 byte */
+static const char FieldMultiSeparator[] = "|";
+
+/* print to stderr, print error message of errno and exit().
+ Unlike BSD err() it does not prefix __progname */
+__dead void
+err(int exitstatus, const char *fmt, ...)
+{
+ va_list ap;
+ int saved_errno;
+
+ saved_errno = errno;
+
+ if (fmt) {
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ fputs(": ", stderr);
+ }
+ fprintf(stderr, "%s\n", strerror(saved_errno));
+
+ exit(exitstatus);
+}
+
+/* print to stderr and exit().
+ Unlike BSD errx() it does not prefix __progname */
+__dead void
+errx(int exitstatus, const char *fmt, ...)
+{
+ va_list ap;
+
+ if (fmt) {
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ }
+ fputs("\n", stderr);
+
+ exit(exitstatus);
+}
+
+
+/* Convert time fields. Returns a UNIX timestamp. */
+static long long
+datetounix(long long year, int mon, int day, int hour, int min, int sec)
+{
+ static const long secs_through_month[] = {
+ 0, 31 * 86400, 59 * 86400, 90 * 86400,
+ 120 * 86400, 151 * 86400, 181 * 86400, 212 * 86400,
+ 243 * 86400, 273 * 86400, 304 * 86400, 334 * 86400 };
+ int is_leap = 0, cycles, centuries = 0, leaps = 0, rem;
+ long long t;
+
+ if (year - 2ULL <= 136) {
+ leaps = (year - 68) >> 2;
+ if (!((year - 68) & 3)) {
+ leaps--;
+ is_leap = 1;
+ } else {
+ is_leap = 0;
+ }
+ t = 31536000 * (year - 70) + 86400 * leaps;
+ } else {
+ cycles = (year - 100) / 400;
+ rem = (year - 100) % 400;
+ if (rem < 0) {
+ cycles--;
+ rem += 400;
+ }
+ if (!rem) {
+ is_leap = 1;
+ } else {
+ if (rem >= 300)
+ centuries = 3, rem -= 300;
+ else if (rem >= 200)
+ centuries = 2, rem -= 200;
+ else if (rem >= 100)
+ centuries = 1, rem -= 100;
+ if (rem) {
+ leaps = rem / 4U;
+ rem %= 4U;
+ is_leap = !rem;
+ }
+ }
+ leaps += 97 * cycles + 24 * centuries - is_leap;
+ t = ((year - 100) * 31536000LL) + (leaps * 86400LL) + 946771200LL;
+ }
+ t += secs_through_month[mon];
+ if (is_leap && mon >= 2)
+ t += 86400;
+ t += 86400LL * (day - 1);
+ t += 3600LL * hour;
+ t += 60LL * min;
+ t += sec;
+
+ return t;
+}
+
+/* Get timezone from string, return time offset in seconds from UTC. */
+static long
+gettzoffset(const char *s)
+{
+ const char *p;
+ long tzhour = 0, tzmin = 0;
+ size_t i;
+
+ for (; ISSPACE((unsigned char)*s); s++)
+ ;
+ switch (*s) {
+ case '-': /* offset */
+ case '+':
+ for (i = 0, p = s + 1; i < 2 && ISDIGIT((unsigned char)*p); i++, p++)
+ tzhour = (tzhour * 10) + (*p - '0');
+ if (*p == ':')
+ p++;
+ for (i = 0; i < 2 && ISDIGIT((unsigned char)*p); i++, p++)
+ tzmin = (tzmin * 10) + (*p - '0');
+ return ((tzhour * 3600) + (tzmin * 60)) * (s[0] == '-' ? -1 : 1);
+ default: /* timezone name */
+ break;
+ }
+ return 0;
+}
+
+/* Parse time string `s` into the UNIX timestamp `tp`.
+ Returns 0 on success or -1 on failure. */
+static int
+parsetime(const char *s, long long *tp)
+{
+ int va[6] = { 0 }, i, v, vi;
+
+ for (; ISSPACE((unsigned char)*s); s++)
+ ;
+
+ if (!ISDIGIT((unsigned char)s[0]) ||
+ !ISDIGIT((unsigned char)s[1]) ||
+ !ISDIGIT((unsigned char)s[2]) ||
+ !ISDIGIT((unsigned char)s[3]))
+ return -1;
+
+ /* formats "%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S" or "%Y%m%d%H%M%S" */
+ vi = 0;
+
+ /* parse time parts (and possibly remaining date parts) */
+ for (; *s && vi < 6; vi++) {
+ for (i = 0, v = 0; i < ((vi == 0) ? 4 : 2) &&
+ ISDIGIT((unsigned char)*s); s++, i++) {
+ v = (v * 10) + (*s - '0');
+ }
+ va[vi] = v;
+
+ if ((vi < 2 && *s == '-') ||
+ (vi == 2 && (*s == 'T' || ISSPACE((unsigned char)*s))) ||
+ (vi > 2 && *s == ':'))
+ s++;
+ }
+
+ /* skip milliseconds in for example: "%Y-%m-%dT%H:%M:%S.000Z" */
+ if (*s == '.') {
+ for (s++; ISDIGIT((unsigned char)*s); s++)
+ ;
+ }
+
+ /* invalid range */
+ if (va[0] < 0 || va[0] > 9999 ||
+ va[1] < 1 || va[1] > 12 ||
+ va[2] < 1 || va[2] > 31 ||
+ va[3] < 0 || va[3] > 23 ||
+ va[4] < 0 || va[4] > 59 ||
+ va[5] < 0 || va[5] > 60) /* allow leap second */
+ return -1;
+
+ *tp = datetounix(va[0] - 1900, va[1] - 1, va[2], va[3], va[4], va[5]) -
+ gettzoffset(s);
+
+ return 0;
+}
+
+/* Handle read or write errors for a FILE * stream */
+static void
+checkfileerror(FILE *fp, const char *name, int mode)
+{
+ if (mode == 'r' && ferror(fp))
+ errx(1, "read error: %s", name);
+ else if (mode == 'w' && (fflush(fp) || ferror(fp)))
+ errx(1, "write error: %s", name);
+}
+
+/* Clear string only; don't free, prevents unnecessary reallocation. */
+static void
+string_clear(String *s)
+{
+ if (s->data)
+ s->data[0] = '\0';
+ s->len = 0;
+}
+
+static void
+string_buffer_realloc(String *s, size_t newlen)
+{
+ size_t alloclen;
+
+ if (newlen > SIZE_MAX / 2) {
+ alloclen = SIZE_MAX;
+ } else {
+ for (alloclen = 64; alloclen <= newlen; alloclen *= 2)
+ ;
+ }
+ if (!(s->data = realloc(s->data, alloclen)))
+ err(1, "realloc");
+ s->bufsiz = alloclen;
+}
+
+/* Append data to String, s->data and data may not overlap. */
+static void
+string_append(String *s, const char *data, size_t len)
+{
+ if (!len)
+ return;
+
+ if (s->len >= SIZE_MAX - len) {
+ errno = EOVERFLOW;
+ err(1, "realloc");
+ }
+
+ /* check if allocation is necessary, never shrink the buffer. */
+ if (s->len + len >= s->bufsiz)
+ string_buffer_realloc(s, s->len + len + 1);
+ memcpy(s->data + s->len, data, len);
+ s->len += len;
+ s->data[s->len] = '\0';
+}
+
+/* Clear and append string */
+static void
+string_set(String *s, const char *data, size_t len)
+{
+ string_clear(s);
+ string_append(s, data, len);
+}
+
+/* Print text, encode TABs, newlines and '\', remove other whitespace.
+ * Remove leading and trailing whitespace. */
+static void
+string_print_encoded(String *s)
+{
+ const char *p, *e;
+
+ if (!s->data || !s->len)
+ return;
+
+ p = s->data;
+ e = p + s->len;
+
+ for (; *p && p != e; p++) {
+ switch (*p) {
+ case '\n': putchar('\\'); putchar('n'); break;
+ case '\\': putchar('\\'); putchar('\\'); break;
+ case '\t': putchar('\\'); putchar('t'); break;
+ default:
+ /* ignore control chars */
+ if (!ISCNTRL((unsigned char)*p))
+ putchar(*p);
+ break;
+ }
+ }
+}
+
+/* Print text, replace TABs, carriage return and other whitespace with ' '.
+ * Other control chars are removed. Remove leading and trailing whitespace. */
+static void
+string_print(String *s)
+{
+ const char *p, *e;
+
+ if (!s->data || !s->len)
+ return;
+
+ p = s->data;
+ e = s->data + s->len;
+ for (; *p && p != e; p++) {
+ if (ISSPACE((unsigned char)*p))
+ putchar(' '); /* any whitespace to space */
+ else if (!ISCNTRL((unsigned char)*p))
+ /* ignore other control chars */
+ putchar(*p);
+ }
+}
+
+/* Print as UNIX timestamp, print nothing if the time is empty or invalid. */
+static void
+string_print_timestamp(String *s)
+{
+ long long t;
+
+ if (!s->data || !s->len)
+ return;
+
+ if (parsetime(s->data, &t) != -1)
+ printf("%lld", t);
+}
+
+static void
+printfields(void)
+{
+ string_print_timestamp(&fields[FeedFieldTime]);
+ putchar(FieldSeparator);
+ string_print(&fields[FeedFieldTitle]);
+ putchar(FieldSeparator);
+ string_print(&fields[FeedFieldLink]);
+ putchar(FieldSeparator);
+ string_print_encoded(&fields[FeedFieldContent]);
+ putchar(FieldSeparator);
+ fputs(contenttypes[contenttype], stdout);
+ putchar(FieldSeparator);
+ string_print(&fields[FeedFieldId]);
+ putchar(FieldSeparator);
+ string_print(&fields[FeedFieldAuthor]);
+ putchar(FieldSeparator);
+ string_print(&fields[FeedFieldEnclosure]);
+ putchar(FieldSeparator);
+ string_print(&fields[FeedFieldCategory]);
+ putchar('\n');
+
+ if (ferror(stdout)) /* check for errors but do not flush */
+ checkfileerror(stdout, "<stdout>", 'w');
+}
+
+static void
+newitem(void)
+{
+ size_t i;
+
+ contenttype = ContentTypeNone;
+ for (i = 0; i < FeedFieldLast; i++)
+ string_clear(&fields[i]);
+
+}
+
+static void
+processnode(struct json_node *nodes, size_t depth, const char *value, size_t valuelen)
+{
+ /* item */
+ if (depth == 3) {
+ if (nodes[0].type == JSON_TYPE_OBJECT &&
+ nodes[1].type == JSON_TYPE_ARRAY &&
+ nodes[2].type == JSON_TYPE_OBJECT &&
+ !attrcmp(nodes[1].name, "items")) {
+ if (itemisopen)
+ printfields();
+ newitem();
+ itemisopen = 1;
+ }
+ }
+
+ /* item attributes */
+ if (depth == 4) {
+ if (nodes[0].type == JSON_TYPE_OBJECT &&
+ nodes[1].type == JSON_TYPE_ARRAY &&
+ nodes[2].type == JSON_TYPE_OBJECT &&
+ !attrcmp(nodes[1].name, "items")) {
+ if (!attrcmp(nodes[3].name, "content_html")) {
+ string_set(&fields[FeedFieldContent], value, valuelen);
+ contenttype = ContentTypeHTML;
+ } else if (!attrcmp(nodes[3].name, "content_text")) {
+ /* prefer HTML, if summary text is set override it also */
+ if (!fields[FeedFieldContent].len && contenttype != ContentTypeHTML) {
+ string_set(&fields[FeedFieldContent], value, valuelen);
+ contenttype = ContentTypePlain;
+ }
+ } else if (!attrcmp(nodes[3].name, "date_published")) {
+ /* published has higher priority than updated */
+ string_set(&fields[FeedFieldTime], value, valuelen);
+ } else if (!attrcmp(nodes[3].name, "date_modified")) {
+ if (!fields[FeedFieldTime].len)
+ string_append(&fields[FeedFieldTime], value, valuelen);
+ } else if (!attrcmp(nodes[3].name, "id")) {
+ if (!fields[FeedFieldId].len)
+ string_append(&fields[FeedFieldId], value, valuelen);
+ } else if (!attrcmp(nodes[3].name, "summary")) {
+ /* only if content_html or content_text is not set yet. */
+ if (!fields[FeedFieldContent].len) {
+ string_append(&fields[FeedFieldContent], value, valuelen);
+ contenttype = ContentTypePlain;
+ }
+ } else if (!attrcmp(nodes[3].name, "title")) {
+ if (!fields[FeedFieldTitle].len)
+ string_set(&fields[FeedFieldTitle], value, valuelen);
+ } else if (!attrcmp(nodes[3].name, "url")) {
+ if (!fields[FeedFieldLink].len)
+ string_append(&fields[FeedFieldLink], value, valuelen);
+ }
+ }
+ }
+
+ if (depth == 5) {
+ /* 1.0 author name */
+ if (nodes[0].type == JSON_TYPE_OBJECT &&
+ nodes[1].type == JSON_TYPE_ARRAY &&
+ nodes[2].type == JSON_TYPE_OBJECT &&
+ nodes[3].type == JSON_TYPE_OBJECT &&
+ nodes[4].type == JSON_TYPE_STRING &&
+ !attrcmp(nodes[1].name, "items") &&
+ !attrcmp(nodes[3].name, "author") &&
+ !attrcmp(nodes[4].name, "name")) {
+ if (!fields[FeedFieldAuthor].len)
+ string_append(&fields[FeedFieldAuthor], value, valuelen);
+ }
+
+ /* tags / categories */
+ if (nodes[0].type == JSON_TYPE_OBJECT &&
+ nodes[1].type == JSON_TYPE_ARRAY &&
+ nodes[2].type == JSON_TYPE_OBJECT &&
+ nodes[3].type == JSON_TYPE_ARRAY &&
+ nodes[4].type == JSON_TYPE_STRING &&
+ !attrcmp(nodes[1].name, "items") &&
+ !attrcmp(nodes[3].name, "tags")) {
+ if (fields[FeedFieldCategory].len)
+ string_append(&fields[FeedFieldCategory], FieldMultiSeparator,
+ sizeof(FieldMultiSeparator) - 1);
+ string_append(&fields[FeedFieldCategory], value, valuelen);
+ }
+ }
+
+ if (depth == 6) {
+ /* 1.1 author name */
+ if (nodes[0].type == JSON_TYPE_OBJECT &&
+ nodes[1].type == JSON_TYPE_ARRAY &&
+ nodes[2].type == JSON_TYPE_OBJECT &&
+ nodes[3].type == JSON_TYPE_ARRAY &&
+ nodes[4].type == JSON_TYPE_OBJECT &&
+ nodes[5].type == JSON_TYPE_STRING &&
+ !attrcmp(nodes[1].name, "items") &&
+ !attrcmp(nodes[3].name, "authors") &&
+ !attrcmp(nodes[5].name, "name")) {
+ if (!fields[FeedFieldAuthor].len)
+ string_append(&fields[FeedFieldAuthor], value, valuelen);
+ }
+
+ /* enclosure attributes */
+ if (nodes[0].type == JSON_TYPE_OBJECT &&
+ nodes[1].type == JSON_TYPE_ARRAY &&
+ nodes[2].type == JSON_TYPE_OBJECT &&
+ nodes[3].type == JSON_TYPE_ARRAY &&
+ nodes[4].type == JSON_TYPE_OBJECT &&
+ (nodes[5].type == JSON_TYPE_STRING || nodes[5].type == JSON_TYPE_NUMBER) &&
+ !attrcmp(nodes[1].name, "items") &&
+ !attrcmp(nodes[3].name, "attachments") &&
+ !attrcmp(nodes[5].name, "url")) {
+ if (!fields[FeedFieldEnclosure].len)
+ string_append(&fields[FeedFieldEnclosure], value, valuelen);
+ }
+ }
+
+ if (ferror(stdout)) {
+ fprintf(stderr, "write error: <stdout>\n");
+ exit(2);
+ }
+}
+
+int
+main(int argc, char *argv[])
+{
+ if (pledge("stdio", NULL) == -1)
+ err(1, "pledge");
+
+ if (itemisopen)
+ printfields();
+
+ switch (parsejson(processnode)) {
+ case JSON_ERROR_MEM:
+ errx(2, "error: cannot allocate enough memory");
+ case JSON_ERROR_INVALID:
+ errx(1, "error: invalid JSON");
+ }
+
+ if (ferror(stdin))
+ errx(2, "read error: <stdin>");
+ if (fflush(stdout) || ferror(stdout))
+ errx(2, "write error: <stdout>");
+
+ return 0;
+}