codemadness.org

       improvements - json2tsv - JSON to TSV converter
 (HTM) git clone git://git.codemadness.org/json2tsv
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit 2f12ae56971af8acaa52357fe1042d37f71ffbd4
 (DIR) parent 9a85d6c5f69749ac71c690e5b4d36b8e70b15d16
 (HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
       Date:   Sun, 20 Oct 2019 14:30:24 +0200
       
       improvements
       
       - separate into more precise JSON types: primitives to: bool, null, number.
       - separate JSON code into a reusable "library": json.c.
       - remove errstr from parsejson(): just return an error code for out-of-memory
         and a JSON parse error. The tool returns exit code 2 when out-of-memory.
       - much more strict JSON parsing for incorrect input.
       - make primitives a fixed buffer. The longest size data for primitives are
         numbers and cannot be long anyway.
       - improve README example (reddit).
       
       Diffstat:
         M Makefile                            |      26 +++++++++++++++++++++-----
         M README                              |      35 ++++++++++++++++---------------
         A json.c                              |     314 +++++++++++++++++++++++++++++++
         A json.h                              |      26 ++++++++++++++++++++++++++
         M json2tsv.c                          |     301 +------------------------------
       
       5 files changed, 386 insertions(+), 316 deletions(-)
       ---
 (DIR) diff --git a/Makefile b/Makefile
       @@ -8,35 +8,51 @@ PREFIX = /usr/local
        MANPREFIX = ${PREFIX}/man
        DOCPREFIX = ${PREFIX}/share/doc/${NAME}
        
       +RANLIB = ranlib
       +
        BIN = ${NAME}
        SRC = ${BIN:=.c}
       +HDR = json.h
        MAN1 = ${BIN:=.1}
        DOC = \
                LICENSE\
                README
        
       +LIBJSON = libjson.a
       +LIBJSONSRC = json.c
       +LIBJSONOBJ = ${LIBJSONSRC:.c=.o}
       +
       +LIB = ${LIBJSON}
       +
        all: ${BIN}
        
       -${BIN}: ${@:=.o}
       +${BIN}: ${LIB} ${@:=.o}
        
       -OBJ = ${SRC:.c=.o}
       +OBJ = ${SRC:.c=.o} ${LIBJSONOBJ}
       +
       +${OBJ}: ${HDR}
        
        .o:
       -        ${CC} ${LDFLAGS} -o $@ $<
       +        ${CC} ${LDFLAGS} -o $@ $< ${LIB}
        
        .c.o:
                ${CC} ${CFLAGS} ${CPPFLAGS} -o $@ -c $<
        
       +${LIBJSON}: ${LIBJSONOBJ}
       +        ${AR} rc $@ $?
       +        ${RANLIB} $@
       +
        dist:
                rm -rf "${NAME}-${VERSION}"
                mkdir -p "${NAME}-${VERSION}"
       -        cp -f ${MAN1} ${DOC} ${SRC} Makefile "${NAME}-${VERSION}"
       +        cp -f ${MAN1} ${DOC} ${HDR} \
       +                ${SRC} ${LIBJSONSRC} Makefile "${NAME}-${VERSION}"
                # make tarball
                tar -cf - "${NAME}-${VERSION}" | gzip -c > "${NAME}-${VERSION}.tar.gz"
                rm -rf "${NAME}-${VERSION}"
        
        clean:
       -        rm -f ${BIN} ${OBJ}
       +        rm -f ${BIN} ${OBJ} ${LIB}
        
        install: all
                # installing executable files.
 (DIR) diff --git a/README b/README
       @@ -16,10 +16,16 @@ The output format per line is:
        The nodename and value are escaped (\n, \t and \\).  Control-characters are
        removed.
        
       -The type can be: o (for object), a (for array), p (for primitive such as true,
       -false, null, a number) or s (for string).
       +The type field is a single byte and can be:
        
       -Then filtering is easy using some awk script on the node "selector".
       +        a for array
       +        b for bool
       +        n for number
       +        o for object
       +        s for string
       +        ? for null
       +
       +Filtering on the first field "nodename" is easy using awk for example.
        
        See the json2tsv(1) man page for the full documentation.
        
       @@ -32,29 +38,24 @@ plain-text list using awk:
        
        
        #!/bin/sh
       -curl -s -H 'User-Agent:' 'https://old.reddit.com/.json' | \
       +curl -s -H 'User-Agent:' 'https://old.reddit.com/.json?raw_json=1&limit=100' | \
        json2tsv | \
       -awk 'BEGIN {
       -        FS = OFS = "\t";
       -        n = 0;
       -        title = author = subreddit = "";
       -}
       +awk -F '\t' '
        function show() {
       -        if (length(title) == 0)
       +        if (length(o["title"]) == 0)
                        return;
       -        print n ". " title " by " author " in r/" subreddit;
       -        print url;
       +        print n ". " o["title"] " by " o["author"] " in r/" o["subreddit"];
       +        print o["url"];
                print "";
        }
        $1 == ".data.children[].data" {
                show();
                n++;
       -        title = url = author = subreddit = "";
       +        delete o;
       +}
       +$1 ~ /^\.data\.children\[\]\.data\.[a-zA-Z0-9_]*$/ {
       +        o[substr($1, 23)] = $3;
        }
       -$1 == ".data.children[].data.url" { url = $3; }
       -$1 == ".data.children[].data.title" { title = $3; }
       -$1 == ".data.children[].data.author" { author = $3; }
       -$1 == ".data.children[].data.subreddit" { subreddit = $3; }
        END {
                show();
        }'
 (DIR) diff --git a/json.c b/json.c
       @@ -0,0 +1,314 @@
       +#include <ctype.h>
       +#include <errno.h>
       +#include <stdint.h>
       +#include <stdio.h>
       +#include <stdlib.h>
       +#include <string.h>
       +
       +#define GETNEXT getchar
       +
       +#include "json.h"
       +
       +int
       +codepointtoutf8(long r, char *s)
       +{
       +        if (r == 0) {
       +                return 0; /* NUL byte */
       +        } else if (r <= 0x7F) {
       +                /* 1 byte: 0aaaaaaa */
       +                s[0] = r;
       +                return 1;
       +        } else if (r <= 0x07FF) {
       +                /* 2 bytes: 00000aaa aabbbbbb */
       +                s[0] = 0xC0 | ((r & 0x0007C0) >>  6); /* 110aaaaa */
       +                s[1] = 0x80 |  (r & 0x00003F);        /* 10bbbbbb */
       +                return 2;
       +        } else if (r <= 0xFFFF) {
       +                /* 3 bytes: aaaabbbb bbcccccc */
       +                s[0] = 0xE0 | ((r & 0x00F000) >> 12); /* 1110aaaa */
       +                s[1] = 0x80 | ((r & 0x000FC0) >>  6); /* 10bbbbbb */
       +                s[2] = 0x80 |  (r & 0x00003F);        /* 10cccccc */
       +                return 3;
       +        } else {
       +                /* 4 bytes: 000aaabb bbbbcccc ccdddddd */
       +                s[0] = 0xF0 | ((r & 0x1C0000) >> 18); /* 11110aaa */
       +                s[1] = 0x80 | ((r & 0x03F000) >> 12); /* 10bbbbbb */
       +                s[2] = 0x80 | ((r & 0x000FC0) >>  6); /* 10cccccc */
       +                s[3] = 0x80 |  (r & 0x00003F);        /* 10dddddd */
       +                return 4;
       +        }
       +}
       +
       +int
       +hexdigit(int c)
       +{
       +        if (c >= '0' && c <= '9')
       +                return c - '0';
       +        else if (c >= 'a' && c <= 'f')
       +                return 10 + (c - 'a');
       +        else if (c >= 'A' && c <= 'F')
       +                return 10 + (c - 'A');
       +        return 0;
       +}
       +
       +int
       +capacity(char **value, size_t *sz, size_t cur, size_t inc)
       +{
       +        size_t need, newsiz;
       +        char *newp;
       +
       +        /* check for addition overflow */
       +        if (cur > SIZE_MAX - inc) {
       +                errno = EOVERFLOW;
       +                return -1;
       +        }
       +        need = cur + inc;
       +
       +        if (need > *sz) {
       +                if (need > SIZE_MAX / 2) {
       +                        newsiz = SIZE_MAX;
       +                } else {
       +                        for (newsiz = *sz < 64 ? 64 : *sz; newsiz <= need; newsiz *= 2)
       +                                ;
       +                }
       +                if (!(newp = realloc(*value, newsiz)))
       +                        return -1; /* up to caller to free *value */
       +                *value = newp;
       +                *sz = newsiz;
       +        }
       +        return 0;
       +}
       +
       +#define EXPECT_VALUE         "{[\"-0123456789tfn"
       +#define EXPECT_STRING        "\""
       +#define EXPECT_END           "}],"
       +#define EXPECT_NOTHING       ""
       +#define EXPECT_OBJECT_STRING EXPECT_STRING "}"
       +#define EXPECT_ARRAY_VALUE   EXPECT_VALUE "]"
       +
       +#define JSON_INVALID()       do { ret = JSON_ERROR_INVALID; goto end; } while (0);
       +
       +int
       +parsejson(void (*cb)(struct json_node *, size_t, const char *))
       +{
       +        struct json_node nodes[JSON_MAX_NODE_DEPTH] = { 0 };
       +        size_t depth = 0, p = 0, len, sz = 0;
       +        long cp, hi, lo;
       +        char pri[128], *str = NULL;
       +        int c, i, escape, iskey = 0, ret = JSON_ERROR_MEM;
       +        const char *expect = EXPECT_VALUE;
       +
       +        if (capacity(&(nodes[0].name), &(nodes[0].namesiz), 0, 1) == -1)
       +                goto end;
       +        nodes[0].name[0] = '\0';
       +
       +        while (1) {
       +                c = GETNEXT();
       +handlechr:
       +                if (c == EOF)
       +                        break;
       +
       +                if (c && strchr(" \t\n\r", c)) /* (no \v, \f, \b etc) */
       +                        continue;
       +
       +                if (!c || !strchr(expect, c))
       +                        JSON_INVALID();
       +
       +                switch (c) {
       +                case ':':
       +                        /* not in an object or key in object is not a string */
       +                        if (!depth || nodes[depth - 1].type != TYPE_OBJECT ||
       +                            nodes[depth].type != TYPE_STRING)
       +                                JSON_INVALID();
       +                        iskey = 0;
       +                        expect = EXPECT_VALUE;
       +                        break;
       +                case '"':
       +                        nodes[depth].type = TYPE_STRING;
       +                        escape = 0;
       +                        len = 0;
       +                        while (1) {
       +                                c = GETNEXT();
       +chr:
       +                                /* EOF or control char: 0x7f is not defined as a control char in RFC8259 */
       +                                if (c < 0x20)
       +                                        JSON_INVALID();
       +
       +                                if (escape) {
       +escchr:
       +                                        escape = 0;
       +                                        switch (c) {
       +                                        case '"': /* FALLTHROUGH */
       +                                        case '\\':
       +                                        case '/': break;
       +                                        case 'b': c = '\b'; break;
       +                                        case 'f': c = '\f'; break;
       +                                        case 'n': c = '\n'; break;
       +                                        case 'r': c = '\r'; break;
       +                                        case 't': c = '\t'; break;
       +                                        case 'u': /* hex hex hex hex */
       +                                                if (capacity(&str, &sz, len, 4) == -1)
       +                                                        goto end;
       +                                                for (i = 12, cp = 0; i >= 0; i -= 4) {
       +                                                        if ((c = GETNEXT()) == EOF || !isxdigit(c))
       +                                                                JSON_INVALID(); /* invalid codepoint */
       +                                                        cp |= (hexdigit(c) << i);
       +                                                }
       +                                                /* RFC8259 - 7. Strings - surrogates.
       +                                                 * 0xd800 - 0xdb7f - high surrogates */
       +                                                if (cp >= 0xd800 && cp <= 0xdb7f) {
       +                                                        if ((c = GETNEXT()) != '\\') {
       +                                                                len += codepointtoutf8(cp, &str[len]);
       +                                                                goto chr;
       +                                                        }
       +                                                        if ((c = GETNEXT()) != 'u') {
       +                                                                len += codepointtoutf8(cp, &str[len]);
       +                                                                goto escchr;
       +                                                        }
       +                                                        for (hi = cp, i = 12, lo = 0; i >= 0; i -= 4) {
       +                                                                if ((c = GETNEXT()) == EOF || !isxdigit(c))
       +                                                                        JSON_INVALID(); /* invalid codepoint */
       +                                                                lo |= (hexdigit(c) << i);
       +                                                        }
       +                                                        /* 0xdc00 - 0xdfff - low surrogates */
       +                                                        if (lo >= 0xdc00 && lo <= 0xdfff) {
       +                                                                cp = (hi << 10) + lo - 56613888; /* - offset */
       +                                                        } else {
       +                                                                /* handle graceful: raw invalid output bytes */
       +                                                                len += codepointtoutf8(hi, &str[len]);
       +                                                                if (capacity(&str, &sz, len, 4) == -1)
       +                                                                        goto end;
       +                                                                len += codepointtoutf8(lo, &str[len]);
       +                                                                continue;
       +                                                        }
       +                                                }
       +                                                len += codepointtoutf8(cp, &str[len]);
       +                                                continue;
       +                                        default:
       +                                                JSON_INVALID(); /* invalid escape char */
       +                                        }
       +                                        if (capacity(&str, &sz, len, 1) == -1)
       +                                                goto end;
       +                                        str[len++] = c;
       +                                } else if (c == '\\') {
       +                                        escape = 1;
       +                                } else if (c == '"') {
       +                                        if (capacity(&str, &sz, len, 1) == -1)
       +                                                goto end;
       +                                        str[len++] = '\0';
       +
       +                                        if (iskey) {
       +                                                if (capacity(&(nodes[depth].name), &(nodes[depth].namesiz), len, 1) == -1)
       +                                                        goto end;
       +                                                memcpy(nodes[depth].name, str, len);
       +                                        } else {
       +                                                cb(nodes, depth + 1, str);
       +                                        }
       +                                        break;
       +                                } else {
       +                                        if (capacity(&str, &sz, len, 1) == -1)
       +                                                goto end;
       +                                        str[len++] = c;
       +                                }
       +                        }
       +                        if (iskey)
       +                                expect = ":";
       +                        else
       +                                expect = EXPECT_END;
       +                        break;
       +                case '[':
       +                case '{':
       +                        if (depth + 1 >= JSON_MAX_NODE_DEPTH)
       +                                JSON_INVALID(); /* too deep */
       +
       +                        nodes[depth].index = 0;
       +                        nodes[depth].type = TYPE_OBJECT;
       +                        if (c == '{') {
       +                                iskey = 1;
       +                                nodes[depth].type = TYPE_OBJECT;
       +                                expect = EXPECT_OBJECT_STRING;
       +                        } else if (c == '[') {
       +                                nodes[depth].type = TYPE_ARRAY;
       +                                expect = EXPECT_ARRAY_VALUE;
       +                        }
       +
       +                        cb(nodes, depth + 1, "");
       +
       +                        depth++;
       +                        nodes[depth].index = 0;
       +                        if (capacity(&(nodes[depth].name), &(nodes[depth].namesiz), 0, 1) == -1)
       +                                goto end;
       +                        nodes[depth].name[0] = '\0';
       +                        break;
       +                case ']':
       +                case '}':
       +                        if (!depth ||
       +                           (c == ']' && nodes[depth - 1].type != TYPE_ARRAY) ||
       +                           (c == '}' && nodes[depth - 1].type != TYPE_OBJECT))
       +                                JSON_INVALID(); /* unbalanced nodes */
       +
       +                        nodes[--depth].index++;
       +                        if (!depth)
       +                                expect = EXPECT_NOTHING;
       +                        else
       +                                expect = EXPECT_END;
       +                        break;
       +                case ',':
       +                        nodes[depth - 1].index++;
       +                        if (nodes[depth - 1].type == TYPE_OBJECT) {
       +                                iskey = 1;
       +                                expect = EXPECT_STRING;
       +                        } else {
       +                                expect = EXPECT_VALUE;
       +                        }
       +                        break;
       +                case 't': /* true */
       +                        if (GETNEXT() != 'r' || GETNEXT() != 'u' || GETNEXT() != 'e')
       +                                JSON_INVALID();
       +                        nodes[depth].type = TYPE_BOOL;
       +                        cb(nodes, depth + 1, "true");
       +                        expect = EXPECT_END;
       +                        break;
       +                case 'f': /* false */
       +                        if (GETNEXT() != 'a' || GETNEXT() != 'l' || GETNEXT() != 's' || GETNEXT() != 'e')
       +                                JSON_INVALID();
       +                        nodes[depth].type = TYPE_BOOL;
       +                        cb(nodes, depth + 1, "false");
       +                        expect = EXPECT_END;
       +                        break;
       +                case 'n': /* null */
       +                        if (GETNEXT() != 'u' || GETNEXT() != 'l' || GETNEXT() != 'l')
       +                                JSON_INVALID();
       +                        nodes[depth].type = TYPE_NULL;
       +                        cb(nodes, depth + 1, "null");
       +                        expect = EXPECT_END;
       +                        break;
       +                default: /* number */
       +                        nodes[depth].type = TYPE_NUMBER;
       +                        p = 0;
       +                        pri[p++] = c;
       +                        expect = EXPECT_END;
       +                        while (1) {
       +                                c = GETNEXT();
       +                                if (!c || !strchr("0123456789eE+-.", c) ||
       +                                    c == EOF || p + 1 >= sizeof(pri)) {
       +                                        pri[p] = '\0';
       +                                        cb(nodes, depth + 1, pri);
       +                                        goto handlechr; /* do not read next char, handle this */
       +                                } else {
       +                                        pri[p++] = c;
       +                                }
       +                        }
       +                }
       +        }
       +        if (depth)
       +                JSON_INVALID(); /* unbalanced nodes */
       +
       +        ret = 0; /* success */
       +end:
       +        for (depth = 0; depth < sizeof(nodes) / sizeof(nodes[0]); depth++)
       +                free(nodes[depth].name);
       +        free(str);
       +
       +        return ret;
       +}
 (DIR) diff --git a/json.h b/json.h
       @@ -0,0 +1,26 @@
       +#include <stdint.h>
       +
       +enum JSONType {
       +        TYPE_ARRAY     = 'a',
       +        TYPE_OBJECT    = 'o',
       +        TYPE_STRING    = 's',
       +        TYPE_BOOL      = 'b',
       +        TYPE_NULL      = '?',
       +        TYPE_NUMBER    = 'n'
       +};
       +
       +enum JSONError {
       +        JSON_ERROR_MEM     = -2,
       +        JSON_ERROR_INVALID = -1
       +};
       +
       +#define JSON_MAX_NODE_DEPTH 64
       +
       +struct json_node {
       +        enum JSONType type;
       +        char *name;
       +        size_t namesiz;
       +        size_t index; /* count/index for array or object type */
       +};
       +
       +int parsejson(void (*cb)(struct json_node *, size_t, const char *));
 (DIR) diff --git a/json2tsv.c b/json2tsv.c
       @@ -11,299 +11,10 @@
        #define pledge(a,b) 0
        #endif
        
       -#define GETNEXT getchar
       -
       -enum JSONType {
       -        TYPE_PRIMITIVE = 'p',
       -        TYPE_STRING    = 's',
       -        TYPE_ARRAY     = 'a',
       -        TYPE_OBJECT    = 'o'
       -};
       -
       -#define JSON_MAX_NODE_DEPTH 64
       -
       -struct json_node {
       -        enum JSONType type;
       -        char *name;
       -        size_t namesiz;
       -        size_t index; /* count/index for array or object type */
       -};
       -
       -const char *JSON_ERROR_ALLOC         = "cannot allocate enough memory";
       -const char *JSON_ERROR_BALANCE       = "unbalanced nodes";
       -const char *JSON_ERROR_CODEPOINT     = "invalid codepoint";
       -const char *JSON_ERROR_DEPTH         = "max node depth reached";
       -const char *JSON_ERROR_ESCAPE_CHAR   = "unknown escape character in string";
       -const char *JSON_ERROR_INVALID_CHAR  = "invalid character in string";
       -const char *JSON_ERROR_OBJECT_MEMBER = "object member, but not in an object";
       +#include "json.h"
        
        static int showindices = 0; /* -n flag: show indices count for arrays */
        
       -int
       -codepointtoutf8(long r, char *s)
       -{
       -        if (r == 0) {
       -                return 0; /* NUL byte */
       -        } else if (r <= 0x7F) {
       -                /* 1 byte: 0aaaaaaa */
       -                s[0] = r;
       -                return 1;
       -        } else if (r <= 0x07FF) {
       -                /* 2 bytes: 00000aaa aabbbbbb */
       -                s[0] = 0xC0 | ((r & 0x0007C0) >>  6); /* 110aaaaa */
       -                s[1] = 0x80 |  (r & 0x00003F);        /* 10bbbbbb */
       -                return 2;
       -        } else if (r <= 0xFFFF) {
       -                /* 3 bytes: aaaabbbb bbcccccc */
       -                s[0] = 0xE0 | ((r & 0x00F000) >> 12); /* 1110aaaa */
       -                s[1] = 0x80 | ((r & 0x000FC0) >>  6); /* 10bbbbbb */
       -                s[2] = 0x80 |  (r & 0x00003F);        /* 10cccccc */
       -                return 3;
       -        } else {
       -                /* 4 bytes: 000aaabb bbbbcccc ccdddddd */
       -                s[0] = 0xF0 | ((r & 0x1C0000) >> 18); /* 11110aaa */
       -                s[1] = 0x80 | ((r & 0x03F000) >> 12); /* 10bbbbbb */
       -                s[2] = 0x80 | ((r & 0x000FC0) >>  6); /* 10cccccc */
       -                s[3] = 0x80 |  (r & 0x00003F);        /* 10dddddd */
       -                return 4;
       -        }
       -}
       -
       -int
       -hexdigit(int c)
       -{
       -        if (c >= '0' && c <= '9')
       -                return c - '0';
       -        else if (c >= 'a' && c <= 'f')
       -                return 10 + (c - 'a');
       -        else if (c >= 'A' && c <= 'F')
       -                return 10 + (c - 'A');
       -        return 0;
       -}
       -
       -int
       -capacity(char **value, size_t *sz, size_t cur, size_t inc)
       -{
       -        size_t need, newsiz;
       -        char *newp;
       -
       -        /* check for addition overflow */
       -        if (cur > SIZE_MAX - inc) {
       -                errno = EOVERFLOW;
       -                return -1;
       -        }
       -        need = cur + inc;
       -
       -        if (need > *sz) {
       -                if (need > SIZE_MAX / 2) {
       -                        newsiz = SIZE_MAX;
       -                } else {
       -                        for (newsiz = *sz < 64 ? 64 : *sz; newsiz <= need; newsiz *= 2)
       -                                ;
       -                }
       -                if (!(newp = realloc(*value, newsiz)))
       -                        return -1; /* up to caller to free *value */
       -                *value = newp;
       -                *sz = newsiz;
       -        }
       -        return 0;
       -}
       -
       -int
       -parsejson(void (*cb)(struct json_node *, size_t, const char *), const char **errstr)
       -{
       -        struct json_node nodes[JSON_MAX_NODE_DEPTH] = { 0 };
       -        size_t depth = 0, v = 0, vz = 0;
       -        long cp, hi, lo;
       -        int c, i, escape, ret = -1;
       -        char *value = NULL;
       -
       -        *errstr = JSON_ERROR_ALLOC;
       -        if (capacity(&(nodes[0].name), &(nodes[0].namesiz), 0, 1) == -1)
       -                goto end;
       -        nodes[0].name[0] = '\0';
       -        nodes[depth].type = TYPE_PRIMITIVE;
       -
       -        while ((c = GETNEXT()) != EOF) {
       -                /* not whitespace or control character */
       -                if (c <= 0x20 || c == 0x7f)
       -                        continue;
       -
       -                switch (c) {
       -                case ':':
       -                        if (!depth || nodes[depth - 1].type != TYPE_OBJECT) {
       -                                *errstr = JSON_ERROR_OBJECT_MEMBER;
       -                                goto end;
       -                        }
       -
       -                        if (capacity(&value, &vz, v, 1) == -1)
       -                                goto end;
       -                        value[v] = '\0';
       -                        if (capacity(&(nodes[depth].name), &(nodes[depth].namesiz), v, 1) == -1)
       -                                goto end;
       -                        memcpy(nodes[depth].name, value, v);
       -                        nodes[depth].name[v] = '\0';
       -                        v = 0;
       -                        nodes[depth].type = TYPE_PRIMITIVE;
       -                        break;
       -                case '"':
       -                        nodes[depth].type = TYPE_STRING;
       -                        escape = 0;
       -                        for (;;) {
       -                                c = GETNEXT();
       -chr:
       -                                if (c < 0x20) {
       -                                        /* EOF or control char: 0x7f is not defined as a control char in RFC8259 */
       -                                        *errstr = JSON_ERROR_INVALID_CHAR;
       -                                        goto end;
       -                                }
       -
       -                                if (escape) {
       -escchr:
       -                                        escape = 0;
       -                                        switch (c) {
       -                                        case '"': /* FALLTHROUGH */
       -                                        case '\\':
       -                                        case '/': break;
       -                                        case 'b': c = '\b'; break;
       -                                        case 'f': c = '\f'; break;
       -                                        case 'n': c = '\n'; break;
       -                                        case 'r': c = '\r'; break;
       -                                        case 't': c = '\t'; break;
       -                                        case 'u': /* hex hex hex hex */
       -                                                if (capacity(&value, &vz, v, 4) == -1)
       -                                                        goto end;
       -                                                for (i = 12, cp = 0; i >= 0; i -= 4) {
       -                                                        if ((c = GETNEXT()) == EOF || !isxdigit(c)) {
       -                                                                *errstr = JSON_ERROR_CODEPOINT;
       -                                                                goto end;
       -                                                        }
       -                                                        cp |= (hexdigit(c) << i);
       -                                                }
       -                                                /* RFC8259 - 7. Strings - surrogates.
       -                                                 * 0xd800 - 0xdb7f - high surrogates */
       -                                                if (cp >= 0xd800 && cp <= 0xdb7f) {
       -                                                        if ((c = GETNEXT()) != '\\') {
       -                                                                v += codepointtoutf8(cp, &value[v]);
       -                                                                goto chr;
       -                                                        }
       -                                                        if ((c = GETNEXT()) != 'u') {
       -                                                                v += codepointtoutf8(cp, &value[v]);
       -                                                                goto escchr;
       -                                                        }
       -                                                        for (hi = cp, i = 12, lo = 0; i >= 0; i -= 4) {
       -                                                                if ((c = GETNEXT()) == EOF || !isxdigit(c)) {
       -                                                                        *errstr = JSON_ERROR_CODEPOINT;
       -                                                                        goto end;
       -                                                                }
       -                                                                lo |= (hexdigit(c) << i);
       -                                                        }
       -                                                        /* 0xdc00 - 0xdfff - low surrogates */
       -                                                        if (lo >= 0xdc00 && lo <= 0xdfff) {
       -                                                                cp = (hi << 10) + lo - 56613888; /* - offset */
       -                                                        } else {
       -                                                                /* handle graceful: raw invalid output bytes */
       -                                                                v += codepointtoutf8(hi, &value[v]);
       -                                                                if (capacity(&value, &vz, v, 4) == -1)
       -                                                                        goto end;
       -                                                                v += codepointtoutf8(lo, &value[v]);
       -                                                                continue;
       -                                                        }
       -                                                }
       -                                                v += codepointtoutf8(cp, &value[v]);
       -                                                continue;
       -                                        default:
       -                                                *errstr = JSON_ERROR_ESCAPE_CHAR;
       -                                                goto end;
       -                                        }
       -                                        if (capacity(&value, &vz, v, 1) == -1)
       -                                                goto end;
       -                                        value[v++] = c;
       -                                } else if (c == '\\') {
       -                                        escape = 1;
       -                                } else if (c == '"') {
       -                                        break;
       -                                } else {
       -                                        if (capacity(&value, &vz, v, 1) == -1)
       -                                                goto end;
       -                                        value[v++] = c;
       -                                }
       -                        }
       -                        if (capacity(&value, &vz, v, 1) == -1)
       -                                goto end;
       -                        value[v] = '\0';
       -                        break;
       -                case '[':
       -                case '{':
       -                        if (depth + 1 >= JSON_MAX_NODE_DEPTH) {
       -                                *errstr = JSON_ERROR_DEPTH;
       -                                goto end;
       -                        }
       -
       -                        nodes[depth].index = 0;
       -                        nodes[depth].type = c == '{' ? TYPE_OBJECT : TYPE_ARRAY;
       -
       -                        cb(nodes, depth + 1, "");
       -                        v = 0;
       -
       -                        depth++;
       -                        nodes[depth].index = 0;
       -                        nodes[depth].type = TYPE_PRIMITIVE;
       -                        if (capacity(&(nodes[depth].name), &(nodes[depth].namesiz), v, 1) == -1)
       -                                goto end;
       -                        nodes[depth].name[0] = '\0';
       -                        break;
       -                case ']':
       -                case '}':
       -                case ',':
       -                        if (v || nodes[depth].type == TYPE_STRING) {
       -                                if (capacity(&value, &vz, v, 1) == -1)
       -                                        goto end;
       -                                value[v] = '\0';
       -                                cb(nodes, depth + 1, value);
       -                                v = 0;
       -                        }
       -                        if (!depth ||
       -                            (c == ']' && nodes[depth - 1].type != TYPE_ARRAY) ||
       -                            (c == '}' && nodes[depth - 1].type != TYPE_OBJECT)) {
       -                                *errstr = JSON_ERROR_BALANCE;
       -                                goto end;
       -                        }
       -
       -                        if (c == ']' || c == '}') {
       -                                nodes[--depth].index++;
       -                        } else if (c == ',') {
       -                                nodes[depth - 1].index++;
       -                                nodes[depth].type = TYPE_PRIMITIVE;
       -                        }
       -                        break;
       -                default:
       -                        if (capacity(&value, &vz, v, 1) == -1)
       -                                goto end;
       -                        value[v++] = c;
       -                }
       -        }
       -        if (depth) {
       -                *errstr = JSON_ERROR_BALANCE;
       -                goto end;
       -        }
       -        if (v || nodes[depth].type == TYPE_STRING) {
       -                if (capacity(&value, &vz, v, 1) == -1)
       -                        goto end;
       -                value[v] = '\0';
       -                cb(nodes, depth + 1, value);
       -        }
       -
       -        ret = 0; /* success */
       -        *errstr = NULL;
       -end:
       -        for (depth = 0; depth < sizeof(nodes) / sizeof(nodes[0]); depth++)
       -                free(nodes[depth].name);
       -        free(value);
       -
       -        return ret;
       -}
       -
        void
        printvalue(const char *s)
        {
       @@ -356,8 +67,6 @@ processnode(struct json_node *nodes, size_t depth, const char *value)
        int
        main(int argc, char *argv[])
        {
       -        const char *errstr;
       -
                if (pledge("stdio", NULL) == -1) {
                        fprintf(stderr, "pledge stdio: %s\n", strerror(errno));
                        return 1;
       @@ -366,8 +75,12 @@ main(int argc, char *argv[])
                if (argc > 1 && argv[1][0] == '-' && argv[1][1] == 'n')
                        showindices = 1;
        
       -        if (parsejson(processnode, &errstr) == -1) {
       -                fprintf(stderr, "error: %s\n", errstr);
       +        switch (parsejson(processnode)) {
       +        case JSON_ERROR_MEM:
       +                fputs("error: cannot allocate enough memory\n", stderr);
       +                return 2;
       +        case JSON_ERROR_INVALID:
       +                fputs("error: invalid JSON\n", stderr);
                        return 1;
                }