json.c - json2tsv - JSON to TSV converter
(HTM) git clone git://git.codemadness.org/json2tsv
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
json.c (8152B)
---
1 #include <errno.h>
2 #include <stdint.h>
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
6
7 #ifndef GETNEXT
8 #define GETNEXT getchar_unlocked
9 #endif
10
11 #include "json.h"
12
13 /* ctype-like macros, but always compatible with ASCII / UTF-8 */
14 #define ISDIGIT(c) (((unsigned)c) - '0' < 10)
15 #define ISXDIGIT(c) ((((unsigned)c) - '0' < 10) || ((unsigned)c | 32) - 'a' < 6)
16
17 static int
18 codepointtoutf8(long r, char *s)
19 {
20 if (r == 0) {
21 return 0; /* NUL byte */
22 } else if (r <= 0x7F) {
23 /* 1 byte: 0aaaaaaa */
24 s[0] = r;
25 return 1;
26 } else if (r <= 0x07FF) {
27 /* 2 bytes: 00000aaa aabbbbbb */
28 s[0] = 0xC0 | ((r & 0x0007C0) >> 6); /* 110aaaaa */
29 s[1] = 0x80 | (r & 0x00003F); /* 10bbbbbb */
30 return 2;
31 } else if (r <= 0xFFFF) {
32 /* 3 bytes: aaaabbbb bbcccccc */
33 s[0] = 0xE0 | ((r & 0x00F000) >> 12); /* 1110aaaa */
34 s[1] = 0x80 | ((r & 0x000FC0) >> 6); /* 10bbbbbb */
35 s[2] = 0x80 | (r & 0x00003F); /* 10cccccc */
36 return 3;
37 } else {
38 /* 4 bytes: 000aaabb bbbbcccc ccdddddd */
39 s[0] = 0xF0 | ((r & 0x1C0000) >> 18); /* 11110aaa */
40 s[1] = 0x80 | ((r & 0x03F000) >> 12); /* 10bbbbbb */
41 s[2] = 0x80 | ((r & 0x000FC0) >> 6); /* 10cccccc */
42 s[3] = 0x80 | (r & 0x00003F); /* 10dddddd */
43 return 4;
44 }
45 }
46
47 static int
48 hexdigit(int c)
49 {
50 if (c >= '0' && c <= '9')
51 return c - '0';
52 else if (c >= 'a' && c <= 'f')
53 return 10 + (c - 'a');
54 else if (c >= 'A' && c <= 'F')
55 return 10 + (c - 'A');
56 return 0;
57 }
58
59 static int
60 capacity(char **value, size_t *sz, size_t cur, size_t inc)
61 {
62 size_t need, newsiz;
63 char *newp;
64
65 /* check for addition overflow */
66 if (cur > SIZE_MAX - inc) {
67 errno = ENOMEM;
68 return -1;
69 }
70 need = cur + inc;
71
72 if (need > *sz) {
73 if (need > SIZE_MAX / 2) {
74 newsiz = SIZE_MAX;
75 } else {
76 for (newsiz = *sz < 64 ? 64 : *sz; newsiz <= need; newsiz *= 2)
77 ;
78 }
79 if (!(newp = realloc(*value, newsiz)))
80 return -1; /* up to caller to free *value */
81 *value = newp;
82 *sz = newsiz;
83 }
84 return 0;
85 }
86
87 #define EXPECT_VALUE "{[\"-0123456789tfn"
88 #define EXPECT_STRING "\""
89 #define EXPECT_END "}],"
90 #define EXPECT_OBJECT_STRING EXPECT_STRING "}"
91 #define EXPECT_OBJECT_KEY ":"
92 #define EXPECT_ARRAY_VALUE EXPECT_VALUE "]"
93
94 #define JSON_INVALID() do { ret = JSON_ERROR_INVALID; goto end; } while (0);
95
96 int
97 parsejson(void (*cb)(struct json_node *, size_t, const char *, size_t))
98 {
99 struct json_node nodes[JSON_MAX_NODE_DEPTH] = { { 0 } };
100 size_t depth = 0, p = 0, len, sz = 0;
101 long cp, hi, lo;
102 char pri[128], *str = NULL;
103 int c, i, escape, iskey = 0, ret = JSON_ERROR_MEM;
104 const char *expect = EXPECT_VALUE;
105
106 if (capacity(&(nodes[0].name), &(nodes[0].namesiz), 0, 1) == -1)
107 goto end;
108 nodes[0].name[0] = '\0';
109
110 while (1) {
111 c = GETNEXT();
112 handlechr:
113 if (c == EOF)
114 break;
115
116 /* skip JSON white-space, (NOTE: no \v, \f, \b etc) */
117 if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
118 continue;
119
120 if (!c || !strchr(expect, c))
121 JSON_INVALID();
122
123 switch (c) {
124 case ':':
125 iskey = 0;
126 expect = EXPECT_VALUE;
127 break;
128 case '"':
129 nodes[depth].type = JSON_TYPE_STRING;
130 escape = 0;
131 len = 0;
132 while (1) {
133 c = GETNEXT();
134 chr:
135 /* EOF or control char: 0x7f is not defined as a control char in RFC 8259 */
136 if (c < 0x20)
137 JSON_INVALID();
138
139 if (escape) {
140 escchr:
141 escape = 0;
142 switch (c) {
143 case '"': /* FALLTHROUGH */
144 case '\\':
145 case '/': break;
146 case 'b': c = '\b'; break;
147 case 'f': c = '\f'; break;
148 case 'n': c = '\n'; break;
149 case 'r': c = '\r'; break;
150 case 't': c = '\t'; break;
151 case 'u': /* hex hex hex hex */
152 if (capacity(&str, &sz, len, 4) == -1)
153 goto end;
154 for (i = 12, cp = 0; i >= 0; i -= 4) {
155 if ((c = GETNEXT()) == EOF || !ISXDIGIT(c))
156 JSON_INVALID(); /* invalid code point */
157 cp |= (hexdigit(c) << i);
158 }
159 /* RFC 8259 - 7. Strings - surrogates.
160 * 0xd800 - 0xdbff - high surrogates */
161 if (cp >= 0xd800 && cp <= 0xdbff) {
162 if ((c = GETNEXT()) != '\\') {
163 len += codepointtoutf8(cp, &str[len]);
164 goto chr;
165 }
166 if ((c = GETNEXT()) != 'u') {
167 len += codepointtoutf8(cp, &str[len]);
168 goto escchr;
169 }
170 for (hi = cp, i = 12, lo = 0; i >= 0; i -= 4) {
171 if ((c = GETNEXT()) == EOF || !ISXDIGIT(c))
172 JSON_INVALID(); /* invalid code point */
173 lo |= (hexdigit(c) << i);
174 }
175 /* 0xdc00 - 0xdfff - low surrogates */
176 if (lo >= 0xdc00 && lo <= 0xdfff) {
177 cp = (hi << 10) + lo - 56613888; /* - offset */
178 } else {
179 /* handle graceful: raw invalid output bytes */
180 len += codepointtoutf8(hi, &str[len]);
181 if (capacity(&str, &sz, len, 4) == -1)
182 goto end;
183 len += codepointtoutf8(lo, &str[len]);
184 continue;
185 }
186 }
187 len += codepointtoutf8(cp, &str[len]);
188 continue;
189 default:
190 JSON_INVALID(); /* invalid escape char */
191 }
192 if (capacity(&str, &sz, len, 1) == -1)
193 goto end;
194 str[len++] = c;
195 } else if (c == '\\') {
196 escape = 1;
197 } else if (c == '"') {
198 if (capacity(&str, &sz, len, 1) == -1)
199 goto end;
200 str[len++] = '\0';
201
202 if (iskey) {
203 /* copy string as key, including NUL byte */
204 if (capacity(&(nodes[depth].name), &(nodes[depth].namesiz), len, 1) == -1)
205 goto end;
206 memcpy(nodes[depth].name, str, len);
207 } else {
208 cb(nodes, depth + 1, str, len - 1); /* length excluding NUL byte */
209 }
210 break;
211 } else {
212 if (capacity(&str, &sz, len, 1) == -1)
213 goto end;
214 str[len++] = c;
215 }
216 }
217 if (iskey)
218 expect = EXPECT_OBJECT_KEY;
219 else
220 expect = EXPECT_END;
221 break;
222 case '[':
223 case '{':
224 if (depth + 1 >= JSON_MAX_NODE_DEPTH)
225 JSON_INVALID(); /* too deep */
226
227 nodes[depth].index = 0;
228 if (c == '[') {
229 nodes[depth].type = JSON_TYPE_ARRAY;
230 expect = EXPECT_ARRAY_VALUE;
231 } else if (c == '{') {
232 iskey = 1;
233 nodes[depth].type = JSON_TYPE_OBJECT;
234 expect = EXPECT_OBJECT_STRING;
235 }
236
237 cb(nodes, depth + 1, "", 0);
238
239 depth++;
240 nodes[depth].index = 0;
241 if (capacity(&(nodes[depth].name), &(nodes[depth].namesiz), 0, 1) == -1)
242 goto end;
243 nodes[depth].name[0] = '\0';
244 break;
245 case ']':
246 case '}':
247 if (!depth ||
248 (c == ']' && nodes[depth - 1].type != JSON_TYPE_ARRAY) ||
249 (c == '}' && nodes[depth - 1].type != JSON_TYPE_OBJECT))
250 JSON_INVALID(); /* unbalanced nodes */
251
252 depth--;
253 nodes[depth].index++;
254 expect = EXPECT_END;
255 break;
256 case ',':
257 if (!depth)
258 JSON_INVALID(); /* unbalanced nodes */
259
260 nodes[depth - 1].index++;
261 if (nodes[depth - 1].type == JSON_TYPE_OBJECT) {
262 iskey = 1;
263 expect = EXPECT_STRING;
264 } else {
265 iskey = 0;
266 expect = EXPECT_VALUE;
267 }
268 break;
269 case 't': /* true */
270 if (GETNEXT() != 'r' || GETNEXT() != 'u' || GETNEXT() != 'e')
271 JSON_INVALID();
272 nodes[depth].type = JSON_TYPE_BOOL;
273 cb(nodes, depth + 1, "true", 4);
274 expect = EXPECT_END;
275 break;
276 case 'f': /* false */
277 if (GETNEXT() != 'a' || GETNEXT() != 'l' || GETNEXT() != 's' ||
278 GETNEXT() != 'e')
279 JSON_INVALID();
280 nodes[depth].type = JSON_TYPE_BOOL;
281 cb(nodes, depth + 1, "false", 5);
282 expect = EXPECT_END;
283 break;
284 case 'n': /* null */
285 if (GETNEXT() != 'u' || GETNEXT() != 'l' || GETNEXT() != 'l')
286 JSON_INVALID();
287 nodes[depth].type = JSON_TYPE_NULL;
288 cb(nodes, depth + 1, "null", 4);
289 expect = EXPECT_END;
290 break;
291 default: /* number */
292 nodes[depth].type = JSON_TYPE_NUMBER;
293 p = 0;
294 pri[p++] = c;
295 expect = EXPECT_END;
296 while (1) {
297 c = GETNEXT();
298 if (c == EOF ||
299 (!ISDIGIT(c) && c != 'e' && c != 'E' &&
300 c != '+' && c != '-' && c != '.') ||
301 p + 1 >= sizeof(pri)) {
302 pri[p] = '\0';
303 cb(nodes, depth + 1, pri, p);
304 goto handlechr; /* do not read next char, handle this */
305 } else {
306 pri[p++] = c;
307 }
308 }
309 }
310 }
311 if (depth)
312 JSON_INVALID(); /* unbalanced nodes */
313
314 ret = 0; /* success */
315 end:
316 for (depth = 0; depth < sizeof(nodes) / sizeof(nodes[0]); depth++)
317 free(nodes[depth].name);
318 free(str);
319
320 return ret;
321 }