util.c - sfeed - RSS and Atom parser
(HTM) git clone git://git.codemadness.org/sfeed
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
util.c (9946B)
---
1 #include <errno.h>
2 #include <stdarg.h>
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
6 #include <wchar.h>
7
8 #include "util.h"
9
10 /* print to stderr, print error message of errno and exit().
11 * Unlike BSD err() it does not prefix __progname */
12 __dead void
13 err(int exitstatus, const char *fmt, ...)
14 {
15 va_list ap;
16 int saved_errno;
17
18 saved_errno = errno;
19
20 if (fmt) {
21 va_start(ap, fmt);
22 vfprintf(stderr, fmt, ap);
23 va_end(ap);
24 fputs(": ", stderr);
25 }
26 fprintf(stderr, "%s\n", strerror(saved_errno));
27
28 exit(exitstatus);
29 }
30
31 /* print to stderr and exit().
32 * Unlike BSD errx() it does not prefix __progname */
33 __dead void
34 errx(int exitstatus, const char *fmt, ...)
35 {
36 va_list ap;
37
38 if (fmt) {
39 va_start(ap, fmt);
40 vfprintf(stderr, fmt, ap);
41 va_end(ap);
42 }
43 fputs("\n", stderr);
44
45 exit(exitstatus);
46 }
47
48 /* Handle read or write errors for a FILE * stream */
49 void
50 checkfileerror(FILE *fp, const char *name, int mode)
51 {
52 if (mode == 'r' && ferror(fp))
53 errx(1, "read error: %s", name);
54 else if (mode == 'w' && (fflush(fp) || ferror(fp)))
55 errx(1, "write error: %s", name);
56 }
57
58 /* strcasestr() included for portability */
59 char *
60 strcasestr(const char *h, const char *n)
61 {
62 size_t i;
63
64 if (!n[0])
65 return (char *)h;
66
67 for (; *h; ++h) {
68 for (i = 0; n[i] && TOLOWER((unsigned char)n[i]) ==
69 TOLOWER((unsigned char)h[i]); ++i)
70 ;
71 if (n[i] == '\0')
72 return (char *)h;
73 }
74
75 return NULL;
76 }
77
78 /* Check if string has a non-empty scheme / protocol part. */
79 int
80 uri_hasscheme(const char *s)
81 {
82 const char *p = s;
83
84 for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) ||
85 *p == '+' || *p == '-' || *p == '.'; p++)
86 ;
87 /* scheme, except if empty and starts with ":" then it is a path */
88 return (*p == ':' && p != s);
89 }
90
91 /* Parse URI string `s` into an uri structure `u`.
92 * Returns 0 on success or -1 on failure */
93 int
94 uri_parse(const char *s, struct uri *u)
95 {
96 const char *p = s;
97 char *endptr;
98 size_t i;
99 long l;
100
101 u->proto[0] = u->userinfo[0] = u->host[0] = u->port[0] = '\0';
102 u->path[0] = u->query[0] = u->fragment[0] = '\0';
103
104 /* protocol-relative */
105 if (*p == '/' && *(p + 1) == '/') {
106 p += 2; /* skip "//" */
107 goto parseauth;
108 }
109
110 /* scheme / protocol part */
111 for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) ||
112 *p == '+' || *p == '-' || *p == '.'; p++)
113 ;
114 /* scheme, except if empty and starts with ":" then it is a path */
115 if (*p == ':' && p != s) {
116 if (*(p + 1) == '/' && *(p + 2) == '/')
117 p += 3; /* skip "://" */
118 else
119 p++; /* skip ":" */
120
121 if ((size_t)(p - s) >= sizeof(u->proto))
122 return -1; /* protocol too long */
123 memcpy(u->proto, s, p - s);
124 u->proto[p - s] = '\0';
125
126 if (*(p - 1) != '/')
127 goto parsepath;
128 } else {
129 p = s; /* no scheme format, reset to start */
130 goto parsepath;
131 }
132
133 parseauth:
134 /* userinfo (username:password) */
135 i = strcspn(p, "@/?#");
136 if (p[i] == '@') {
137 if (i >= sizeof(u->userinfo))
138 return -1; /* userinfo too long */
139 memcpy(u->userinfo, p, i);
140 u->userinfo[i] = '\0';
141 p += i + 1;
142 }
143
144 /* IPv6 address */
145 if (*p == '[') {
146 /* bracket not found, host too short or too long */
147 i = strcspn(p, "]");
148 if (p[i] != ']' || i < 3)
149 return -1;
150 i++; /* including "]" */
151 } else {
152 /* domain / host part, skip until port, path or end. */
153 i = strcspn(p, ":/?#");
154 }
155 if (i >= sizeof(u->host))
156 return -1; /* host too long */
157 memcpy(u->host, p, i);
158 u->host[i] = '\0';
159 p += i;
160
161 /* port */
162 if (*p == ':') {
163 p++;
164 if ((i = strcspn(p, "/?#")) >= sizeof(u->port))
165 return -1; /* port too long */
166 memcpy(u->port, p, i);
167 u->port[i] = '\0';
168 /* check for valid port: range 1 - 65535, may be empty */
169 errno = 0;
170 l = strtol(u->port, &endptr, 10);
171 if (i && (errno || *endptr || l <= 0 || l > 65535))
172 return -1;
173 p += i;
174 }
175
176 parsepath:
177 /* path */
178 if ((i = strcspn(p, "?#")) >= sizeof(u->path))
179 return -1; /* path too long */
180 memcpy(u->path, p, i);
181 u->path[i] = '\0';
182 p += i;
183
184 /* query */
185 if (*p == '?') {
186 p++;
187 if ((i = strcspn(p, "#")) >= sizeof(u->query))
188 return -1; /* query too long */
189 memcpy(u->query, p, i);
190 u->query[i] = '\0';
191 p += i;
192 }
193
194 /* fragment */
195 if (*p == '#') {
196 p++;
197 if ((i = strlen(p)) >= sizeof(u->fragment))
198 return -1; /* fragment too long */
199 memcpy(u->fragment, p, i);
200 u->fragment[i] = '\0';
201 }
202
203 return 0;
204 }
205
206 /* Transform and try to make the URI `u` absolute using base URI `b` into `a`.
207 * Follows some of the logic from "RFC 3986 - 5.2.2. Transform References".
208 * Returns 0 on success, -1 on error or truncation. */
209 int
210 uri_makeabs(struct uri *a, struct uri *u, struct uri *b)
211 {
212 char *p;
213 int c;
214
215 strlcpy(a->fragment, u->fragment, sizeof(a->fragment));
216
217 if (u->proto[0] || u->host[0]) {
218 strlcpy(a->proto, u->proto[0] ? u->proto : b->proto, sizeof(a->proto));
219 strlcpy(a->host, u->host, sizeof(a->host));
220 strlcpy(a->userinfo, u->userinfo, sizeof(a->userinfo));
221 strlcpy(a->host, u->host, sizeof(a->host));
222 strlcpy(a->port, u->port, sizeof(a->port));
223 strlcpy(a->path, u->path, sizeof(a->path));
224 strlcpy(a->query, u->query, sizeof(a->query));
225 return 0;
226 }
227
228 strlcpy(a->proto, b->proto, sizeof(a->proto));
229 strlcpy(a->host, b->host, sizeof(a->host));
230 strlcpy(a->userinfo, b->userinfo, sizeof(a->userinfo));
231 strlcpy(a->host, b->host, sizeof(a->host));
232 strlcpy(a->port, b->port, sizeof(a->port));
233
234 if (!u->path[0]) {
235 strlcpy(a->path, b->path, sizeof(a->path));
236 } else if (u->path[0] == '/') {
237 strlcpy(a->path, u->path, sizeof(a->path));
238 } else {
239 a->path[0] = (b->host[0] && b->path[0] != '/') ? '/' : '\0';
240 a->path[1] = '\0';
241
242 if ((p = strrchr(b->path, '/'))) {
243 c = *(++p);
244 *p = '\0'; /* temporary NUL-terminate */
245 if (strlcat(a->path, b->path, sizeof(a->path)) >= sizeof(a->path))
246 return -1;
247 *p = c; /* restore */
248 }
249 if (strlcat(a->path, u->path, sizeof(a->path)) >= sizeof(a->path))
250 return -1;
251 }
252
253 if (u->path[0] || u->query[0])
254 strlcpy(a->query, u->query, sizeof(a->query));
255 else
256 strlcpy(a->query, b->query, sizeof(a->query));
257
258 return 0;
259 }
260
261 int
262 uri_format(char *buf, size_t bufsiz, struct uri *u)
263 {
264 return snprintf(buf, bufsiz, "%s%s%s%s%s%s%s%s%s%s%s%s",
265 u->proto,
266 u->userinfo[0] ? u->userinfo : "",
267 u->userinfo[0] ? "@" : "",
268 u->host,
269 u->port[0] ? ":" : "",
270 u->port,
271 u->host[0] && u->path[0] && u->path[0] != '/' ? "/" : "",
272 u->path,
273 u->query[0] ? "?" : "",
274 u->query,
275 u->fragment[0] ? "#" : "",
276 u->fragment);
277 }
278
279 /* Splits fields in the line buffer by replacing TAB separators with NUL ('\0')
280 * terminators and assign these fields as pointers. If there are less fields
281 * than expected then the field is an empty string constant. */
282 void
283 parseline(char *line, char *fields[FieldLast])
284 {
285 char *prev, *s;
286 size_t i;
287
288 for (prev = line, i = 0;
289 (s = strchr(prev, '\t')) && i < FieldLast - 1;
290 i++) {
291 *s = '\0';
292 fields[i] = prev;
293 prev = s + 1;
294 }
295 fields[i++] = prev;
296 /* make non-parsed fields empty. */
297 for (; i < FieldLast; i++)
298 fields[i] = "";
299 }
300
301 /* Parse time to time_t, assumes time_t is signed, ignores fractions. */
302 int
303 strtotime(const char *s, time_t *t)
304 {
305 long long l;
306 char *e;
307
308 errno = 0;
309 l = strtoll(s, &e, 10);
310 if (errno || *s == '\0' || *e)
311 return -1;
312
313 /* NOTE: the type long long supports the 64-bit range. If time_t is
314 * 64-bit it is "2038-ready", otherwise it is truncated/wrapped. */
315 if (t)
316 *t = (time_t)l;
317
318 return 0;
319 }
320
321 time_t
322 getcomparetime(void)
323 {
324 time_t now, t;
325 char *p;
326
327 if ((now = time(NULL)) == (time_t)-1)
328 return (time_t)-1;
329
330 if ((p = getenv("SFEED_NEW_AGE"))) {
331 if (strtotime(p, &t) == -1)
332 return (time_t)-1;
333 return now - t;
334 }
335
336 return now - 86400; /* 1 day is old news */
337 }
338
339 /* Escape characters below as HTML 2.0 / XML 1.0. */
340 void
341 xmlencode(const char *s, FILE *fp)
342 {
343 for (; *s; ++s) {
344 switch (*s) {
345 case '<': fputs("<", fp); break;
346 case '>': fputs(">", fp); break;
347 case '\'': fputs("'", fp); break;
348 case '&': fputs("&", fp); break;
349 case '"': fputs(""", fp); break;
350 default: putc(*s, fp);
351 }
352 }
353 }
354
355 /* print `len` columns of characters. If string is shorter pad the rest with
356 * characters `pad`. */
357 void
358 printutf8pad(FILE *fp, const char *s, size_t len, int pad)
359 {
360 wchar_t wc;
361 size_t col = 0, i, slen;
362 int inc, rl, w;
363
364 if (!len)
365 return;
366
367 slen = strlen(s);
368 for (i = 0; i < slen; i += inc) {
369 inc = 1; /* next byte */
370 if ((unsigned char)s[i] < 32) {
371 continue; /* skip control characters */
372 } else if ((unsigned char)s[i] >= 127) {
373 rl = mbtowc(&wc, s + i, slen - i < 4 ? slen - i : 4);
374 inc = rl;
375 if (rl < 0) {
376 mbtowc(NULL, NULL, 0); /* reset state */
377 inc = 1; /* invalid, seek next byte */
378 w = 1; /* replacement char is one width */
379 } else if ((w = wcwidth(wc)) == -1) {
380 continue;
381 }
382
383 if (col + w > len || (col + w == len && s[i + inc])) {
384 fputs(PAD_TRUNCATE_SYMBOL, fp); /* ellipsis */
385 col++;
386 break;
387 } else if (rl < 0) {
388 fputs(UTF_INVALID_SYMBOL, fp); /* replacement */
389 col++;
390 continue;
391 }
392 fwrite(&s[i], 1, rl, fp);
393 col += w;
394 } else {
395 /* optimization: simple ASCII character */
396 if (col + 1 > len || (col + 1 == len && s[i + 1])) {
397 fputs(PAD_TRUNCATE_SYMBOL, fp); /* ellipsis */
398 col++;
399 break;
400 }
401 putc(s[i], fp);
402 col++;
403 }
404
405 }
406 for (; col < len; ++col)
407 putc(pad, fp);
408 }
409
410 /* Counts column width of a character string. */
411 size_t
412 colw(const char *s)
413 {
414 wchar_t wc;
415 size_t col = 0, i, slen;
416 int inc, rl, w;
417
418 slen = strlen(s);
419 for (i = 0; i < slen; i += inc) {
420 inc = 1; /* next byte */
421 if ((unsigned char)s[i] < 32) {
422 continue;
423 } else if ((unsigned char)s[i] >= 127) {
424 rl = mbtowc(&wc, &s[i], slen - i < 4 ? slen - i : 4);
425 inc = rl;
426 if (rl < 0) {
427 mbtowc(NULL, NULL, 0); /* reset state */
428 inc = 1; /* invalid, seek next byte */
429 w = 1; /* replacement char is one width */
430 } else if ((w = wcwidth(wc)) == -1) {
431 continue;
432 }
433 col += w;
434 } else {
435 col++;
436 }
437 }
438 return col;
439 }