initial repo - uriparser - URI parser
(HTM) git clone git://git.codemadness.org/uriparser
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
(DIR) commit f003f99bc853675e14235c2750a31571c988543b
(HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Wed, 22 Nov 2023 19:21:03 +0100
initial repo
Diffstat:
A LICENSE | 15 +++++++++++++++
A Makefile | 5 +++++
A README | 1 +
A example.c | 65 +++++++++++++++++++++++++++++++
A strlcat.c | 54 +++++++++++++++++++++++++++++++
A strlcpy.c | 49 +++++++++++++++++++++++++++++++
A util.c | 207 ++++++++++++++++++++++++++++++
A util.h | 26 ++++++++++++++++++++++++++
8 files changed, 422 insertions(+), 0 deletions(-)
---
(DIR) diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,15 @@
+ISC License
+
+Copyright (c) 2023 Hiltjo Posthuma <hiltjo@codemadness.org>
+
+Permission to use, copy, modify, and/or distribute this software for any
+purpose with or without fee is hereby granted, provided that the above
+copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
(DIR) diff --git a/Makefile b/Makefile
@@ -0,0 +1,5 @@
+build: clean
+ ${CC} -o example example.c util.c strlcat.c strlcpy.c -Wall
+
+clean:
+ rm -f example
(DIR) diff --git a/README b/README
@@ -0,0 +1 @@
+Small URI parser written in C
(DIR) diff --git a/example.c b/example.c
@@ -0,0 +1,65 @@
+#include <stdio.h>
+
+#include "util.h"
+
+void
+printfields(struct uri *u)
+{
+ printf("* proto: %s\n", u->proto);
+ printf("* userinfo: %s\n", u->userinfo);
+ printf("* host: %s\n", u->host);
+ printf("* port: %s\n", u->port);
+ printf("* path: %s\n", u->path);
+ printf("* query: %s\n", u->query);
+ printf("* fragment: %s\n", u->fragment);
+}
+
+int
+main(int argc, char *argv[])
+{
+ struct uri b, u, abs;
+ char buf[4096];
+ int r;
+
+ if (argc < 2) {
+ fprintf(stderr, "usage: %s <url> [baseurl]\n", argv[0]);
+ return 1;
+ }
+
+ r = uri_parse(argv[1], &u);
+ printf("uri_parse() for %s returned: %d\n", argv[1], r);
+ if (r != -1) {
+ printf("success!\n\nfields for %s:\n", argv[1]);
+ printfields(&u);
+ printf("\n");
+
+ if (argc > 2) {
+ r = uri_parse(argv[2], &b);
+ printf("uri_parse() for %s returned: %d\n", argv[2], r);
+ if (r != -1) {
+ printf("success!\n\nfields for %s:\n", argv[2]);
+ printfields(&b);
+ printf("\n");
+
+ r = uri_makeabs(&abs, &u, &b);
+ printf("uri_makeabs() for %s and %s returned: %d\n", argv[1], argv[2], r);
+ if (r != -1) {
+ printf("success!\n\nfields for %s and %s:\n", argv[1], argv[2]);
+ printfields(&abs);
+ r = uri_format(buf, sizeof(buf), &abs);
+ printf("uri_format() for absolute URI returned: %d\n", r);
+ if (r > 0 && r < sizeof(buf))
+ printf("formatted URI: %s\n", buf);
+ } else {
+ printf("failure!\n");
+ }
+ } else {
+ printf("failure!\n");
+ }
+ }
+ } else {
+ printf("failure!\n");
+ }
+
+ return 0;
+}
(DIR) diff --git a/strlcat.c b/strlcat.c
@@ -0,0 +1,54 @@
+/* $OpenBSD: strlcat.c,v 1.15 2015/03/02 21:41:08 millert Exp $ */
+
+/*
+ * Copyright (c) 1998, 2015 Todd C. Miller <Todd.Miller@courtesan.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <string.h>
+
+/*
+ * Appends src to string dst of size dsize (unlike strncat, dsize is the
+ * full size of dst, not space left). At most dsize-1 characters
+ * will be copied. Always NUL terminates (unless dsize <= strlen(dst)).
+ * Returns strlen(src) + MIN(dsize, strlen(initial dst)).
+ * If retval >= dsize, truncation occurred.
+ */
+size_t
+strlcat(char *dst, const char *src, size_t dsize)
+{
+ const char *odst = dst;
+ const char *osrc = src;
+ size_t n = dsize;
+ size_t dlen;
+
+ /* Find the end of dst and adjust bytes left but don't go past end. */
+ while (n-- != 0 && *dst != '\0')
+ dst++;
+ dlen = dst - odst;
+ n = dsize - dlen;
+
+ if (n-- == 0)
+ return(dlen + strlen(src));
+ while (*src != '\0') {
+ if (n != 0) {
+ *dst++ = *src;
+ n--;
+ }
+ src++;
+ }
+ *dst = '\0';
+
+ return(dlen + (src - osrc)); /* count does not include NUL */
+}
(DIR) diff --git a/strlcpy.c b/strlcpy.c
@@ -0,0 +1,49 @@
+/* $OpenBSD: strlcpy.c,v 1.12 2015/01/15 03:54:12 millert Exp $ */
+
+/*
+ * Copyright (c) 1998, 2015 Todd C. Miller <Todd.Miller@courtesan.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <string.h>
+
+/*
+ * Copy string src to buffer dst of size dsize. At most dsize-1
+ * chars will be copied. Always NUL terminates (unless dsize == 0).
+ * Returns strlen(src); if retval >= dsize, truncation occurred.
+ */
+size_t
+strlcpy(char *dst, const char *src, size_t dsize)
+{
+ const char *osrc = src;
+ size_t nleft = dsize;
+
+ /* Copy as many bytes as will fit. */
+ if (nleft != 0) {
+ while (--nleft != 0) {
+ if ((*dst++ = *src++) == '\0')
+ break;
+ }
+ }
+
+ /* Not enough room in dst, add NUL and traverse rest of src. */
+ if (nleft == 0) {
+ if (dsize != 0)
+ *dst = '\0'; /* NUL-terminate dst */
+ while (*src++)
+ ;
+ }
+
+ return(src - osrc - 1); /* count does not include NUL */
+}
(DIR) diff --git a/util.c b/util.c
@@ -0,0 +1,207 @@
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "util.h"
+
+/* Check if string has a non-empty scheme / protocol part. */
+int
+uri_hasscheme(const char *s)
+{
+ const char *p = s;
+
+ for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) ||
+ *p == '+' || *p == '-' || *p == '.'; p++)
+ ;
+ /* scheme, except if empty and starts with ":" then it is a path */
+ return (*p == ':' && p != s);
+}
+
+/* Parse URI string `s` into an uri structure `u`.
+ Returns 0 on success or -1 on failure */
+int
+uri_parse(const char *s, struct uri *u)
+{
+ const char *p = s;
+ char *endptr;
+ size_t i;
+ long l;
+
+ u->proto[0] = u->userinfo[0] = u->host[0] = u->port[0] = '\0';
+ u->path[0] = u->query[0] = u->fragment[0] = '\0';
+
+ /* protocol-relative */
+ if (*p == '/' && *(p + 1) == '/') {
+ p += 2; /* skip "//" */
+ goto parseauth;
+ }
+
+ /* scheme / protocol part */
+ for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) ||
+ *p == '+' || *p == '-' || *p == '.'; p++)
+ ;
+ /* scheme, except if empty and starts with ":" then it is a path */
+ if (*p == ':' && p != s) {
+ if (*(p + 1) == '/' && *(p + 2) == '/')
+ p += 3; /* skip "://" */
+ else
+ p++; /* skip ":" */
+
+ if ((size_t)(p - s) >= sizeof(u->proto))
+ return -1; /* protocol too long */
+ memcpy(u->proto, s, p - s);
+ u->proto[p - s] = '\0';
+
+ if (*(p - 1) != '/')
+ goto parsepath;
+ } else {
+ p = s; /* no scheme format, reset to start */
+ goto parsepath;
+ }
+
+parseauth:
+ /* userinfo (username:password) */
+ i = strcspn(p, "@/?#");
+ if (p[i] == '@') {
+ if (i >= sizeof(u->userinfo))
+ return -1; /* userinfo too long */
+ memcpy(u->userinfo, p, i);
+ u->userinfo[i] = '\0';
+ p += i + 1;
+ }
+
+ /* IPv6 address */
+ if (*p == '[') {
+ /* bracket not found, host too short or too long */
+ i = strcspn(p, "]");
+ if (p[i] != ']' || i < 3)
+ return -1;
+ i++; /* including "]" */
+ } else {
+ /* domain / host part, skip until port, path or end. */
+ i = strcspn(p, ":/?#");
+ }
+ if (i >= sizeof(u->host))
+ return -1; /* host too long */
+ memcpy(u->host, p, i);
+ u->host[i] = '\0';
+ p += i;
+
+ /* port */
+ if (*p == ':') {
+ p++;
+ if ((i = strcspn(p, "/?#")) >= sizeof(u->port))
+ return -1; /* port too long */
+ memcpy(u->port, p, i);
+ u->port[i] = '\0';
+ /* check for valid port: range 1 - 65535, may be empty */
+ errno = 0;
+ l = strtol(u->port, &endptr, 10);
+ if (i && (errno || *endptr || l <= 0 || l > 65535))
+ return -1;
+ p += i;
+ }
+
+parsepath:
+ /* path */
+ if ((i = strcspn(p, "?#")) >= sizeof(u->path))
+ return -1; /* path too long */
+ memcpy(u->path, p, i);
+ u->path[i] = '\0';
+ p += i;
+
+ /* query */
+ if (*p == '?') {
+ p++;
+ if ((i = strcspn(p, "#")) >= sizeof(u->query))
+ return -1; /* query too long */
+ memcpy(u->query, p, i);
+ u->query[i] = '\0';
+ p += i;
+ }
+
+ /* fragment */
+ if (*p == '#') {
+ p++;
+ if ((i = strlen(p)) >= sizeof(u->fragment))
+ return -1; /* fragment too long */
+ memcpy(u->fragment, p, i);
+ u->fragment[i] = '\0';
+ }
+
+ return 0;
+}
+
+/* Transform and try to make the URI `u` absolute using base URI `b` into `a`.
+ Follows some of the logic from "RFC 3986 - 5.2.2. Transform References".
+ Returns 0 on success, -1 on error or truncation. */
+int
+uri_makeabs(struct uri *a, struct uri *u, struct uri *b)
+{
+ char *p;
+ int c;
+
+ strlcpy(a->fragment, u->fragment, sizeof(a->fragment));
+
+ if (u->proto[0] || u->host[0]) {
+ strlcpy(a->proto, u->proto[0] ? u->proto : b->proto, sizeof(a->proto));
+ strlcpy(a->host, u->host, sizeof(a->host));
+ strlcpy(a->userinfo, u->userinfo, sizeof(a->userinfo));
+ strlcpy(a->host, u->host, sizeof(a->host));
+ strlcpy(a->port, u->port, sizeof(a->port));
+ strlcpy(a->path, u->path, sizeof(a->path));
+ strlcpy(a->query, u->query, sizeof(a->query));
+ return 0;
+ }
+
+ strlcpy(a->proto, b->proto, sizeof(a->proto));
+ strlcpy(a->host, b->host, sizeof(a->host));
+ strlcpy(a->userinfo, b->userinfo, sizeof(a->userinfo));
+ strlcpy(a->host, b->host, sizeof(a->host));
+ strlcpy(a->port, b->port, sizeof(a->port));
+
+ if (!u->path[0]) {
+ strlcpy(a->path, b->path, sizeof(a->path));
+ } else if (u->path[0] == '/') {
+ strlcpy(a->path, u->path, sizeof(a->path));
+ } else {
+ a->path[0] = (b->host[0] && b->path[0] != '/') ? '/' : '\0';
+ a->path[1] = '\0';
+
+ if ((p = strrchr(b->path, '/'))) {
+ c = *(++p);
+ *p = '\0'; /* temporary NUL-terminate */
+ if (strlcat(a->path, b->path, sizeof(a->path)) >= sizeof(a->path))
+ return -1;
+ *p = c; /* restore */
+ }
+ if (strlcat(a->path, u->path, sizeof(a->path)) >= sizeof(a->path))
+ return -1;
+ }
+
+ if (u->path[0] || u->query[0])
+ strlcpy(a->query, u->query, sizeof(a->query));
+ else
+ strlcpy(a->query, b->query, sizeof(a->query));
+
+ return 0;
+}
+
+int
+uri_format(char *buf, size_t bufsiz, struct uri *u)
+{
+ return snprintf(buf, bufsiz, "%s%s%s%s%s%s%s%s%s%s%s%s",
+ u->proto,
+ u->userinfo[0] ? u->userinfo : "",
+ u->userinfo[0] ? "@" : "",
+ u->host,
+ u->port[0] ? ":" : "",
+ u->port,
+ u->host[0] && u->path[0] && u->path[0] != '/' ? "/" : "",
+ u->path,
+ u->query[0] ? "?" : "",
+ u->query,
+ u->fragment[0] ? "#" : "",
+ u->fragment);
+}
(DIR) diff --git a/util.h b/util.h
@@ -0,0 +1,26 @@
+#include <stdio.h>
+
+/* ctype-like macros, but always compatible with ASCII / UTF-8 */
+#define ISALPHA(c) ((((unsigned)c) | 32) - 'a' < 26)
+#define ISDIGIT(c) (((unsigned)c) - '0' < 10)
+
+#undef strlcat
+size_t strlcat(char *, const char *, size_t);
+#undef strlcpy
+size_t strlcpy(char *, const char *, size_t);
+
+/* URI */
+struct uri {
+ char proto[48]; /* scheme including ":" or "://" */
+ char userinfo[256]; /* username [:password] */
+ char host[256];
+ char port[6]; /* numeric port */
+ char path[1024];
+ char query[1024];
+ char fragment[1024];
+};
+
+int uri_format(char *, size_t, struct uri *);
+int uri_hasscheme(const char *);
+int uri_makeabs(struct uri *, struct uri *, struct uri *);
+int uri_parse(const char *, struct uri *);