codemadness.org

       initial repo - uriparser - URI parser
 (HTM) git clone git://git.codemadness.org/uriparser
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit f003f99bc853675e14235c2750a31571c988543b
 (HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
       Date:   Wed, 22 Nov 2023 19:21:03 +0100
       
       initial repo
       
       Diffstat:
         A LICENSE                             |      15 +++++++++++++++
         A Makefile                            |       5 +++++
         A README                              |       1 +
         A example.c                           |      65 +++++++++++++++++++++++++++++++
         A strlcat.c                           |      54 +++++++++++++++++++++++++++++++
         A strlcpy.c                           |      49 +++++++++++++++++++++++++++++++
         A util.c                              |     207 ++++++++++++++++++++++++++++++
         A util.h                              |      26 ++++++++++++++++++++++++++
       
       8 files changed, 422 insertions(+), 0 deletions(-)
       ---
 (DIR) diff --git a/LICENSE b/LICENSE
       @@ -0,0 +1,15 @@
       +ISC License
       +
       +Copyright (c) 2023 Hiltjo Posthuma <hiltjo@codemadness.org>
       +
       +Permission to use, copy, modify, and/or distribute this software for any
       +purpose with or without fee is hereby granted, provided that the above
       +copyright notice and this permission notice appear in all copies.
       +
       +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
       +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
       +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
       +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
       +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
       +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
       +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 (DIR) diff --git a/Makefile b/Makefile
       @@ -0,0 +1,5 @@
       +build: clean
       +        ${CC} -o example example.c util.c strlcat.c strlcpy.c -Wall
       +
       +clean:
       +        rm -f example
 (DIR) diff --git a/README b/README
       @@ -0,0 +1 @@
       +Small URI parser written in C
 (DIR) diff --git a/example.c b/example.c
       @@ -0,0 +1,65 @@
       +#include <stdio.h>
       +
       +#include "util.h"
       +
       +void
       +printfields(struct uri *u)
       +{
       +        printf("* proto:    %s\n", u->proto);
       +        printf("* userinfo: %s\n", u->userinfo);
       +        printf("* host:     %s\n", u->host);
       +        printf("* port:     %s\n", u->port);
       +        printf("* path:     %s\n", u->path);
       +        printf("* query:    %s\n", u->query);
       +        printf("* fragment: %s\n", u->fragment);
       +}
       +
       +int
       +main(int argc, char *argv[])
       +{
       +        struct uri b, u, abs;
       +        char buf[4096];
       +        int r;
       +
       +        if (argc < 2) {
       +                fprintf(stderr, "usage: %s <url> [baseurl]\n", argv[0]);
       +                return 1;
       +        }
       +
       +        r = uri_parse(argv[1], &u);
       +        printf("uri_parse() for %s returned: %d\n", argv[1], r);
       +        if (r != -1) {
       +                printf("success!\n\nfields for %s:\n", argv[1]);
       +                printfields(&u);
       +                printf("\n");
       +
       +                if (argc > 2) {
       +                        r = uri_parse(argv[2], &b);
       +                        printf("uri_parse() for %s returned: %d\n", argv[2], r);
       +                        if (r != -1) {
       +                                printf("success!\n\nfields for %s:\n", argv[2]);
       +                                printfields(&b);
       +                                printf("\n");
       +
       +                                r = uri_makeabs(&abs, &u, &b);
       +                                printf("uri_makeabs() for %s and %s returned: %d\n", argv[1], argv[2], r);
       +                                if (r != -1) {
       +                                        printf("success!\n\nfields for %s and %s:\n", argv[1], argv[2]);
       +                                        printfields(&abs);
       +                                        r = uri_format(buf, sizeof(buf), &abs);
       +                                        printf("uri_format() for absolute URI returned: %d\n", r);
       +                                        if (r > 0 && r < sizeof(buf))
       +                                                printf("formatted URI: %s\n", buf);
       +                                } else {
       +                                        printf("failure!\n");
       +                                }
       +                        } else {
       +                                printf("failure!\n");
       +                        }
       +                }
       +        } else {
       +                printf("failure!\n");
       +        }
       +
       +        return 0;
       +}
 (DIR) diff --git a/strlcat.c b/strlcat.c
       @@ -0,0 +1,54 @@
       +/*        $OpenBSD: strlcat.c,v 1.15 2015/03/02 21:41:08 millert Exp $        */
       +
       +/*
       + * Copyright (c) 1998, 2015 Todd C. Miller <Todd.Miller@courtesan.com>
       + *
       + * Permission to use, copy, modify, and distribute this software for any
       + * purpose with or without fee is hereby granted, provided that the above
       + * copyright notice and this permission notice appear in all copies.
       + *
       + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
       + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
       + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
       + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
       + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
       + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
       + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
       + */
       +
       +#include <string.h>
       +
       +/*
       + * Appends src to string dst of size dsize (unlike strncat, dsize is the
       + * full size of dst, not space left).  At most dsize-1 characters
       + * will be copied.  Always NUL terminates (unless dsize <= strlen(dst)).
       + * Returns strlen(src) + MIN(dsize, strlen(initial dst)).
       + * If retval >= dsize, truncation occurred.
       + */
       +size_t
       +strlcat(char *dst, const char *src, size_t dsize)
       +{
       +        const char *odst = dst;
       +        const char *osrc = src;
       +        size_t n = dsize;
       +        size_t dlen;
       +
       +        /* Find the end of dst and adjust bytes left but don't go past end. */
       +        while (n-- != 0 && *dst != '\0')
       +                dst++;
       +        dlen = dst - odst;
       +        n = dsize - dlen;
       +
       +        if (n-- == 0)
       +                return(dlen + strlen(src));
       +        while (*src != '\0') {
       +                if (n != 0) {
       +                        *dst++ = *src;
       +                        n--;
       +                }
       +                src++;
       +        }
       +        *dst = '\0';
       +
       +        return(dlen + (src - osrc));        /* count does not include NUL */
       +}
 (DIR) diff --git a/strlcpy.c b/strlcpy.c
       @@ -0,0 +1,49 @@
       +/*        $OpenBSD: strlcpy.c,v 1.12 2015/01/15 03:54:12 millert Exp $        */
       +
       +/*
       + * Copyright (c) 1998, 2015 Todd C. Miller <Todd.Miller@courtesan.com>
       + *
       + * Permission to use, copy, modify, and distribute this software for any
       + * purpose with or without fee is hereby granted, provided that the above
       + * copyright notice and this permission notice appear in all copies.
       + *
       + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
       + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
       + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
       + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
       + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
       + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
       + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
       + */
       +
       +#include <string.h>
       +
       +/*
       + * Copy string src to buffer dst of size dsize.  At most dsize-1
       + * chars will be copied.  Always NUL terminates (unless dsize == 0).
       + * Returns strlen(src); if retval >= dsize, truncation occurred.
       + */
       +size_t
       +strlcpy(char *dst, const char *src, size_t dsize)
       +{
       +        const char *osrc = src;
       +        size_t nleft = dsize;
       +
       +        /* Copy as many bytes as will fit. */
       +        if (nleft != 0) {
       +                while (--nleft != 0) {
       +                        if ((*dst++ = *src++) == '\0')
       +                                break;
       +                }
       +        }
       +
       +        /* Not enough room in dst, add NUL and traverse rest of src. */
       +        if (nleft == 0) {
       +                if (dsize != 0)
       +                        *dst = '\0';                /* NUL-terminate dst */
       +                while (*src++)
       +                        ;
       +        }
       +
       +        return(src - osrc - 1);        /* count does not include NUL */
       +}
 (DIR) diff --git a/util.c b/util.c
       @@ -0,0 +1,207 @@
       +#include <errno.h>
       +#include <stdio.h>
       +#include <stdlib.h>
       +#include <string.h>
       +
       +#include "util.h"
       +
       +/* Check if string has a non-empty scheme / protocol part. */
       +int
       +uri_hasscheme(const char *s)
       +{
       +        const char *p = s;
       +
       +        for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) ||
       +                       *p == '+' || *p == '-' || *p == '.'; p++)
       +                ;
       +        /* scheme, except if empty and starts with ":" then it is a path */
       +        return (*p == ':' && p != s);
       +}
       +
       +/* Parse URI string `s` into an uri structure `u`.
       +   Returns 0 on success or -1 on failure */
       +int
       +uri_parse(const char *s, struct uri *u)
       +{
       +        const char *p = s;
       +        char *endptr;
       +        size_t i;
       +        long l;
       +
       +        u->proto[0] = u->userinfo[0] = u->host[0] = u->port[0] = '\0';
       +        u->path[0] = u->query[0] = u->fragment[0] = '\0';
       +
       +        /* protocol-relative */
       +        if (*p == '/' && *(p + 1) == '/') {
       +                p += 2; /* skip "//" */
       +                goto parseauth;
       +        }
       +
       +        /* scheme / protocol part */
       +        for (; ISALPHA((unsigned char)*p) || ISDIGIT((unsigned char)*p) ||
       +                       *p == '+' || *p == '-' || *p == '.'; p++)
       +                ;
       +        /* scheme, except if empty and starts with ":" then it is a path */
       +        if (*p == ':' && p != s) {
       +                if (*(p + 1) == '/' && *(p + 2) == '/')
       +                        p += 3; /* skip "://" */
       +                else
       +                        p++; /* skip ":" */
       +
       +                if ((size_t)(p - s) >= sizeof(u->proto))
       +                        return -1; /* protocol too long */
       +                memcpy(u->proto, s, p - s);
       +                u->proto[p - s] = '\0';
       +
       +                if (*(p - 1) != '/')
       +                        goto parsepath;
       +        } else {
       +                p = s; /* no scheme format, reset to start */
       +                goto parsepath;
       +        }
       +
       +parseauth:
       +        /* userinfo (username:password) */
       +        i = strcspn(p, "@/?#");
       +        if (p[i] == '@') {
       +                if (i >= sizeof(u->userinfo))
       +                        return -1; /* userinfo too long */
       +                memcpy(u->userinfo, p, i);
       +                u->userinfo[i] = '\0';
       +                p += i + 1;
       +        }
       +
       +        /* IPv6 address */
       +        if (*p == '[') {
       +                /* bracket not found, host too short or too long */
       +                i = strcspn(p, "]");
       +                if (p[i] != ']' || i < 3)
       +                        return -1;
       +                i++; /* including "]" */
       +        } else {
       +                /* domain / host part, skip until port, path or end. */
       +                i = strcspn(p, ":/?#");
       +        }
       +        if (i >= sizeof(u->host))
       +                return -1; /* host too long */
       +        memcpy(u->host, p, i);
       +        u->host[i] = '\0';
       +        p += i;
       +
       +        /* port */
       +        if (*p == ':') {
       +                p++;
       +                if ((i = strcspn(p, "/?#")) >= sizeof(u->port))
       +                        return -1; /* port too long */
       +                memcpy(u->port, p, i);
       +                u->port[i] = '\0';
       +                /* check for valid port: range 1 - 65535, may be empty */
       +                errno = 0;
       +                l = strtol(u->port, &endptr, 10);
       +                if (i && (errno || *endptr || l <= 0 || l > 65535))
       +                        return -1;
       +                p += i;
       +        }
       +
       +parsepath:
       +        /* path */
       +        if ((i = strcspn(p, "?#")) >= sizeof(u->path))
       +                return -1; /* path too long */
       +        memcpy(u->path, p, i);
       +        u->path[i] = '\0';
       +        p += i;
       +
       +        /* query */
       +        if (*p == '?') {
       +                p++;
       +                if ((i = strcspn(p, "#")) >= sizeof(u->query))
       +                        return -1; /* query too long */
       +                memcpy(u->query, p, i);
       +                u->query[i] = '\0';
       +                p += i;
       +        }
       +
       +        /* fragment */
       +        if (*p == '#') {
       +                p++;
       +                if ((i = strlen(p)) >= sizeof(u->fragment))
       +                        return -1; /* fragment too long */
       +                memcpy(u->fragment, p, i);
       +                u->fragment[i] = '\0';
       +        }
       +
       +        return 0;
       +}
       +
       +/* Transform and try to make the URI `u` absolute using base URI `b` into `a`.
       +   Follows some of the logic from "RFC 3986 - 5.2.2. Transform References".
       +   Returns 0 on success, -1 on error or truncation. */
       +int
       +uri_makeabs(struct uri *a, struct uri *u, struct uri *b)
       +{
       +        char *p;
       +        int c;
       +
       +        strlcpy(a->fragment, u->fragment, sizeof(a->fragment));
       +
       +        if (u->proto[0] || u->host[0]) {
       +                strlcpy(a->proto, u->proto[0] ? u->proto : b->proto, sizeof(a->proto));
       +                strlcpy(a->host, u->host, sizeof(a->host));
       +                strlcpy(a->userinfo, u->userinfo, sizeof(a->userinfo));
       +                strlcpy(a->host, u->host, sizeof(a->host));
       +                strlcpy(a->port, u->port, sizeof(a->port));
       +                strlcpy(a->path, u->path, sizeof(a->path));
       +                strlcpy(a->query, u->query, sizeof(a->query));
       +                return 0;
       +        }
       +
       +        strlcpy(a->proto, b->proto, sizeof(a->proto));
       +        strlcpy(a->host, b->host, sizeof(a->host));
       +        strlcpy(a->userinfo, b->userinfo, sizeof(a->userinfo));
       +        strlcpy(a->host, b->host, sizeof(a->host));
       +        strlcpy(a->port, b->port, sizeof(a->port));
       +
       +        if (!u->path[0]) {
       +                strlcpy(a->path, b->path, sizeof(a->path));
       +        } else if (u->path[0] == '/') {
       +                strlcpy(a->path, u->path, sizeof(a->path));
       +        } else {
       +                a->path[0] = (b->host[0] && b->path[0] != '/') ? '/' : '\0';
       +                a->path[1] = '\0';
       +
       +                if ((p = strrchr(b->path, '/'))) {
       +                        c = *(++p);
       +                        *p = '\0'; /* temporary NUL-terminate */
       +                        if (strlcat(a->path, b->path, sizeof(a->path)) >= sizeof(a->path))
       +                                return -1;
       +                        *p = c; /* restore */
       +                }
       +                if (strlcat(a->path, u->path, sizeof(a->path)) >= sizeof(a->path))
       +                        return -1;
       +        }
       +
       +        if (u->path[0] || u->query[0])
       +                strlcpy(a->query, u->query, sizeof(a->query));
       +        else
       +                strlcpy(a->query, b->query, sizeof(a->query));
       +
       +        return 0;
       +}
       +
       +int
       +uri_format(char *buf, size_t bufsiz, struct uri *u)
       +{
       +        return snprintf(buf, bufsiz, "%s%s%s%s%s%s%s%s%s%s%s%s",
       +                u->proto,
       +                u->userinfo[0] ? u->userinfo : "",
       +                u->userinfo[0] ? "@" : "",
       +                u->host,
       +                u->port[0] ? ":" : "",
       +                u->port,
       +                u->host[0] && u->path[0] && u->path[0] != '/' ? "/" : "",
       +                u->path,
       +                u->query[0] ? "?" : "",
       +                u->query,
       +                u->fragment[0] ? "#" : "",
       +                u->fragment);
       +}
 (DIR) diff --git a/util.h b/util.h
       @@ -0,0 +1,26 @@
       +#include <stdio.h>
       +
       +/* ctype-like macros, but always compatible with ASCII / UTF-8 */
       +#define ISALPHA(c) ((((unsigned)c) | 32) - 'a' < 26)
       +#define ISDIGIT(c) (((unsigned)c) - '0' < 10)
       +
       +#undef strlcat
       +size_t strlcat(char *, const char *, size_t);
       +#undef strlcpy
       +size_t strlcpy(char *, const char *, size_t);
       +
       +/* URI */
       +struct uri {
       +        char proto[48];     /* scheme including ":" or "://" */
       +        char userinfo[256]; /* username [:password] */
       +        char host[256];
       +        char port[6];       /* numeric port */
       +        char path[1024];
       +        char query[1024];
       +        char fragment[1024];
       +};
       +
       +int uri_format(char *, size_t, struct uri *);
       +int uri_hasscheme(const char *);
       +int uri_makeabs(struct uri *, struct uri *, struct uri *);
       +int uri_parse(const char *, struct uri *);