Ignore queries and fragments in URIs - quark - quark web server
 (HTM) git clone git://git.suckless.org/quark
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) LICENSE
       ---
 (DIR) commit 319ba7083fdde836d6614c6b8b228bf3a9849e95
 (DIR) parent c6a9055e5a30be570e30da8d216c39662c3a3f99
 (HTM) Author: Laslo Hunhold <dev@frign.de>
       Date:   Sat, 30 Jan 2021 12:53:00 +0100
       
       Ignore queries and fragments in URIs
       
       Previously, a request for "/index.html" would yield a 200, while a
       request for "/index.html?foo=bar" would yield a 404, as quark would
       look for the file "index.html?foo=bar" in the serve directory.
       
       To accomodate this behaviour, it's no longer sufficient to just compare
       realuri and req->uri. Instead, we set a "dirty" flag every time we
       change the URI in such a way that it requires a redirect.
       
       According to RFC 3986 section 3, queries and fragments are there
       to (further) "identify a resource within the scope of the URI's scheme
       and naming authority (if any)". However, it's perfectly legitimate to
       just ignore this further specification when the URI itself is already
       pointing at a unique resource (i.e. "/index.html").
       
       This behaviour is consistent with dynamic web applications which usually
       ignore parameters they don't care about. Quark is too much Zen to care
       about any parameters. This has the added bonus that you can now clone
       repositories (read-only) via the "dumb" HTTP git-protocol, so
       
               git clone https://example.org/git/project.git
       
       is now possible (provided you run update-server-info during the
       post-update-hook). This wouldn't work previously because git, when
       asked to clone via HTTP, would first probe the server with a request for
       
               project.git/info/refs?service=git-upload-pack
       
       (i.e. asking for the "smart" HTTP git-protocol to confirm). Quark would
       return a 404, though, while git only gracefully "downgrades" to the
       "dumb" HTTP git-protocol if the request succeeds but only yields a basic
       200 response without special git-headers.
       
       This way, it is now trivial to also share git-repositories (and other
       gracefully-downgrading protocols). While the "dumb" HTTP git-protocol
       only supports read-only-access, I don't think that's much of an overall
       loss (to the contrary!).
       
       HTTP authentication is broken and it makes much more sense to enable
       ssh-access to contributors and make them push changes via ssh. The key
       advantage of HTTP-cloning over git://-cloning is the fact that the git
       protocol can be tampered with, while the HTTP-protocol can be encapsulated
       into a secure TLS connection.
       
       Signed-off-by: Laslo Hunhold <dev@frign.de>
       
       Diffstat:
         M http.c                              |      69 +++++++++++++++++++++++--------
       
       1 file changed, 51 insertions(+), 18 deletions(-)
       ---
 (DIR) diff --git a/http.c b/http.c
       @@ -368,12 +368,12 @@ static int
        normabspath(char *path)
        {
                size_t len;
       -        int last = 0;
       +        int dirty = 0, last = 0;
                char *p, *q;
        
                /* require and skip first slash */
                if (path[0] != '/') {
       -                return 1;
       +                return -1;
                }
                p = path + 1;
        
       @@ -387,7 +387,9 @@ normabspath(char *path)
                                last = 1;
                        }
        
       -                if (p == q || (q - p == 1 && p[0] == '.')) {
       +                if (*p == '\0') {
       +                        break;
       +                } else if (p == q || (q - p == 1 && p[0] == '.')) {
                                /* "/" or "./" */
                                goto squash;
                        } else if (q - p == 2 && p[0] == '.' && p[1] == '.') {
       @@ -412,9 +414,10 @@ squash:
                                memmove(p, q + 1, len - ((q + 1) - path) + 2);
                                len -= (q + 1) - p;
                        }
       +                dirty = 1;
                }
        
       -        return 0;
       +        return dirty;
        }
        
        static enum status
       @@ -562,7 +565,7 @@ http_prepare_response(const struct request *req, struct response *res,
                struct tm tm = { 0 };
                struct vhost *vhost;
                size_t len, i;
       -        int hasport, ipv6host;
       +        int dirty = 0, hasport, ipv6host;
                static char realuri[PATH_MAX], tmpuri[PATH_MAX];
                char *p, *mime;
                const char *targethost;
       @@ -570,11 +573,29 @@ http_prepare_response(const struct request *req, struct response *res,
                /* empty all response fields */
                memset(res, 0, sizeof(*res));
        
       -        /* make a working copy of the URI and normalize it */
       +        /*
       +         * make a working copy of the URI, strip queries and fragments
       +         * (ignorable according to RFC 3986 section 3) and normalize it
       +         */
                memcpy(realuri, req->uri, sizeof(realuri));
       -        if (normabspath(realuri)) {
       +
       +        if ((p = strchr(realuri, '?'))) {
       +                *p = '\0';
       +        } else if ((p = strchr(realuri, '#'))) {
       +                *p = '\0';
       +        }
       +
       +        switch (normabspath(realuri)) {
       +        case -1:
                        s = S_BAD_REQUEST;
                        goto err;
       +        case 0:
       +                /* string is unchanged */
       +                break;
       +        case 1:
       +                /* string was changed */
       +                dirty = 1;
       +                break;
                }
        
                /* match vhost */
       @@ -594,10 +615,12 @@ http_prepare_response(const struct request *req, struct response *res,
                        }
        
                        /* if we have a vhost prefix, prepend it to the URI */
       -                if (vhost->prefix &&
       -                    prepend(realuri, LEN(realuri), vhost->prefix)) {
       -                        s = S_REQUEST_TOO_LARGE;
       -                        goto err;
       +                if (vhost->prefix) {
       +                        if (prepend(realuri, LEN(realuri), vhost->prefix)) {
       +                                s = S_REQUEST_TOO_LARGE;
       +                                goto err;
       +                        }
       +                        dirty = 1;
                        }
                }
        
       @@ -618,14 +641,23 @@ http_prepare_response(const struct request *req, struct response *res,
                                        s = S_REQUEST_TOO_LARGE;
                                        goto err;
                                }
       +                        dirty = 1;
                                break;
                        }
                }
        
                /* normalize URI again, in case we introduced dirt */
       -        if (normabspath(realuri)) {
       +        switch (normabspath(realuri)) {
       +        case -1:
                        s = S_BAD_REQUEST;
                        goto err;
       +        case 0:
       +                /* string is unchanged */
       +                break;
       +        case 1:
       +                /* string was changed */
       +                dirty = 1;
       +                break;
                }
        
                /* stat the relative path derived from the URI */
       @@ -644,6 +676,7 @@ http_prepare_response(const struct request *req, struct response *res,
                        if (len > 0 && realuri[len - 1] != '/') {
                                realuri[len] = '/';
                                realuri[len + 1] = '\0';
       +                        dirty = 1;
                        }
                }
        
       @@ -658,10 +691,10 @@ http_prepare_response(const struct request *req, struct response *res,
                }
        
                /*
       -         * redirect if the original URI and the "real" URI differ or if
       -         * the requested host is non-canonical
       +         * redirect if the URI needs to be redirected or the requested
       +         * host is non-canonical
                 */
       -        if (strcmp(req->uri, realuri) || (srv->vhost && vhost &&
       +        if (dirty || (srv->vhost && vhost &&
                    strcmp(req->field[REQ_HOST], vhost->chost))) {
                        res->status = S_MOVED_PERMANENTLY;
        
       @@ -716,12 +749,12 @@ http_prepare_response(const struct request *req, struct response *res,
                         * (optionally including the vhost servedir as a prefix)
                         * into the actual response-path
                         */
       -                if (esnprintf(res->uri, sizeof(res->uri), "%s", req->uri)) {
       +                if (esnprintf(res->uri, sizeof(res->uri), "%s", realuri)) {
                                s = S_REQUEST_TOO_LARGE;
                                goto err;
                        }
                        if (esnprintf(res->path, sizeof(res->path), "%s%s",
       -                    vhost ? vhost->dir : "", RELPATH(req->uri))) {
       +                    vhost ? vhost->dir : "", RELPATH(realuri))) {
                                s = S_REQUEST_TOO_LARGE;
                                goto err;
                        }
       @@ -733,7 +766,7 @@ http_prepare_response(const struct request *req, struct response *res,
                         * the URI
                         */
                        if (esnprintf(tmpuri, sizeof(tmpuri), "%s%s",
       -                              req->uri, srv->docindex)) {
       +                              realuri, srv->docindex)) {
                                s = S_REQUEST_TOO_LARGE;
                                goto err;
                        }