sync xml.c improvements - frontends - front-ends for some sites (experiment)
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit 42fa84b70552cbd5338298050a35536132622eb9
 (DIR) parent d7b1c9d7714418bc5857fc83d5c7161a4d2f0bbb
 (HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
       Date:   Thu, 11 Dec 2025 21:02:17 +0100
       
       sync xml.c improvements
       
       Diffstat:
         M xml.c                               |      41 ++++++++++++++++++++++---------
         M xml.h                               |       4 ++--
       
       2 files changed, 32 insertions(+), 13 deletions(-)
       ---
 (DIR) diff --git a/xml.c b/xml.c
       @@ -6,7 +6,9 @@
        #include "xml.h"
        
        #define ISALPHA(c) ((((unsigned)c) | 32) - 'a' < 26)
       +#define ISDIGIT(c) (((unsigned)c) - '0' < 10)
        #define ISSPACE(c) ((c) == ' ' || ((((unsigned)c) - '\t') < 5))
       +#define ISXDIGIT(c) ((((unsigned)c) - '0' < 10) || (((unsigned)c) | 32) - 'a' < 6)
        
        /* data buffers, size and offset used for parsing XML, see getnext() */
        static const unsigned char *xml_data_buf;
       @@ -301,22 +303,39 @@ static int
        numericentitytostr(const char *e, char *buf, size_t bufsiz)
        {
                long l;
       -        int len;
       +        int base, len;
       +        const char *s;
                char *end;
        
                /* buffer is too small */
                if (bufsiz < 5)
                        return -1;
        
       +        /* hex (base 16) or decimal (base 10) */
       +        if (*e == 'x') {
       +                e++;
       +                for (s = e; *s && *s != ';'; s++) {
       +                        if (!ISXDIGIT((unsigned char)*s))
       +                                return -1; /* invalid: no hex */
       +                }
       +                base = 16;
       +
       +        } else {
       +                for (s = e; *s && *s != ';'; s++) {
       +                        if (!ISDIGIT((unsigned char)*s))
       +                                return -1; /* invalid: no digits */
       +                }
       +                base = 10;
       +        }
       +        if (*s != ';' || *(s + 1) != '\0')
       +                return -1; /* must end with ';' NUL */
       +
                errno = 0;
       -        /* hex (16) or decimal (10) */
       -        if (*e == 'x')
       -                l = strtol(++e, &end, 16);
       -        else
       -                l = strtol(e, &end, 10);
       +        l = strtol(e, &end, base);
       +
                /* invalid value or not a well-formed entity or invalid code point */
                if (errno || e == end || *end != ';' || l < 0 || l > 0x10ffff ||
       -            (l >= 0xd800 && l <= 0xdfff))
       +            (l >= 0xd800 && l <= 0xdfff)) /* surrogate range */
                        return -1;
                len = codepointtoutf8(l, buf);
                buf[len] = '\0';
       @@ -353,7 +372,7 @@ xml_parse(XMLParser *x)
                                if ((c = GETNEXT()) == EOF)
                                        return;
        
       -                        if (c == '!') { /* cdata and comments */
       +                        if (c == '!') { /* CDATA and comments */
                                        for (tagdatalen = 0; (c = GETNEXT()) != EOF;) {
                                                /* NOTE: sizeof(x->data) must be at least sizeof("[CDATA[") */
                                                if (tagdatalen <= sizeof("[CDATA[") - 1)
       @@ -378,7 +397,7 @@ xml_parse(XMLParser *x)
                                        x->taglen = 1;
                                        x->isshorttag = isend = 0;
        
       -                                /* treat processing instruction as shorttag, don't strip "?" prefix. */
       +                                /* treat processing instruction as short tag, don't strip "?" prefix. */
                                        if (c == '?') {
                                                x->isshorttag = 1;
                                        } else if (c == '/') {
       @@ -409,7 +428,7 @@ xml_parse(XMLParser *x)
                                                                if (x->xmltagstartparsed)
                                                                        x->xmltagstartparsed(x, x->tag, x->taglen, x->isshorttag);
                                                        }
       -                                                /* call tagend for shortform or processing instruction */
       +                                                /* call tagend for short tag or processing instruction */
                                                        if (x->isshorttag) {
                                                                if (x->xmltagend)
                                                                        x->xmltagend(x, x->tag, x->taglen, x->isshorttag);
       @@ -427,7 +446,7 @@ xml_parse(XMLParser *x)
                                if (x->xmldatastart)
                                        x->xmldatastart(x);
                                while ((c = GETNEXT()) != EOF) {
       -                                if (c == '&') {
       +                                if (c == '&') { /* entities */
                                                if (datalen) {
                                                        x->data[datalen] = '\0';
                                                        if (x->xmldata)
 (DIR) diff --git a/xml.h b/xml.h
       @@ -34,11 +34,11 @@ typedef struct xmlparser {
                /* current tag */
                char tag[1024];
                size_t taglen;
       -        /* current tag is in short form ? <tag /> */
       +        /* current tag is a short tag ? <tag /> */
                int isshorttag;
                /* current attribute name */
                char name[1024];
       -        /* data buffer used for tag data, cdata and attribute data */
       +        /* data buffer used for tag data, CDATA and attribute data */
                char data[BUFSIZ];
        } XMLParser;