xml.c: more strictly check numeric entities from the "library" side - sfeed - RSS and Atom parser
 (HTM) git clone git://git.codemadness.org/sfeed
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit a16377b91f996a3cc9772c23cc8aadd48a13651c
 (DIR) parent 8cf6e3f18ddfaa0d71a3d0a5535a175502b1d276
 (HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
       Date:   Thu, 11 Dec 2025 20:21:38 +0100
       
       xml.c: more strictly check numeric entities from the "library" side
       
       For sfeed there is no functional difference, because of how entities are parsed
       in a certain way.
       
       The xml.{c,h} code is reusable and some programs might use xml_entitytostr()
       with less validation beforehand.
       
       These cases are now tested in the sfeed_tests repository also (98% to 100%
       coverage for xml.c).
       
       Diffstat:
         M xml.c                               |      15 ++++++++++-----
       
       1 file changed, 10 insertions(+), 5 deletions(-)
       ---
 (DIR) diff --git a/xml.c b/xml.c
       @@ -242,7 +242,7 @@ static int
        numericentitytostr(const char *e, char *buf, size_t bufsiz)
        {
                long l;
       -        int len;
       +        int base, len;
                const char *s;
                char *end;
        
       @@ -250,22 +250,27 @@ numericentitytostr(const char *e, char *buf, size_t bufsiz)
                if (bufsiz < 5)
                        return -1;
        
       -        /* hex (16) or decimal (10) */
       -        errno = 0; /* for strtol() */
       +        /* hex (base 16) or decimal (base 10) */
                if (*e == 'x') {
                        e++;
                        for (s = e; *s && *s != ';'; s++) {
                                if (!ISXDIGIT((unsigned char)*s))
                                        return -1; /* invalid: no hex */
                        }
       -                l = strtol(e, &end, 16);
       +                base = 16;
       +
                } else {
                        for (s = e; *s && *s != ';'; s++) {
                                if (!ISDIGIT((unsigned char)*s))
                                        return -1; /* invalid: no digits */
                        }
       -                l = strtol(e, &end, 10);
       +                base = 10;
                }
       +        if (*s != ';' || *(s + 1) != '\0')
       +                return -1; /* must end with ';' NUL */
       +
       +        errno = 0;
       +        l = strtol(e, &end, base);
        
                /* invalid value or not a well-formed entity or invalid code point */
                if (errno || e == end || *end != ';' || l < 0 || l > 0x10ffff ||