xml.c: validate numeric entities more strictly - sfeed - RSS and Atom parser
 (HTM) git clone git://git.codemadness.org/sfeed
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit bfa2642d1e469839d55060e820cf0073cabaf270
 (DIR) parent cbd70903033be0b9f3a9790ef6c61fb3f5769e66
 (HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
       Date:   Sun, 16 Nov 2025 22:04:42 +0100
       
       xml.c: validate numeric entities more strictly
       
       validate numeric entities more strictly before passing them to strtol().
       
       Examples:
       - "&# 65;" (spaces).
       - ""&#-65;" (negative number).
       - "&#af;" (hex but no hexadecimal x prefix): "&#xaf;" (correct)
       
       Diffstat:
         M xml.c                               |      24 +++++++++++++++++++-----
       
       1 file changed, 19 insertions(+), 5 deletions(-)
       ---
 (DIR) diff --git a/xml.c b/xml.c
       @@ -6,7 +6,9 @@
        #include "xml.h"
        
        #define ISALPHA(c) ((((unsigned)c) | 32) - 'a' < 26)
       +#define ISDIGIT(c) (((unsigned)c) - '0' < 10)
        #define ISSPACE(c) ((c) == ' ' || ((((unsigned)c) - '\t') < 5))
       +#define ISXDIGIT(c) ((((unsigned)c) - '0' < 10) || (((unsigned)c) | 32) - 'a' < 6)
        
        static void
        xml_parseattrs(XMLParser *x)
       @@ -241,18 +243,30 @@ numericentitytostr(const char *e, char *buf, size_t bufsiz)
        {
                long l;
                int len;
       +        const char *s;
                char *end;
        
                /* buffer is too small */
                if (bufsiz < 5)
                        return -1;
        
       -        errno = 0;
                /* hex (16) or decimal (10) */
       -        if (*e == 'x')
       -                l = strtol(++e, &end, 16);
       -        else
       +        errno = 0; /* for strtol() */
       +        if (*e == 'x') {
       +                e++;
       +                for (s = e; *s && *s != ';'; s++) {
       +                        if (!ISXDIGIT((unsigned char)*s))
       +                                return -1; /* invalid: no hex */
       +                }
       +                l = strtol(e, &end, 16);
       +        } else {
       +                for (s = e; *s && *s != ';'; s++) {
       +                        if (!ISDIGIT((unsigned char)*s))
       +                                return -1; /* invalid: no digits */
       +                }
                        l = strtol(e, &end, 10);
       +        }
       +
                /* invalid value or not a well-formed entity or invalid code point */
                if (errno || e == end || *end != ';' || l < 0 || l > 0x10ffff ||
                    (l >= 0xd800 && l <= 0xdfff))
       @@ -364,7 +378,7 @@ xml_parse(XMLParser *x)
                                /* parse tag data */
                                datalen = 0;
                                while ((c = GETNEXT()) != EOF) {
       -                                if (c == '&') {
       +                                if (c == '&') { /* entities */
                                                if (datalen) {
                                                        x->data[datalen] = '\0';
                                                        if (x->xmldata)