xml.c: more strictly check numeric entities from the "library" side - sfeed - RSS and Atom parser
(HTM) git clone git://git.codemadness.org/sfeed
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
(DIR) commit a16377b91f996a3cc9772c23cc8aadd48a13651c
(DIR) parent 8cf6e3f18ddfaa0d71a3d0a5535a175502b1d276
(HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Thu, 11 Dec 2025 20:21:38 +0100
xml.c: more strictly check numeric entities from the "library" side
For sfeed there is no functional difference, because of how entities are parsed
in a certain way.
The xml.{c,h} code is reusable and some programs might use xml_entitytostr()
with less validation beforehand.
These cases are now tested in the sfeed_tests repository also (98% to 100%
coverage for xml.c).
Diffstat:
M xml.c | 15 ++++++++++-----
1 file changed, 10 insertions(+), 5 deletions(-)
---
(DIR) diff --git a/xml.c b/xml.c
@@ -242,7 +242,7 @@ static int
numericentitytostr(const char *e, char *buf, size_t bufsiz)
{
long l;
- int len;
+ int base, len;
const char *s;
char *end;
@@ -250,22 +250,27 @@ numericentitytostr(const char *e, char *buf, size_t bufsiz)
if (bufsiz < 5)
return -1;
- /* hex (16) or decimal (10) */
- errno = 0; /* for strtol() */
+ /* hex (base 16) or decimal (base 10) */
if (*e == 'x') {
e++;
for (s = e; *s && *s != ';'; s++) {
if (!ISXDIGIT((unsigned char)*s))
return -1; /* invalid: no hex */
}
- l = strtol(e, &end, 16);
+ base = 16;
+
} else {
for (s = e; *s && *s != ';'; s++) {
if (!ISDIGIT((unsigned char)*s))
return -1; /* invalid: no digits */
}
- l = strtol(e, &end, 10);
+ base = 10;
}
+ if (*s != ';' || *(s + 1) != '\0')
+ return -1; /* must end with ';' NUL */
+
+ errno = 0;
+ l = strtol(e, &end, base);
/* invalid value or not a well-formed entity or invalid code point */
if (errno || e == end || *end != ';' || l < 0 || l > 0x10ffff ||