xml.c: validate numeric entities more strictly - sfeed - RSS and Atom parser
(HTM) git clone git://git.codemadness.org/sfeed
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
(DIR) commit bfa2642d1e469839d55060e820cf0073cabaf270
(DIR) parent cbd70903033be0b9f3a9790ef6c61fb3f5769e66
(HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Sun, 16 Nov 2025 22:04:42 +0100
xml.c: validate numeric entities more strictly
validate numeric entities more strictly before passing them to strtol().
Examples:
- "&# 65;" (spaces).
- ""&#-65;" (negative number).
- "&#af;" (hex but no hexadecimal x prefix): "¯" (correct)
Diffstat:
M xml.c | 24 +++++++++++++++++++-----
1 file changed, 19 insertions(+), 5 deletions(-)
---
(DIR) diff --git a/xml.c b/xml.c
@@ -6,7 +6,9 @@
#include "xml.h"
#define ISALPHA(c) ((((unsigned)c) | 32) - 'a' < 26)
+#define ISDIGIT(c) (((unsigned)c) - '0' < 10)
#define ISSPACE(c) ((c) == ' ' || ((((unsigned)c) - '\t') < 5))
+#define ISXDIGIT(c) ((((unsigned)c) - '0' < 10) || (((unsigned)c) | 32) - 'a' < 6)
static void
xml_parseattrs(XMLParser *x)
@@ -241,18 +243,30 @@ numericentitytostr(const char *e, char *buf, size_t bufsiz)
{
long l;
int len;
+ const char *s;
char *end;
/* buffer is too small */
if (bufsiz < 5)
return -1;
- errno = 0;
/* hex (16) or decimal (10) */
- if (*e == 'x')
- l = strtol(++e, &end, 16);
- else
+ errno = 0; /* for strtol() */
+ if (*e == 'x') {
+ e++;
+ for (s = e; *s && *s != ';'; s++) {
+ if (!ISXDIGIT((unsigned char)*s))
+ return -1; /* invalid: no hex */
+ }
+ l = strtol(e, &end, 16);
+ } else {
+ for (s = e; *s && *s != ';'; s++) {
+ if (!ISDIGIT((unsigned char)*s))
+ return -1; /* invalid: no digits */
+ }
l = strtol(e, &end, 10);
+ }
+
/* invalid value or not a well-formed entity or invalid code point */
if (errno || e == end || *end != ';' || l < 0 || l > 0x10ffff ||
(l >= 0xd800 && l <= 0xdfff))
@@ -364,7 +378,7 @@ xml_parse(XMLParser *x)
/* parse tag data */
datalen = 0;
while ((c = GETNEXT()) != EOF) {
- if (c == '&') {
+ if (c == '&') { /* entities */
if (datalen) {
x->data[datalen] = '\0';
if (x->xmldata)