sync xml.c improvements - frontends - front-ends for some sites (experiment)
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
(DIR) commit 42fa84b70552cbd5338298050a35536132622eb9
(DIR) parent d7b1c9d7714418bc5857fc83d5c7161a4d2f0bbb
(HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Thu, 11 Dec 2025 21:02:17 +0100
sync xml.c improvements
Diffstat:
M xml.c | 41 ++++++++++++++++++++++---------
M xml.h | 4 ++--
2 files changed, 32 insertions(+), 13 deletions(-)
---
(DIR) diff --git a/xml.c b/xml.c
@@ -6,7 +6,9 @@
#include "xml.h"
#define ISALPHA(c) ((((unsigned)c) | 32) - 'a' < 26)
+#define ISDIGIT(c) (((unsigned)c) - '0' < 10)
#define ISSPACE(c) ((c) == ' ' || ((((unsigned)c) - '\t') < 5))
+#define ISXDIGIT(c) ((((unsigned)c) - '0' < 10) || (((unsigned)c) | 32) - 'a' < 6)
/* data buffers, size and offset used for parsing XML, see getnext() */
static const unsigned char *xml_data_buf;
@@ -301,22 +303,39 @@ static int
numericentitytostr(const char *e, char *buf, size_t bufsiz)
{
long l;
- int len;
+ int base, len;
+ const char *s;
char *end;
/* buffer is too small */
if (bufsiz < 5)
return -1;
+ /* hex (base 16) or decimal (base 10) */
+ if (*e == 'x') {
+ e++;
+ for (s = e; *s && *s != ';'; s++) {
+ if (!ISXDIGIT((unsigned char)*s))
+ return -1; /* invalid: no hex */
+ }
+ base = 16;
+
+ } else {
+ for (s = e; *s && *s != ';'; s++) {
+ if (!ISDIGIT((unsigned char)*s))
+ return -1; /* invalid: no digits */
+ }
+ base = 10;
+ }
+ if (*s != ';' || *(s + 1) != '\0')
+ return -1; /* must end with ';' NUL */
+
errno = 0;
- /* hex (16) or decimal (10) */
- if (*e == 'x')
- l = strtol(++e, &end, 16);
- else
- l = strtol(e, &end, 10);
+ l = strtol(e, &end, base);
+
/* invalid value or not a well-formed entity or invalid code point */
if (errno || e == end || *end != ';' || l < 0 || l > 0x10ffff ||
- (l >= 0xd800 && l <= 0xdfff))
+ (l >= 0xd800 && l <= 0xdfff)) /* surrogate range */
return -1;
len = codepointtoutf8(l, buf);
buf[len] = '\0';
@@ -353,7 +372,7 @@ xml_parse(XMLParser *x)
if ((c = GETNEXT()) == EOF)
return;
- if (c == '!') { /* cdata and comments */
+ if (c == '!') { /* CDATA and comments */
for (tagdatalen = 0; (c = GETNEXT()) != EOF;) {
/* NOTE: sizeof(x->data) must be at least sizeof("[CDATA[") */
if (tagdatalen <= sizeof("[CDATA[") - 1)
@@ -378,7 +397,7 @@ xml_parse(XMLParser *x)
x->taglen = 1;
x->isshorttag = isend = 0;
- /* treat processing instruction as shorttag, don't strip "?" prefix. */
+ /* treat processing instruction as short tag, don't strip "?" prefix. */
if (c == '?') {
x->isshorttag = 1;
} else if (c == '/') {
@@ -409,7 +428,7 @@ xml_parse(XMLParser *x)
if (x->xmltagstartparsed)
x->xmltagstartparsed(x, x->tag, x->taglen, x->isshorttag);
}
- /* call tagend for shortform or processing instruction */
+ /* call tagend for short tag or processing instruction */
if (x->isshorttag) {
if (x->xmltagend)
x->xmltagend(x, x->tag, x->taglen, x->isshorttag);
@@ -427,7 +446,7 @@ xml_parse(XMLParser *x)
if (x->xmldatastart)
x->xmldatastart(x);
while ((c = GETNEXT()) != EOF) {
- if (c == '&') {
+ if (c == '&') { /* entities */
if (datalen) {
x->data[datalen] = '\0';
if (x->xmldata)
(DIR) diff --git a/xml.h b/xml.h
@@ -34,11 +34,11 @@ typedef struct xmlparser {
/* current tag */
char tag[1024];
size_t taglen;
- /* current tag is in short form ? <tag /> */
+ /* current tag is a short tag ? <tag /> */
int isshorttag;
/* current attribute name */
char name[1024];
- /* data buffer used for tag data, cdata and attribute data */
+ /* data buffer used for tag data, CDATA and attribute data */
char data[BUFSIZ];
} XMLParser;