do not convert UTF-16 surrogate pairs to an invalid sequence - grabtitle - stupid HTML title grabber
(HTM) git clone git://git.codemadness.org/grabtitle
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
(DIR) commit efe5e8763fcc364f504198009d79f841c48bf7dc
(DIR) parent 375166031e3942890db414e46937ae485986a2fa
(HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Thu, 22 Apr 2021 20:19:06 +0200
do not convert UTF-16 surrogate pairs to an invalid sequence
Diffstat:
M xml.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
---
(DIR) diff --git a/xml.c b/xml.c
@@ -199,8 +199,9 @@ numericentitytostr(const char *e, char *buf, size_t bufsiz)
l = strtol(++e, &end, 16);
else
l = strtol(e, &end, 10);
- /* invalid value or not a well-formed entity or invalid codepoint */
- if (errno || e == end || *end != ';' || l < 0 || l > 0x10ffff)
+ /* invalid value or not a well-formed entity or invalid code point */
+ if (errno || e == end || *end != ';' || l < 0 || l > 0x10ffff ||
+ (l >= 0xd800 && l <= 0xdfff))
return -1;
len = codepointtoutf8(l, buf);
buf[len] = '\0';