sync XML improvements - tscrape - twitter scraper
 (HTM) git clone git://git.codemadness.org/tscrape
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit 5df58d27f557292778cdc5dee306f18db8c980f7
 (DIR) parent f8629e681a16fc3af086355a44c942df57291b4b
 (HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
       Date:   Sat,  1 Feb 2020 15:02:27 +0100
       
       sync XML improvements
       
       Diffstat:
         M tscrape.c                           |       8 ++++----
         M xml.c                               |      24 ++++++++----------------
         M xml.h                               |       2 ++
       
       3 files changed, 14 insertions(+), 20 deletions(-)
       ---
 (DIR) diff --git a/tscrape.c b/tscrape.c
       @@ -107,10 +107,10 @@ isclassmatch(const char *classes, const char *clss, size_t len)
        }
        
        /* convert XML and some HTML entities */
       -static ssize_t
       +static int
        html_entitytostr(const char *s, char *buf, size_t bufsiz)
        {
       -        ssize_t len;
       +        int len;
        
                if ((len = xml_entitytostr(s, buf, bufsiz)) > 0)
                        return len;
       @@ -244,7 +244,7 @@ xmlattrentity(XMLParser *x, const char *t, size_t tl, const char *a, size_t al,
                      const char *v, size_t vl)
        {
                char buf[16];
       -        ssize_t len;
       +        int len;
        
                if (!state)
                        return;
       @@ -267,7 +267,7 @@ static void
        xmldataentity(XMLParser *x, const char *d, size_t dl)
        {
                char buf[16];
       -        ssize_t len;
       +        int len;
        
                if (!(state & Text))
                        return;
 (DIR) diff --git a/xml.c b/xml.c
       @@ -1,8 +1,5 @@
       -#include <sys/types.h>
       -
        #include <ctype.h>
        #include <errno.h>
       -#include <limits.h>
        #include <stdio.h>
        #include <stdlib.h>
        #include <string.h>
       @@ -255,11 +252,6 @@ namedentitytostr(const char *e, char *buf, size_t bufsiz)
                        { "gt;",   '>'  },
                        { "apos;", '\'' },
                        { "quot;", '"'  },
       -                { "AMP;",  '&'  },
       -                { "LT;",   '<'  },
       -                { "GT;",   '>'  },
       -                { "APOS;", '\'' },
       -                { "QUOT;", '"'  }
                };
                size_t i;
        
       @@ -274,7 +266,7 @@ namedentitytostr(const char *e, char *buf, size_t bufsiz)
                                return 1;
                        }
                }
       -        return 0;
       +        return -1;
        }
        
        static int
       @@ -291,12 +283,12 @@ numericentitytostr(const char *e, char *buf, size_t bufsiz)
                errno = 0;
                /* hex (16) or decimal (10) */
                if (*e == 'x')
       -                l = strtoul(e + 1, &end, 16);
       +                l = strtol(++e, &end, 16);
                else
       -                l = strtoul(e, &end, 10);
       -        /* invalid value or not a well-formed entity or too high codepoint */
       -        if (errno || *end != ';' || l > 0x10FFFF)
       -                return 0;
       +                l = strtol(e, &end, 10);
       +        /* invalid value or not a well-formed entity or invalid codepoint */
       +        if (errno || e == end || *end != ';' || l < 0 || l > 0x10ffff)
       +                return -1;
                len = codepointtoutf8(l, buf);
                buf[len] = '\0';
        
       @@ -304,13 +296,13 @@ numericentitytostr(const char *e, char *buf, size_t bufsiz)
        }
        
        /* convert named- or numeric entity string to buffer string
       - * returns byte-length of string. */
       + * returns byte-length of string or -1 on failure. */
        int
        xml_entitytostr(const char *e, char *buf, size_t bufsiz)
        {
                /* doesn't start with & */
                if (e[0] != '&')
       -                return 0;
       +                return -1;
                /* numeric entity */
                if (e[1] == '#')
                        return numericentitytostr(e + 2, buf, bufsiz);
 (DIR) diff --git a/xml.h b/xml.h
       @@ -1,6 +1,8 @@
        #ifndef _XML_H
        #define _XML_H
        
       +#include <stdio.h>
       +
        typedef struct xmlparser {
                /* handlers */
                void (*xmlattr)(struct xmlparser *, const char *, size_t,