sync XML improvements - tscrape - twitter scraper
 (HTM) git clone git://git.codemadness.org/tscrape
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit 2872a29d4f44afbfa4f439ba1f3d84c22114b0d4
 (DIR) parent bd299de160e8f56d6f88538d9d4d4ded4775038d
 (HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
       Date:   Mon, 22 Apr 2019 14:46:30 +0200
       
       sync XML improvements
       
       Diffstat:
         M xml.c                               |      26 ++++++++++++--------------
         M xml.h                               |       7 +++++++
       
       2 files changed, 19 insertions(+), 14 deletions(-)
       ---
 (DIR) diff --git a/xml.c b/xml.c
       @@ -15,7 +15,7 @@ xml_parseattrs(XMLParser *x)
                size_t namelen = 0, valuelen;
                int c, endsep, endname = 0, valuestart = 0;
        
       -        while ((c = x->getnext()) != EOF) {
       +        while ((c = GETNEXT()) != EOF) {
                        if (isspace(c)) {
                                if (namelen)
                                        endname = 1;
       @@ -51,7 +51,7 @@ xml_parseattrs(XMLParser *x)
                                        goto startvalue;
                                }
        
       -                        while ((c = x->getnext()) != EOF) {
       +                        while ((c = GETNEXT()) != EOF) {
        startvalue:
                                        if (c == '&') { /* entities */
                                                x->data[valuelen] = '\0';
       @@ -60,7 +60,7 @@ startvalue:
                                                        x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
                                                x->data[0] = c;
                                                valuelen = 1;
       -                                        while ((c = x->getnext()) != EOF) {
       +                                        while ((c = GETNEXT()) != EOF) {
                                                        if (c == endsep || (endsep == ' ' && (c == '>' || isspace(c))))
                                                                break;
                                                        if (valuelen < sizeof(x->data) - 1)
       @@ -124,7 +124,7 @@ xml_parsecomment(XMLParser *x)
        
                if (x->xmlcommentstart)
                        x->xmlcommentstart(x);
       -        while ((c = x->getnext()) != EOF) {
       +        while ((c = GETNEXT()) != EOF) {
                        if (c == '-' || c == '>') {
                                if (x->xmlcomment) {
                                        x->data[datalen] = '\0';
       @@ -173,7 +173,7 @@ xml_parsecdata(XMLParser *x)
        
                if (x->xmlcdatastart)
                        x->xmlcdatastart(x);
       -        while ((c = x->getnext()) != EOF) {
       +        while ((c = GETNEXT()) != EOF) {
                        if (c == ']' || c == '>') {
                                if (x->xmlcdata) {
                                        x->data[datalen] = '\0';
       @@ -324,18 +324,16 @@ xml_parse(XMLParser *x)
                size_t datalen, tagdatalen;
                int c, isend;
        
       -        if (!x->getnext)
       -                return;
       -        while ((c = x->getnext()) != EOF && c != '<')
       +        while ((c = GETNEXT()) != EOF && c != '<')
                        ; /* skip until < */
        
                while (c != EOF) {
                        if (c == '<') { /* parse tag */
       -                        if ((c = x->getnext()) == EOF)
       +                        if ((c = GETNEXT()) == EOF)
                                        return;
        
                                if (c == '!') { /* cdata and comments */
       -                                for (tagdatalen = 0; (c = x->getnext()) != EOF;) {
       +                                for (tagdatalen = 0; (c = GETNEXT()) != EOF;) {
                                                /* NOTE: sizeof(x->data) must be atleast sizeof("[CDATA[") */
                                                if (tagdatalen <= sizeof("[CDATA[") - 1)
                                                        x->data[tagdatalen++] = c;
       @@ -363,13 +361,13 @@ xml_parse(XMLParser *x)
                                        if (c == '?') {
                                                x->isshorttag = 1;
                                        } else if (c == '/') {
       -                                        if ((c = x->getnext()) == EOF)
       +                                        if ((c = GETNEXT()) == EOF)
                                                        return;
                                                x->tag[0] = c;
                                                isend = 1;
                                        }
        
       -                                while ((c = x->getnext()) != EOF) {
       +                                while ((c = GETNEXT()) != EOF) {
                                                if (c == '/')
                                                        x->isshorttag = 1; /* short tag */
                                                else if (c == '>' || isspace(c)) {
       @@ -405,7 +403,7 @@ xml_parse(XMLParser *x)
                                datalen = 0;
                                if (x->xmldatastart)
                                        x->xmldatastart(x);
       -                        while ((c = x->getnext()) != EOF) {
       +                        while ((c = GETNEXT()) != EOF) {
                                        if (c == '&') {
                                                if (datalen) {
                                                        x->data[datalen] = '\0';
       @@ -414,7 +412,7 @@ xml_parse(XMLParser *x)
                                                }
                                                x->data[0] = c;
                                                datalen = 1;
       -                                        while ((c = x->getnext()) != EOF) {
       +                                        while ((c = GETNEXT()) != EOF) {
                                                        if (c == '<')
                                                                break;
                                                        if (datalen < sizeof(x->data) - 1)
 (DIR) diff --git a/xml.h b/xml.h
       @@ -1,3 +1,6 @@
       +#ifndef _XML_H
       +#define _XML_H
       +
        typedef struct xmlparser {
                /* handlers */
                void (*xmlattr)(struct xmlparser *, const char *, size_t,
       @@ -23,7 +26,10 @@ typedef struct xmlparser {
                void (*xmltagstartparsed)(struct xmlparser *, const char *,
                      size_t, int);
        
       +#ifndef GETNEXT
       +        #define GETNEXT (x)->getnext
                int (*getnext)(void);
       +#endif
        
                /* current tag */
                char tag[1024];
       @@ -38,3 +44,4 @@ typedef struct xmlparser {
        
        int xml_entitytostr(const char *, char *, size_t);
        void xml_parse(XMLParser *);
       +#endif