sync XML parser and some improvements - sub - subscene.com subtitle search
(HTM) git clone git://git.codemadness.org/sub
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
(DIR) commit 919b13a33a111b5f946652c2e2ce0a07200a3fe3
(DIR) parent 6ef7f7e85bfb08f37166b9c8c450afb43bc7fc50
(HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Sun, 11 Mar 2018 18:51:49 +0100
sync XML parser and some improvements
Diffstat:
M sub.c | 8 +++++---
M xml.c | 439 ++++++++++++++++++++-----------
M xml.h | 79 +++++++++++++++----------------
3 files changed, 322 insertions(+), 204 deletions(-)
---
(DIR) diff --git a/sub.c b/sub.c
@@ -1,3 +1,5 @@
+#include <sys/types.h>
+
#include <ctype.h>
#include <errno.h>
#include <stdio.h>
@@ -165,14 +167,14 @@ main(void)
return 1;
}
- xmlparser_init(&parser, stdin);
-
parser.xmltagstart = xml_handler_start_element;
parser.xmltagend = xml_handler_end_element;
parser.xmlattr = xml_handler_attr;
parser.xmldata = xml_handler_data;
- xmlparser_parse(&parser);
+ parser.getnext = getchar;
+
+ xml_parse(&parser);
return 0;
}
(DIR) diff --git a/xml.c b/xml.c
@@ -1,110 +1,104 @@
+#include <sys/types.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
+#include <stdint.h>
#include <stdio.h>
-#include <string.h>
#include <stdlib.h>
-#include <ctype.h>
+#include <string.h>
#include "xml.h"
-static __inline__ int /* like getc(), but do some smart buffering */
-xmlparser_getnext(XMLParser *x) {
- return fgetc(x->fp);
-#if 0
- if(x->readoffset >= x->readlastbytes) {
- x->readoffset = 0;
- if(!(x->readlastbytes = fread(x->readbuf, 1, sizeof(x->readbuf), x->fp)))
- return EOF; /* 0 bytes read, assume EOF */
- }
- return (int)x->readbuf[x->readoffset++];
-#endif
-}
-
-static __inline__ void
-xmlparser_parseattrs(XMLParser *x) {
+static void
+xml_parseattrs(XMLParser *x)
+{
size_t namelen = 0, valuelen;
int c, endsep, endname = 0;
- while((c = xmlparser_getnext(x)) != EOF) {
- if(isspace(c)) { /* TODO: simplify endname ? */
- if(namelen)
+ while ((c = x->getnext()) != EOF) {
+ if (isspace(c)) { /* TODO: simplify endname ? */
+ if (namelen)
endname = 1;
continue;
}
- if(c == '?')
+ if (c == '?')
; /* ignore */
- else if(c == '=') {
+ else if (c == '=') {
x->name[namelen] = '\0';
- } else if(namelen && ((endname && isalpha(c)) || (c == '>' || c == '/'))) {
+ } else if (namelen && ((endname && isalpha(c)) || (c == '>' || c == '/'))) {
/* attribute without value */
x->name[namelen] = '\0';
- if(x->xmlattrstart)
+ if (x->xmlattrstart)
x->xmlattrstart(x, x->tag, x->taglen, x->name, namelen);
- if(x->xmlattr)
+ if (x->xmlattr)
x->xmlattr(x, x->tag, x->taglen, x->name, namelen, "", 0);
- if(x->xmlattrend)
+ if (x->xmlattrend)
x->xmlattrend(x, x->tag, x->taglen, x->name, namelen);
endname = 0;
x->name[0] = c;
namelen = 1;
- } else if(namelen && (c == '\'' || c == '"')) {
+ } else if (namelen && (c == '\'' || c == '"')) {
/* attribute with value */
endsep = c; /* c is end separator */
- if(x->xmlattrstart)
+ if (x->xmlattrstart)
x->xmlattrstart(x, x->tag, x->taglen, x->name, namelen);
- for(valuelen = 0; (c = xmlparser_getnext(x)) != EOF;) {
- if(c == '&' && x->xmlattrentity) { /* entities */
+ for (valuelen = 0; (c = x->getnext()) != EOF;) {
+ if (c == '&') { /* entities */
x->data[valuelen] = '\0';
/* call data function with data before entity if there is data */
- if(valuelen && x->xmlattr)
+ if (valuelen && x->xmlattr)
x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
x->data[0] = c;
valuelen = 1;
- while((c = xmlparser_getnext(x)) != EOF) {
- if(c == endsep)
+ while ((c = x->getnext()) != EOF) {
+ if (c == endsep)
break;
- if(valuelen < sizeof(x->data) - 1)
+ if (valuelen < sizeof(x->data) - 1)
x->data[valuelen++] = c;
else {
/* TODO: entity too long? this should be very strange. */
x->data[valuelen] = '\0';
- if(x->xmlattr)
+ if (x->xmlattr)
x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
valuelen = 0;
break;
}
- if(c == ';') {
+ if (c == ';') {
x->data[valuelen] = '\0';
- x->xmlattrentity(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
+ if (x->xmlattrentity)
+ x->xmlattrentity(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
valuelen = 0;
break;
}
}
- } else if(c != endsep) {
- if(valuelen < sizeof(x->data) - 1) {
+ } else if (c != endsep) {
+ if (valuelen < sizeof(x->data) - 1) {
x->data[valuelen++] = c;
} else {
x->data[valuelen] = '\0';
- if(x->xmlattr)
+ if (x->xmlattr)
x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
x->data[0] = c;
valuelen = 1;
}
}
- if(c == endsep) {
+ if (c == endsep) {
x->data[valuelen] = '\0';
- if(x->xmlattr)
+ if (x->xmlattr)
x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
- if(x->xmlattrend)
+ if (x->xmlattrend)
x->xmlattrend(x, x->tag, x->taglen, x->name, namelen);
break;
}
}
- namelen = 0;
- endname = 0;
- } else if(namelen < sizeof(x->name) - 1)
+ namelen = endname = 0;
+ } else if (namelen < sizeof(x->name) - 1) {
x->name[namelen++] = c;
- if(c == '>') {
+ }
+ if (c == '>') {
break;
- } else if(c == '/') {
+ } else if (c == '/') {
x->isshorttag = 1;
namelen = 0;
x->name[0] = '\0';
@@ -112,37 +106,48 @@ xmlparser_parseattrs(XMLParser *x) {
}
}
-static __inline__ void
-xmlparser_parsecomment(XMLParser *x) {
+static void
+xml_parsecomment(XMLParser *x)
+{
size_t datalen = 0, i = 0;
int c;
- if(x->xmlcommentstart)
+ if (x->xmlcommentstart)
x->xmlcommentstart(x);
- while((c = xmlparser_getnext(x)) != EOF) {
- if(c == '-' && i < 2)
- i++;
- else if(c == '>') {
- if(i == 2) { /* -- */
- if(datalen >= 2) {
- datalen -= 2;
- x->data[datalen] = '\0';
- if(x->xmlcomment)
- x->xmlcomment(x, x->data, datalen);
- }
- if(x->xmlcommentend)
- x->xmlcommentend(x);
- break;
+ while ((c = x->getnext()) != EOF) {
+ if (c == '-' || c == '>') {
+ if (x->xmlcomment) {
+ x->data[datalen] = '\0';
+ x->xmlcomment(x, x->data, datalen);
+ datalen = 0;
+ }
+ }
+
+ if (c == '-') {
+ if (++i > 2) {
+ if (x->xmlcomment)
+ for (; i > 2; i--)
+ x->xmlcomment(x, "-", 1);
+ i = 2;
+ }
+ continue;
+ } else if (c == '>' && i == 2) {
+ if (x->xmlcommentend)
+ x->xmlcommentend(x);
+ return;
+ } else if (i) {
+ if (x->xmlcomment) {
+ for (; i > 0; i--)
+ x->xmlcomment(x, "-", 1);
}
i = 0;
}
- /* || (c == '-' && d >= sizeof(x->data) - 4)) { */
- /* TODO: what if the end has --, and it's cut on the boundary, test this. */
- if(datalen < sizeof(x->data) - 1)
+
+ if (datalen < sizeof(x->data) - 1) {
x->data[datalen++] = c;
- else {
+ } else {
x->data[datalen] = '\0';
- if(x->xmlcomment)
+ if (x->xmlcomment)
x->xmlcomment(x, x->data, datalen);
x->data[0] = c;
datalen = 1;
@@ -150,43 +155,47 @@ xmlparser_parsecomment(XMLParser *x) {
}
}
-/* TODO:
- * <test><![CDATA[1234567dddd8]]]>
- *
- * with x->data of sizeof(15) gives 2 ] at end of cdata, should be 1
- * test comment function too for similar bug?
- *
- */
-static __inline__ void
-xmlparser_parsecdata(XMLParser *x) {
+static void
+xml_parsecdata(XMLParser *x)
+{
size_t datalen = 0, i = 0;
int c;
- if(x->xmlcdatastart)
+ if (x->xmlcdatastart)
x->xmlcdatastart(x);
- while((c = xmlparser_getnext(x)) != EOF) {
- if(c == ']' && i < 2) {
- i++;
- } else if(c == '>') {
- if(i == 2) { /* ]] */
- if(datalen >= 2) {
- datalen -= 2;
- x->data[datalen] = '\0';
- if(x->xmlcdata && datalen)
- x->xmlcdata(x, x->data, datalen);
- }
- if(x->xmlcdataend)
- x->xmlcdataend(x);
- break;
+ while ((c = x->getnext()) != EOF) {
+ if (c == ']' || c == '>') {
+ if (x->xmlcdata) {
+ x->data[datalen] = '\0';
+ x->xmlcdata(x, x->data, datalen);
+ datalen = 0;
}
+ }
+
+ if (c == ']') {
+ if (++i > 2) {
+ if (x->xmlcdata)
+ for (; i > 2; i--)
+ x->xmlcdata(x, "]", 1);
+ i = 2;
+ }
+ continue;
+ } else if (c == '>' && i == 2) {
+ if (x->xmlcdataend)
+ x->xmlcdataend(x);
+ return;
+ } else if (i) {
+ if (x->xmlcdata)
+ for (; i > 0; i--)
+ x->xmlcdata(x, "]", 1);
i = 0;
}
- /* TODO: what if the end has ]>, and it's cut on the boundary */
- if(datalen < sizeof(x->data) - 1) {
+
+ if (datalen < sizeof(x->data) - 1) {
x->data[datalen++] = c;
} else {
x->data[datalen] = '\0';
- if(x->xmlcdata)
+ if (x->xmlcdata)
x->xmlcdata(x, x->data, datalen);
x->data[0] = c;
datalen = 1;
@@ -194,128 +203,240 @@ xmlparser_parsecdata(XMLParser *x) {
}
}
-void
-xmlparser_init(XMLParser *x, FILE *fp) {
- memset(x, 0, sizeof(XMLParser));
- x->fp = fp;
+int
+xml_codepointtoutf8(uint32_t cp, uint32_t *utf)
+{
+ if (cp >= 0x10000) {
+ /* 4 bytes */
+ *utf = 0xf0808080 | ((cp & 0xfc0000) << 6) |
+ ((cp & 0x3f000) << 4) | ((cp & 0xfc0) << 2) |
+ (cp & 0x3f);
+ return 4;
+ } else if (cp >= 0x00800) {
+ /* 3 bytes */
+ *utf = 0xe08080 |
+ ((cp & 0x3f000) << 4) | ((cp & 0xfc0) << 2) |
+ (cp & 0x3f);
+ return 3;
+ } else if (cp >= 0x80) {
+ /* 2 bytes */
+ *utf = 0xc080 |
+ ((cp & 0xfc0) << 2) | (cp & 0x3f);
+ return 2;
+ }
+ *utf = cp & 0xff;
+ return *utf ? 1 : 0; /* 1 byte */
+}
+
+ssize_t
+xml_namedentitytostr(const char *e, char *buf, size_t bufsiz)
+{
+ static const struct {
+ char *entity;
+ int c;
+ } entities[] = {
+ { .entity = "&", .c = '&' },
+ { .entity = "<", .c = '<' },
+ { .entity = ">", .c = '>' },
+ { .entity = "'", .c = '\'' },
+ { .entity = """, .c = '"' },
+ { .entity = "&", .c = '&' },
+ { .entity = "<", .c = '<' },
+ { .entity = ">", .c = '>' },
+ { .entity = "&APOS;", .c = '\'' },
+ { .entity = """, .c = '"' }
+ };
+ size_t i;
+
+ /* buffer is too small */
+ if (bufsiz < 2)
+ return -1;
+
+ /* doesn't start with &: can't match */
+ if (*e != '&')
+ return 0;
+
+ for (i = 0; i < sizeof(entities) / sizeof(*entities); i++) {
+ if (!strcmp(e, entities[i].entity)) {
+ buf[0] = entities[i].c;
+ buf[1] = '\0';
+ return 1;
+ }
+ }
+ return 0;
+}
+
+ssize_t
+xml_numericentitytostr(const char *e, char *buf, size_t bufsiz)
+{
+ uint32_t l = 0, cp = 0;
+ size_t b, len;
+ char *end;
+
+ /* buffer is too small */
+ if (bufsiz < 5)
+ return -1;
+
+ /* not a numeric entity */
+ if (e[0] != '&' || e[1] != '#')
+ return 0;
+
+ /* e[1] == '#', numeric / hexadecimal entity */
+ e += 2; /* skip "&#" */
+ errno = 0;
+ /* hex (16) or decimal (10) */
+ if (*e == 'x')
+ l = strtoul(e + 1, &end, 16);
+ else
+ l = strtoul(e, &end, 10);
+ /* invalid value or not a well-formed entity */
+ if (errno || *end != ';')
+ return 0;
+ len = xml_codepointtoutf8(l, &cp);
+ /* make string */
+ for (b = 0; b < len; b++)
+ buf[b] = (cp >> (8 * (len - 1 - b))) & 0xff;
+ buf[len] = '\0';
+
+ return (ssize_t)len;
+}
+
+/* convert named- or numeric entity string to buffer string
+ * returns byte-length of string. */
+ssize_t
+xml_entitytostr(const char *e, char *buf, size_t bufsiz)
+{
+ /* buffer is too small */
+ if (bufsiz < 5)
+ return -1;
+ /* doesn't start with & */
+ if (e[0] != '&')
+ return 0;
+ /* named entity */
+ if (e[1] != '#')
+ return xml_namedentitytostr(e, buf, bufsiz);
+ else /* numeric entity */
+ return xml_numericentitytostr(e, buf, bufsiz);
}
void
-xmlparser_parse(XMLParser *x) {
+xml_parse(XMLParser *x)
+{
int c, ispi;
size_t datalen, tagdatalen, taglen;
- while((c = xmlparser_getnext(x)) != EOF && c != '<'); /* skip until < */
+ if (!x->getnext)
+ return;
+ while ((c = x->getnext()) != EOF && c != '<')
+ ; /* skip until < */
- while(c != EOF) {
- if(c == '<') { /* parse tag */
- if((c = xmlparser_getnext(x)) == EOF)
+ while (c != EOF) {
+ if (c == '<') { /* parse tag */
+ if ((c = x->getnext()) == EOF)
return;
x->tag[0] = '\0';
x->taglen = 0;
- if(c == '!') { /* cdata and comments */
- for(tagdatalen = 0; (c = xmlparser_getnext(x)) != EOF;) {
- if(tagdatalen <= strlen("[CDATA[")) /* if(d < sizeof(x->data)) */
+ if (c == '!') { /* cdata and comments */
+ for (tagdatalen = 0; (c = x->getnext()) != EOF;) {
+ if (tagdatalen <= sizeof("[CDATA[") - 1) /* if (d < sizeof(x->data)) */
x->data[tagdatalen++] = c; /* TODO: prevent overflow */
- if(c == '>')
+ if (c == '>')
break;
- else if(c == '-' && tagdatalen == strlen("--") &&
- (x->data[0] == '-')) { /* comment */
- xmlparser_parsecomment(x);
+ else if (c == '-' && tagdatalen == sizeof("--") - 1 &&
+ (x->data[0] == '-')) {
+ xml_parsecomment(x);
break;
- } else if(c == '[') {
- if(tagdatalen == strlen("[CDATA[") &&
- x->data[1] == 'C' && x->data[2] == 'D' &&
- x->data[3] == 'A' && x->data[4] == 'T' &&
- x->data[5] == 'A' && x->data[6] == '[') { /* cdata */
- xmlparser_parsecdata(x);
+ } else if (c == '[') {
+ if (tagdatalen == sizeof("[CDATA[") - 1 &&
+ !strncmp(x->data, "[CDATA[", tagdatalen)) {
+ xml_parsecdata(x);
break;
- #if 0
- } else {
- /* TODO ? */
- /* markup declaration section */
- while((c = xmlparser_getnext(x)) != EOF && c != ']');
- #endif
}
}
}
- } else { /* normal tag (open, short open, close), processing instruction. */
- if(isspace(c))
- while((c = xmlparser_getnext(x)) != EOF && isspace(c));
- if(c == EOF)
+ } else {
+ /* normal tag (open, short open, close), processing instruction. */
+ if (isspace(c))
+ while ((c = x->getnext()) != EOF && isspace(c))
+ ;
+ if (c == EOF)
return;
x->tag[0] = c;
ispi = (c == '?') ? 1 : 0;
x->isshorttag = ispi;
taglen = 1;
- while((c = xmlparser_getnext(x)) != EOF) {
- if(c == '/') /* TODO: simplify short tag? */
+ while ((c = x->getnext()) != EOF) {
+ if (c == '/') /* TODO: simplify short tag? */
x->isshorttag = 1; /* short tag */
- else if(c == '>' || isspace(c)) {
+ else if (c == '>' || isspace(c)) {
x->tag[taglen] = '\0';
- if(x->tag[0] == '/') { /* end tag, starts with </ */
+ if (x->tag[0] == '/') { /* end tag, starts with </ */
x->taglen = --taglen; /* len -1 because of / */
- if(taglen && x->xmltagend)
+ if (taglen && x->xmltagend)
x->xmltagend(x, &(x->tag)[1], x->taglen, 0);
} else {
x->taglen = taglen;
- if(x->xmltagstart)
- x->xmltagstart(x, x->tag, x->taglen); /* start tag */
- if(isspace(c))
- xmlparser_parseattrs(x);
- if(x->xmltagstartparsed)
+ /* start tag */
+ if (x->xmltagstart)
+ x->xmltagstart(x, x->tag, x->taglen);
+ if (isspace(c))
+ xml_parseattrs(x);
+ if (x->xmltagstartparsed)
x->xmltagstartparsed(x, x->tag, x->taglen, x->isshorttag);
}
- if((x->isshorttag || ispi) && x->xmltagend) /* call tagend for shortform or processing instruction */
+ /* call tagend for shortform or processing instruction */
+ if ((x->isshorttag || ispi) && x->xmltagend)
x->xmltagend(x, x->tag, x->taglen, 1);
break;
- } else if(taglen < sizeof(x->tag) - 1)
+ } else if (taglen < sizeof(x->tag) - 1)
x->tag[taglen++] = c;
}
}
} else {
- /* parse data */
+ /* parse tag data */
datalen = 0;
- if(x->xmldatastart)
+ if (x->xmldatastart)
x->xmldatastart(x);
- while((c = xmlparser_getnext(x)) != EOF) {
- if(c == '&' && x->xmldataentity) {
- if(datalen) {
+ while ((c = x->getnext()) != EOF) {
+ if (c == '&') {
+ if (datalen) {
x->data[datalen] = '\0';
- x->xmldata(x, x->data, datalen);
+ if (x->xmldata)
+ x->xmldata(x, x->data, datalen);
}
x->data[0] = c;
datalen = 1;
- while((c = xmlparser_getnext(x)) != EOF) {
- if(c == '<')
+ while ((c = x->getnext()) != EOF) {
+ if (c == '<')
break;
- if(datalen < sizeof(x->data) - 1)
+ if (datalen < sizeof(x->data) - 1)
x->data[datalen++] = c;
- if(isspace(c))
+ if (isspace(c))
break;
- else if(c == ';') {
+ else if (c == ';') {
x->data[datalen] = '\0';
- x->xmldataentity(x, x->data, datalen);
+ if (x->xmldataentity)
+ x->xmldataentity(x, x->data, datalen);
datalen = 0;
break;
}
}
- } else if(c != '<') {
- if(datalen < sizeof(x->data) - 1) {
+ } else if (c != '<') {
+ if (datalen < sizeof(x->data) - 1) {
x->data[datalen++] = c;
} else {
x->data[datalen] = '\0';
- if(x->xmldata)
+ if (x->xmldata)
x->xmldata(x, x->data, datalen);
x->data[0] = c;
datalen = 1;
}
}
- if(c == '<') {
+ if (c == '<') {
x->data[datalen] = '\0';
- if(x->xmldata && datalen)
+ if (x->xmldata && datalen)
x->xmldata(x, x->data, datalen);
- if(x->xmldataend)
+ if (x->xmldataend)
x->xmldataend(x);
break;
}
(DIR) diff --git a/xml.h b/xml.h
@@ -1,49 +1,44 @@
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-
typedef struct xmlparser {
/* handlers */
- void (*xmltagstart)(struct xmlparser *p, const char *tag, size_t taglen);
- void (*xmltagstartparsed)(struct xmlparser *p, const char *tag,
- size_t taglen, int isshort);
- void (*xmltagend)(struct xmlparser *p, const char *tag, size_t taglen,
- int isshort);
- void (*xmldatastart)(struct xmlparser *p);
- void (*xmldata)(struct xmlparser *p, const char *data, size_t datalen);
- void (*xmldataend)(struct xmlparser *p);
- void (*xmldataentity)(struct xmlparser *p, const char *data,
- size_t datalen);
- void (*xmlattrstart)(struct xmlparser *p, const char *tag, size_t taglen,
- const char *name, size_t namelen);
- void (*xmlattr)(struct xmlparser *p, const char *tag, size_t taglen,
- const char *name, size_t namelen, const char *value,
- size_t valuelen);
- void (*xmlattrend)(struct xmlparser *p, const char *tag, size_t taglen,
- const char *name, size_t namelen);
- void (*xmlattrentity)(struct xmlparser *p, const char *tag, size_t taglen,
- const char *name, size_t namelen, const char *value,
- size_t valuelen);
- void (*xmlcdatastart)(struct xmlparser *p);
- void (*xmlcdata)(struct xmlparser *p, const char *data, size_t datalen);
- void (*xmlcdataend)(struct xmlparser *p);
- void (*xmlcommentstart)(struct xmlparser *p);
- void (*xmlcomment)(struct xmlparser *p, const char *comment,
- size_t commentlen);
- void (*xmlcommentend)(struct xmlparser *p);
+ void (*xmlattr)(struct xmlparser *, const char *, size_t,
+ const char *, size_t, const char *, size_t);
+ void (*xmlattrend)(struct xmlparser *, const char *, size_t,
+ const char *, size_t);
+ void (*xmlattrstart)(struct xmlparser *, const char *, size_t,
+ const char *, size_t);
+ void (*xmlattrentity)(struct xmlparser *, const char *, size_t,
+ const char *, size_t, const char *, size_t);
+ void (*xmlcdatastart)(struct xmlparser *);
+ void (*xmlcdata)(struct xmlparser *, const char *, size_t);
+ void (*xmlcdataend)(struct xmlparser *);
+ void (*xmlcommentstart)(struct xmlparser *);
+ void (*xmlcomment)(struct xmlparser *, const char *, size_t);
+ void (*xmlcommentend)(struct xmlparser *);
+ void (*xmldata)(struct xmlparser *, const char *, size_t);
+ void (*xmldataend)(struct xmlparser *);
+ void (*xmldataentity)(struct xmlparser *, const char *, size_t);
+ void (*xmldatastart)(struct xmlparser *);
+ void (*xmltagend)(struct xmlparser *, const char *, size_t, int);
+ void (*xmltagstart)(struct xmlparser *, const char *, size_t);
+ void (*xmltagstartparsed)(struct xmlparser *, const char *,
+ size_t, int);
- FILE *fp; /* file stream to read from */
+ int (*getnext)(void);
- /* private; internal state */
- char tag[1024]; /* current tag */
- int isshorttag; /* current tag is in short form ? */
+ /* current tag */
+ char tag[1024];
size_t taglen;
- char name[256]; /* current attribute name */
- char data[BUFSIZ]; /* data buffer used for tag and attribute data */
- size_t readoffset;
- size_t readlastbytes;
- unsigned char readbuf[BUFSIZ]; /* read buffer used by xmlparser_getnext */
+ /* current tag is in short form ? <tag /> */
+ int isshorttag;
+ /* current attribute name */
+ char name[256];
+ /* data buffer used for tag data, cdata and attribute data */
+ char data[BUFSIZ];
} XMLParser;
-void xmlparser_init(XMLParser *x, FILE *fp);
-void xmlparser_parse(XMLParser *x);
+int xml_codepointtoutf8(uint32_t, uint32_t *);
+ssize_t xml_entitytostr(const char *, char *, size_t);
+ssize_t xml_namedentitytostr(const char *, char *, size_t);
+ssize_t xml_numericentitytostr(const char *, char *, size_t);
+
+void xml_parse(XMLParser *);