remove unneeded xml.{c,h}: this was integrated into parse.c - bag - BAG Kadaster Extract parser (subset)
(HTM) git clone git://git.codemadness.org/bag
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
(DIR) commit 9065ded7ce4b1d4dc515ae92ba33d92505f497d5
(DIR) parent bc7bd116af0cada05627c574f5b0f6c69a82da36
(HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Sat, 18 Nov 2023 23:27:24 +0100
remove unneeded xml.{c,h}: this was integrated into parse.c
Diffstat:
D xml.c | 480 -------------------------------
D xml.h | 44 -------------------------------
2 files changed, 0 insertions(+), 524 deletions(-)
---
(DIR) diff --git a/xml.c b/xml.c
@@ -1,480 +0,0 @@
-#include <errno.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "xml.h"
-
-#define ISALPHA(c) ((((unsigned)c) | 32) - 'a' < 26)
-#define ISSPACE(c) ((c) == ' ' || ((((unsigned)c) - '\t') < 5))
-
-static int roffset, rtotal;
-static char rbuf[4096*4];
-
-int
-getnext(void)
-{
- ssize_t n;
-
- if (roffset >= rtotal) {
- n = fread(rbuf, 1, sizeof(rbuf), stdin);
- if (ferror(stdin))
- exit(1);
- if (feof(stdin) || n == 0) {
- roffset = 0;
- rtotal = 0;
- return EOF;
- }
- roffset = 0;
- rtotal = n;
- }
- return rbuf[roffset++];
-}
-
-//#define GETNEXT getnext
-#define GETNEXT getchar_unlocked
-
-static void
-xml_parseattrs(XMLParser *x)
-{
- size_t namelen = 0, valuelen;
- int c, endsep, endname = 0, valuestart = 0;
-
- while ((c = GETNEXT()) != EOF) {
- if (ISSPACE(c)) {
- if (namelen)
- endname = 1;
- continue;
- } else if (c == '?')
- ; /* ignore */
- else if (c == '=') {
- x->name[namelen] = '\0';
- valuestart = 1;
- endname = 1;
- } else if (namelen && ((endname && !valuestart && ISALPHA(c)) || (c == '>' || c == '/'))) {
- /* attribute without value */
- x->name[namelen] = '\0';
- if (x->xmlattrstart)
- x->xmlattrstart(x, x->tag, x->taglen, x->name, namelen);
- if (x->xmlattr)
- x->xmlattr(x, x->tag, x->taglen, x->name, namelen, "", 0);
- if (x->xmlattrend)
- x->xmlattrend(x, x->tag, x->taglen, x->name, namelen);
- endname = 0;
- x->name[0] = c;
- namelen = 1;
- } else if (namelen && valuestart) {
- /* attribute with value */
- if (x->xmlattrstart)
- x->xmlattrstart(x, x->tag, x->taglen, x->name, namelen);
-
- valuelen = 0;
- if (c == '\'' || c == '"') {
- endsep = c;
- } else {
- endsep = ' '; /* ISSPACE() */
- goto startvalue;
- }
-
- while ((c = GETNEXT()) != EOF) {
-startvalue:
- if (c == '&') { /* entities */
- x->data[valuelen] = '\0';
- /* call data function with data before entity if there is data */
- if (valuelen && x->xmlattr)
- x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
- x->data[0] = c;
- valuelen = 1;
- while ((c = GETNEXT()) != EOF) {
- if (c == endsep || (endsep == ' ' && (c == '>' || ISSPACE(c))))
- break;
- if (valuelen < sizeof(x->data) - 1)
- x->data[valuelen++] = c;
- else {
- /* entity too long for buffer, handle as normal data */
- x->data[valuelen] = '\0';
- if (x->xmlattr)
- x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
- x->data[0] = c;
- valuelen = 1;
- break;
- }
- if (c == ';') {
- x->data[valuelen] = '\0';
- if (x->xmlattrentity)
- x->xmlattrentity(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
- valuelen = 0;
- break;
- }
- }
- } else if (c != endsep && !(endsep == ' ' && (c == '>' || ISSPACE(c)))) {
- if (valuelen < sizeof(x->data) - 1) {
- x->data[valuelen++] = c;
- } else {
- x->data[valuelen] = '\0';
- if (x->xmlattr)
- x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
- x->data[0] = c;
- valuelen = 1;
- }
- }
- if (c == endsep || (endsep == ' ' && (c == '>' || ISSPACE(c)))) {
- x->data[valuelen] = '\0';
- if (x->xmlattr)
- x->xmlattr(x, x->tag, x->taglen, x->name, namelen, x->data, valuelen);
- if (x->xmlattrend)
- x->xmlattrend(x, x->tag, x->taglen, x->name, namelen);
- break;
- }
- }
- namelen = endname = valuestart = 0;
- } else if (namelen < sizeof(x->name) - 1) {
- x->name[namelen++] = c;
- }
- if (c == '>') {
- break;
- } else if (c == '/') {
- x->isshorttag = 1;
- x->name[0] = '\0';
- namelen = 0;
- }
- }
-}
-
-static void
-xml_parsecomment(XMLParser *x)
-{
- size_t datalen = 0, i = 0;
- int c;
-
- if (x->xmlcommentstart)
- x->xmlcommentstart(x);
- while ((c = GETNEXT()) != EOF) {
- if (c == '-' || c == '>') {
- if (x->xmlcomment && datalen) {
- x->data[datalen] = '\0';
- x->xmlcomment(x, x->data, datalen);
- datalen = 0;
- }
- }
-
- if (c == '-') {
- if (++i > 2) {
- if (x->xmlcomment)
- for (; i > 2; i--)
- x->xmlcomment(x, "-", 1);
- i = 2;
- }
- continue;
- } else if (c == '>' && i == 2) {
- if (x->xmlcommentend)
- x->xmlcommentend(x);
- return;
- } else if (i) {
- if (x->xmlcomment) {
- for (; i > 0; i--)
- x->xmlcomment(x, "-", 1);
- }
- i = 0;
- }
-
- if (datalen < sizeof(x->data) - 1) {
- x->data[datalen++] = c;
- } else {
- x->data[datalen] = '\0';
- if (x->xmlcomment)
- x->xmlcomment(x, x->data, datalen);
- x->data[0] = c;
- datalen = 1;
- }
- }
-}
-
-static void
-xml_parsecdata(XMLParser *x)
-{
- size_t datalen = 0, i = 0;
- int c;
-
- if (x->xmlcdatastart)
- x->xmlcdatastart(x);
- while ((c = GETNEXT()) != EOF) {
- if (c == ']' || c == '>') {
- if (x->xmlcdata && datalen) {
- x->data[datalen] = '\0';
- x->xmlcdata(x, x->data, datalen);
- datalen = 0;
- }
- }
-
- if (c == ']') {
- if (++i > 2) {
- if (x->xmlcdata)
- for (; i > 2; i--)
- x->xmlcdata(x, "]", 1);
- i = 2;
- }
- continue;
- } else if (c == '>' && i == 2) {
- if (x->xmlcdataend)
- x->xmlcdataend(x);
- return;
- } else if (i) {
- if (x->xmlcdata)
- for (; i > 0; i--)
- x->xmlcdata(x, "]", 1);
- i = 0;
- }
-
- if (datalen < sizeof(x->data) - 1) {
- x->data[datalen++] = c;
- } else {
- x->data[datalen] = '\0';
- if (x->xmlcdata)
- x->xmlcdata(x, x->data, datalen);
- x->data[0] = c;
- datalen = 1;
- }
- }
-}
-
-static int
-codepointtoutf8(long r, char *s)
-{
- if (r == 0) {
- return 0; /* NUL byte */
- } else if (r <= 0x7F) {
- /* 1 byte: 0aaaaaaa */
- s[0] = r;
- return 1;
- } else if (r <= 0x07FF) {
- /* 2 bytes: 00000aaa aabbbbbb */
- s[0] = 0xC0 | ((r & 0x0007C0) >> 6); /* 110aaaaa */
- s[1] = 0x80 | (r & 0x00003F); /* 10bbbbbb */
- return 2;
- } else if (r <= 0xFFFF) {
- /* 3 bytes: aaaabbbb bbcccccc */
- s[0] = 0xE0 | ((r & 0x00F000) >> 12); /* 1110aaaa */
- s[1] = 0x80 | ((r & 0x000FC0) >> 6); /* 10bbbbbb */
- s[2] = 0x80 | (r & 0x00003F); /* 10cccccc */
- return 3;
- } else {
- /* 4 bytes: 000aaabb bbbbcccc ccdddddd */
- s[0] = 0xF0 | ((r & 0x1C0000) >> 18); /* 11110aaa */
- s[1] = 0x80 | ((r & 0x03F000) >> 12); /* 10bbbbbb */
- s[2] = 0x80 | ((r & 0x000FC0) >> 6); /* 10cccccc */
- s[3] = 0x80 | (r & 0x00003F); /* 10dddddd */
- return 4;
- }
-}
-
-static int
-namedentitytostr(const char *e, char *buf, size_t bufsiz)
-{
- static const struct {
- const char *entity;
- int c;
- } entities[] = {
- { "amp;", '&' },
- { "lt;", '<' },
- { "gt;", '>' },
- { "apos;", '\'' },
- { "quot;", '"' },
- };
- size_t i;
-
- /* buffer is too small */
- if (bufsiz < 2)
- return -1;
-
- for (i = 0; i < sizeof(entities) / sizeof(*entities); i++) {
- if (!strcmp(e, entities[i].entity)) {
- buf[0] = entities[i].c;
- buf[1] = '\0';
- return 1;
- }
- }
- return -1;
-}
-
-static int
-numericentitytostr(const char *e, char *buf, size_t bufsiz)
-{
- long l;
- int len;
- char *end;
-
- /* buffer is too small */
- if (bufsiz < 5)
- return -1;
-
- errno = 0;
- /* hex (16) or decimal (10) */
- if (*e == 'x')
- l = strtol(++e, &end, 16);
- else
- l = strtol(e, &end, 10);
- /* invalid value or not a well-formed entity or invalid code point */
- if (errno || e == end || *end != ';' || l < 0 || l > 0x10ffff ||
- (l >= 0xd800 && l <= 0xdfff))
- return -1;
- len = codepointtoutf8(l, buf);
- buf[len] = '\0';
-
- return len;
-}
-
-/* convert named- or numeric entity string to buffer string
- * returns byte-length of string or -1 on failure. */
-int
-xml_entitytostr(const char *e, char *buf, size_t bufsiz)
-{
- /* doesn't start with & */
- if (e[0] != '&')
- return -1;
- /* numeric entity */
- if (e[1] == '#')
- return numericentitytostr(e + 2, buf, bufsiz);
- else /* named entity */
- return namedentitytostr(e + 1, buf, bufsiz);
-}
-
-void
-xml_parse(XMLParser *x)
-{
- size_t datalen, tagdatalen;
- int c, isend;
-
- while ((c = GETNEXT()) != EOF && c != '<')
- ; /* skip until < */
-
- while (c != EOF) {
- if (c == '<') { /* parse tag */
- if ((c = GETNEXT()) == EOF)
- return;
-
- if (c == '!') { /* CDATA and comments */
- for (tagdatalen = 0; (c = GETNEXT()) != EOF;) {
- /* NOTE: sizeof(x->data) must be at least sizeof("[CDATA[") */
- if (tagdatalen <= sizeof("[CDATA[") - 1)
- x->data[tagdatalen++] = c;
- if (c == '>')
- break;
- else if (c == '-' && tagdatalen == sizeof("--") - 1 &&
- (x->data[0] == '-')) {
- xml_parsecomment(x);
- break;
- } else if (c == '[') {
- if (tagdatalen == sizeof("[CDATA[") - 1 &&
- !strncmp(x->data, "[CDATA[", tagdatalen)) {
- xml_parsecdata(x);
- break;
- }
- }
- }
- } else {
- /* normal tag (open, short open, close), processing instruction. */
- x->tag[0] = c;
- x->taglen = 1;
- x->isshorttag = isend = 0;
-
- /* treat processing instruction as short tag, don't strip "?" prefix. */
- if (c == '?') {
- x->isshorttag = 1;
- } else if (c == '/') {
- if ((c = GETNEXT()) == EOF)
- return;
- x->tag[0] = c;
- isend = 1;
- }
-
- while ((c = GETNEXT()) != EOF) {
- if (c == '/')
- x->isshorttag = 1; /* short tag */
- else if (c == '>' || ISSPACE(c)) {
- x->tag[x->taglen] = '\0';
- if (isend) { /* end tag, starts with </ */
- if (x->xmltagend)
- x->xmltagend(x, x->tag, x->taglen, x->isshorttag);
- x->tag[0] = '\0';
- x->taglen = 0;
- } else {
- /* start tag */
- if (x->xmltagstart)
- x->xmltagstart(x, x->tag, x->taglen);
- if (ISSPACE(c))
- xml_parseattrs(x);
- if (x->xmltagstartparsed)
- x->xmltagstartparsed(x, x->tag, x->taglen, x->isshorttag);
- }
- /* call tagend for short tag or processing instruction */
- if (x->isshorttag) {
- if (x->xmltagend)
- x->xmltagend(x, x->tag, x->taglen, x->isshorttag);
- x->tag[0] = '\0';
- x->taglen = 0;
- }
- break;
- } else if (x->taglen < sizeof(x->tag) - 1)
- x->tag[x->taglen++] = c; /* NOTE: tag name truncation */
- }
- }
- } else {
- /* parse tag data */
- datalen = 0;
- if (x->xmldatastart)
- x->xmldatastart(x);
- while ((c = GETNEXT()) != EOF) {
- if (c == '&') {
- if (datalen) {
- x->data[datalen] = '\0';
- if (x->xmldata)
- x->xmldata(x, x->data, datalen);
- }
- x->data[0] = c;
- datalen = 1;
- while ((c = GETNEXT()) != EOF) {
- if (c == '<')
- break;
- if (datalen < sizeof(x->data) - 1)
- x->data[datalen++] = c;
- else {
- /* entity too long for buffer, handle as normal data */
- x->data[datalen] = '\0';
- if (x->xmldata)
- x->xmldata(x, x->data, datalen);
- x->data[0] = c;
- datalen = 1;
- break;
- }
- if (c == ';') {
- x->data[datalen] = '\0';
- if (x->xmldataentity)
- x->xmldataentity(x, x->data, datalen);
- datalen = 0;
- break;
- }
- }
- } else if (c != '<') {
- if (datalen < sizeof(x->data) - 1) {
- x->data[datalen++] = c;
- } else {
- x->data[datalen] = '\0';
- if (x->xmldata)
- x->xmldata(x, x->data, datalen);
- x->data[0] = c;
- datalen = 1;
- }
- }
- if (c == '<') {
- x->data[datalen] = '\0';
- if (x->xmldata && datalen)
- x->xmldata(x, x->data, datalen);
- if (x->xmldataend)
- x->xmldataend(x);
- break;
- }
- }
- }
- }
-}
(DIR) diff --git a/xml.h b/xml.h
@@ -1,44 +0,0 @@
-#ifndef XML_H
-#define XML_H
-
-#include <stdio.h>
-
-typedef struct xmlparser {
- /* handlers */
- void (*xmlattr)(struct xmlparser *, const char *, size_t,
- const char *, size_t, const char *, size_t);
- void (*xmlattrend)(struct xmlparser *, const char *, size_t,
- const char *, size_t);
- void (*xmlattrstart)(struct xmlparser *, const char *, size_t,
- const char *, size_t);
- void (*xmlattrentity)(struct xmlparser *, const char *, size_t,
- const char *, size_t, const char *, size_t);
- void (*xmlcdatastart)(struct xmlparser *);
- void (*xmlcdata)(struct xmlparser *, const char *, size_t);
- void (*xmlcdataend)(struct xmlparser *);
- void (*xmlcommentstart)(struct xmlparser *);
- void (*xmlcomment)(struct xmlparser *, const char *, size_t);
- void (*xmlcommentend)(struct xmlparser *);
- void (*xmldata)(struct xmlparser *, const char *, size_t);
- void (*xmldataend)(struct xmlparser *);
- void (*xmldataentity)(struct xmlparser *, const char *, size_t);
- void (*xmldatastart)(struct xmlparser *);
- void (*xmltagend)(struct xmlparser *, const char *, size_t, int);
- void (*xmltagstart)(struct xmlparser *, const char *, size_t);
- void (*xmltagstartparsed)(struct xmlparser *, const char *,
- size_t, int);
-
- /* current tag */
- char tag[1024];
- size_t taglen;
- /* current tag is a short tag ? <tag /> */
- int isshorttag;
- /* current attribute name */
- char name[1024];
- /* data buffer used for tag data, CDATA and attribute data */
- char data[BUFSIZ];
-} XMLParser;
-
-int xml_entitytostr(const char *, char *, size_t);
-void xml_parse(XMLParser *);
-#endif