xml2tsv.c - xml2tsv - a simple xml-to-tsv converter, based on xmlparser
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) Tags
(DIR) README
(DIR) LICENSE
---
xml2tsv.c (4522B)
---
1 /*
2 * (c) 2020 Vincenzo "KatolaZ" Nicosia <katolaz@freaknet.org>
3 *
4 * A simple xml-to-tsv converter, based on xmlparser by Hiltjo Posthuma
5 * http://codemadness.org/git/xmlparser/
6 *
7 * You can use, distribute, modify, and/or redistribute this program under
8 * the terms of the ISC LICENSE. See LICENSE for details.
9 *
10 */
11
12
13 #include <sys/types.h>
14
15 #include <stdio.h>
16 #include <string.h>
17 #include <ctype.h>
18
19 #include "xml.h"
20 #include "config.h"
21
22
23 #define FALSE 0
24 #define TRUE 1
25
26 /* tag stack */
27
28 typedef struct {
29 char s[STR_MAX];
30 int ref;
31 } taginfo_t;
32
33
34
35 typedef struct {
36 int top;
37 taginfo_t st[DEPTH_MAX];
38 } tstack_t;
39
40 int stack_empty(tstack_t *t){
41 return (t->top < 0);
42 }
43
44 int stack_push(tstack_t *t, const char *c){
45 if (t->top < DEPTH_MAX){
46 t->top ++;
47 strncpy(t->st[t->top].s, c, STR_MAX - 1);
48 t->st[t->top].s[STR_MAX - 1] = '\0';
49 t->st[t->top].ref = 0;
50 if (t->top){
51 /* Increment the ref counter of the parent tag*/
52 t->st[t->top-1].ref += 1;
53 }
54 return 0;
55 }
56 return -1;
57 }
58
59 taginfo_t* stack_pop(tstack_t *t){
60 if (!stack_empty(t))
61 return t->st + t->top--;
62 return NULL;
63 }
64
65 taginfo_t* stack_peek(tstack_t *t){
66 if (!stack_empty(t))
67 return t->st + t->top;
68 return NULL;
69 }
70
71 void stack_init(tstack_t *t){
72 t->top = -1;
73 }
74
75
76 /* utility functions */
77
78 /* quote_print: quote \\, \n, \t, and strip other ctrl chars */
79 void quote_print(const char *s){
80 const char *tmp = s;
81 size_t len;
82 int i;
83 while (*tmp != '\0'){
84 len = strcspn(tmp, "\\\n\t");
85 for(i=0; i<len; i++, tmp++){
86 if (!iscntrl((unsigned char)*tmp)){
87 putchar(*tmp);
88 }
89 }
90 switch (*tmp){
91 case '\n':
92 if (len > 0){
93 fputs("\\n", stdout);
94 }
95 tmp ++;
96 break;
97 case '\t':
98 fputs("\\t", stdout);
99 tmp ++;
100 break;
101 case '\r':
102 fputs("\\r", stdout);
103 tmp ++;
104 break;
105 case '\\':
106 fputs("\\\\", stdout);
107 tmp ++;
108 break;
109 }
110 }
111 }
112
113 void print_cur_str(tstack_t *t){
114 int i;
115 for (i=0; i<=t->top; i++){
116 putchar('/');
117 fputs(t->st[i].s, stdout);
118 }
119 }
120
121 void print_cur_str_fp(FILE *f, tstack_t *t){
122 int i;
123 for (i=0; i<=t->top; i++){
124 fputc('/', f);
125 fputs(t->st[i].s, f);
126 }
127 }
128
129 /* global variables */
130
131 tstack_t st;
132 char emitsep;
133
134 /* XML callbacks */
135
136 void
137 xmlattr(XMLParser *x, const char *t, size_t tl, const char *a, size_t al,
138 const char *v, size_t vl)
139 {
140 fputs(v, stdout);
141 }
142
143 void
144 xmlattrentity(XMLParser *x, const char *t, size_t tl, const char *a, size_t al,
145 const char *v, size_t vl)
146 {
147 char buf[16];
148 int n;
149
150 if ((n = xml_entitytostr(v, buf, sizeof(buf))) > 0)
151 xmlattr(x, t, tl, a, al, buf, (size_t)n);
152 else
153 xmlattr(x, t, tl, a, al, v, vl);
154 }
155
156 void
157 xmlattrstart(XMLParser *x, const char *t, size_t tl, const char *a, size_t al)
158 {
159 putchar(SEP);
160 fputs(a, stdout);
161 putchar(SATTR);
162 }
163
164 void
165 xmlcdatastart(XMLParser *x)
166 {
167 putchar(SEP);
168 }
169
170 void
171 xmlcdata(XMLParser *x, const char *d, size_t dl)
172 {
173 quote_print(d);
174 }
175
176 void
177 xmldata(XMLParser *x, const char *d, size_t dl)
178 {
179 if ((strcspn(d, " \t\n") || (strspn(d, " \t\n")<strlen(d) && !stack_peek(&st)->ref)) && emitsep){
180 putchar(SEP);
181 emitsep = FALSE;
182 }
183 quote_print(d);
184 }
185
186 void
187 xmldataend(XMLParser *x)
188 {
189 emitsep = FALSE;
190 }
191
192 void
193 xmldataentity(XMLParser *x, const char *d, size_t dl)
194 {
195 char buf[16];
196 int n;
197
198 if ((n = xml_entitytostr(d, buf, sizeof(buf))) > 0)
199 xmldata(x, buf, (size_t)n);
200 else
201 xmldata(x, d, dl);
202 }
203
204 void
205 xmldatastart(XMLParser *x)
206 {
207 emitsep = TRUE;
208 }
209
210 void
211 xmltagend(XMLParser *x, const char *t, size_t tl, int isshort)
212 {
213 char *tag;
214 if (stack_empty(&st)){
215 fprintf(stderr, "Error: tag-end '%s' before any open tag\n", t);
216 }
217 tag = stack_pop(&st)->s;
218 if (strcmp(t, tag)){
219 fprintf(stderr, "Error: tag-end '%s' closes tag '%s'\n", t, tag);
220 }
221 }
222
223 void
224 xmltagstart(XMLParser *x, const char *t, size_t tl)
225 {
226 if (stack_push(&st, t)){
227 fprintf(stderr, "Error: stack full. Ignoring tag '%s' (parent tag: '%s')\n", t, stack_peek(&st)->s);
228 return;
229 }
230 putchar('\n');
231 print_cur_str(&st);
232 }
233
234 int
235 main(void)
236 {
237 stack_init(&st);
238 emitsep = FALSE;
239 XMLParser x = { 0 };
240
241 x.xmlattr = xmlattr;
242 x.xmlattrstart = xmlattrstart;
243 x.xmlattrentity = xmlattrentity;
244 x.xmlcdatastart = xmlcdatastart;
245 x.xmlcdata = xmlcdata;
246 x.xmldata = xmldata;
247 x.xmldataend = xmldataend;
248 x.xmldataentity = xmldataentity;
249 x.xmldatastart = xmldatastart;
250 x.xmltagend = xmltagend;
251 x.xmltagstart = xmltagstart;
252
253 xml_parse(&x);
254 putchar('\n');
255 if (! stack_empty(&st)) {
256 fprintf(stderr, "Error: tags still open at EOF: ");
257 print_cur_str_fp(stderr, &st);
258 fprintf(stderr, "\n");
259 }
260 return 0;
261 }