xml2tsv.c - xml2tsv - a simple xml-to-tsv converter, based on xmlparser
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) Tags
 (DIR) README
 (DIR) LICENSE
       ---
       xml2tsv.c (4522B)
       ---
            1 /*
            2 * (c) 2020 Vincenzo "KatolaZ" Nicosia <katolaz@freaknet.org>
            3 * 
            4 * A simple xml-to-tsv converter, based on xmlparser by Hiltjo Posthuma
            5 * http://codemadness.org/git/xmlparser/
            6 *
            7 * You can use, distribute, modify, and/or redistribute this program under
            8 * the terms of the ISC LICENSE. See LICENSE for details.
            9 *
           10 */
           11 
           12 
           13 #include <sys/types.h>
           14 
           15 #include <stdio.h>
           16 #include <string.h>
           17 #include <ctype.h>
           18 
           19 #include "xml.h"
           20 #include "config.h"
           21 
           22 
           23 #define FALSE 0
           24 #define TRUE 1
           25 
           26 /* tag stack */
           27 
           28 typedef struct {
           29         char s[STR_MAX];
           30         int ref;
           31 } taginfo_t;
           32 
           33 
           34 
           35 typedef struct {
           36         int top;
           37         taginfo_t st[DEPTH_MAX];
           38 } tstack_t;
           39 
           40 int stack_empty(tstack_t *t){
           41         return (t->top < 0);
           42 }
           43 
           44 int stack_push(tstack_t *t, const char *c){
           45         if (t->top < DEPTH_MAX){
           46                 t->top ++;
           47                 strncpy(t->st[t->top].s, c, STR_MAX - 1);
           48                 t->st[t->top].s[STR_MAX - 1] = '\0';
           49                 t->st[t->top].ref = 0;
           50                 if (t->top){
           51                         /* Increment the ref counter of the parent tag*/
           52                         t->st[t->top-1].ref += 1;
           53                 }
           54                 return 0;
           55         }
           56         return -1;
           57 }
           58 
           59 taginfo_t* stack_pop(tstack_t *t){
           60         if (!stack_empty(t))
           61                 return t->st + t->top--;
           62         return NULL;
           63 }
           64 
           65 taginfo_t* stack_peek(tstack_t *t){
           66         if (!stack_empty(t))
           67                 return t->st + t->top;
           68         return NULL;
           69 }
           70 
           71 void stack_init(tstack_t *t){
           72         t->top = -1;
           73 }
           74 
           75 
           76 /* utility functions */
           77 
           78 /* quote_print: quote \\, \n, \t, and strip other ctrl chars */
           79 void quote_print(const char *s){
           80         const char *tmp = s;
           81         size_t len;
           82         int i;
           83         while (*tmp != '\0'){
           84                 len = strcspn(tmp, "\\\n\t");
           85                 for(i=0; i<len; i++, tmp++){
           86                         if (!iscntrl((unsigned char)*tmp)){
           87                                 putchar(*tmp);
           88                         }
           89                 }
           90                 switch (*tmp){
           91                         case '\n':
           92                                 if (len > 0){
           93                                         fputs("\\n", stdout);
           94                                 }
           95                                 tmp ++;
           96                                 break;
           97                         case '\t':
           98                                 fputs("\\t", stdout);
           99                                 tmp ++;
          100                                 break;
          101                         case '\r':
          102                                 fputs("\\r", stdout);
          103                                 tmp ++;
          104                                 break;
          105                         case '\\':
          106                                 fputs("\\\\", stdout);
          107                                 tmp ++;
          108                                 break;
          109                 }
          110         }
          111 }
          112 
          113 void print_cur_str(tstack_t *t){
          114         int i;
          115         for (i=0; i<=t->top; i++){
          116                 putchar('/');
          117                 fputs(t->st[i].s, stdout);
          118         }
          119 }
          120 
          121 void print_cur_str_fp(FILE *f, tstack_t *t){
          122         int i;
          123         for (i=0; i<=t->top; i++){
          124                 fputc('/', f);
          125                 fputs(t->st[i].s, f);
          126         }
          127 }
          128 
          129 /* global variables */
          130 
          131 tstack_t st;
          132 char emitsep;
          133 
          134 /* XML callbacks */
          135 
          136 void
          137 xmlattr(XMLParser *x, const char *t, size_t tl, const char *a, size_t al,
          138         const char *v, size_t vl)
          139 {
          140         fputs(v, stdout);
          141 }
          142 
          143 void
          144 xmlattrentity(XMLParser *x, const char *t, size_t tl, const char *a, size_t al,
          145               const char *v, size_t vl)
          146 {
          147         char buf[16];
          148         int n;
          149 
          150         if ((n = xml_entitytostr(v, buf, sizeof(buf))) > 0)
          151                 xmlattr(x, t, tl, a, al, buf, (size_t)n);
          152         else
          153                 xmlattr(x, t, tl, a, al, v, vl);
          154 }
          155 
          156 void
          157 xmlattrstart(XMLParser *x, const char *t, size_t tl, const char *a, size_t al)
          158 {
          159         putchar(SEP);
          160         fputs(a, stdout);
          161         putchar(SATTR);
          162 }
          163 
          164 void
          165 xmlcdatastart(XMLParser *x)
          166 {
          167         putchar(SEP);
          168 }
          169 
          170 void
          171 xmlcdata(XMLParser *x, const char *d, size_t dl)
          172 {
          173         quote_print(d);
          174 }
          175 
          176 void
          177 xmldata(XMLParser *x, const char *d, size_t dl)
          178 {
          179         if ((strcspn(d, " \t\n") || (strspn(d, " \t\n")<strlen(d) && !stack_peek(&st)->ref)) && emitsep){
          180                  putchar(SEP);
          181                 emitsep = FALSE;
          182         }
          183         quote_print(d);
          184 }
          185 
          186 void
          187 xmldataend(XMLParser *x)
          188 {
          189         emitsep = FALSE;
          190 }
          191 
          192 void
          193 xmldataentity(XMLParser *x, const char *d, size_t dl)
          194 {
          195         char buf[16];
          196         int n;
          197 
          198         if ((n = xml_entitytostr(d, buf, sizeof(buf))) > 0)
          199                 xmldata(x, buf, (size_t)n);
          200         else
          201                 xmldata(x, d, dl);
          202 }
          203 
          204 void
          205 xmldatastart(XMLParser *x)
          206 {
          207         emitsep = TRUE;
          208 }
          209 
          210 void
          211 xmltagend(XMLParser *x, const char *t, size_t tl, int isshort)
          212 {
          213         char *tag;
          214         if (stack_empty(&st)){
          215                 fprintf(stderr, "Error: tag-end '%s' before any open tag\n", t);
          216         }
          217         tag = stack_pop(&st)->s;
          218         if (strcmp(t, tag)){
          219                 fprintf(stderr, "Error: tag-end '%s' closes tag '%s'\n", t, tag);
          220         }
          221 }
          222 
          223 void
          224 xmltagstart(XMLParser *x, const char *t, size_t tl)
          225 {
          226         if (stack_push(&st, t)){
          227                 fprintf(stderr, "Error: stack full. Ignoring tag '%s' (parent tag: '%s')\n", t, stack_peek(&st)->s);
          228                 return;
          229         }
          230         putchar('\n');
          231         print_cur_str(&st);
          232 }
          233 
          234 int
          235 main(void)
          236 {
          237         stack_init(&st);
          238         emitsep = FALSE;
          239         XMLParser x = { 0 };
          240 
          241         x.xmlattr = xmlattr;
          242         x.xmlattrstart = xmlattrstart;
          243         x.xmlattrentity = xmlattrentity;
          244         x.xmlcdatastart = xmlcdatastart;
          245         x.xmlcdata = xmlcdata;
          246         x.xmldata = xmldata;
          247         x.xmldataend = xmldataend;
          248         x.xmldataentity = xmldataentity;
          249         x.xmldatastart = xmldatastart;
          250         x.xmltagend = xmltagend;
          251         x.xmltagstart = xmltagstart;
          252 
          253         xml_parse(&x);
          254         putchar('\n');
          255         if (! stack_empty(&st)) {
          256                 fprintf(stderr, "Error: tags still open at EOF: ");
          257                 print_cur_str_fp(stderr, &st);
          258                 fprintf(stderr, "\n");
          259         }
          260         return 0;
          261 }