#!/usr/bin/awk -f
# xml2tsv.awk version 8 by Ben Collver <bencollver@tilde.pink>
# reads XML from stdin and writes TSV to stdout

function append(str) {
    if (length(str) > 0) {
        if (length(text) == 0) {
            text = str
        } else {
            text = text eolstr str
        }
    }
    return
}

function hex_decode_print(str,     d) {
   while (length(str) > 0) {
       d = substr(str, 1, 2)
       str = substr(str, 3)
       if (d == "09") {
           printf "%s", tabstr
       } else if (d == "0A") {
           printf "%s", eolstr
       } else {
           printf "%c", hex_decode_ord[d]
       }
   }
   return    
}

function hex_init(     i, d) {
    for (i = 0; i <= 255; i++) {
        d = sprintf("%02X", i)
        hex_decode_ord[d] = i
    }
    return
}

function normalize(str) {
    retval = str
    gsub(/^[\n\r ][\n\r ]*/, "", retval)
    gsub(/[\n\r ]*[\n\r ]$/, "", retval)
    gsub(/\r\n/, eolstr, retval)
    gsub(/\n/, eolstr, retval)
    gsub(/\t/, tabstr, retval)
    return retval
}

function trimleft(str) {
    retval = str
    gsub(/^[\n ][\n ]*/, "", retval)
    return retval
}

BEGIN {
    eolstr = "\\n"
    tabstr = "\\t"
    is_hex = 0
    path = ""
    text = ""
    RS = ">"
    hex_init()
}

/^<\?xml/ {
    # ignore xml header
    next
}

/<awk:ok xmlns:awk=/ {
    # ignore awk namespace declaration
    next
}

/<\// {
    # close tag
    closed_path = path
    count = split($0, tokens, /[<]/)
    if (count != 2) {
        print "Apparently malformed close tag: " $0 ">"
        exit(1)
    }
    str = normalize(tokens[1])
    append(str)
    name = substr(tokens[2], 2)
    if (name == "awk:cdata") {
        if (length(text) > 0) {
            printf "%s/text()\t", path
            hex_decode_print(text)
            print ""
        }
        is_hex = 0
    } else {
        namelen = length(name)
        pathlen = length(path)
        expected = substr(path, 1 + pathlen - namelen, namelen)
        if (name != expected) {
            printf "Error: Expected \"%s\" closing tag, got \"%s\"\n",
            expected, name
            exit(1)
        }
        if (length(text) > 0) {
            printf "%s/text()\t%s\n", path, text
        }
        path = substr(path, 1, pathlen - namelen - 1)
    }
    printf "%s\t\n", closed_path
    text = ""
    next
} 

/</ {
    # open tag
    is_open = 1
    count = split($0, tokens, /</)
    if (count != 2) {
        print "Error: Apparently malformed open tag: " $0 ">"
        exit(1)
    }
    str = normalize(tokens[1])
    append(str)
    str = tokens[2]
    if (str ~ /^!\[CDATA\[/) {
        str = normalize(substr(str, 9, length(str) - 10))
        append(str)
        next
    } else if (str ~ /^!DOCTYPE /) {
        # ignore DOCTYPE
        next
    }
    if (match(str, /[\n ]*\/$/)) {
        # discard slash from self-closing tag
        str = substr(str, 1, length(str) - RLENGTH)
        is_open = 0
    }
    if (match(str, /^awk:cdata type="awk:hexBinary"/)) {
        is_hex = 1
        next
    }
    match(str, /^[^\n ][^\n ]*/)
    name = substr(str, 1, RLENGTH)
    str = substr(str, RLENGTH+1)

    oldpath = path
    path = path "/" name
    while (length(str) > 0) {
        str = trimleft(str)
        if (match(str, /^[^=]*="[^"]*"/)) {
            pair = substr(str, 1, RLENGTH)
            str = trimleft(substr(str, RLENGTH+1))
            count = split(pair, tokens, /=/)
            attr = tokens[1]
            value = substr(tokens[2], 2, length(tokens[2]) - 2)
        } else if (match(str, /^[^=]*=[^\n ]*/)) {
            pair = substr(str, 1, RLENGTH)
            str = trimleft(substr(str, RLENGTH+1))
            count = split(pair, tokens, /=/)
            attr = tokens[1]
            value = tokens[2]
        } else {
            printf "Apparently malformed attribute: \"%s\"\n", str
            exit(0)
        }
        printf "%s[@%s]\t%s\n", path, attr, value
    }
    if (!is_open) {
        printf "%s\t\n", path
        path = oldpath
    }
    next
}

{
    str = normalize($0)
    append(str)
}
