-- libxml ver. 1.2
-- lua ver. 5.2 compliant
-- general functions to parse xml document
-- need only regular lua binary
-- update: 2014/09/07
-- coded by Kenar (Kenji arisawa)

gen=require("gen")
str=require("str")
split = str.split
sub = string.sub
gsub = string.gsub
find = string.find
match = string.match
format = string.format
printf=gen.printf
trim=str.trim

function encode(s)
  local tab = { ["<"] = "&lt;", [">"] = "&gt;", ["'"] = "&apos;", ['"'] = "&quot;"}
  s = gsub(s,"&","&amp;")
  s = gsub(s,"([<>'\"])",tab)
  return s
end

function decode(s)
  local tab = { ["&amp;"] = "&", ["&lt;"] = "<", ["&gt;"] = ">",
                ["&apos;"] = "'", ["&quot;"] = '"'}
  return gsub(s,"(&[^;]+;)",tab)
end

function collect(s)
  local ns
  local stack = {}
  local top = {}
  local ni,c,name,attrs, empty
  local i,j,decl =find(s, "<%?xml *(.-)%?>")
  if j == nil then
    i = 1
  else
    i = j + 1
  end
  table.insert(stack, top)
  while true do
    -- tags have the form: "<NAME ATTRS/?>", "<NAME/?>", "</NAME>",
    -- we must skip comment! we cant't use "repeat ... until" here
    ni,j,c,name,attrs, empty = find(s, "<(/?)([^%s/>]+) *(.-)(/?)>", i)
    while name == "!--" do
      -- print("DBG0: ",i,ni,j,c,"/",name,attrs,"/",empty)
      i = j+1
      ni,j,c,name,attrs, empty = find(s, "<(/?)([^%s/>]+) *(.-)(/?)>", i)
    end
    -- print("DBG1: ",i,ni,j,c,"/",name,attrs,"/",empty)
    -- here attrs == "" if no attr but we make it nil
    if attrs == "" then
      attrs = nil
    end
    if not ni then
      break
    end
    local text = sub(s, i, ni-1)
    if not find(text, "^%s*$") then
      table.insert(top, text)
    end
    if empty == "/" then  -- empty element tag
      local e
      e = {name=name, attrs=attrs}
      table.insert(top,e)
    elseif c == "" then   -- start tag
      top = {name=name, attrs=attrs}
      table.insert(stack, top)   -- new level
    else  -- end tag
      local toclose = table.remove(stack)  -- remove top
      top = stack[#stack]
      if #stack < 1 then
        error("nothing to close with "..name)
      end
      if toclose.name ~= name then
        error("trying to close "..toclose.name.." with "..name)
      end
      table.insert(top, toclose)
    end
    i = j+1
  end
  local text = sub(s, i)
  if not find(text, "^%s*$") then
    table.insert(stack[#stack], text)
  end
  if #stack > 1 then
    error("unclosed "..stack[#stack].name)
  end 
  return stack[1]
end

function decomp(attrs)
  -- decompose attrs
  local t = {}
  local i,j,q
  local name,value
  if attrs == nil then
    return nil
  end
  j = 0
  while true do
    i,j = find(attrs,"[^= >]+",j+1)
    if i == nil then
      break
    end
    name = sub(attrs,i,j)
    i,j = find(attrs,"^ *= *['\"]",j+1)
    if i == nil then
      error(attrs)
    end
    q = sub(attrs,j,j)
    i = j+1
    j,j = find(attrs,q,j+1,true)
    if j == nil then
      error(attrs)
    end
    value = sub(attrs,i,j-1)
    t[name] = value
  end
  return t
end

function putdb(e,names,ids)
  if names[e.name] == nil then
    names[e.name] = {}
  end
  table.insert(names[e.name], e)
  if e.attr and e.attr.id then
    if ids[e.attr.id] then
      error("same id: "..e.attr.id)
    end
    ids[e.attr.id] = e
  end
end

function mktree(s)
  -- modified collect()
  -- mktree() has a DOM like ability
  -- a powerful tool to evaluate xml data
  local ns
  local stack = {}
  local top = {}
  local ni,c,name,attrs, empty
  local i,j,decl =find(s, "<%?xml *(.-)%?>")
  local ids = {} -- id table
  local names = {} -- tag name table
  if j == nil then
    i = 1
  else
    i = j + 1
  end
  while true do
    -- tags have the form: "<NAME ATTRS/?>", "<NAME/?>", "</NAME>",
    -- we must skip comment! we cant't use "repeat ... until" here
    ni,j,c,name,attrs, empty = find(s, "<(/?)([^%s/>]+) *(.-)(/?)>", i)
    while name == "!--" do
      -- print("DBG0: ",i,ni,j,c,"/",name,attrs,"/",empty)
      i = j+1
      ni,j,c,name,attrs, empty = find(s, "<(/?)([^%s/>]+) *(.-)(/?)>", i)
    end
    -- here attrs == "" if no attr but we make it nil
    if attrs == "" then
      attrs = nil
    end
    if not ni then
      break
    end
    local text = sub(s, i, ni-1)
    if not find(text, "^%s*$") then
      top.text = match(text,"^%s*(.-)%s*$")
    end
    if empty == "/" then  -- empty element tag
      local e,attr
      attr = decomp(attrs)
      e = {name=name, attrs=attrs, attr=attr, parent=stack[#stack], no=#top+1}
      table.insert(top,e)
      putdb(e,names,ids)
    elseif c == "" then   -- start tag
      local e,attr
      attr = decomp(attrs)
      e = {name=name, attrs=attrs, attr=attr, parent=stack[#stack], no=#top+1}
      table.insert(stack, e)   -- new level
      putdb(e,names,ids)
      top = e
    else  -- end tag
      close = table.remove(stack)  -- remove top
      if close.name ~= name then
        error("inconsitent open and close tag: "..close.name.." with "..name)
      end
      if #stack < 1 then
        break
      end
      top = stack[#stack]  -- stack[0] == nil
      table.insert(top, close)
    end
    i = j+1
  end
  if #stack > 1 then
    error("unclosed "..stack[#stack].name)
  end 
  return close,names,ids
end

function printstack(t, Ntab)
  -- this function shows the structure of stack returned from collect()
  -- please call me by: t = collect(content); printstack(t)
  if t == nil then
    return
  end
  if Ntab == nil then
    Ntab = 0
  end
  if type(t) == "string" then
    print(Ntab,'T', t)   -- text outside of tags
    return
  end
  Ntab = Ntab + 1
  -- Ntab: tag level (namespace level)
  -- #t: number of element, starting tag (if #t>0) or empty tag (if #t == 0)
  -- t.name: tag name; t.attrs: tag attributes
  print(Ntab,#t,t.name, t.attrs)

  for i=1, #t do
    printstack(t[i], Ntab)
  end

  print(Ntab,"/",t.name)  -- closing tag
  Ntab = Ntab - 1
end

function mksp(n)
  return string.rep("\t",n)
end

function viewt(t,Ntab)
  -- print in indented data format(idf)
  -- idf is useful to view logical structure of xml file
  -- please call me by: t = mktree(content); viewt(t)
  if t == nil then
    return
  end
  if Ntab == nil then
    print("# format: idf-1.0")
    Ntab = -1
  end
  Ntab = Ntab + 1
  -- print("#t",#t)
  print(mksp(Ntab)..t.name)
  if t.parent then
    -- printf("%s@ %s\n",mksp(Ntab),t.parent.name)
  end
  if t.attrs  then
    local s
    -- we don't use v.attr so that we can keep order
    --s = asplit(t.attrs)
	s=split(t.attrs,nil,true)
    for i=1,#s do
      print(mksp(Ntab).."- "..s[i])
    end
  end
  if t.text then
    local s
    s = split(t.text,"\n")
    for i=1,#s do
      print(mksp(Ntab).."* "..s[i])
    end
  end
  for i,v in ipairs(t) do
    viewt(v,Ntab)
  end
  Ntab = Ntab - 1
end

function mkidf(file)
  -- file: xml file
  -- print in indented data format(idf)
  -- idf is useful to view logical structure of xml file
  -- 2009/04/05
  -- 2012/04/23
  local ni,c,name,attrs, empty,text
  local stack = {}
  local s,t
  local f
  if file == nil then
    f = io.stdin
  else
    f = io.open(file)
    if f == nil then
      print("no such file: "..file)
      return
    end
  end
  s = f:read("*a")
  f:close()
  print("# format: idf-1.1")
  local i,j,decl =find(s, "<%?xml *(.-)%?>%s*")
  if j == nil then
    i = 1
  else
    i = j + 1
  end

  if sub(s,i,i+9) == "<!DOCTYPE " then
    local c
    n1,j,t,c = find(s,"<!([^>%[]+)(.)%s*",i)
    if c == "[" then
      print("!"..t..c)
      i = j + 1
      while true do
        ni,j,t = find(s,"<!(%w[^>]-)%s*>%s*",i)
        if t == nil then
          -- we should sweep out trailing "]>"
          ni,j,t = find(s,"([^>]*)>",i)
          print("!"..t)
          i = j + 1
          break
        end
        print("!"..t)
        i = j + 1
      end
    else
      print("!"..t)
      i = j + 1
    end
  end

  while true do
    -- tags have the form: "<NAME ATTRS/?>", "<NAME/?>", "</NAME>",
    ni,j,c,name,attrs, empty = find(s, "<(/?)([^%s/>]+) *(.-)(/?)>", i)
    while name == "!--" do
      -- print("DBG0: ",i,ni,j,c,"/",name,attrs,"/",empty)
      -- <!-- comments -->
      print(mksp(#stack).."# "..gsub(sub(s,ni + 5,j - 4),"\n","\n#"))
      i = j+1
      ni,j,c,name,attrs, empty = find(s, "<(/?)([^%s/>]+) *(.-)(/?)>", i)
    end
    -- here attrs == "" if no attr but we make it nil
    if attrs == "" then
      attrs = nil
    end
    if not ni then
      break
    end
    local text = sub(s, i, ni-1)
    if not find(text, "^%s*$") then
      local indent=mksp(#stack - 1)
      print(indent.."* "..gsub(trim(text),"(%s+)","\n"..indent.."  "))
    end
    if empty == "/" or c == "" then -- empty element tag or starting tag
      print(mksp(#stack)..name)
      if attrs then
        --t = asplit(attrs)
		t=split(attrs,nil,true)
        for i=1,#t do
          print(mksp(#stack).."- "..t[i])
        end
      end
      if empty ~= "/" then   -- start tag
        table.insert(stack,name)
      end
    else  -- end tag
      local toclose = table.remove(stack)  -- remove top
      if toclose ~= name then
        error("trying to close "..toclose.." with "..name)
      end
    end
    i = j+1
  end
end


function mkxml(file,indent)
  -- make xml file from idf file
  -- reverse of mkidf()
  local function sp(n)
    local s = ""
    for i=1,n do
      s = s .. " "
    end
    return s
  end
  local s,t,i,stack,t0,t00
  if indent == nil then
    indent = 0
  end
  stack = {}
  io.write('<?xml version="1.0" encoding="utf-8"?>\n')
  i = -1
  for line in io.lines(file) do
    s = match(line,"^(\t*)")
    t = sub(line,#s + 1)
    if sub(t,1,1) == " " then
      print("\n### use tabs for indent")
      os.exit()
    end
    t0 = sub(t,1,2)
    if t0 == "- " then -- an attribute/value pair
      io.write(" "..sub(t,3))
    elseif t0 == "* " then -- a data value
      if t00 and t0 ~= t00 then
        io.write(">")
      end
      io.write(sub(t,3))
    elseif sub(t,1,2) == "# " then -- comments
      io.write(sp(#stack * indent).."<!-- "..sub(t,3).." -->\n")
    elseif match(t,"^%w") then -- a name
      -- we have 9 cases:
      --   t00 = "- "; t00 = "* "; others
      --   #s > i; #s == i; #s < i
      if #s > i then -- new level, t must be a name
        -- note that t00 ~= "* " here
        if i ~= -1 then
          io.write(">\n")
        end
        table.insert(stack,name)
        if #s ~= #stack then
          print("# invalid indent")
          os.exit()
        end
        io.write(sp(#s * indent).."<"..t)
      else
        if t00 == "* " then
          io.write("</"..name..">\n")
        else
          io.write("/>\n")
        end
        if #s < i then -- back to low level until #s == i
          repeat
            name = table.remove(stack)
            io.write(sp(#stack * indent).."</"..name..">\n")
          until #s == #stack
        end
        io.write(sp(#s * indent).."<"..t)
      end
      name = t
      i = #s
    else
      print("# invalid format")
      os.exit()
    end
    t00 = t0
  end
  if t00 == "* " then
    io.write("</"..name..">\n")
  else
    io.write("/>\n")
  end
  while #stack > 0 do
    name = table.remove(stack)
    io.write(sp(#stack * indent).."</"..name..">\n")
  end
end

function pack(t,Ntab)
  -- make packed xml string from the stack
  -- please call me by: t = collect(content); pack()
  local c=''
  if t == nil then
    return
  end
  if Ntab == nil then
    Ntab = 0
    printf('<?xml version="1.0" encoding="utf-8"?>')
    pack(t[1],0)
    return
  end
  if type(t) == "string" then
    printf("%s",t)
    return
  end
  Ntab = Ntab + 1
  if #t == 0 then
    c = '/'
  end
  if t.attrs then
    printf("<%s %s%s>",t.name, t.attrs,c)
  else
    printf("<%s%s>",t.name,c)
  end
  for i=1, #t do
    pack(t[i],Ntab)
  end
  if c ~= "/" then
    printf("</%s>",t.name)
  end
  Ntab = Ntab - 1
end

function namepath(t,r,h)
  -- namepath() is useful for simple xml
  -- call me: s=collect(content); t=namepath(s)
  -- and confirm the result: for k,v in pairs(t) do print(k,v) end
  -- t["foo bar"] is the value of <foo><bar>...</bar></foo>
  -- t["foo bar "] is the attribute
  local w,g
  -- print("DBG",r,t,#t, t.attrs)
  if r == nil then
    r = ""
    h = {}
  end
  if t == nil then -- should not happen
    return h
  end
  if t.attrs then
    h[r.." "] = t.attrs
  end
  if #t == 0 then
    -- this case comes from <foo/>
    -- and <foo/> is semantically equivalent to <foo></foo>
    h[r] = ""
    return h
  end
  if #t == 1 and type(t[1]) == "string" then
    -- this case comes from <name>alice</name>
    h[r] = t[1]
    return h
  end
  for i,v in ipairs(t) do
    if v.name ~= nil then
      if r == "" then
        t = v.name
      else
        t = r .. " ".. v.name
      end
      h = namepath(v,t,h)
    end
  end
  return h
end

-- sometimes last element of namepath is facutual value, so I make pullout
-- hash = {["lockinfo lockscope exclusive"]="", ["lockinfo locktype write"]=""}
-- then pullout(hash) returns:
--   {["lockinfo lockscope"]="exclusive", ["lockinfo locktype"]="write"}
function pullout(hash)
  local t = {}
  for k,v in pairs(hash) do
    if v == "" then
      gsub(k,"^(.-) ([^%s]+)$",function(a,b)
      t[a] = b
      end)
    else
      t[k] = v
    end
  end
  return t
end

--[=[
content = [[
<?xml version="1.0" encoding="utf-8"?>
<D:lockinfo xmlns:D="DAV:">
<D:lockscope><D:exclusive/></D:lockscope>
<D:locktype><D:write/></D:locktype>
<D:owner>
<D:href>http://www.apple.com/webdav_fs/</D:href>
</D:owner>
</D:lockinfo>
]]

s = collect(content)
-- printstack(s)

print(s[1].name)
print(s[1][1].name)
print(s[1][1][1].name)

t = mknametree(s)
print(type(t))
print(t)
print(t["D:lockinfo"]["D:locktype"]["D:write"])
print(t["D:lockinfo"]["D:owner"]["D:href"])




-- then the result is
term% lua parse.lua
1	1	nil	nil
2	3	D:lockinfo	xmlns:D="DAV:"
3	1	D:lockscope	
4	0	D:exclusive	
4	/	D:exclusive
3	/	D:lockscope
3	1	D:locktype	
4	0	D:write	
4	/	D:write
3	/	D:locktype
3	1	D:owner	
4	1	D:href	
4	T	http://www.apple.com/webdav_fs/
4	/	D:href
3	/	D:owner
2	/	D:lockinfo
1	/	nil
D:lockinfo
D:lockscope
D:exclusive
term% 
]=]

return {
decode=decode,
encode=encode,
collect=collect,
mkidf=mkidf,
mktree=mktree,
mkxml=mkxml,
view=view,
namepath=namepath,
pullout=pullout
}
