/* UTF8-UCS4-String/lib/xp/utf8stringtree.cpp
 * 
 * Copyright (C) 2002 Francis James Franklin <fjf@alinameridon.com>
 * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <cstdlib>
#include <cstring>

#include <new>
#include <stdexcept>

#include "utf8stringtree.h"

#ifdef HAVE_EXPAT
extern bool xml_expat_parse (UTF8StringTree & tree, const char * buffer, int length, UTF8String & error);
#endif

static void             s_xmlns_remap (UTF8StringMap & xmlns, const char * const * atts);
static const UTF8String s_xmlns (const char * const * atts, const UTF8String & xmlns_default);
static bool             s_xmlns_prefixed (const char * name, UTF8String & tag_prefix, UTF8String & tag_name);
static void             s_xmlns_guess (const UTF8String & tag_prefix, UTF8String & tag_uri);

static inline bool s_white_space (UTF8String::UCS4Char ucs4)
{
  bool is_white_space = false;

  switch (ucs4)
    {
    case 0x000d:
    case 0x0020:
    case 0x0085:
    case 0x1680:
    case 0x200a:
    case 0x2029:
    case 0x202f:
    case 0x3000:
      is_white_space = true;
      break;
    default:
      break;
    }
  return is_white_space;
}

/* update the namespace map
 */
static void s_xmlns_remap (UTF8StringMap & xmlns, const char * const * atts)
{
  if (atts == 0) return;

  UTF8String xmlns_name;
  UTF8String xmlns_uri;

  const char * const * attr = atts;
  while (*attr)
    {
      if (strncmp (*attr, "xmlns:", 6) == 0)
	{
	  xmlns_name = (*attr++) + 6;
	  xmlns_uri = *attr++;
	  xmlns.ins (xmlns_name, xmlns_uri);
	}
      else
	{
	  attr++;
	  attr++;
	}
    }
}

/* return new default namespace
 */
static const UTF8String s_xmlns (const char * const * atts, const UTF8String & xmlns_default)
{
  const char * xmlns_new = 0;
  if (atts)
    {
      const char * const * attr = atts;
      while (*attr)
	{
	  if (strcmp (*attr, "xmlns") == 0)
	    {
	      attr++;
	      xmlns_new = *attr++;
	      break;
	    }
	  else
	    {
	      attr++;
	      attr++;
	    }
	}
    }
  if (xmlns_new) return UTF8String(xmlns_new);

  return UTF8String(xmlns_default);
}

/* separate namespace prefix from element name; return whether a prefix is there
 */
static bool s_xmlns_prefixed (const char * name, UTF8String & tag_prefix, UTF8String & tag_name)
{
  if (name == 0) return false;

  UTF8String tag(name);

  tag_prefix = "";
  tag_name = tag;

  UTF8String::const_iterator ci_start = tag.begin ();
  UTF8String::const_iterator ci_end = tag.end ();
  UTF8String::const_iterator ci = tag.begin ();

  while (ci != ci_end)
    {
      if (*(&ci) == ':') // operator& returns a string from the iterator
	{
	  tag_prefix = UTF8String(ci_start,ci);
	  ++ci;
	  tag_name = UTF8String(ci,ci_end);
	  break;
	}
      ++ci;
    }
  return (tag_prefix.utf8_length () > 0);
}

static void s_xmlns_guess (const UTF8String & tag_prefix, UTF8String & tag_uri)
{
  if (tag_prefix == "math")
    tag_uri = "http://www.w3.org/1998/Math/MathML";
  else if (tag_prefix == "html")
    tag_uri = "http://www.w3.org/1999/xhtml";
  else if (tag_prefix == "svg")
    tag_uri = "http://www.w3.org/2000/svg";
  else if (tag_prefix == "abiword")
    tag_uri = "http://www.abisource.com/awml.dtd"; // AWML
}

void UTF8StringTree::xmlns_query (const UTF8StringMap & map, const UTF8String & name, UTF8String & prefix, UTF8String & uri)
{
  UTF8String::const_iterator ci_start = name.begin ();
  UTF8String::const_iterator ci_end = name.end ();
  UTF8String::const_iterator ci = name.begin ();

  while (ci != ci_end)
    {
      if (*(&ci) == ':') // operator& returns a string from the iterator
	{
	  prefix = UTF8String(ci_start,ci);
	  break;
	}
      ++ci;
    }
  if (prefix.utf8_length () == 0) return;

  const UTF8StringPair * pair = map.lookup (prefix);
  if (pair)
    {
      uri = pair->value ();
    }
  else uri = "";
}

void UTF8StringTree::style_parse (UTF8StringMap & map, const UTF8String & style)
{
  if (style.utf8_length () == 0) return;

  UTF8String name;
  UTF8String value;
  UTF8String entity;

  UTF8String::const_iterator ci_start = style.begin ();
  UTF8String::const_iterator ci_end   = style.end ();

  const UTF8String::UCS4Char colon     = UTF8String::UCS4Cast(':');
  const UTF8String::UCS4Char semicolon = UTF8String::UCS4Cast(';');
  const UTF8String::UCS4Char dquote    = UTF8String::UCS4Cast('"');
  const UTF8String::UCS4Char squote    = UTF8String::UCS4Cast('\'');
  const UTF8String::UCS4Char backslash = UTF8String::UCS4Cast('\\');

  while (true)
    {
      UTF8String::UCS4Char ucs4;

      while (ci_start != ci_end)
	{
	  ucs4 = *ci_start;
	  if (!s_white_space (ucs4)) break;
	  ++ci_start;
	}
      if (ci_start == ci_end) break;

      UTF8String::const_iterator ci = ci_start;

      while (ci != ci_end)
	{
	  ucs4 = *ci;
	  if ((ucs4 == semicolon) || (s_white_space (ucs4))) break; // huh? bad!
	  if (ucs4 == colon) break; // good!
	  ++ci;
	}
      if (ci == ci_end) break;
      if (ucs4 != colon)
	{
	  ci_start = ++ci;
	  continue;
	}
      name = UTF8String(ci_start,ci);

      ci_start = ++ci;

      while (ci_start != ci_end)
	{
	  ucs4 = *ci_start;
	  if (!s_white_space (ucs4)) break;
	  ++ci_start;
	}
      if (ci_start == ci_end) break;

      bool is_escape = false;
      bool in_squote = false;
      bool in_dquote = false;

      value = "";

      ci = ci_start;

      while (ci != ci_end)
	{
	  ucs4 = *ci;
	  if (s_white_space (ucs4))
	    {
	      if (in_squote || in_dquote)
		{
		  is_escape = false;
		  ++ci;
		  continue;
		}
	      else
		{
		  value.append (ci_start, ci);
		  while (ci != ci_end)
		    {
		      ucs4 = *ci;
		      if (!s_white_space (ucs4)) break;
		      ++ci;
		    }
		  ci_start = ci;
		  break;
		}
	    }
	  if (ucs4 == semicolon)
	    {
	      if (!in_dquote && !in_squote)
		{
		  if (ci != ci_start)
		    {
		      value.append (ci_start, ci);
		      ci_start = ci;
		    }
		  break;
		}
	      is_escape = false;
	    }
	  else if (ucs4 == dquote)
	    {
	      if (is_escape) ci_start = ci;
	      if (in_dquote)
		{
		  if (!is_escape) in_dquote = false;
		}
	      else if (!in_squote)
		{
		  in_dquote = true;
		}
	      is_escape = false;
	    }
	  else if (ucs4 == squote)
	    {
	      if (is_escape) ci_start = ci;
	      if (in_squote)
		{
		  if (!is_escape) in_squote = false;
		}
	      else if (!in_dquote)
		{
		  in_squote = true;
		}
	      is_escape = false;
	    }
	  else if (ucs4 == backslash)
	    {
	      if (is_escape)
		{
		  ci_start = ci;
		  is_escape = false;
		}
	      else if (in_squote || in_dquote) is_escape = true;

	      if (ci != ci_start)
		{
		  value.append (ci_start, ci);
		  ci_start = ci;
		}
	    }
	  ++ci;
	}
      if ((ucs4 == semicolon) || (ci == ci_end))
	{
	  if (ci_start != ci)
	    {
	      value.append (ci_start, ci);
	      ci_start = ci;
	    }
	  if (name.utf8_length () && value.utf8_length ()) map.ins (name, value);
	  if (ucs4 == semicolon)
	    ci_start = ++ci;
	  else
	    ci_start = ci;
	  continue;
	}
    }
}

UTF8StringTree::ElementNode::ElementNode (const UTF8String & ns_default, // default namespace uri
					  const UTF8String & prefix,     // namespace prefix of element
					  const UTF8String & uri,        // namespace uri of element
					  const char * name,             // element name
					  const char * const * atts,     // element attributes
					  const UTF8StringMap & xmlns,   // namespace prefix->uri map
					  const UTF8StringMap & style) : // CSS style map
  m_ns_default(ns_default),
  m_prefix(prefix),
  m_uri(uri),
  m_name(name),
  m_node(0),
  m_node_count(0),
  m_node_max(0),
  m_xmlns(xmlns),
  m_style(style)
{
  if (atts)
    {
      bool success = true;
      try
	{
	  UTF8String attr_name;
	  UTF8String attr_value;

	  const char * const * attr = atts;
	  while (success && *attr)
	    {
	      attr_name = *attr++;
	      if (*attr == 0)
		{
		  success = false;
		  break;
		}
	      attr_value = *attr++;

	      if (attr_name.utf8_length () == 0)
		{
		  success = false;
		  break;
		}
	      success = m_attrs.ins (attr_name, attr_value);
	    }
	}
      catch (...)
	{
	  success = false;
	}
      if (!success)
	{
	  m_attrs.clear ();
	  throw std::bad_alloc(); // why isn't this being caught??!! /me snarls at fate
	  return;
	}

      /* TODO: remove this - this should be done only by namespaces that support it...
       */
      const UTF8StringPair * pair = 0;
      if (m_uri == "http://www.abisource.com/awml.dtd")
	pair = m_attrs.lookup ("props");
      else
	pair = m_attrs.lookup ("style");
      if (pair)
	{
	  UTF8StringTree::style_parse (m_style, pair->value ());
	}
    }
}

UTF8StringTree::ElementNode::~ElementNode ()
{
  while (m_node_count) delete m_node[--m_node_count];
  if (m_node) free (m_node);
}

UTF8StringTree::NodeType UTF8StringTree::ElementNode::type () const
{
  return nt_element;
}

/* ElementNode is responsible for node; do not delete it elsewhere!
 */
bool UTF8StringTree::ElementNode::append (Node * node)
{
  if (node == 0) return false;

  if (m_node == 0)
    {
      m_node = (Node **) malloc (32 * sizeof (Node *));
      if (m_node == 0)
	{
	  delete node;
	  return false;
	}
      m_node_max = 32;
      m_node_count = 0;
    }
  if (m_node_count == m_node_max)
    {
      Node ** more = (Node **) realloc (m_node, (m_node_max + 32) * sizeof (Node *));
      if (more == 0)
	{
	  delete node;
	  return false;
	}
      m_node_max += 32;
      m_node = more;
    }
  m_node[m_node_count++] = node;
  return true;
}

UTF8StringTree::TextNode::TextNode ()
{
  // 
}

UTF8StringTree::TextNode::~TextNode ()
{
  // 
}

UTF8StringTree::NodeType UTF8StringTree::TextNode::type () const
{
  return nt_text;
}

void UTF8StringTree::TextNode::clear ()
{
  m_text = "";
}

void UTF8StringTree::TextNode::append (const UTF8String & extra_text)
{
  m_text += extra_text;
}

UTF8StringTree::CDATANode::CDATANode ()
{
  // 
}

UTF8StringTree::CDATANode::~CDATANode ()
{
  // 
}

UTF8StringTree::NodeType UTF8StringTree::CDATANode::type () const
{
  return nt_cdata;
}

UTF8StringTree::PINode::PINode (const char * target) :
  m_target(target)
{
  // 
}

UTF8StringTree::PINode::~PINode ()
{
  // 
}

UTF8StringTree::NodeType UTF8StringTree::PINode::type () const
{
  return nt_pi;
}

UTF8StringTree::CommentNode::CommentNode ()
{
  // 
}

UTF8StringTree::CommentNode::~CommentNode ()
{
  // 
}

UTF8StringTree::NodeType UTF8StringTree::CommentNode::type () const
{
  return nt_comment;
}

UTF8StringTree::DefaultNode::DefaultNode ()
{
  // 
}

UTF8StringTree::DefaultNode::~DefaultNode ()
{
  // 
}

UTF8StringTree::NodeType UTF8StringTree::DefaultNode::type () const
{
  return nt_default;
}

UTF8StringTree::ElementFactory::ElementFactory () :
  m_map(0),
  m_map_count(0),
  m_map_max(0)
{
  // 
}

UTF8StringTree::ElementFactory::~ElementFactory ()
{
  for (unsigned long i = 0; i < m_map_count; i++) delete m_map[i].uri;
  if (m_map) free (m_map);
}

static UTF8StringTree::ElementFactory * s_ElementFactory = 0;

UTF8StringTree::ElementFactory * UTF8StringTree::ElementFactory::instance ()
{
  if (s_ElementFactory == 0)
    {
      try
	{
	  s_ElementFactory = new ElementFactory;
	}
      catch (...)
	{
	  s_ElementFactory = 0;
	}
    }
  return s_ElementFactory;
}

bool UTF8StringTree::ElementFactory::registerNamespace (const UTF8String & uri, const XMLNS * delegate)
{
  if (delegate == 0) return false;
  if (uri.utf8_length () == 0) return false;

  bool matched = false;
  for (unsigned long i = 0; i < m_map_count; i++)
    if ((*(m_map[i].uri)) == uri)
      {
	m_map[i].delegate = delegate;
	matched = true;
	break;
      }
  if (matched) return true;

  if (m_map == 0)
    {
      m_map = (URI_Map *) malloc (32 * sizeof (URI_Map));
      if (m_map == 0) return false;
      m_map_max = 32;
      m_map_count = 0;
    }
  if (m_map_count == m_map_max)
    {
      URI_Map * more = (URI_Map *) realloc (m_map, (m_map_max + 32) * sizeof (URI_Map));
      if (more == 0) return false;
      m_map_max += 32;
      m_map = more;
    }
  try
    {
      m_map[m_map_count].uri = new UTF8String(uri);
    }
  catch (...)
    {
      m_map[m_map_count].uri = 0;
    }
  if (m_map[m_map_count].uri == 0) return false;

  m_map[m_map_count++].delegate = delegate;
  return true;
}

UTF8StringTree::ElementNode * UTF8StringTree::ElementFactory::createElement (const UTF8String & ns_default,
									     const UTF8String & prefix,
									     const UTF8String & uri,
									     const char * name,
									     const char * const * atts,
									     const UTF8StringMap & xmlns,
									     const UTF8StringMap & style) const
{
  const XMLNS * delegate = 0;
  for (unsigned long i = 0; i < m_map_count; i++)
    if ((*(m_map[i].uri)) == uri)
      {
	delegate = m_map[i].delegate;
	break;
      }

  if (delegate) delegate->createElement (ns_default, prefix, uri, name, atts, xmlns, style);

  ElementNode * node = 0;
  try
    {
      node = new ElementNode(ns_default,prefix,uri,name,atts,xmlns,style);
    }
  catch (...)
    {
      node = 0;
    }
  return node;
}

UTF8StringTree::ElementStack::ElementStack () :
  m_stack(0),
  m_stack_size(0),
  m_stack_max(0)
{
  // 
}

UTF8StringTree::ElementStack::~ElementStack ()
{
  if (m_stack) free (m_stack);
}

void UTF8StringTree::ElementStack::clear ()
{
  m_stack_size = 0;
}

bool UTF8StringTree::ElementStack::push (ElementNode * node)
{
  if (node == 0) return false;

  if (m_stack == 0)
    {
      m_stack = (ElementNode **) malloc (32 * sizeof (ElementNode *));
      if (m_stack == 0) return false;
      m_stack_max = 32;
      m_stack_size = 0;
    }
  if (m_stack_size == m_stack_max)
    {
      ElementNode ** more = (ElementNode **) realloc (m_stack, (m_stack_max + 32) * sizeof (ElementNode *));
      if (more == 0) return false;
      m_stack_max += 32;
      m_stack = more;
    }
  m_stack[m_stack_size++] = node;
  return true;
}

UTF8StringTree::ElementNode * UTF8StringTree::ElementStack::pop ()
{
  ElementNode * top = 0;

  if (m_stack && (m_stack_size > 0))
    {
      top = m_stack[--m_stack_size];
    }
  return top;
}

UTF8StringTree::ElementNode * UTF8StringTree::ElementStack::top ()
{
  if (m_stack && (m_stack_size > 0)) return m_stack[m_stack_size-1];
  return 0;
}

UTF8StringTree::UTF8StringTree (UTF8String * uri, ElementFactory * factory) :
  m_buffer(0),
  m_buffer_length(0),
  m_buffer_max(0),
  m_stop(true),
  m_factory(factory),
  m_node(0),
  m_node_count(0),
  m_node_max(0),
  m_active(0),
  m_tree(0),
  m_current(0)
{
  // 
  if (uri) m_uri = *uri;
  if (m_factory == 0) m_factory = ElementFactory::instance ();
}

UTF8StringTree::~UTF8StringTree ()
{
  clear ();
}

void UTF8StringTree::clear ()
{
  if (m_buffer)
    {
      free (m_buffer);
      m_buffer = 0;
    }
  m_buffer_length = 0;
  m_buffer_max = 0;

  while (m_node_count) delete m_node[--m_node_count];
  if (m_node) free (m_node);
  m_node = 0;
  m_node_max = 0;

  m_xmlns.clear ();
  m_style.clear ();
  m_stack.clear ();
}

bool UTF8StringTree::import (const char * buffer, int length, UTF8String & error)
{
  if (m_tree) return false;
  if (m_factory == 0) return false;

  UTF8String exceptionCaught("UTF8StringTree::UTF8StringTree: exception caught while parsing");

  error = UTF8String("UTF8StringTree::UTF8StringTree: no parser implemented!");

  m_stop = false;

  m_buffer_length = 0;

  bool success = false;
#ifdef HAVE_EXPAT
  try
    {
      success = xml_expat_parse (*this, buffer, length, error);
    }
  catch (...)
    {
      success = false;
      error = exceptionCaught;
    }
#endif
  if (m_stop || (m_tree == 0) || !success)
    {
      if (m_stop) error = m_error;
      success = false;
      clear ();
    }
  else if (m_buffer)
    {
      free (m_buffer);
      m_buffer = 0;
      m_buffer_length = 0;
      m_buffer_max = 0;
    }

  m_stop = true;

  return success;
}

bool UTF8StringTree::_append (Node * node)
{
  if (node == 0) return false;

  if (m_node == 0)
    {
      m_node = (Node **) malloc (32 * sizeof (Node *));
      if (m_node == 0)
	{
	  delete node;
	  return false;
	}
      m_node_max = 32;
      m_node_count = 0;
    }
  if (m_node_count == m_node_max)
    {
      Node ** more = (Node **) realloc (m_node, (m_node_max + 32) * sizeof (Node *));
      if (more == 0)
	{
	  delete node;
	  return false;
	}
      m_node_max += 32;
      m_node = more;
    }
  m_node[m_node_count++] = node;
  return true;
}

void UTF8StringTree::StartElement (const char * name, const char * const * atts)
{
  if (m_stop) return;

  if (!_flushCharacterData ()) return;
  if (!_flushDefaultData ()) return;

  if (m_tree == 0)
    {
      // this is the first and only element of the tree

      /* update namespace map
       */
      s_xmlns_remap (m_xmlns, atts);

      /* update default namespace
       */
      m_uri = s_xmlns (atts, m_uri);

      /* separate out namespace prefix, if any
       */
      UTF8String tag_prefix;
      UTF8String tag_name;
      UTF8String tag_uri(m_uri);
      if (s_xmlns_prefixed (name, tag_prefix, tag_name))
	{
	  const UTF8StringPair * pair = m_xmlns.lookup (tag_prefix);
	  if (pair)
	    {
	      tag_uri = pair->value ();
	    }
	  else
	    {
	      // this is bad :-(
	      s_xmlns_guess (tag_prefix, tag_uri);
	    }
	}
      else if (tag_uri.utf8_length () == 0)
	{
	  // this is bad :-(
	  /* I have a whole bunch of test SVG images and not all of them have a proper
	   * namespace declaration, and I daresay there's bad XHTML and bad MathML out
	   * there in the wild also...
	   */
	  s_xmlns_guess (tag_name, tag_uri);
	}
      if (m_uri.utf8_length () == 0) m_uri = tag_uri;

      /* create root element
       */
      ElementNode * node = m_factory->createElement (m_uri, tag_prefix, tag_uri, name, atts, m_xmlns, m_style);
      if (!_append (node))
	{
	  m_stop = true;
	  m_error = "UTF8StringTree::StartElement: failed to create&append node";
	  return;
	}
      else
	{
	  /* push root element onto stack
	   */
	  if (!m_stack.push (node))
	    {
	      m_stop = true;
	      m_error = "UTF8StringTree::StartElement: failed to push node onto stack";
	    }
	  else
	    {
	      /* set tree/current to root element
	       */
	      m_tree = node;
	      m_current = node;
	    }
	  return;
	}
    }

  if (m_current == 0) // huh??
    {
      m_stop = true;
      m_error = "UTF8StringTree::StartElement: no current node - something's gone wrong";
      return;
    }

  UTF8StringMap xmlns = m_current->xmlns ();
  UTF8StringMap style = m_current->style ();

  s_xmlns_remap (xmlns, atts);

  UTF8String xmlns_default = m_current->ns_default ();

  xmlns_default = s_xmlns (atts, xmlns_default);

  UTF8String tag_prefix;
  UTF8String tag_name;
  UTF8String tag_uri(xmlns_default);
  if (s_xmlns_prefixed (name, tag_prefix, tag_name))
    {
      const UTF8StringPair * pair = xmlns.lookup (tag_prefix);
      if (pair)
	{
	  tag_uri = pair->value ();
	}
      else
	{
	  // this is bad :-(
	  s_xmlns_guess (tag_prefix, tag_uri);
	}
    }

  ElementNode * node = m_factory->createElement (xmlns_default, tag_prefix, tag_uri, name, atts, xmlns, style);
  if (!m_current->append (node))
    {
      m_stop = true;
      m_error = "UTF8StringTree::StartElement: failed to create&append root-node";
    }
  else
    {
      if (!m_stack.push (node))
	{
	  m_stop = true;
	  m_error = "UTF8StringTree::StartElement: failed to push root-node onto stack";
	}
      else m_current = node;
    }
}

void UTF8StringTree::EndElement (const char * name)
{
  if (m_stop) return;

  if (!_flushCharacterData ()) return;
  if (!_flushDefaultData ()) return;

  if (m_stack.pop () == 0)
    {
      m_stop = true;
      m_error = "UTF8StringTree::EndElement: failed to pop node from stack";
    }
  else m_current = m_stack.top ();
}

void UTF8StringTree::CharacterData (const char * s, int len)
{
  if (m_stop) return;

  if (!_flushDefaultData ()) return;

  if (m_active == 0)
    {
      TextNode * text_node = 0;
      try
	{
	  text_node = new TextNode;
	}
      catch (...)
	{
	  text_node = 0;
	}
      if (text_node == 0)
	{
	  m_stop = true;
	  m_error = "UTF8StringTree::CharacterData: failed to create text node";
	  return;
	}
      bool success = true;
      if (m_current)
	success = m_current->append (text_node);
      else
	success = _append (text_node);
      if (!success)
	{
	  m_stop = true;
	  m_error = "UTF8StringTree::CharacterData: failed to append node to tree/current";
	  return;
	}
      m_active = text_node;
    }
  _appendData (s, len);
}

bool UTF8StringTree::_flushCharacterData ()
{
  if (m_active == 0) return true;

  if ((m_active->type () == nt_text) || (m_active->type () == nt_cdata))
    {
      return _flushData ();
    }
  else return true;
}

bool UTF8StringTree::_flushData ()
{
  if (m_buffer_length == 0) return true; // hmm

  if (m_active == 0)
    {
      m_stop = true;
      m_error = "UTF8StringTree::_flushData: no active node - something wrong here";
      return false;
    }
  bool success = true;
  try
    {
      m_active->append (UTF8String(m_buffer));
    }
  catch (...)
    {
      success = false;
    }
  if (!success)
    {
      m_stop = true;
      m_error = "UTF8StringTree::_flushData: failed to append data to active node";
    }
  else
    {
      m_buffer_length = 0;
      m_active = 0;
    }
  return success;
}

bool UTF8StringTree::_appendData (const char * s, int len)
{
  if ((s == 0) || (len <= 0)) return true; // weird, but let's ignore it

  if (m_active == 0)
    {
      m_stop = true;
      m_error = "UTF8StringTree::_appendData: no active node - something wrong here";
      return false;
    }

  unsigned long length = static_cast<unsigned long>((len < 128) ? 128 : (len + 1));

  if (m_buffer == 0)
    {
      m_buffer = (char *) malloc (length);
      if (m_buffer == 0)
	{
	  m_stop = true;
	  m_error = "UTF8StringTree::_appendData: insufficient memory";
	  return false;
	}
      m_buffer_length = 0;
      m_buffer_max = length;
    }
  if (m_buffer_length + len + 1 > m_buffer_max)
    {
      char * more = (char *) realloc (m_buffer, m_buffer_max + length);
      if (more == 0)
	{
	  m_stop = true;
	  m_error = "UTF8StringTree::_appendData: insufficient memory";
	  return false;
	}
      m_buffer = more;
      m_buffer_max += length;
    }

  memcpy (m_buffer + m_buffer_length, s, len);

  m_buffer_length += len;
  m_buffer[m_buffer_length] = 0;

  return true;
}

void UTF8StringTree::ProcessingInstruction (const char * target, const char * data)
{
  if (m_stop) return;

  if (!_flushCharacterData ()) return;
  if (!_flushDefaultData ()) return;

  PINode * pi_node = 0;
  try
    {
      pi_node = new PINode(target);
    }
  catch (...)
    {
      pi_node = 0;
    }
  if (pi_node == 0)
    {
      m_stop = true;
      m_error = "UTF8StringTree::ProcessingInstruction: failed to create PI node";
      return;
    }
  bool success = true;
  try
    {
      pi_node->append (UTF8String(data));
    }
  catch (...)
    {
      success = false;
    }
  if (!success)
    {
      delete pi_node;
      m_stop = true;
      m_error = "UTF8StringTree::ProcessingInstruction: failed to append data/text to PI node";
      return;
    }
  if (m_current)
    success = m_current->append (pi_node);
  else
    success = _append (pi_node);
  if (!success)
    {
      m_stop = true;
      m_error = "UTF8StringTree::ProcessingInstruction: failed to append node to tree/current";
    }
}

void UTF8StringTree::Comment (const char * data)
{
  if (m_stop) return;

  if (!_flushCharacterData ()) return;
  if (!_flushDefaultData ()) return;

  CommentNode * comment_node = 0;
  try
    {
      comment_node = new CommentNode;
    }
  catch (...)
    {
      comment_node = 0;
    }
  if (comment_node == 0)
    {
      m_stop = true;
      m_error = "UTF8StringTree::Comment: failed to create comment node";
      return;
    }
  bool success = true;
  try
    {
      comment_node->append (UTF8String(data));
    }
  catch (...)
    {
      success = false;
    }
  if (!success)
    {
      delete comment_node;
      m_stop = true;
      m_error = "UTF8StringTree::Comment: failed to append data/text to comment node";
      return;
    }
  if (m_current)
    success = m_current->append (comment_node);
  else
    success = _append (comment_node);
  if (!success)
    {
      m_stop = true;
      m_error = "UTF8StringTree::Comment: failed to append node to tree/current";
    }
}

void UTF8StringTree::StartCdataSection ()
{
  if (m_stop) return;

  if (!_flushCharacterData ()) return;
  if (!_flushDefaultData ()) return;

  if (m_active == 0)
    {
      CDATANode * cdata_node = 0;
      try
	{
	  cdata_node = new CDATANode;
	}
      catch (...)
	{
	  cdata_node = 0;
	}
      if (cdata_node == 0)
	{
	  m_stop = true;
	  m_error = "UTF8StringTree::StartCdataSection: failed to create cdata node";
	  return;
	}
      bool success = true;
      if (m_current)
	success = m_current->append (cdata_node);
      else
	success = _append (cdata_node);
      if (!success)
	{
	  m_stop = true;
	  m_error = "UTF8StringTree::StartCdataSection: failed to append node to tree/current";
	  return;
	}
      m_active = cdata_node;
    }
}

void UTF8StringTree::EndCdataSection ()
{
  if (m_stop) return;

  if (!_flushCharacterData ()) return;
}

void UTF8StringTree::Default (const char * s, int len)
{
  if (m_stop) return;

  if (!_flushCharacterData ()) return;

  if (m_active == 0)
    {
      DefaultNode * default_node = 0;
      try
	{
	  default_node = new DefaultNode;
	}
      catch (...)
	{
	  default_node = 0;
	}
      if (default_node == 0)
	{
	  m_stop = true;
	  m_error = "UTF8StringTree::Default: failed to create default node";
	  return;
	}
      bool success = true;
      if (m_current)
	success = m_current->append (default_node);
      else
	success = _append (default_node);
      if (!success)
	{
	  m_stop = true;
	  m_error = "UTF8StringTree::Default: failed to append node to tree/current";
	  return;
	}
      m_active = default_node;
    }
  _appendData (s, len);
}

bool UTF8StringTree::_flushDefaultData ()
{
  if (m_active == 0) return true;

  if (m_active->type () == nt_default)
    {
      return _flushData ();
    }
  else return true;
}

void UTF8StringTree::UnparsedEntityDecl (const char * entityName,
					 const char * base,
					 const char * systemId,
					 const char * publicId,
					 const char * notationName)
{
  if (m_stop) return;

  // TODO
}

void UTF8StringTree::NotationDecl (const char * notationName,
				   const char * base,
				   const char * systemId,
				   const char * publicId)
{
  if (m_stop) return;

  // TODO
}

int UTF8StringTree::NotStandalone ()
{
  if (m_stop) return 0;

  return 1;
}
