/* libeXtra/utils/ucs4string.h
 *
 * Copyright (C) 2002 Francis James Franklin <fjf@alinameridon.com>
 * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#ifndef LIBEXTRA_UCS4STRING_H
#define LIBEXTRA_UCS4STRING_H

#ifndef LIBEXTRA_UTF8STRING_H
#include "utf8string.h"
#endif

class UCS4String
{
 public:
  typedef UTF8String::UCS4Char UCS4Char;

  static UCS4Char UCS4Cast (char c) { return UTF8String::UCS4Cast (c); }

  class iterator; // forward declaration - see below

 private:
  class UCS4StringValue
    {
    public:
      UCS4StringValue (const char * utf8str);
      UCS4StringValue (const UCS4Char * ucs4str, int length); // length=0 indicates 0-termination
      UCS4StringValue (const iterator & ci_start, const iterator & ci_end);

      ~UCS4StringValue ();

      void clear (); // set ucs4/bytelength to 0

      /* watch out for multiple references!
       */
      bool append (const char * utf8str);
      bool append (const UCS4Char * ucs4str, int length); // length=0 indicates 0-termination
      bool append (const iterator & ci_start, const iterator & ci_end);

      /* watch out for multiple references!
       * 
       * since ins() may need to realloc() the internal UCS-4 string buffer, the reference
       * insertion iterator (ci) may need to be updated - ins() will therefore update this.
       */
      bool ins (iterator & ci, const iterator & ci_start, const iterator & ci_end);

      /* watch out for multiple references!
       * 
       * although the reference deletion iterator (ci) will remain valid, other iterators may
       * be affected and perhaps even invalidated - use with extreme caution!
       */
      bool del (const iterator & ci, int ucs4length);

      /* watch out for multiple references!
       */
      bool set (const iterator & ci, UCS4Char ucs4);

      int count () const { return m_count; }

      int ref ()   { return ++m_count; }
      int unref () { return --m_count; }

      const UCS4Char * ucs4_str () const { return m_ucs4str; }

      int ucs4_length () const { return m_ucs4length; }
      int byte_length () const { return m_bytelength; }

      const char * utf8_str ();

    private:
      int m_count; // reference count

      int m_ucs4length; // number of code points
      int m_bytelength; // length in bytes
      int m_bytemaxlen; // buffer size

      UCS4Char * m_ucs4str;

      UTF8String m_utf8str;

      bool grow (int size = 16);
    };

  UCS4StringValue * m_value;

 public:
  /* WARNING: Do *not* perform any non-const operations on a string while
   *          using iterators. The results are undefined and probably
   *          catastrophic!
   */
  class iterator
    {
    private:
      const UCS4StringValue * m_value;

      UCS4Char * m_ucs4ptr;

      /* The iterator belongs to a particular string; set_ucs4ptr()
       * sets pointer position, which must lie within the current string.
       * The terminating 0 is valid.
       */
      void set_ucs4ptr (const UCS4Char * ucs4ptr); // throws std::out_of_range

    public:
      /* calls set_ucs4ptr() - throws std::out_of_range
       */
      iterator (const UCS4StringValue * value, const UCS4Char * ucs4ptr);

      /* Contents of UCS4String should be validated UCS-4: direct writing is not allowed!
       */
      inline UCS4Char operator* () const { return *m_ucs4ptr; }
      inline const UCS4Char * operator& () const { return m_ucs4ptr; }

      iterator & operator+= (int adv); // throws std::out_of_range
      iterator & operator-= (int ret); // throws std::out_of_range

      iterator & operator++ (); // throws std::out_of_range
      iterator & operator-- (); // throws std::out_of_range

      inline const iterator operator++ (int /* */) // postfix operator
	{
	  const iterator ci = *this;
	  ++(*this);
	  return ci;
	}
      inline const iterator operator-- (int /* */) // postfix operator
	{
	  const iterator ci = *this;
	  --(*this);
	  return ci;
	}

      /* compare the string sequence between ci_start and (optionally inclusive) ci_end
       * with *this; returns the difference using memcmp().
       * 
       * throws std::out_of_range
       */
    private:
      int string_compare (const iterator & ci_start, const iterator & ci_end, bool end_inclusive) const;
    public:
      inline int ucs4_strncmp (const UCS4String & str) const { return string_compare (str.begin (), str.end (), false); }
      inline int ucs4_strcmp  (const UCS4String & str) const { return string_compare (str.begin (), str.end (), true ); }

      bool ucs4_strcmp (const UCS4Char * str) const; // returns true if the strings match

      /* iterator advances self to str; or to end() if str not found
       * returns self-reference
       */
      const iterator & ucs4_strstr (const UCS4String & str);
    };
  /* The official way to get an iterator is to ask the string in question for
   * one, using begin(), end(), or offset():
   * 
   *   UCS4String ucs4 = "Hello, World!";
   *   UCS4String::iterator ci = ucs4.begin ();
   */
  inline const iterator begin () const
    {
      return iterator (m_value, m_value->ucs4_str ());
    }
  inline const iterator end () const
    {
      return iterator (m_value, m_value->ucs4_str () + m_value->ucs4_length ());
    }
  inline const iterator offset (const UCS4Char * ucs4ptr) const // throws std::out_of_range
    {
      return iterator (m_value, ucs4ptr);
    }
  inline const iterator operator[] (int i) const // throws std::out_of_range
    {
      return iterator (m_value, m_value->ucs4_str () + i);
    }

  UCS4String (const char * utf8str = 0);                 // if 0, create empty string ""
  UCS4String (const UCS4Char * ucs4str, int length = 0); // length=0 indicates 0-termination
  UCS4String (const UCS4String & rhs);                   // copy constructor

  ~UCS4String ();

  UCS4String & operator= (const char * utf8str);   // if 0, create empty string ""
  UCS4String & operator= (const UTF8String & rhs);
  UCS4String & operator= (const UCS4String & rhs);
  UCS4String & operator= (const UCS4Char * ucs4str);

  UCS4String & operator+= (const char * utf8str);  // if 0, assume empty string ""
  UCS4String & operator+= (const UTF8String & rhs);
  UCS4String & operator+= (const UCS4String & rhs);
  UCS4String & operator+= (const UCS4Char * ucs4str);

  UCS4String & operator+= (UCS4Char ucs4);

  /* NOTE: Best to append using iterator, if it's an option.
   */
 private:
  bool append (const char * utf8str, bool clear = false);     // internal use only
  bool append (const UCS4Char * ucs4str, bool clear = false); // internal use only
 public:
  bool append (const UCS4Char * ucs4str, int length = 0); // length=0 indicates 0-termination
  bool append (const iterator & ci_start, const iterator & ci_end);

  /* the reference insertion iterator (ci) will be updated if necessary;
   * other iterators may be affected - use with extreme caution!
   */
  bool ins (iterator & ci, const iterator & ci_start, const iterator & ci_end);
  bool ins (iterator & ci, const UCS4String & str) { return ins (ci, str.begin (), str.end ()); }

  /* the reference deletion iterator (ci) will be updated if necessary;
   * other iterators may be affected - use with extreme caution!
   */
  bool del (iterator & ci, int ucs4length = 1);

  /* the reference deletion iterator (ci) will be updated if necessary;
   * other iterators may be affected - use with extreme caution!
   */
  bool set (iterator & ci, UCS4Char ucs4);

  /* The returned pointer is valid until the next non-const operation.
   * You will _always_ get a legal pointer back, even if to an empty (0) string.
   * (Shouldn't be 0 - unless we're suffering major memory-allocation problems!)
   */
  const UCS4Char * ucs4_str () const { return m_value->ucs4_str (); }

  /* length of string in codes/sequences
   */
  int ucs4_length () const { return m_value->ucs4_length (); }

  /* length of string in bytes
   */
  int byte_length () const { return m_value->byte_length (); }

  /* The returned pointer is valid until the next non-const operation.
   * You will _always_ get a legal pointer back, even if to an empty (0) string.
   * (Shouldn't be 0 - unless we're suffering major memory-allocation problems!)
   */
  const char * utf8_str () { return m_value->utf8_str (); }
};

inline UCS4String operator+ (const UCS4String & s1, const UCS4String & s2)
{
  UCS4String sr(s1);
  sr += s2;
  return sr;
}

inline UCS4String operator+ (const UCS4String & s1, const char * s2)
{
  UCS4String sr(s1);
  sr += s2;
  return sr;
}

inline UCS4String operator+ (const char * s1, const UCS4String & s2)
{
  UCS4String sr(s1);
  sr += s2;
  return sr;
}

inline bool operator< (const UCS4String & s1, const UCS4String & s2)
{
  return ((s1.begin ()).ucs4_strcmp (s2) < 0);
}

inline bool operator== (const UCS4String & s1, const UCS4String & s2)
{
  return ((s1.begin ()).ucs4_strcmp (s2) == 0);
}
inline bool operator!= (const UCS4String & s1, const UCS4String & s2)
{
  return ((s1.begin ()).ucs4_strcmp (s2) != 0);
}

inline bool operator== (const UCS4String & s1, const UCS4String::UCS4Char * s2)
{
  return (s1.begin ()).ucs4_strcmp (s2);
}
inline bool operator!= (const UCS4String & s1, const UCS4String::UCS4Char * s2)
{
  return !(s1.begin ()).ucs4_strcmp (s2);
}

inline bool operator== (const UCS4String::UCS4Char * s1, const UCS4String & s2)
{
  return (s2.begin ()).ucs4_strcmp (s1);
}
inline bool operator!= (const UCS4String::UCS4Char * s1, const UCS4String & s2)
{
  return !(s2.begin ()).ucs4_strcmp (s1);
}

inline bool operator!= (const UCS4String::iterator & ci1, const UCS4String::iterator & ci2)
{
  return ((&ci1) != (&ci2));
}
inline bool operator== (const UCS4String::iterator & ci1, const UCS4String::iterator & ci2)
{
  return ((&ci1) == (&ci2));
}

#endif /* ! LIBEXTRA_UCS4STRING_H */
