/* libeXtra/utils/utf8test.cpp
 *
 * Copyright (C) 2002 Francis James Franklin <fjf@alinameridon.com>
 * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

#include <cstdio>
#include <cstring>

#include "utf8string.h"
#include "ucs4string.h"

void utf8test ();
void ucs4test ();

int main (int /* argc */, char ** /* argv */)
{
  utf8test ();
  ucs4test ();

  return 0;
}

void utf8test ()
{
  fprintf (stdout, "===========\n");
  fprintf (stdout, "UTF-8 Tests\n");
  fprintf (stdout, "===========\n");
  fprintf (stdout, "\n");

  UTF8String empty;

  UTF8String hello("Hello,");
  UTF8String world("world!");

  UTF8String h2 = hello;
  h2 += " ";

  UTF8String h2w = h2 + world;

  UTF8String am = "Add before: " + h2w;
  UTF8String pm = h2w + " - add after.";

  UTF8String ampm = "Add before: " + h2w + " - and also add after.";

  UTF8String um(""); // iso not utf
  UTF8String laut = "\"P en grn ng\" means 'On a green meadow'"; // iso not utf
  UTF8String utf8a("äöü");
  UTF8String utf8b = "På en grön äng";

  fprintf (stdout, "empty: '%s'\n", empty.utf8_str ());
  fprintf (stdout, "hello: '%s'\n", hello.utf8_str ());
  fprintf (stdout, "world: '%s'\n", world.utf8_str ());
  fprintf (stdout, "   h2: '%s'\n",    h2.utf8_str ());
  fprintf (stdout, "  h2w: '%s'\n",   h2w.utf8_str ());
  fprintf (stdout, "   am: '%s'\n",    am.utf8_str ());
  fprintf (stdout, "   pm: '%s'\n",    pm.utf8_str ());
  fprintf (stdout, " ampm: '%s'\n",  ampm.utf8_str ());
  fprintf (stdout, "   um: '%s'\n",    um.utf8_str ());
  fprintf (stdout, " laut: '%s'\n",  laut.utf8_str ());
  fprintf (stdout, "utf8a: '%s'\n", utf8a.utf8_str ());
  fprintf (stdout, "utf8b: '%s'\n", utf8b.utf8_str ());

  fprintf (stdout, "\n");

  const UTF8String::const_iterator ci_end = utf8b.end ();
  for (UTF8String::const_iterator ci = utf8b.begin (); ci != ci_end; ++ci)
    {
      const char * utf8str = &ci;
      int seql = UTF8String::sequence_length (utf8str);
      for (int i = 0; i < seql; i++) fprintf (stdout, "'%c'", utf8str[i]);
      fprintf (stdout, "[%d]", seql);
    }
  fprintf (stdout, "\n");

  fprintf (stdout, "\n");

  UTF8String::UCS4Char junk[] = {
    0x00000043,
    0x00000430,
    0x000049f1,
    0x0007be33,
    0x00cb0d37,
    0x24b21f3a,
    0x6f8f6fab,
    0x00000000
  };
  UTF8String ucs4(junk);
  fprintf (stdout, " ucs4: '%s'\n",  ucs4.utf8_str ());
  fprintf (stdout, "codes: '%d'\n",  ucs4.utf8_length ());
  fprintf (stdout, "bytes: '%d'\n",  ucs4.byte_length ());

  const UTF8String::UCS4Char * back = ucs4.ucs4_str ();
  for (int i = 0; i < ucs4.utf8_length (); i++)
    {
      fprintf (stdout, " [0x%08lx,0x%08lx]", (unsigned long) junk[i], (unsigned long) back[i]);
      fprintf (stdout, " (%d)\n", UTF8String::sequence_length (back[i]));
    }

  fprintf (stdout, "\n");

  UTF8String i_lt = "<";
  UTF8String o_lt = "&lt;";
  UTF8String i_gt = ">";
  UTF8String o_gt = "&gt;";
  UTF8String i_amp = "&";
  UTF8String o_amp = "&amp;";
  UTF8String::UTF8StringPair pairs[4];
  pairs[0].str1 = &i_lt;
  pairs[0].str2 = &o_lt;
  pairs[1].str1 = &i_gt;
  pairs[1].str2 = &o_gt;
  pairs[2].str1 = &i_amp;
  pairs[2].str2 = &o_amp;
  pairs[3].str1 = 0;
  pairs[3].str2 = 0;
  UTF8String in = "<?xml version=\"1.0\"?>\n<html>\n<body>\n<h1>Search &amp; Replace</h1>\n</body>\n</html>\n";
  UTF8String out = UTF8String::substring_replace (in, pairs);

  fprintf (stdout, "   in: '%s'\n",    in.utf8_str ());
  fprintf (stdout, "  out: '%s'\n",   out.utf8_str ());

  fprintf (stdout, "\n");
}

void ucs4test ()
{
  fprintf (stdout, "===========\n");
  fprintf (stdout, "UCS-4 Tests\n");
  fprintf (stdout, "===========\n");
  fprintf (stdout, "\n");

  UCS4String s1("Once upon a time");
  UCS4String s2("in a galaxy populated by a race of beings called dochits (pron. doe-chits not do-kits)");
  UCS4String s3 = "who looked basically human but they averaged only 1m tall.";
  UCS4String s4 = s1 + ", " + s2 + " " + s3;

  fprintf (stdout, "s1: '%s'\n", s1.utf8_str ());
  fprintf (stdout, "s2: '%s'\n", s2.utf8_str ());
  fprintf (stdout, "s3: '%s'\n", s3.utf8_str ());
  fprintf (stdout, "s4: '%s'\n", s4.utf8_str ());

  UCS4String s5 = s3;
  UCS4String::iterator ci = s5.begin ();
  while (ci != s5.end ())
    {
      if (*ci == UCS4String::UCS4Cast ('u'))
	{
	  s5.set (ci, UCS4String::UCS4Cast ('v'));
	}
      ++ci;
    }
  fprintf (stdout, "s3: '%s'\n", s3.utf8_str ());
  fprintf (stdout, "s5: '%s'\n", s5.utf8_str ());

  UCS4String s6 = s5;
  ci = s6.begin ();
  while (ci != s6.end ())
    {
      if (*ci == UCS4String::UCS4Cast ('v'))
	{
	  s6.del (ci, 1);
	}
      else ++ci;
    }
  fprintf (stdout, "s6: '%s'\n", s6.utf8_str ());

  UCS4String s8 = "Ah!";
  UCS4String s7 = s5;
  ci = s7.begin ();
  while (ci != s7.end ())
    {
      if (*ci == UCS4String::UCS4Cast ('a'))
	{
	  s7.del (ci, 1);
	  s7.ins (ci, s8);
	  ci += s8.ucs4_length ();
	}
      else ++ci;
    }
  fprintf (stdout, "s7: '%s'\n", s7.utf8_str ());

  UCS4String s9 = s7;
  ci = s9.begin ();
  while (ci.ucs4_strstr (s8) != s9.end ())
    {
      s9.del (ci, 2);
    }
  fprintf (stdout, "s9: '%s'\n", s9.utf8_str ());
  fprintf (stdout, "s7: '%s'\n", s7.utf8_str ());
  fprintf (stdout, "s5: '%s'\n", s5.utf8_str ());

  UCS4String v1 = "few";
  UCS4String v2 = "many";
  if (v1 < v2) fprintf (stdout, "vv: few < many\n");
  if (v2 < v1) fprintf (stdout, "vv: many < few\n");
}
