/*
 * charset.c  -  Character mapping routines
 *
 * Copyright (C) 2006-2007 Gero Kuhlmann <gero@gkminix.han.de>
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 * $Id: charset.c,v 1.5 2007/01/06 18:31:38 gkminix Exp $
 */

#include <common.h>
#include <nblib.h>
#include "privlib.h"



/*
 * Maximum character number in a conversion table
 */
#define CONV_MAX	255



/*
 * Character to represent invalid chars in input stream
 */
#define INVALID_PC	168		/* inverted question mark */
#define INVALID_ASCII	126		/* tilde */
#define INVALID_LATIN1	191		/* inverted question mark */
#define INVALID_LATIN9	191		/* inverted question mark */
#define INVALID_EBCDIC	171		/* inverted question mark */



/*
 * Storage to collect multibyte characters
 */
static hchar_t collchar = 0;
static int collcount = 0;




/*
 ****************************************************************************
 *
 *	Latin-1 conversion tables
 *
 ****************************************************************************
 */


/*
 * Convert a latin1 character into the PC character set
 */
static pcchar_t const latin1_pc_table[CONV_MAX + 1] = {
      0,   1,   2,   3,   4,   5,   6,   7,	/*   0 -   7  */
      8,   9,  10,  11,  12,  13,  14,  15,	/*   8 -  15  */
     16,  17,  18,  19, 182, 186,  22,  23,	/*  16 -  23  */
     24,  25,  26,  27,  28,  29,  30,  31,	/*  24 -  31  */
     32,  33,  34,  35,  36,  37,  38,  39,	/*  32 -  39  */
     40,  41,  42,  43,  44,  45,  46,  47,	/*  40 -  47  */
     48,  49,  50,  51,  52,  53,  54,  55,	/*  48 -  55  */
     56,  57,  58,  59,  60,  61,  62,  63,	/*  56 -  63  */
     64,  65,  66,  67,  68,  69,  70,  71,	/*  64 -  71  */
     72,  73,  74,  75,  76,  77,  78,  79,	/*  72 -  79  */
     80,  81,  82,  83,  84,  85,  86,  87,	/*  80 -  87  */
     88,  89,  90,  91,  92,  93,  94,  95,	/*  88 -  95  */
     96,  97,  98,  99, 100, 101, 102, 103,	/*  96 - 103  */
    104, 105, 106, 107, 108, 109, 110, 111,	/* 104 - 111  */
    112, 113, 114, 115, 116, 117, 118, 119,	/* 112 - 119  */
    120, 121, 122, 123, 124, 125, 126, 127,	/* 120 - 127  */
    199, 252, 233, 226, 228, 224, 229, 231,	/* 128 - 135  */
    234, 235, 232, 239, 238, 236, 196, 197,	/* 136 - 143  */
    201, 181, 198, 244, 247, 242, 251, 249,	/* 144 - 151  */
    223, 214, 220, 243, 183, 209, 158, 159,	/* 152 - 159  */
    255, 173, 155, 156, 177, 157, 188,  21,	/* 160 - 167  */
    191, 169, 166, 174, 170, 237, 189, 187,	/* 168 - 175  */
    248, 241, 253, 179, 180, 230,  20, 250,	/* 176 - 183  */
    184, 185, 167, 175, 172, 171, 190, 168,	/* 184 - 191  */
    192, 193, 194, 195, 142, 143, 146, 128,	/* 192 - 199  */
    200, 144, 202, 203, 204, 205, 206, 207,	/* 200 - 207  */
    208, 165, 210, 211, 212, 213, 153, 215,	/* 208 - 215  */
    216, 217, 218, 219, 154, 221, 222, 225,	/* 216 - 223  */
    133, 160, 131, 227, 132, 134, 145, 135,	/* 224 - 231  */
    138, 130, 136, 137, 141, 161, 140, 139,	/* 232 - 239  */
    240, 164, 149, 162, 147, 245, 148, 246,	/* 240 - 247  */
    176, 151, 163, 150, 129, 178, 254, 152 	/* 248 - 255  */
};

#define latin1_pc(c)	((c) > CONV_MAX ? INVALID_PC : \
					latin1_pc_table[c] & 0xff)



/*
 * Convert a PC character into the latin1 character set
 */
static hchar_t const pc_latin1_table[CONV_MAX + 1] = {
      0,   1,   2,   3,   4,   5,   6,   7,	/*   0 -   7  */
      8,   9,  10,  11,  12,  13,  14,  15,	/*   8 -  15  */
     16,  17,  18,  19, 182, 167,  22,  23,	/*  16 -  23  */
     24,  25,  26,  27,  28,  29,  30,  31,	/*  24 -  31  */
     32,  33,  34,  35,  36,  37,  38,  39,	/*  32 -  39  */
     40,  41,  42,  43,  44,  45,  46,  47,	/*  40 -  47  */
     48,  49,  50,  51,  52,  53,  54,  55,	/*  48 -  55  */
     56,  57,  58,  59,  60,  61,  62,  63,	/*  56 -  63  */
     64,  65,  66,  67,  68,  69,  70,  71,	/*  64 -  71  */
     72,  73,  74,  75,  76,  77,  78,  79,	/*  72 -  79  */
     80,  81,  82,  83,  84,  85,  86,  87,	/*  80 -  87  */
     88,  89,  90,  91,  92,  93,  94,  95,	/*  88 -  95  */
     96,  97,  98,  99, 100, 101, 102, 103,	/*  96 - 103  */
    104, 105, 106, 107, 108, 109, 110, 111,	/* 104 - 111  */
    112, 113, 114, 115, 116, 117, 118, 119,	/* 112 - 119  */
    120, 121, 122, 123, 124, 125, 126, 127,	/* 120 - 127  */
    199, 252, 233, 226, 228, 224, 229, 231,	/* 128 - 135  */
    234, 235, 232, 239, 238, 236, 196, 197,	/* 136 - 143  */
    201, 230, 198, 244, 246, 242, 251, 249,	/* 144 - 151  */
    255, 214, 220, 162, 163, 165, 158, 159,	/* 152 - 159  */
    225, 237, 243, 250, 241, 209, 170, 186,	/* 160 - 167  */
    191, 169, 172, 189, 188, 161, 171, 187,	/* 168 - 175  */
    248, 164, 253, 179, 180, 145,  20, 156,	/* 176 - 183  */
    184, 185,  21, 175, 166, 174, 190, 168,	/* 184 - 191  */
    192, 193, 194, 195, 142, 143, 146, 128,	/* 192 - 199  */
    200, 144, 202, 203, 204, 205, 206, 207,	/* 200 - 207  */
    208, 157, 210, 211, 212, 213, 153, 215,	/* 208 - 215  */
    216, 217, 218, 219, 154, 221, 222, 152,	/* 216 - 223  */
    133, 223, 131, 227, 132, 134, 181, 135,	/* 224 - 231  */
    138, 130, 136, 137, 141, 173, 140, 139,	/* 232 - 239  */
    240, 177, 149, 155, 147, 245, 247, 148,	/* 240 - 247  */
    176, 151, 183, 150, 129, 178, 254, 160 	/* 248 - 255  */
};

#define pc_latin1(c)	(pc_latin1_table[(int)(c & 0xff)])



/*
 * Convert a latin1 character into upper case
 */
#define latin1_upper(c)	(((c) >= 97 && (c) <= 122) || \
				((c) >= 224 && (c) <= 254 && (c) != 247) ? \
					(hchar_t)((c) - 32) : (c))



/*
 * Convert a latin1 character into lower case
 */
#define latin1_lower(c)	(((c) >= 65 && (c) <= 90) || \
				((c) >= 192 && (c) <= 222 && (c) != 247) ? \
					(hchar_t)((c) + 32) : (c))




/*
 ****************************************************************************
 *
 *	Latin-9 conversion tables
 *
 ****************************************************************************
 */


/*
 * Convert a latin9 character into the PC character set
 */
#define latin9_pc(c)	((c) > CONV_MAX ? INVALID_PC : \
				((c) == 164 || (c) == 166 || (c) == 168 || \
				 (c) == 180 || (c) == 184 || (c) == 188 || \
				 (c) == 189 || (c) == 190 ? INVALID_PC : \
					latin1_pc_table[c] & 0xff))



/*
 * Convert a PC character into the latin9 character set
 */
#define pc_latin9(c)	((c) == 177 || (c) == 188 || (c) == 191 || \
			 (c) == 180 || (c) == 184 || (c) == 172 || \
			 (c) == 171 || (c) == 190 ? INVALID_LATIN9 : \
					pc_latin1_table[(int)(c & 0xff)])



/*
 * Convert a latin9 character into UCS-2
 */
static ucs_t const latin9_ucs_table[192 - 160] = {
    160,  161,  162,  163, 8364,  165,  352,  167,	/* 160 - 167  */
    353,  169,  170,  171,  172,  173,  174,  175,	/* 168 - 175  */
    176,  177,  178,  179,  381,  181,  182,  183,	/* 176 - 183  */
    382,  185,  186,  187,  338,  339,  376,  191	/* 184 - 191  */
};

#define latin9_ucs(c)	((c) > CONV_MAX ? INVALID_LATIN9 : \
			 ((c) < 160 || (c) > 191 ? (c) : \
				latin9_ucs_table[(int)((c & 0xff) - 160)]))



/*
 * Convert a latin9 character into upper case
 */
#define latin9_upper(c)	(((c) >= 97 && (c) <= 122) || \
				((c) >= 224 && (c) <= 254 && (c) != 247) ? \
					(hchar_t)((c) - 32) : (c))



/*
 * Convert a latin9 character into lower case
 */
#define latin9_lower(c)	(((c) >= 65 && (c) <= 90) || \
				((c) >= 192 && (c) <= 222 && (c) != 247) ? \
					(hchar_t)((c) + 32) : (c))




/*
 ****************************************************************************
 *
 *	EBCDIC conversion tables
 *
 ****************************************************************************
 */


/*
 * Convert a EBCDIC character into the latin1 character set
 */
#define INV	INVALID_LATIN1
static hchar_t const ebcdic_latin1_table[CONV_MAX + 1] = {
      0,   1,   2,   3, INV,   9, INV, 127,	/*   0 -   7  */
    INV, INV, INV,  11,  12,  13,  14,  15,	/*   8 -  15  */
     16,  17,  18,  19, INV,  10,   8, INV,	/*  16 -  23  */
     24,  25, INV, INV,  28,  29,  30,  31,	/*  24 -  31  */
    INV, INV,  28, INV, INV,  10,  23,  27,	/*  32 -  39  */
    INV, INV, INV, INV, INV,   5,   6,   7,	/*  40 -  47  */
    INV, INV,  22, INV, INV, INV, INV,   4,	/*  48 -  55  */
    INV, INV, INV, INV,  20,  21, INV,  26,	/*  56 -  63  */
     32, INV, 226, 228, 224, 225, 227, 229,	/*  64 -  71  */
    232, 240, 162,  46,  60,  40,  43, 124,	/*  72 -  79  */
     38, 233, 234, 235, 232, 237, 238, 239,	/*  80 -  87  */
    236, 223,  33,  36,  42,  41,  59,  94,	/*  88 -  95  */
     45,  47, 194, 196, 192, 193, 195, 197,	/*  96 - 103  */
    199, 209, 166,  44,  37,  95,  62,  63,	/* 104 - 111  */
    248, 201, 202, 203, 200, 205, 206, 207,	/* 112 - 119  */
    204,  95,  58,  35,  64,  39,  61,  34,	/* 120 - 127  */
    216,  97,  98,  99, 100, 101, 102, 103,	/* 128 - 135  */
    104, 105, 171, 187, 240,  13, 222, 177,	/* 136 - 143  */
    176, 106, 107, 108, 109, 110, 111, 112,	/* 144 - 151  */
    113, 114, 170, 186, 230, 184, 198, 164,	/* 152 - 159  */
    181, 126, 115, 116, 117, 118, 119, 120,	/* 160 - 167  */
    121, 122, 161, 191, 208,  91, 254, 174,	/* 168 - 175  */
    172, 163, 165, 149, 169, 167, 182, 188,	/* 176 - 183  */
    189, 190, 221, 168, 175,  93, 146, 215,	/* 184 - INV  */
    123,  65,  66,  67,  68,  69,  70,  71,	/* 192 - 199  */
     72,  73, 155, 244, 246, 242, 243, 245,	/* 200 - 207  */
    125,  74,  75,  76,  77,  78,  79,  80,	/* 208 - 215  */
     81,  82, 185, 251, 252, 249, 250, 255,	/* 216 - 223  */
     92, 247,  83,  84,  85,  86,  87,  88,	/* 224 - 231  */
     89,  90, 178, 212, 214, 210, 211, 213,	/* 232 - 239  */
     48,  49,  50,  51,  52,  53,  54,  55,	/* 240 - 247  */
     56,  57, 179, 219, 220, 217, 218, INV 	/* 248 - 255  */
};
#undef INV

#define ebcdic_latin1(c)	((c) > CONV_MAX ? INVALID_LATIN1 : \
						ebcdic_latin1_table[c])
#define ebcdic_pc(c)		(latin1_pc(ebcdic_latin1(c)))



/*
 * Convert a latin1 character into the EBCDIC character set
 */
#define INV	INVALID_EBCDIC
static hchar_t const latin1_ebcdic_table[CONV_MAX + 1] = {
      0,   1,   2,   3,  55,  45,  46,  47,	/*   0 -   7  */
     22,   5,  37,  11,  12,  13,  14,  15,	/*   8 -  15  */
     16,  17,  18,  19,  60,  61,  50,  38,	/*  16 -  23  */
     24,  25,  63,  39,  28,  29,  30,  31,	/*  24 -  31  */
     64,  90, 127, 123,  91, 108,  80, 125,	/*  32 -  39  */
     77,  93,  92,  78, 107,  96,  75,  97,	/*  40 -  47  */
    240, 241, 242, 243, 244, 245, 246, 247,	/*  48 -  55  */
    248, 249, 122,  94,  76, 126, 110, 111,	/*  56 -  63  */
    124, 193, 194, 195, 196, 197, 198, 199,	/*  64 -  71  */
    200, 201, 209, 210, 211, 212, 213, 214,	/*  72 -  79  */
    215, 216, 217, 226, 227, 228, 229, 230,	/*  80 -  87  */
    231, 233, INV, 173, 224, 189,  95, 109,	/*  88 -  95  */
    INV, 129, 130, 131, 132, 133, 134, 135,	/*  96 - 103  */
    136, 137, 145, 146, 147, 148, 149, 150,	/* 104 - 111  */
    151, 152, 153, 162, 163, 164, 165, 166,	/* 112 - 119  */
    167, 168, 169, 192,  79, 208, 161,   7,	/* 120 - 127  */
    INV, INV, INV, INV, INV, INV, INV, INV,	/* 128 - 135  */
    INV, INV, INV, INV, INV, INV, INV, INV,	/* 136 - 143  */
    INV, INV, 190, INV, INV, 179, INV, INV,	/* 144 - 151  */
    INV, INV, INV, 202, INV, INV, INV, INV,	/* 152 - 159  */
    INV, 170,  74, 177, 159, 178, 106, 181,	/* 160 - 167  */
    187, 180, 154, 138, 176, INV, 175, 188,	/* 168 - 175  */
    INV, 143, 144, 250, INV, 160, 182, INV,	/* 176 - 183  */
    157, 218, 155, 139, 183, 184, 185, 171,	/* 184 - 191  */
    100, 101,  98, 102,  99, 103, 158, 104,	/* 192 - 199  */
    116, 113, 114, 115, 120, 117, 118, 119,	/* 200 - 207  */
    172, 105, 237, 238, 235, 239, 236, 191,	/* 208 - 215  */
    128, 253, 254, 251, 252, 186, 142,  89,	/* 216 - 223  */
     68,  69,  66,  70,  67,  71, 156,  72,	/* 224 - 231  */
     84,  81,  82,  83,  88,  85,  86,  87,	/* 232 - 239  */
     73, INV, 205, 206, 203, 207, 204, 225,	/* 240 - 247  */
    112, 221, 222, 219, 220, INV, 174, 223 	/* 248 - 255  */
};
#undef INV

#define latin1_ebcdic(c)	((c) > CONV_MAX ? INVALID_EBCDIC : \
						latin1_ebcdic_table[c])
#define pc_ebcdic(c)		(pc_latin1(latin1_ebcdic(c)))



/*
 * Convert an EBCDIC character to upper case
 */
static hchar_t const ebcdic_upper_table[CONV_MAX + 1 - 64] = {
     64,  65,  98,  99, 100, 101, 102, 102,	/*  64 -  71  */
    103, 104,  74,  75,  76,  77,  78,  79,	/*  72 -  79  */
     80, 113, 114, 115, 116, 117, 118, 119,	/*  80 -  87  */
    120,  89,  90,  91,  92,  93,  94,  95,	/*  88 -  95  */
     96,  97,  98,  99, 100, 101, 102, 103,	/*  96 - 103  */
    104, 105, 106, 107, 108, 109, 110, 111,	/* 104 - 111  */
    128, 113, 114, 115, 116, 117, 118, 119,	/* 112 - 119  */
    120, 121, 122, 123, 124, 125, 126, 127,	/* 120 - 127  */
    128, 193, 194, 195, 196, 197, 198, 199,	/* 128 - 135  */
    200, 201, 138, 139, 172, 173, 174, 143,	/* 136 - 143  */
    144, 209, 210, 211, 212, 213, 214, 215,	/* 144 - 151  */
    216, 217, 154, 155, 158, 157, 158, 159,	/* 152 - 159  */
    160, 161, 226, 227, 228, 229, 230, 231,	/* 160 - 167  */
    232, 233, 170, 171, 172, 173, 174, 175,	/* 168 - 175  */
    176, 177, 178, 179, 180, 181, 182, 183,	/* 176 - 183  */
    184, 185, 186, 187, 188, 189, 190, 191,	/* 184 - 191  */
    192, 193, 194, 195, 196, 197, 198, 199,	/* 192 - 199  */
    200, 201, 202, 235, 236, 237, 238, 239,	/* 200 - 207  */
    208, 209, 210, 211, 212, 213, 214, 215,	/* 208 - 215  */
    216, 217, 218, 251, 252, 253, 254, 223,	/* 216 - 223  */
    224, 225, 226, 227, 228, 229, 230, 231,	/* 224 - 231  */
    232, 233, 234, 235, 236, 237, 238, 239,	/* 232 - 239  */
    240, 241, 242, 243, 244, 245, 246, 247,	/* 240 - 247  */
    248, 249, 250, 251, 252, 253, 254, 255 	/* 248 - 255  */
};

#define ebcdic_upper(c)	((c) < 64 || (c) > CONV_MAX ? (c) : \
				ebcdic_upper_table[(c) - 64])



/*
 * Convert an EBCDIC character to lower case
 */
static hchar_t const ebcdic_lower_table[CONV_MAX + 1 - 64] = {
     64,  65,  66,  67,  68,  69,  70,  71,	/*  64 -  71  */
     72,  73,  74,  75,  76,  77,  78,  79,	/*  72 -  79  */
     80,  81,  82,  83,  84,  85,  86,  87,	/*  80 -  87  */
     88,  89,  90,  91,  92,  93,  94,  95,	/*  88 -  95  */
     96,  97,  66,  67,  68,  69,  70,  71,	/*  96 - 103  */
     72,  73, 106, 107, 108, 109, 110, 111,	/* 104 - 111  */
    112,  81,  82,  83,  84,  85,  86,  87,	/* 112 - 119  */
     88, 121, 122, 123, 124, 125, 126, 127,	/* 120 - 127  */
    112, 129, 130, 131, 132, 133, 134, 135,	/* 128 - 135  */
    136, 137, 138, 139, 140, 141, 142, 143,	/* 136 - 143  */
    144, 145, 146, 147, 148, 149, 150, 151,	/* 144 - 151  */
    152, 153, 154, 155, 156, 157, 158, 159,	/* 152 - 159  */
    160, 161, 162, 163, 164, 165, 166, 167,	/* 160 - 167  */
    168, 169, 170, 171, 140, 141, 142, 175,	/* 168 - 175  */
    176, 177, 178, 179, 180, 181, 182, 183,	/* 176 - 183  */
    184, 185, 186, 187, 188, 189, 190, 191,	/* 184 - 191  */
    192, 129, 130, 131, 132, 133, 134, 135,	/* 192 - 199  */
    136, 137, 202, 203, 204, 205, 206, 207,	/* 200 - 207  */
    208, 145, 146, 147, 148, 149, 150, 151,	/* 208 - 215  */
    152, 153, 218, 219, 220, 221, 222, 223,	/* 216 - 223  */
    224, 225, 162, 163, 164, 165, 166, 167,	/* 224 - 231  */
    168, 169, 234, 203, 204, 205, 206, 207,	/* 232 - 239  */
    240, 241, 242, 243, 244, 245, 246, 247,	/* 240 - 247  */
    248, 249, 250, 219, 220, 221, 222, 255 	/* 248 - 255  */
};

#define ebcdic_lower(c)	((c) < 64 || (c) > CONV_MAX ? (c) : \
				ebcdic_lower_table[(c) - 64])




/*
 ****************************************************************************
 *
 *	Public conversion routines
 *
 ****************************************************************************
 */


/*
 * Convert a character from host encoding into PC encoding
 * If the host character set is UTF-8, we assume the character
 * argument to be a UCS-2 code.
 */
pcchar_t chartotarget __F((c), hchar_t c)
{
  switch (nbcharset) {
	case CHARSET_EBCDIC:
#if EBCDIC
	default:
#endif
		return(ebcdic_pc(c));
	case CHARSET_ASCII:
#if !EBCDIC
	default:
#endif
		return(latin1_pc(c & 0x7f));
	case CHARSET_UTF8:
	case CHARSET_LATIN1:
		return(latin1_pc(c));
	case CHARSET_LATIN9:
		return(latin9_pc(c));
  }
}



/*
 * Convert a character from target system character set into host
 * character set. If the host character set is UTF-8, we return
 * a UCS-2 code.
 */
hchar_t chartohost __F((c), pcchar_t c)
{
  hchar_t ret;

  switch (nbcharset) {
	case CHARSET_EBCDIC:
#if EBCDIC
	default:
#endif
		ret = c;	/* This is just to avoid warnings */
		return((hchar_t)pc_ebcdic(ret));
	case CHARSET_ASCII:
#if !EBCDIC
	default:
#endif
		ret = (hchar_t)pc_latin1(c);
		return(ret > 0x7f ? INVALID_ASCII : ret);
	case CHARSET_UTF8:
	case CHARSET_LATIN1:
		return((hchar_t)pc_latin1(c));
	case CHARSET_LATIN9:
		return((hchar_t)pc_latin9(c));
  }
}



/*
 * Convert a character into UCS-2
 */
ucs_t chartoucs2 __F((c), hchar_t c)
{
  switch(nbcharset) {
	case CHARSET_EBCDIC:
#if EBCDIC
	default:
#endif
		return((ucs_t)(ebcdic_latin1(c) & 0x00ff));
	case CHARSET_ASCII:
#if !EBCDIC
	default:
#endif
		return(c > 0x007f ? INVALID_ASCII : (ucs_t)(c & 0x007f));
	case CHARSET_UTF8:
		return(c > 0x00ff ? (ucs_t)c : (ucs_t)(c & 0x00ff));
	case CHARSET_LATIN1:
		return(c > 0x00ff ? INVALID_LATIN1 : (ucs_t)(c & 0x00ff));
	case CHARSET_LATIN9:
		return((ucs_t)latin9_ucs(c));
  }
}



/*
 * Convert a character to upper case
 */
hchar_t chartoupper __F((c), hchar_t c)
{
  switch(nbcharset) {
	case CHARSET_EBCDIC:
#if EBCDIC
	default:
#endif
		return(ebcdic_upper(c));
	case CHARSET_ASCII:
#if !EBCDIC
	default:
#endif
		return(c > 0x007f ? c : latin1_upper(c));
	case CHARSET_UTF8:
	case CHARSET_LATIN1:
		return(latin1_upper(c));
	case CHARSET_LATIN9:
		return(latin9_upper(c));
  }
}



/*
 * Convert a character to lower case
 */
hchar_t chartolower __F((c), hchar_t c)
{
  switch(nbcharset) {
	case CHARSET_EBCDIC:
#if EBCDIC
	default:
#endif
		return(ebcdic_lower(c));
	case CHARSET_ASCII:
#if !EBCDIC
	default:
#endif
		return(c > 0x007f ? c : latin1_lower(c));
	case CHARSET_UTF8:
	case CHARSET_LATIN1:
		return(latin1_lower(c));
	case CHARSET_LATIN9:
		return(latin9_lower(c));
  }
}



/*
 * Set or query the current host character set
 */
int setcharset __F((new), int newcharset)
{
  if (newcharset != CHARSET_UNKNOWN) {
	nbcharset = newcharset;
	switch (nbcharset) {
		case CHARSET_EBCDIC:
#if EBCDIC
		default:
#endif
			copystr(&nbcharname, "EBCDIC");
			break;
		case CHARSET_ASCII:
#if !EBCDIC
		default:
#endif
			copystr(&nbcharname, "ASCII");
			break;
		case CHARSET_UTF8:
			copystr(&nbcharname, "UTF-8");
			break;
		case CHARSET_LATIN1:
			copystr(&nbcharname, "ISO-8859-1");
			break;
		case CHARSET_LATIN9:
			copystr(&nbcharname, "ISO-8859-15");
			break;
	}
  }
  return(nbcharset);
}




/*
 ****************************************************************************
 *
 *	Multibyte support routines
 *
 ****************************************************************************
 */


/*
 * Collect the next character from a standard character stream. If
 * the hosts uses a single-byte character encoding, this routine
 * returns the character unmodified. Otherwise, if the host uses
 * UTF-8 it collects all bytes defining one character. This routine
 * returns 0 if more bytes have to get collected. Therefore it is
 * necessary that the caller checks for the terminating-zero byte
 * first.
 */
hchar_t charcollect __F((c), char c)
{
  /* With everything else but UTF-8, we have only single-byte characters */
  if (nbcharset != CHARSET_UTF8 || (unsigned char)c < 0x80) {
	collcount = 0;
	collchar = 0;
	return((hchar_t)c);
  }

  /* Check if we have a start character */
  if ((c & 0xc0) != 0x80) {
	if (collcount == 0) {
		if ((c & 0xe0) == 0xc0) {
			collchar = (hchar_t)(c & 0x1f);
			collcount = 1;
			return(0);
		}
		if ((c & 0xf0) == 0xe0) {
			collchar = (hchar_t)(c & 0x0f);
			collcount = 2;
			return(0);
		}
		if ((c & 0xf8) == 0xf0) {
			collchar = (hchar_t)(c & 0x07);
			collcount = 3;
			return(0);
		}
		if ((c & 0xfc) == 0xf8) {
			collchar = (hchar_t)(c & 0x03);
			collcount = 4;
			return(0);
		}
	}
	collcount = 0;
	collchar = 0;
	return(INVALID_LATIN1);
  }

  /* Collect follow-byte */
  if (collcount == 0) {
	collchar = 0;
	return(INVALID_LATIN1);
  }
  if (collcount > 1) {
	collchar = 0;
	collcount--;
	return(0);
  } else if (collchar == 0) {
	collcount = 0;
	return(INVALID_LATIN1);
  }
  collchar = 0;
  collcount = 0;
  return((collchar << 6) + ((hchar_t)(c & 0x3f)));
}



/*
 * Save a (possibly multibyte) character into a destination string. It returns
 * the number of bytes saved into the destination string.
 */
size_t savechar __F((c, s, n), hchar_t c AND char *s AND size_t n)
{
  /* With everything else but UTF-8 just save the char into the string */
  if (nbcharset != CHARSET_UTF8 || c < 0x80) {
	if (n < 1)
		return(0);
	*s = (char)(c & 0xff);
	return(1);
  }

  /* If the string is not long enough, we need to bail out */
  if (c > 0x7fff || n < 2)
	return(0);
  s[0] = (char)(((c >> 6) & 0x1f) | 0xc0);
  s[1] = (char)((c & 0x3f) | 0x80);
  return(2);
}



/*
 * Determine the length of a host character as processed by the
 * savechar() routine above.
 */
size_t charlen __F((c), hchar_t c)
{
  /* With everything but UTF-8 we just have one character byte */
  if (nbcharset != CHARSET_UTF8 || c < 0x80)
	return(1);

  /* Otherwise check if the UTF-8 character is valid */
  return(c > 0x7fff ? 0 : 2);
}



/*
 * Convert a (possibly multibyte) string into a string of host
 * characters.
 */
hchar_t *strtohost __F((s), const char *s)
{
  size_t len;
  const char *cp;
  hchar_t c, *ret, *hp;

  /* First scan the string to determine it's length */
  cp = s;
  len = 0;
  while (*cp) {
	if (charcollect(*cp) != 0)
		len++;
	cp++;
  }

  /* Allocate enough memory for the destination string and copy it */
  ret = hp = (hchar_t *)nbmalloc((len + 1) * sizeof(hchar_t));
  cp = s;
  while (*cp && len > 0) {
	if ((c = charcollect(*cp)) != 0) {
		*hp++ = c;
		len--;
	}
	cp++;
  }
  *hp = 0;
  return(ret);
}

