
/* romakana.c  EOUE Project, 1997 by Michael Mangelsdorf */
/* THIS FILE IS PUBLIC DOMAIN.
   LEAVE THIS MESSAGE IN PLACE AS COURTESY TO THE AUTHOR */


#include <stdio.h>

typedef unsigned char  byte;

/*  'typog' is romaji equivalent of syllable 'charord' of KANA.16 font.
    Structure could be useful for romaji-interfacing to Japanese.
    Combine with JIS information in CONVERT.C for use with other fonts.
    Feel free to cut and paste.
*/
struct {
   char *typog;
   unsigned charord;
} kana_info []=  /* if msb not zero, compound kana, and lsb is 2nd*/
  {
    {"A",0x05}, {"I",0x06}, {"U",0x07}, {"E",0x08}, {"O",0x09},
    {"KA",0x12}, {"KI",0x13}, {"KU",0x14}, {"KE",0x15}, {"KO",0x16},
    {"SA",0x30}, {"SHI",0x31}, {"SU",0x32}, {"SE",0x33}, {"SO",0x34},
    {"TA",0x44}, {"CHI",0x45}, {"TSU",0x46}, {"TE",0x47}, {"TO",0x48},
    {"NA",0x60}, {"NI",0x61}, {"NU",0x62}, {"NE",0x63}, {"NO",0x64},
    {"HA",0x74}, {"HI",0x75}, {"FU",0x76}, {"HE",0x77}, {"HO",0x78},
    {"MA",0x6A}, {"MI",0x6B}, {"MU",0x6C}, {"ME",0x6D}, {"MO",0x6E},
    {"YA",0x90}, {"YU",0x91}, {"YO",0x92},
    {"RA",0x4E}, {"RI",0x4F}, {"RU",0x50}, {"RE",0x51}, {"RO",0x52},
    {"WA",0x97}, {"WI",0x98}, {"WE",0x99}, {"WO",0x9A},
    {"N",0x9B},
    {"GA",0x21}, {"GI",0x22}, {"GU",0x23}, {"GE",0x24}, {"GO",0x25},
    {"ZA",0x3A}, {"JI",0x3B}, {"ZU",0x3C}, {"ZE",0x3D}, {"ZO",0x3E},
    {"DA",0x58}, {"DE",0x59}, {"DO",0x5A}, {"DI",0xA6}, {"DU",0xA7},
    {"BA",0x7E}, {"BI",0x7F}, {"BU",0x80}, {"BE",0x81}, {"BO",0x82},
    {"PA",0x88}, {"PI",0x89}, {"PU",0x8A}, {"PE",0x8B}, {"PO",0x8C},
    {"KYA",0x13A3}, {"KYU",0x13A4}, {"KYO",0x13A5},
    {"SHA",0x31A3}, {"SHU",0x31A4}, {"SHO",0x31A5},
    {"CHA",0x45A3}, {"CHU",0x45A4}, {"CHO",0x45A5},
    {"NYA",0x61A3}, {"NYU",0x61A4}, {"NYO",0x61A5},
    {"HYA",0x75A3}, {"HYU",0x75A4}, {"HYO",0x75A5},
    {"MYA",0x6BA3}, {"MYU",0x6BA4}, {"MYO",0x6BA5},
    {"RYA",0x4FA3}, {"RYU",0x4FA4}, {"RYO",0x4FA5},
    {"GYA",0x22A3}, {"GYU",0x22A4}, {"GYO",0x22A5},
    {"JA",0x3BA3}, {"JU",0x3BA4}, {"JO",0x3BA5},
    {"BYA",0x7FA3}, {"BYU",0x7FA4}, {"BYO",0x7FA5},
    {"PYA",0x89A3}, {"PYU",0x89A4}, {"PYO",0x89A5},

    {"a",0x00}, {"i",0x01}, {"u",0x02}, {"e",0x03}, {"o",0x04},
    {"ka",0x0D}, {"ki",0x0E}, {"ku",0x0F}, {"ke",0x10}, {"ko",0x11},
    {"sa",0x2B}, {"shi",0x2C}, {"su",0x2D}, {"se",0x2E}, {"so",0x2F},
    {"ta",0x3F}, {"chi",0x40}, {"tsu",0x41}, {"te",0x42}, {"to",0x43},
    {"na",0x5B}, {"ni",0x5C}, {"nu",0x5D}, {"ne",0x5E}, {"no",0x5F},
    {"ha",0x6F}, {"hi",0x70}, {"fu",0x71}, {"he",0x72}, {"ho",0x73},
    {"ma",0x65}, {"mi",0x66}, {"mu",0x67}, {"me",0x68}, {"mo",0x69},
    {"ya",0x8D}, {"yu",0x8E}, {"yo",0x8F},
    {"ra",0x49}, {"ri",0x4A}, {"ru",0x4B}, {"re",0x4C}, {"ro",0x4D},
    {"wa",0x93}, {"wi",0x94}, {"we",0x95}, {"wo",0x96},
    {"n",0x9B},
    {"ga",0x1C}, {"gi",0x1D}, {"gu",0x1E}, {"ge",0x1F}, {"go",0x20},
    {"za",0x35}, {"ji",0x36}, {"zu",0x37}, {"ze",0x38}, {"zo",0x39},
    {"da",0x53}, {"di",0x54}, {"du",0x55}, {"de",0x56}, {"do",0x57},
    {"ba",0x79}, {"bi",0x7A}, {"bu",0x7B}, {"be",0x7C}, {"bo",0x7D},
    {"pa",0x83}, {"pi",0x84}, {"pu",0x85}, {"pe",0x86}, {"po",0x87},
    {"kya",0x0E0A}, {"kyu",0x0E0B}, {"kyo",0x0E0C},
    {"sha",0x2C7D}, {"shu",0x2C7E}, {"sho",0x2C2F},
    {"cha",0x400A}, {"chu",0x400B}, {"cho",0x408f},
    {"nya",0x5C0A}, {"nyu",0x5C0B}, {"nyo",0x5C0C},
    {"hya",0x700A}, {"hyu",0x700B}, {"hyo",0x700C},
    {"mya",0x660A}, {"myu",0x660B}, {"myo",0x660C},
    {"rya",0x4A0A}, {"ryu",0x4A0B}, {"ryo",0x4A0C},
    {"gya",0x1D0A}, {"gyu",0x1D0B}, {"gyo",0x1D0C},
    {"ja",0x360A}, {"ju",0x360B}, {"jo",0x360C},
    {"bya",0x7A0A}, {"byu",0x7A0B}, {"byo",0x7A0C},      /*sentinel*/
    {"pya",0x840A}, {"pyu",0x840B}, {"pyo",0x840C}, {"MM",0xFFFF} };


/* Parses romaji string into two-byte Japanese character string.
   See WRCHAR.C for how Kana/Joyo codes are unified. In a nutshell:
   Codes 0-1945 enumerate Joyos, 1946-1999 not used, 2000 and
   following are kana codes.

   input:  linstr, an ordinary character string containing valid
	   romaji kana sequence.
	   KATAKANA capitalized. Hiragana lower-case.
   output: codebuf[] holds result. First element is array length.
	   Returns true on syntactic error in romaji string.
*/
byte kanatransl (byte *linstr, unsigned *codebuf) {
byte last, i, j=1, current=0, found;
unsigned k, ahead;
 last=0; while (linstr[last]!='\0') last++;
 if (!last) return NULL;
 i=0;
 while (current<last) {
   found=0;
   i=0;                              /*sentinel*/
     while (kana_info[i].charord!=0xFFFF) {
	if (!memcmp(linstr+current,"'",1)) {  /* Sokuon -  'k yields kk */
	      ahead=j++; current++;
	      found=1;
	      break;
	}
	if (!memcmp(kana_info[i].typog,
		    linstr+current, strlen(kana_info[i].typog))) {
	/*match found*/
	if (ahead) {
	   if (islower(*(linstr+current))) codebuf[ahead]=2000+0x9D;
	   else codebuf[ahead]=2000+0x9E;
	   ahead=0;        /* handle sokuon */
	}
	k=kana_info[i].charord;
	if (k>255) {
	    codebuf[j++]=2000+(k>>8);
	    codebuf[j++]=2000+(k&0xFF);
	}  else codebuf[j++]=2000+k;
	current+=strlen(kana_info[i].typog);
	found=1;
	break;
      } else i++;
     }
   if (!found) return 1;
 }
 codebuf[0]=j;
 return 0;
}


