1	/* --------------------------------------------------------------------- */
    2	/*
    3	 * Copyright 2001-2004 Unicode, Inc.
    4	 * 
    5	 * Disclaimer
    6	 * 
    7	 * This source code is provided as is by Unicode, Inc. No claims are
    8	 * made as to fitness for any particular purpose. No warranties of any
    9	 * kind are expressed or implied. The recipient agrees to determine
   10	 * applicability of information provided. If this file has been
   11	 * purchased on magnetic or optical media from Unicode, Inc., the
   12	 * sole remedy for any claim will be exchange of defective media
   13	 * within 90 days of receipt.
   14	 * 
   15	 * Limitations on Rights to Redistribute This Code
   16	 * 
   17	 * Unicode, Inc. hereby grants the right to freely use the information
   18	 * supplied in this file in the creation of products supporting the
   19	 * Unicode Standard, and to make copies of this file in any form
   20	 * for internal or external distribution as long as this notice
   21	 * remains attached.
   22	 */
   23	
   24	/* ---------------------------------------------------------------------
   25	
   26	    Conversions between UTF32, UTF-16, and UTF-8. Source code file.
   27	    Author: Mark E. Davis, 1994.
   28	    Rev History: Rick McGowan, fixes & updates May 2001.
   29	    Sept 2001: fixed const & error conditions per
   30	    mods suggested by S. Parent & A. Lillich.
   31	    June 2002: Tim Dodd added detection and handling of incomplete
   32	    source sequences, enhanced error detection, added casts
   33	    to eliminate compiler warnings.
   34	    July 2003: slight mods to back out aggressive FFFE detection.
   35	    Jan 2004: updated switches in from-UTF8 conversions.
   36	    Oct 2004: updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions.
   37	
   38	    See the header file "ConvertUTF.h" for complete documentation.
   39	
   40	------------------------------------------------------------------------ */
   41	
   42	
   43	#include "convertutf.h"
   44	#ifdef CVTUTF_DEBUG
   45	#include <stdio.h>
   46	#endif
   47	
   48	static const int halfShift  = 10; /* used for shifting by 10 bits */
   49	
   50	static const UTF32 halfBase = 0x0010000UL;
   51	static const UTF32 halfMask = 0x3FFUL;
   52	
   53	#define UNI_SUR_HIGH_START  (UTF32)0xD800
   54	#define UNI_SUR_HIGH_END    (UTF32)0xDBFF
   55	#define UNI_SUR_LOW_START   (UTF32)0xDC00
   56	#define UNI_SUR_LOW_END     (UTF32)0xDFFF
   57	#define false    0
   58	#define true     1
   59	
   60	/* --------------------------------------------------------------------- */
   61	
   62	ConversionResult ConvertUTF32toUTF16 (
   63	    const UTF32** sourceStart, const UTF32* sourceEnd, 
   64	    UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
   65	    ConversionResult result = conversionOK;
   66	    const UTF32* source = *sourceStart;
   67	    UTF16* target = *targetStart;
   68	    while (source < sourceEnd) {
   69	    UTF32 ch;
   70	    if (target >= targetEnd) {
   71	        result = targetExhausted; break;
   72	    }
   73	    ch = *source++;
   74	    if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
   75	        /* UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values */
   76	        if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
   77	        if (flags == strictConversion) {
   78	            --source; /* return to the illegal value itself */
   79	            result = sourceIllegal;
   80	            break;
   81	        } else {
   82	            *target++ = UNI_REPLACEMENT_CHAR;
   83	        }
   84	        } else {
   85	        *target++ = (UTF16)ch; /* normal case */
   86	        }
   87	    } else if (ch > UNI_MAX_LEGAL_UTF32) {
   88	        if (flags == strictConversion) {
   89	        result = sourceIllegal;
   90	        } else {
   91	        *target++ = UNI_REPLACEMENT_CHAR;
   92	        }
   93	    } else {
   94	        /* target is a character in range 0xFFFF - 0x10FFFF. */
   95	        if (target + 1 >= targetEnd) {
   96	        --source; /* Back up source pointer! */
   97	        result = targetExhausted; break;
   98	        }
   99	        ch -= halfBase;
  100	        *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
  101	        *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
  102	    }
  103	    }
  104	    *sourceStart = source;
  105	    *targetStart = target;
  106	    return result;
  107	}
  108	
  109	/* --------------------------------------------------------------------- */
  110	
  111	ConversionResult ConvertUTF16toUTF32 (
  112	    const UTF16** sourceStart, const UTF16* sourceEnd, 
  113	    UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
  114	    ConversionResult result = conversionOK;
  115	    const UTF16* source = *sourceStart;
  116	    UTF32* target = *targetStart;
  117	    UTF32 ch, ch2;
  118	    while (source < sourceEnd) {
  119	    const UTF16* oldSource = source; /*  In case we have to back up because of target overflow. */
  120	    ch = *source++;
  121	    /* If we have a surrogate pair, convert to UTF32 first. */
  122	    if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
  123	        /* If the 16 bits following the high surrogate are in the source buffer... */
  124	        if (source < sourceEnd) {
  125	        ch2 = *source;
  126	        /* If it's a low surrogate, convert to UTF32. */
  127	        if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
  128	            ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
  129	            + (ch2 - UNI_SUR_LOW_START) + halfBase;
  130	            ++source;
  131	        } else if (flags == strictConversion) { /* it's an unpaired high surrogate */
  132	            --source; /* return to the illegal value itself */
  133	            result = sourceIllegal;
  134	            break;
  135	        }
  136	        } else { /* We don't have the 16 bits following the high surrogate. */
  137	        --source; /* return to the high surrogate */
  138	        result = sourceExhausted;
  139	        break;
  140	        }
  141	    } else if (flags == strictConversion) {
  142	        /* UTF-16 surrogate values are illegal in UTF-32 */
  143	        if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
  144	        --source; /* return to the illegal value itself */
  145	        result = sourceIllegal;
  146	        break;
  147	        }
  148	    }
  149	    if (target >= targetEnd) {
  150	        source = oldSource; /* Back up source pointer! */
  151	        result = targetExhausted; break;
  152	    }
  153	    *target++ = ch;
  154	    }
  155	    *sourceStart = source;
  156	    *targetStart = target;
  157	#ifdef CVTUTF_DEBUG
  158	if (result == sourceIllegal) {
  159	    fprintf(stderr, "ConvertUTF16toUTF32 illegal seq 0x%04x,%04x\n", ch, ch2);
  160	    fflush(stderr);
  161	}
  162	#endif
  163	    return result;
  164	}
  165	
  166	/* --------------------------------------------------------------------- */
  167	
  168	/*
  169	 * Index into the table below with the first byte of a UTF-8 sequence to
  170	 * get the number of trailing bytes that are supposed to follow it.
  171	 * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
  172	 * left as-is for anyone who may want to do such conversion, which was
  173	 * allowed in earlier algorithms.
  174	 */
  175	static const char trailingBytesForUTF8[256] = {
  176	    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  177	    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  178	    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  179	    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  180	    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  181	    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
  182	    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  183	    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
  184	};
  185	
  186	/*
  187	 * Magic values subtracted from a buffer value during UTF8 conversion.
  188	 * This table contains as many values as there might be trailing bytes
  189	 * in a UTF-8 sequence.
  190	 */
  191	static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, 
  192	             0x03C82080UL, 0xFA082080UL, 0x82082080UL };
  193	
  194	/*
  195	 * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
  196	 * into the first byte, depending on how many bytes follow.  There are
  197	 * as many entries in this table as there are UTF-8 sequence types.
  198	 * (I.e., one byte sequence, two byte... etc.). Remember that sequencs
  199	 * for *legal* UTF-8 will be 4 or fewer bytes total.
  200	 */
  201	static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
  202	
  203	/* --------------------------------------------------------------------- */
  204	
  205	/* The interface converts a whole buffer to avoid function-call overhead.
  206	 * Constants have been gathered. Loops & conditionals have been removed as
  207	 * much as possible for efficiency, in favor of drop-through switches.
  208	 * (See "Note A" at the bottom of the file for equivalent code.)
  209	 * If your compiler supports it, the "isLegalUTF8" call can be turned
  210	 * into an inline function.
  211	 */
  212	
  213	/* --------------------------------------------------------------------- */
  214	
  215	ConversionResult ConvertUTF16toUTF8 (
  216	    const UTF16** sourceStart, const UTF16* sourceEnd, 
  217	    UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
  218	    ConversionResult result = conversionOK;
  219	    const UTF16* source = *sourceStart;
  220	    UTF8* target = *targetStart;
  221	    while (source < sourceEnd) {
  222	    UTF32 ch;
  223	    unsigned short bytesToWrite = 0;
  224	    const UTF32 byteMask = 0xBF;
  225	    const UTF32 byteMark = 0x80; 
  226	    const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */
  227	    ch = *source++;
  228	    /* If we have a surrogate pair, convert to UTF32 first. */
  229	    if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
  230	        /* If the 16 bits following the high surrogate are in the source buffer... */
  231	        if (source < sourceEnd) {
  232	        UTF32 ch2 = *source;
  233	        /* If it's a low surrogate, convert to UTF32. */
  234	        if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
  235	            ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
  236	            + (ch2 - UNI_SUR_LOW_START) + halfBase;
  237	            ++source;
  238	        } else if (flags == strictConversion) { /* it's an unpaired high surrogate */
  239	            --source; /* return to the illegal value itself */
  240	            result = sourceIllegal;
  241	            break;
  242	        }
  243	        } else { /* We don't have the 16 bits following the high surrogate. */
  244	        --source; /* return to the high surrogate */
  245	        result = sourceExhausted;
  246	        break;
  247	        }
  248	    } else if (flags == strictConversion) {
  249	        /* UTF-16 surrogate values are illegal in UTF-32 */
  250	        if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
  251	        --source; /* return to the illegal value itself */
  252	        result = sourceIllegal;
  253	        break;
  254	        }
  255	    }
  256	    /* Figure out how many bytes the result will require */
  257	    if (ch < (UTF32)0x80) {      bytesToWrite = 1;
  258	    } else if (ch < (UTF32)0x800) {     bytesToWrite = 2;
  259	    } else if (ch < (UTF32)0x10000) {   bytesToWrite = 3;
  260	    } else if (ch < (UTF32)0x110000) {  bytesToWrite = 4;
  261	    } else {       bytesToWrite = 3;
  262	                        ch = UNI_REPLACEMENT_CHAR;
  263	    }
  264	
  265	    target += bytesToWrite;
  266	    if (target > targetEnd) {
  267	        source = oldSource; /* Back up source pointer! */
  268	        target -= bytesToWrite; result = targetExhausted; break;
  269	    }
  270	    switch (bytesToWrite) { /* note: everything falls through. */
  271	        case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
  272	        case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
  273	        case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
  274	        case 1: *--target =  (UTF8)(ch | firstByteMark[bytesToWrite]);
  275	    }
  276	    target += bytesToWrite;
  277	    }
  278	    *sourceStart = source;
  279	    *targetStart = target;
  280	    return result;
  281	}
  282	
  283	/* --------------------------------------------------------------------- */
  284	
  285	/*
  286	 * Utility routine to tell whether a sequence of bytes is legal UTF-8.
  287	 * This must be called with the length pre-determined by the first byte.
  288	 * If not calling this from ConvertUTF8to*, then the length can be set by:
  289	 *  length = trailingBytesForUTF8[*source]+1;
  290	 * and the sequence is illegal right away if there aren't that many bytes
  291	 * available.
  292	 * If presented with a length > 4, this returns false.  The Unicode
  293	 * definition of UTF-8 goes up to 4-byte sequences.
  294	 */
  295	
  296	static Boolean isLegalUTF8(const UTF8 *source, int length) {
  297	    UTF8 a;
  298	    const UTF8 *srcptr = source+length;
  299	    switch (length) {
  300	    default: return false;
  301	    /* Everything else falls through when "true"... */
  302	    case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
  303	    case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
  304	    case 2: if ((a = (*--srcptr)) > 0xBF) return false;
  305	
  306	    switch (*source) {
  307	        /* no fall-through in this inner switch */
  308	        case 0xE0: if (a < 0xA0) return false; break;
  309	        case 0xED: if (a > 0x9F) return false; break;
  310	        case 0xF0: if (a < 0x90) return false; break;
  311	        case 0xF4: if (a > 0x8F) return false; break;
  312	        default:   if (a < 0x80) return false;
  313	    }
  314	
  315	    case 1: if (*source >= 0x80 && *source < 0xC2) return false;
  316	    }
  317	    if (*source > 0xF4) return false;
  318	    return true;
  319	}
  320	
  321	/* --------------------------------------------------------------------- */
  322	
  323	/*
  324	 * Exported function to return whether a UTF-8 sequence is legal or not.
  325	 * This is not used here; it's just exported.
  326	 */
  327	Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) {
  328	    int length = trailingBytesForUTF8[*source]+1;
  329	    if (source+length > sourceEnd) {
  330	    return false;
  331	    }
  332	    return isLegalUTF8(source, length);
  333	}
  334	
  335	/* --------------------------------------------------------------------- */
  336	
  337	ConversionResult ConvertUTF8toUTF16 (
  338	    const UTF8** sourceStart, const UTF8* sourceEnd, 
  339	    UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
  340	    ConversionResult result = conversionOK;
  341	    const UTF8* source = *sourceStart;
  342	    UTF16* target = *targetStart;
  343	    while (source < sourceEnd) {
  344	    UTF32 ch = 0;
  345	    unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
  346	    if (source + extraBytesToRead >= sourceEnd) {
  347	        result = sourceExhausted; break;
  348	    }
  349	    /* Do this check whether lenient or strict */
  350	    if (! isLegalUTF8(source, extraBytesToRead+1)) {
  351	        result = sourceIllegal;
  352	        break;
  353	    }
  354	    /*
  355	     * The cases all fall through. See "Note A" below.
  356	     */
  357	    switch (extraBytesToRead) {
  358	        case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
  359	        case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
  360	        case 3: ch += *source++; ch <<= 6;
  361	        case 2: ch += *source++; ch <<= 6;
  362	        case 1: ch += *source++; ch <<= 6;
  363	        case 0: ch += *source++;
  364	    }
  365	    ch -= offsetsFromUTF8[extraBytesToRead];
  366	
  367	    if (target >= targetEnd) {
  368	        source -= (extraBytesToRead+1); /* Back up source pointer! */
  369	        result = targetExhausted; break;
  370	    }
  371	    if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
  372	        /* UTF-16 surrogate values are illegal in UTF-32 */
  373	        if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
  374	        if (flags == strictConversion) {
  375	            source -= (extraBytesToRead+1); /* return to the illegal value itself */
  376	            result = sourceIllegal;
  377	            break;
  378	        } else {
  379	            *target++ = UNI_REPLACEMENT_CHAR;
  380	        }
  381	        } else {
  382	        *target++ = (UTF16)ch; /* normal case */
  383	        }
  384	    } else if (ch > UNI_MAX_UTF16) {
  385	        if (flags == strictConversion) {
  386	        result = sourceIllegal;
  387	        source -= (extraBytesToRead+1); /* return to the start */
  388	        break; /* Bail out; shouldn't continue */
  389	        } else {
  390	        *target++ = UNI_REPLACEMENT_CHAR;
  391	        }
  392	    } else {
  393	        /* target is a character in range 0xFFFF - 0x10FFFF. */
  394	        if (target + 1 >= targetEnd) {
  395	        source -= (extraBytesToRead+1); /* Back up source pointer! */
  396	        result = targetExhausted; break;
  397	        }
  398	        ch -= halfBase;
  399	        *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
  400	        *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
  401	    }
  402	    }
  403	    *sourceStart = source;
  404	    *targetStart = target;
  405	    return result;
  406	}
  407	
  408	/* --------------------------------------------------------------------- */
  409	
  410	ConversionResult ConvertUTF32toUTF8 (
  411	    const UTF32** sourceStart, const UTF32* sourceEnd, 
  412	    UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
  413	    ConversionResult result = conversionOK;
  414	    const UTF32* source = *sourceStart;
  415	    UTF8* target = *targetStart;
  416	    while (source < sourceEnd) {
  417	    UTF32 ch;
  418	    unsigned short bytesToWrite = 0;
  419	    const UTF32 byteMask = 0xBF;
  420	    const UTF32 byteMark = 0x80; 
  421	    ch = *source++;
  422	    if (flags == strictConversion ) {
  423	        /* UTF-16 surrogate values are illegal in UTF-32 */
  424	        if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
  425	        --source; /* return to the illegal value itself */
  426	        result = sourceIllegal;
  427	        break;
  428	        }
  429	    }
  430	    /*
  431	     * Figure out how many bytes the result will require. Turn any
  432	     * illegally large UTF32 things (> Plane 17) into replacement chars.
  433	     */
  434	    if (ch < (UTF32)0x80) {      bytesToWrite = 1;
  435	    } else if (ch < (UTF32)0x800) {     bytesToWrite = 2;
  436	    } else if (ch < (UTF32)0x10000) {   bytesToWrite = 3;
  437	    } else if (ch <= UNI_MAX_LEGAL_UTF32) {  bytesToWrite = 4;
  438	    } else {       bytesToWrite = 3;
  439	                        ch = UNI_REPLACEMENT_CHAR;
  440	                        result = sourceIllegal;
  441	    }
  442	
  443	    target += bytesToWrite;
  444	    if (target > targetEnd) {
  445	        --source; /* Back up source pointer! */
  446	        target -= bytesToWrite; result = targetExhausted; break;
  447	    }
  448	    switch (bytesToWrite) { /* note: everything falls through. */
  449	        case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
  450	        case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
  451	        case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
  452	        case 1: *--target = (UTF8) (ch | firstByteMark[bytesToWrite]);
  453	    }
  454	    target += bytesToWrite;
  455	    }
  456	    *sourceStart = source;
  457	    *targetStart = target;
  458	    return result;
  459	}
  460	
  461	/* --------------------------------------------------------------------- */
  462	
  463	ConversionResult ConvertUTF8toUTF32 (
  464	    const UTF8** sourceStart, const UTF8* sourceEnd, 
  465	    UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
  466	    ConversionResult result = conversionOK;
  467	    const UTF8* source = *sourceStart;
  468	    UTF32* target = *targetStart;
  469	    while (source < sourceEnd) {
  470	    UTF32 ch = 0;
  471	    unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
  472	    if (source + extraBytesToRead >= sourceEnd) {
  473	        result = sourceExhausted; break;
  474	    }
  475	    /* Do this check whether lenient or strict */
  476	    if (! isLegalUTF8(source, extraBytesToRead+1)) {
  477	        result = sourceIllegal;
  478	        break;
  479	    }
  480	    /*
  481	     * The cases all fall through. See "Note A" below.
  482	     */
  483	    switch (extraBytesToRead) {
  484	        case 5: ch += *source++; ch <<= 6;
  485	        case 4: ch += *source++; ch <<= 6;
  486	        case 3: ch += *source++; ch <<= 6;
  487	        case 2: ch += *source++; ch <<= 6;
  488	        case 1: ch += *source++; ch <<= 6;
  489	        case 0: ch += *source++;
  490	    }
  491	    ch -= offsetsFromUTF8[extraBytesToRead];
  492	
  493	    if (target >= targetEnd) {
  494	        source -= (extraBytesToRead+1); /* Back up the source pointer! */
  495	        result = targetExhausted; break;
  496	    }
  497	    if (ch <= UNI_MAX_LEGAL_UTF32) {
  498	        /*
  499	         * UTF-16 surrogate values are illegal in UTF-32, and anything
  500	         * over Plane 17 (> 0x10FFFF) is illegal.
  501	         */
  502	        if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
  503	        if (flags == strictConversion) {
  504	            source -= (extraBytesToRead+1); /* return to the illegal value itself */
  505	            result = sourceIllegal;
  506	            break;
  507	        } else {
  508	            *target++ = UNI_REPLACEMENT_CHAR;
  509	        }
  510	        } else {
  511	        *target++ = ch;
  512	        }
  513	    } else { /* i.e., ch > UNI_MAX_LEGAL_UTF32 */
  514	        result = sourceIllegal;
  515	        *target++ = UNI_REPLACEMENT_CHAR;
  516	    }
  517	    }
  518	    *sourceStart = source;
  519	    *targetStart = target;
  520	    return result;
  521	}
  522