utf8-encode.c - libgrapheme - unicode string library
 (HTM) git clone git://git.suckless.org/libgrapheme
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       utf8-encode.c (2238B)
       ---
            1 /* See LICENSE file for copyright and license details. */
            2 #include <stddef.h>
            3 #include <stdint.h>
            4 #include <stdio.h>
            5 #include <string.h>
            6 
            7 #include "../grapheme.h"
            8 #include "util.h"
            9 
           10 static const struct {
           11         uint_least32_t cp; /* input codepoint */
           12         char *exp_arr;     /* expected UTF-8 byte sequence */
           13         size_t exp_len;    /* expected length of UTF-8 sequence */
           14 } enc_test[] = {
           15         {
           16                 /* invalid codepoint (UTF-16 surrogate half) */
           17                 .cp = UINT32_C(0xD800),
           18                 .exp_arr = (char *)(unsigned char[]) { 0xEF, 0xBF, 0xBD },
           19                 .exp_len = 3,
           20         },
           21         {
           22                 /* invalid codepoint (UTF-16-unrepresentable) */
           23                 .cp = UINT32_C(0x110000),
           24                 .exp_arr = (char *)(unsigned char[]) { 0xEF, 0xBF, 0xBD },
           25                 .exp_len = 3,
           26         },
           27         {
           28                 /* codepoint encoded to a 1-byte sequence */
           29                 .cp = 0x01,
           30                 .exp_arr = (char *)(unsigned char[]) { 0x01 },
           31                 .exp_len = 1,
           32         },
           33         {
           34                 /* codepoint encoded to a 2-byte sequence */
           35                 .cp = 0xFF,
           36                 .exp_arr = (char *)(unsigned char[]) { 0xC3, 0xBF },
           37                 .exp_len = 2,
           38         },
           39         {
           40                 /* codepoint encoded to a 3-byte sequence */
           41                 .cp = 0xFFF,
           42                 .exp_arr = (char *)(unsigned char[]) { 0xE0, 0xBF, 0xBF },
           43                 .exp_len = 3,
           44         },
           45         {
           46                 /* codepoint encoded to a 4-byte sequence */
           47                 .cp = UINT32_C(0xFFFFF),
           48                 .exp_arr = (char *)(unsigned char[]) { 0xF3, 0xBF, 0xBF, 0xBF },
           49                 .exp_len = 4,
           50         },
           51 };
           52 
           53 int
           54 main(int argc, char *argv[])
           55 {
           56         size_t i, j, failed;
           57 
           58         (void)argc;
           59 
           60         /* UTF-8 encoder test */
           61         for (i = 0, failed = 0; i < LEN(enc_test); i++) {
           62                 char arr[4];
           63                 size_t len;
           64 
           65                 len = grapheme_encode_utf8(enc_test[i].cp, arr, LEN(arr));
           66 
           67                 if (len != enc_test[i].exp_len ||
           68                     memcmp(arr, enc_test[i].exp_arr, len)) {
           69                         fprintf(stderr,
           70                                 "%s, Failed test %zu: "
           71                                 "Expected (",
           72                                 argv[0], i);
           73                         for (j = 0; j < enc_test[i].exp_len; j++) {
           74                                 fprintf(stderr, "0x%x", enc_test[i].exp_arr[j]);
           75                                 if (j + 1 < enc_test[i].exp_len) {
           76                                         fprintf(stderr, " ");
           77                                 }
           78                         }
           79                         fprintf(stderr, "), but got (");
           80                         for (j = 0; j < len; j++) {
           81                                 fprintf(stderr, "0x%x", arr[j]);
           82                                 if (j + 1 < len) {
           83                                         fprintf(stderr, " ");
           84                                 }
           85                         }
           86                         fprintf(stderr, ").\n");
           87                         failed++;
           88                 }
           89         }
           90         printf("%s: %zu/%zu unit tests passed.\n", argv[0],
           91                LEN(enc_test) - failed, LEN(enc_test));
           92 
           93         return (failed > 0) ? 1 : 0;
           94 }