utf8-encode.c - libgrapheme - unicode string library
(HTM) git clone git://git.suckless.org/libgrapheme
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
utf8-encode.c (2238B)
---
1 /* See LICENSE file for copyright and license details. */
2 #include <stddef.h>
3 #include <stdint.h>
4 #include <stdio.h>
5 #include <string.h>
6
7 #include "../grapheme.h"
8 #include "util.h"
9
10 static const struct {
11 uint_least32_t cp; /* input codepoint */
12 char *exp_arr; /* expected UTF-8 byte sequence */
13 size_t exp_len; /* expected length of UTF-8 sequence */
14 } enc_test[] = {
15 {
16 /* invalid codepoint (UTF-16 surrogate half) */
17 .cp = UINT32_C(0xD800),
18 .exp_arr = (char *)(unsigned char[]) { 0xEF, 0xBF, 0xBD },
19 .exp_len = 3,
20 },
21 {
22 /* invalid codepoint (UTF-16-unrepresentable) */
23 .cp = UINT32_C(0x110000),
24 .exp_arr = (char *)(unsigned char[]) { 0xEF, 0xBF, 0xBD },
25 .exp_len = 3,
26 },
27 {
28 /* codepoint encoded to a 1-byte sequence */
29 .cp = 0x01,
30 .exp_arr = (char *)(unsigned char[]) { 0x01 },
31 .exp_len = 1,
32 },
33 {
34 /* codepoint encoded to a 2-byte sequence */
35 .cp = 0xFF,
36 .exp_arr = (char *)(unsigned char[]) { 0xC3, 0xBF },
37 .exp_len = 2,
38 },
39 {
40 /* codepoint encoded to a 3-byte sequence */
41 .cp = 0xFFF,
42 .exp_arr = (char *)(unsigned char[]) { 0xE0, 0xBF, 0xBF },
43 .exp_len = 3,
44 },
45 {
46 /* codepoint encoded to a 4-byte sequence */
47 .cp = UINT32_C(0xFFFFF),
48 .exp_arr = (char *)(unsigned char[]) { 0xF3, 0xBF, 0xBF, 0xBF },
49 .exp_len = 4,
50 },
51 };
52
53 int
54 main(int argc, char *argv[])
55 {
56 size_t i, j, failed;
57
58 (void)argc;
59
60 /* UTF-8 encoder test */
61 for (i = 0, failed = 0; i < LEN(enc_test); i++) {
62 char arr[4];
63 size_t len;
64
65 len = grapheme_encode_utf8(enc_test[i].cp, arr, LEN(arr));
66
67 if (len != enc_test[i].exp_len ||
68 memcmp(arr, enc_test[i].exp_arr, len)) {
69 fprintf(stderr,
70 "%s, Failed test %zu: "
71 "Expected (",
72 argv[0], i);
73 for (j = 0; j < enc_test[i].exp_len; j++) {
74 fprintf(stderr, "0x%x", enc_test[i].exp_arr[j]);
75 if (j + 1 < enc_test[i].exp_len) {
76 fprintf(stderr, " ");
77 }
78 }
79 fprintf(stderr, "), but got (");
80 for (j = 0; j < len; j++) {
81 fprintf(stderr, "0x%x", arr[j]);
82 if (j + 1 < len) {
83 fprintf(stderr, " ");
84 }
85 }
86 fprintf(stderr, ").\n");
87 failed++;
88 }
89 }
90 printf("%s: %zu/%zu unit tests passed.\n", argv[0],
91 LEN(enc_test) - failed, LEN(enc_test));
92
93 return (failed > 0) ? 1 : 0;
94 }