case.c - libgrapheme - unicode string library
(HTM) git clone git://git.suckless.org/libgrapheme
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
case.c (16477B)
---
1 /* See LICENSE file for copyright and license details. */
2 #include <stdbool.h>
3 #include <stdint.h>
4 #include <stdio.h>
5 #include <string.h>
6
7 #include "../grapheme.h"
8 #include "util.h"
9
10 struct unit_test_is_case_utf8 {
11 const char *description;
12
13 struct {
14 const char *src;
15 size_t srclen;
16 } input;
17
18 struct {
19 bool ret;
20 size_t caselen;
21 } output;
22 };
23
24 struct unit_test_to_case_utf8 {
25 const char *description;
26
27 struct {
28 const char *src;
29 size_t srclen;
30 size_t destlen;
31 } input;
32
33 struct {
34 const char *dest;
35 size_t ret;
36 } output;
37 };
38
39 static const struct unit_test_is_case_utf8 is_lowercase_utf8[] = {
40 {
41 .description = "empty input",
42 .input = { "", 0 },
43 .output = { true, 0 },
44 },
45 {
46 .description = "one character, violation",
47 .input = { "A", 1 },
48 .output = { false, 0 },
49 },
50 {
51 .description = "one character, confirmation",
52 .input = { "\xC3\x9F", 2 },
53 .output = { true, 2 },
54 },
55 {
56 .description = "one character, violation, NUL-terminated",
57 .input = { "A", SIZE_MAX },
58 .output = { false, 0 },
59 },
60 {
61 .description = "one character, confirmation, NUL-terminated",
62 .input = { "\xC3\x9F", SIZE_MAX },
63 .output = { true, 2 },
64 },
65 {
66 .description = "one word, violation",
67 .input = { "Hello", 5 },
68 .output = { false, 0 },
69 },
70 {
71 .description = "one word, partial confirmation",
72 .input = { "gru"
73 "\xC3\x9F"
74 "fOrmel",
75 11 },
76 .output = { false, 6 },
77 },
78 {
79 .description = "one word, full confirmation",
80 .input = { "gru"
81 "\xC3\x9F"
82 "formel",
83 11 },
84 .output = { true, 11 },
85 },
86 {
87 .description = "one word, violation, NUL-terminated",
88 .input = { "Hello", SIZE_MAX },
89 .output = { false, 0 },
90 },
91 {
92 .description = "one word, partial confirmation, NUL-terminated",
93 .input = { "gru"
94 "\xC3\x9F"
95 "fOrmel",
96 SIZE_MAX },
97 .output = { false, 6 },
98 },
99 {
100 .description = "one word, full confirmation, NUL-terminated",
101 .input = { "gru"
102 "\xC3\x9F"
103 "formel",
104 SIZE_MAX },
105 .output = { true, 11 },
106 },
107 };
108
109 static const struct unit_test_is_case_utf8 is_uppercase_utf8[] = {
110 {
111 .description = "empty input",
112 .input = { "", 0 },
113 .output = { true, 0 },
114 },
115 {
116 .description = "one character, violation",
117 .input = { "\xC3\x9F", 2 },
118 .output = { false, 0 },
119 },
120 {
121 .description = "one character, confirmation",
122 .input = { "A", 1 },
123 .output = { true, 1 },
124 },
125 {
126 .description = "one character, violation, NUL-terminated",
127 .input = { "\xC3\x9F", SIZE_MAX },
128 .output = { false, 0 },
129 },
130 {
131 .description = "one character, confirmation, NUL-terminated",
132 .input = { "A", SIZE_MAX },
133 .output = { true, 1 },
134 },
135 {
136 .description = "one word, violation",
137 .input = { "hello", 5 },
138 .output = { false, 0 },
139 },
140 {
141 .description = "one word, partial confirmation",
142 .input = { "GRU"
143 "\xC3\x9F"
144 "formel",
145 11 },
146 .output = { false, 3 },
147 },
148 {
149 .description = "one word, full confirmation",
150 .input = { "HELLO", 5 },
151 .output = { true, 5 },
152 },
153 {
154 .description = "one word, violation, NUL-terminated",
155 .input = { "hello", SIZE_MAX },
156 .output = { false, 0 },
157 },
158 {
159 .description = "one word, partial confirmation, NUL-terminated",
160 .input = { "GRU"
161 "\xC3\x9F"
162 "formel",
163 SIZE_MAX },
164 .output = { false, 3 },
165 },
166 {
167 .description = "one word, full confirmation, NUL-terminated",
168 .input = { "HELLO", SIZE_MAX },
169 .output = { true, 5 },
170 },
171 };
172
173 static const struct unit_test_is_case_utf8 is_titlecase_utf8[] = {
174 {
175 .description = "empty input",
176 .input = { "", 0 },
177 .output = { true, 0 },
178 },
179 {
180 .description = "one character, violation",
181 .input = { "\xC3\x9F", 2 },
182 .output = { false, 0 },
183 },
184 {
185 .description = "one character, confirmation",
186 .input = { "A", 1 },
187 .output = { true, 1 },
188 },
189 {
190 .description = "one character, violation, NUL-terminated",
191 .input = { "\xC3\x9F", SIZE_MAX },
192 .output = { false, 0 },
193 },
194 {
195 .description = "one character, confirmation, NUL-terminated",
196 .input = { "A", SIZE_MAX },
197 .output = { true, 1 },
198 },
199 {
200 .description = "one word, violation",
201 .input = { "hello", 5 },
202 .output = { false, 0 },
203 },
204 {
205 .description = "one word, partial confirmation",
206 .input = { "Gru"
207 "\xC3\x9F"
208 "fOrmel",
209 11 },
210 .output = { false, 6 },
211 },
212 {
213 .description = "one word, full confirmation",
214 .input = { "Gru"
215 "\xC3\x9F"
216 "formel",
217 11 },
218 .output = { true, 11 },
219 },
220 {
221 .description = "one word, violation, NUL-terminated",
222 .input = { "hello", SIZE_MAX },
223 .output = { false, 0 },
224 },
225 {
226 .description = "one word, partial confirmation, NUL-terminated",
227 .input = { "Gru"
228 "\xC3\x9F"
229 "fOrmel",
230 SIZE_MAX },
231 .output = { false, 6 },
232 },
233 {
234 .description = "one word, full confirmation, NUL-terminated",
235 .input = { "Gru"
236 "\xC3\x9F"
237 "formel",
238 SIZE_MAX },
239 .output = { true, 11 },
240 },
241 {
242 .description = "multiple words, partial confirmation",
243 .input = { "Hello Gru"
244 "\xC3\x9F"
245 "fOrmel!",
246 18 },
247 .output = { false, 12 },
248 },
249 {
250 .description = "multiple words, full confirmation",
251 .input = { "Hello Gru"
252 "\xC3\x9F"
253 "formel!",
254 18 },
255 .output = { true, 18 },
256 },
257 {
258 .description =
259 "multiple words, partial confirmation, NUL-terminated",
260 .input = { "Hello Gru"
261 "\xC3\x9F"
262 "fOrmel!",
263 SIZE_MAX },
264 .output = { false, 12 },
265 },
266 {
267 .description =
268 "multiple words, full confirmation, NUL-terminated",
269 .input = { "Hello Gru"
270 "\xC3\x9F"
271 "formel!",
272 SIZE_MAX },
273 .output = { true, 18 },
274 },
275 };
276
277 static const struct unit_test_to_case_utf8 to_lowercase_utf8[] = {
278 {
279 .description = "empty input",
280 .input = { "", 0, 10 },
281 .output = { "", 0 },
282 },
283 {
284 .description = "empty output",
285 .input = { "hello", 5, 0 },
286 .output = { "", 5 },
287 },
288 {
289 .description = "one character, conversion",
290 .input = { "A", 1, 10 },
291 .output = { "a", 1 },
292 },
293 {
294 .description = "one character, no conversion",
295 .input = { "\xC3\x9F", 2, 10 },
296 .output = { "\xC3\x9F", 2 },
297 },
298 {
299 .description = "one character, conversion, truncation",
300 .input = { "A", 1, 0 },
301 .output = { "", 1 },
302 },
303 {
304 .description = "one character, conversion, NUL-terminated",
305 .input = { "A", SIZE_MAX, 10 },
306 .output = { "a", 1 },
307 },
308 {
309 .description = "one character, no conversion, NUL-terminated",
310 .input = { "\xC3\x9F", SIZE_MAX, 10 },
311 .output = { "\xC3\x9F", 2 },
312 },
313 {
314 .description =
315 "one character, conversion, NUL-terminated, truncation",
316 .input = { "A", SIZE_MAX, 0 },
317 .output = { "", 1 },
318 },
319 {
320 .description = "one word, conversion",
321 .input = { "wOrD", 4, 10 },
322 .output = { "word", 4 },
323 },
324 {
325 .description = "one word, no conversion",
326 .input = { "word", 4, 10 },
327 .output = { "word", 4 },
328 },
329 {
330 .description = "one word, conversion, truncation",
331 .input = { "wOrD", 4, 3 },
332 .output = { "wo", 4 },
333 },
334 {
335 .description = "one word, conversion, NUL-terminated",
336 .input = { "wOrD", SIZE_MAX, 10 },
337 .output = { "word", 4 },
338 },
339 {
340 .description = "one word, no conversion, NUL-terminated",
341 .input = { "word", SIZE_MAX, 10 },
342 .output = { "word", 4 },
343 },
344 {
345 .description =
346 "one word, conversion, NUL-terminated, truncation",
347 .input = { "wOrD", SIZE_MAX, 3 },
348 .output = { "wo", 4 },
349 },
350 };
351
352 static const struct unit_test_to_case_utf8 to_uppercase_utf8[] = {
353 {
354 .description = "empty input",
355 .input = { "", 0, 10 },
356 .output = { "", 0 },
357 },
358 {
359 .description = "empty output",
360 .input = { "hello", 5, 0 },
361 .output = { "", 5 },
362 },
363 {
364 .description = "one character, conversion",
365 .input = { "\xC3\x9F", 2, 10 },
366 .output = { "SS", 2 },
367 },
368 {
369 .description = "one character, no conversion",
370 .input = { "A", 1, 10 },
371 .output = { "A", 1 },
372 },
373 {
374 .description = "one character, conversion, truncation",
375 .input = { "\xC3\x9F", 2, 0 },
376 .output = { "", 2 },
377 },
378 {
379 .description = "one character, conversion, NUL-terminated",
380 .input = { "\xC3\x9F", SIZE_MAX, 10 },
381 .output = { "SS", 2 },
382 },
383 {
384 .description = "one character, no conversion, NUL-terminated",
385 .input = { "A", SIZE_MAX, 10 },
386 .output = { "A", 1 },
387 },
388 {
389 .description =
390 "one character, conversion, NUL-terminated, truncation",
391 .input = { "\xC3\x9F", SIZE_MAX, 0 },
392 .output = { "", 2 },
393 },
394 {
395 .description = "one word, conversion",
396 .input = { "gRu"
397 "\xC3\x9F"
398 "fOrMel",
399 11, 15 },
400 .output = { "GRUSSFORMEL", 11 },
401 },
402 {
403 .description = "one word, no conversion",
404 .input = { "WORD", 4, 10 },
405 .output = { "WORD", 4 },
406 },
407 {
408 .description = "one word, conversion, truncation",
409 .input = { "gRu"
410 "\xC3\x9F"
411 "formel",
412 11, 5 },
413 .output = { "GRUS", 11 },
414 },
415 {
416 .description = "one word, conversion, NUL-terminated",
417 .input = { "gRu"
418 "\xC3\x9F"
419 "formel",
420 SIZE_MAX, 15 },
421 .output = { "GRUSSFORMEL", 11 },
422 },
423 {
424 .description = "one word, no conversion, NUL-terminated",
425 .input = { "WORD", SIZE_MAX, 10 },
426 .output = { "WORD", 4 },
427 },
428 {
429 .description =
430 "one word, conversion, NUL-terminated, truncation",
431 .input = { "gRu"
432 "\xC3\x9F"
433 "formel",
434 SIZE_MAX, 5 },
435 .output = { "GRUS", 11 },
436 },
437 };
438
439 static const struct unit_test_to_case_utf8 to_titlecase_utf8[] = {
440 {
441 .description = "empty input",
442 .input = { "", 0, 10 },
443 .output = { "", 0 },
444 },
445 {
446 .description = "empty output",
447 .input = { "hello", 5, 0 },
448 .output = { "", 5 },
449 },
450 {
451 .description = "one character, conversion",
452 .input = { "a", 1, 10 },
453 .output = { "A", 1 },
454 },
455 {
456 .description = "one character, no conversion",
457 .input = { "A", 1, 10 },
458 .output = { "A", 1 },
459 },
460 {
461 .description = "one character, conversion, truncation",
462 .input = { "a", 1, 0 },
463 .output = { "", 1 },
464 },
465 {
466 .description = "one character, conversion, NUL-terminated",
467 .input = { "a", SIZE_MAX, 10 },
468 .output = { "A", 1 },
469 },
470 {
471 .description = "one character, no conversion, NUL-terminated",
472 .input = { "A", SIZE_MAX, 10 },
473 .output = { "A", 1 },
474 },
475 {
476 .description =
477 "one character, conversion, NUL-terminated, truncation",
478 .input = { "a", SIZE_MAX, 0 },
479 .output = { "", 1 },
480 },
481 {
482 .description = "one word, conversion",
483 .input = { "heLlo", 5, 10 },
484 .output = { "Hello", 5 },
485 },
486 {
487 .description = "one word, no conversion",
488 .input = { "Hello", 5, 10 },
489 .output = { "Hello", 5 },
490 },
491 {
492 .description = "one word, conversion, truncation",
493 .input = { "heLlo", 5, 2 },
494 .output = { "H", 5 },
495 },
496 {
497 .description = "one word, conversion, NUL-terminated",
498 .input = { "heLlo", SIZE_MAX, 10 },
499 .output = { "Hello", 5 },
500 },
501 {
502 .description = "one word, no conversion, NUL-terminated",
503 .input = { "Hello", SIZE_MAX, 10 },
504 .output = { "Hello", 5 },
505 },
506 {
507 .description =
508 "one word, conversion, NUL-terminated, truncation",
509 .input = { "heLlo", SIZE_MAX, 3 },
510 .output = { "He", 5 },
511 },
512 {
513 .description = "two words, conversion",
514 .input = { "heLlo wORLd!", 12, 20 },
515 .output = { "Hello World!", 12 },
516 },
517 {
518 .description = "two words, no conversion",
519 .input = { "Hello World!", 12, 20 },
520 .output = { "Hello World!", 12 },
521 },
522 {
523 .description = "two words, conversion, truncation",
524 .input = { "heLlo wORLd!", 12, 8 },
525 .output = { "Hello W", 12 },
526 },
527 {
528 .description = "two words, conversion, NUL-terminated",
529 .input = { "heLlo wORLd!", SIZE_MAX, 20 },
530 .output = { "Hello World!", 12 },
531 },
532 {
533 .description = "two words, no conversion, NUL-terminated",
534 .input = { "Hello World!", SIZE_MAX, 20 },
535 .output = { "Hello World!", 12 },
536 },
537 {
538 .description =
539 "two words, conversion, NUL-terminated, truncation",
540 .input = { "heLlo wORLd!", SIZE_MAX, 4 },
541 .output = { "Hel", 12 },
542 },
543 };
544
545 static int
546 unit_test_callback_is_case_utf8(const void *t, size_t off, const char *name,
547 const char *argv0)
548 {
549 const struct unit_test_is_case_utf8 *test =
550 (const struct unit_test_is_case_utf8 *)t + off;
551 bool ret = false;
552 size_t caselen = 0x7f;
553
554 if (t == is_lowercase_utf8) {
555 ret = grapheme_is_lowercase_utf8(test->input.src,
556 test->input.srclen, &caselen);
557 } else if (t == is_uppercase_utf8) {
558 ret = grapheme_is_uppercase_utf8(test->input.src,
559 test->input.srclen, &caselen);
560 } else if (t == is_titlecase_utf8) {
561 ret = grapheme_is_titlecase_utf8(test->input.src,
562 test->input.srclen, &caselen);
563
564 } else {
565 goto err;
566 }
567
568 /* check results */
569 if (ret != test->output.ret || caselen != test->output.caselen) {
570 goto err;
571 }
572
573 return 0;
574 err:
575 fprintf(stderr,
576 "%s: %s: Failed unit test %zu \"%s\" "
577 "(returned (%s, %zu) instead of (%s, %zu)).\n",
578 argv0, name, off, test->description, ret ? "true" : "false",
579 caselen, test->output.ret ? "true" : "false",
580 test->output.caselen);
581 return 1;
582 }
583
584 static int
585 unit_test_callback_to_case_utf8(const void *t, size_t off, const char *name,
586 const char *argv0)
587 {
588 const struct unit_test_to_case_utf8 *test =
589 (const struct unit_test_to_case_utf8 *)t + off;
590 size_t ret = 0, i;
591 char buf[512];
592
593 /* fill the array with canary values */
594 memset(buf, 0x7f, LEN(buf));
595
596 if (t == to_lowercase_utf8) {
597 ret = grapheme_to_lowercase_utf8(test->input.src,
598 test->input.srclen, buf,
599 test->input.destlen);
600 } else if (t == to_uppercase_utf8) {
601 ret = grapheme_to_uppercase_utf8(test->input.src,
602 test->input.srclen, buf,
603 test->input.destlen);
604 } else if (t == to_titlecase_utf8) {
605 ret = grapheme_to_titlecase_utf8(test->input.src,
606 test->input.srclen, buf,
607 test->input.destlen);
608 } else {
609 goto err;
610 }
611
612 /* check results */
613 if (ret != test->output.ret ||
614 memcmp(buf, test->output.dest,
615 MIN(test->input.destlen, test->output.ret))) {
616 goto err;
617 }
618
619 /* check that none of the canary values have been overwritten */
620 for (i = test->input.destlen; i < LEN(buf); i++) {
621 if (buf[i] != 0x7f) {
622 goto err;
623 }
624 }
625
626 return 0;
627 err:
628 fprintf(stderr,
629 "%s: %s: Failed unit test %zu \"%s\" "
630 "(returned (\"%.*s\", %zu) instead of (\"%.*s\", %zu)).\n",
631 argv0, name, off, test->description, (int)ret, buf, ret,
632 (int)test->output.ret, test->output.dest, test->output.ret);
633 return 1;
634 }
635
636 int
637 main(int argc, char *argv[])
638 {
639 (void)argc;
640
641 return run_unit_tests(unit_test_callback_is_case_utf8,
642 is_lowercase_utf8, LEN(is_lowercase_utf8),
643 "grapheme_is_lowercase_utf8", argv[0]) +
644 run_unit_tests(unit_test_callback_is_case_utf8,
645 is_uppercase_utf8, LEN(is_uppercase_utf8),
646 "grapheme_is_uppercase_utf8", argv[0]) +
647 run_unit_tests(unit_test_callback_is_case_utf8,
648 is_titlecase_utf8, LEN(is_titlecase_utf8),
649 "grapheme_is_titlecase_utf8", argv[0]) +
650 run_unit_tests(unit_test_callback_to_case_utf8,
651 to_lowercase_utf8, LEN(to_lowercase_utf8),
652 "grapheme_to_lowercase_utf8", argv[0]) +
653 run_unit_tests(unit_test_callback_to_case_utf8,
654 to_uppercase_utf8, LEN(to_uppercase_utf8),
655 "grapheme_to_uppercase_utf8", argv[0]) +
656 run_unit_tests(unit_test_callback_to_case_utf8,
657 to_titlecase_utf8, LEN(to_titlecase_utf8),
658 "grapheme_to_titlecase_utf8", argv[0]);
659 }