bidirectional-test.c - libgrapheme - unicode string library
(HTM) git clone git://git.suckless.org/libgrapheme
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
bidirectional-test.c (15968B)
---
1 /* See LICENSE file for copyright and license details. */
2 #include <errno.h>
3 #include <inttypes.h>
4 #include <stddef.h>
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <string.h>
8
9 #include "../grapheme.h"
10 #include "util.h"
11
12 struct bidirectional_test {
13 uint_least32_t *cp;
14 size_t cplen;
15 enum grapheme_bidirectional_direction mode[3];
16 size_t modelen;
17 enum grapheme_bidirectional_direction resolved;
18 int_least8_t *level;
19 int_least16_t *reorder;
20 size_t reorderlen;
21 };
22
23 static const struct {
24 const char *class;
25 const uint_least32_t cp;
26 } classcpmap[] = {
27 { .class = "L", .cp = UINT32_C(0x0041) },
28 { .class = "AL", .cp = UINT32_C(0x0608) },
29 { .class = "AN", .cp = UINT32_C(0x0600) },
30 { .class = "B", .cp = UINT32_C(0x000A) },
31 { .class = "BN", .cp = UINT32_C(0x0000) },
32 { .class = "CS", .cp = UINT32_C(0x002C) },
33 { .class = "EN", .cp = UINT32_C(0x0030) },
34 { .class = "ES", .cp = UINT32_C(0x002B) },
35 { .class = "ET", .cp = UINT32_C(0x0023) },
36 { .class = "FSI", .cp = UINT32_C(0x2068) },
37 { .class = "LRE", .cp = UINT32_C(0x202A) },
38 { .class = "LRI", .cp = UINT32_C(0x2066) },
39 { .class = "LRO", .cp = UINT32_C(0x202D) },
40 { .class = "NSM", .cp = UINT32_C(0x0300) },
41 { .class = "ON", .cp = UINT32_C(0x0021) },
42 { .class = "PDF", .cp = UINT32_C(0x202C) },
43 { .class = "PDI", .cp = UINT32_C(0x2069) },
44 { .class = "R", .cp = UINT32_C(0x05BE) },
45 { .class = "RLE", .cp = UINT32_C(0x202B) },
46 { .class = "RLI", .cp = UINT32_C(0x2067) },
47 { .class = "RLO", .cp = UINT32_C(0x202E) },
48 { .class = "S", .cp = UINT32_C(0x0009) },
49 { .class = "WS", .cp = UINT32_C(0x000C) },
50 };
51
52 static int
53 classtocp(const char *str, size_t len, uint_least32_t *cp)
54 {
55 size_t i;
56
57 for (i = 0; i < LEN(classcpmap); i++) {
58 if (!strncmp(str, classcpmap[i].class, len)) {
59 *cp = classcpmap[i].cp;
60 return 0;
61 }
62 }
63 fprintf(stderr, "classtocp: unknown class string '%.*s'.\n", (int)len,
64 str);
65
66 return 1;
67 }
68
69 static int
70 parse_class_list(const char *str, uint_least32_t **cp, size_t *cplen)
71 {
72 size_t count, i;
73 const char *tmp1 = NULL, *tmp2 = NULL;
74
75 if (strlen(str) == 0) {
76 *cp = NULL;
77 *cplen = 0;
78 return 0;
79 }
80
81 /* count the number of spaces in the string and infer list length */
82 for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL;
83 count++, tmp1 = tmp2 + 1) {
84 ;
85 }
86
87 /* allocate resources */
88 if (!(*cp = calloc((*cplen = count), sizeof(**cp)))) {
89 fprintf(stderr, "calloc: %s\n", strerror(errno));
90 exit(1);
91 }
92
93 /* go through the string again, parsing the classes */
94 for (i = 0, tmp1 = tmp2 = str; tmp2 != NULL; i++) {
95 tmp2 = strchr(tmp1, ' ');
96 if (classtocp(tmp1, tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp1),
97 &((*cp)[i]))) {
98 return 1;
99 }
100 if (tmp2 != NULL) {
101 tmp1 = tmp2 + 1;
102 }
103 }
104
105 return 0;
106 }
107
108 static int
109 strtolevel(const char *str, size_t len, int_least8_t *level)
110 {
111 size_t i;
112
113 if (len == 1 && str[0] == 'x') {
114 /*
115 * 'x' indicates those characters that are ignored.
116 * We indicate this with a level of -1
117 */
118 *level = -1;
119 return 0;
120 }
121
122 if (len > 3) {
123 /*
124 * given we can only express (positive) numbers from
125 * 0..127, more than 3 digits means an excess
126 */
127 goto toolarge;
128 }
129
130 /* check if the string is completely numerical */
131 for (i = 0; i < len; i++) {
132 if (str[i] < '0' && str[i] > '9') {
133 fprintf(stderr,
134 "strtolevel: '%.*s' is not an integer.\n",
135 (int)len, str);
136 return 1;
137 }
138 }
139
140 if (len == 3) {
141 if (str[0] != '1' || str[1] > '2' ||
142 (str[1] == '2' && str[2] > '7')) {
143 goto toolarge;
144 }
145 *level = (str[0] - '0') * 100 + (str[1] - '0') * 10 +
146 (str[2] - '0');
147 } else if (len == 2) {
148 *level = (str[0] - '0') * 10 + (str[1] - '0');
149 } else if (len == 1) {
150 *level = (str[0] - '0');
151 } else { /* len == 0 */
152 *level = 0;
153 }
154
155 return 0;
156 toolarge:
157 fprintf(stderr, "strtolevel: '%.*s' is too large.\n", (int)len, str);
158 return 1;
159 }
160
161 static int
162 strtoreorder(const char *str, size_t len, int_least16_t *reorder)
163 {
164 size_t i;
165
166 if (len == 1 && str[0] == 'x') {
167 /*
168 * 'x' indicates those characters that are ignored.
169 * We indicate this with a reorder of -1
170 */
171 *reorder = -1;
172 return 0;
173 }
174
175 if (len > 3) {
176 /*
177 * given we want to only express (positive) numbers from
178 * 0..999 (at most!), more than 3 digits means an excess
179 */
180 goto toolarge;
181 }
182
183 /* check if the string is completely numerical */
184 for (i = 0; i < len; i++) {
185 if (str[i] < '0' && str[i] > '9') {
186 fprintf(stderr,
187 "strtoreorder: '%.*s' is not an integer.\n",
188 (int)len, str);
189 return 1;
190 }
191 }
192
193 if (len == 3) {
194 *reorder = (str[0] - '0') * 100 + (str[1] - '0') * 10 +
195 (str[2] - '0');
196 } else if (len == 2) {
197 *reorder = (str[0] - '0') * 10 + (str[1] - '0');
198 } else if (len == 1) {
199 *reorder = (str[0] - '0');
200 } else { /* len == 0 */
201 *reorder = 0;
202 }
203
204 return 0;
205 toolarge:
206 fprintf(stderr, "strtoreorder: '%.*s' is too large.\n", (int)len, str);
207 return 1;
208 }
209
210 static int
211 parse_level_list(const char *str, int_least8_t **level, size_t *levellen)
212 {
213 size_t count, i;
214 const char *tmp1 = NULL, *tmp2 = NULL;
215
216 if (strlen(str) == 0) {
217 *level = NULL;
218 *levellen = 0;
219 return 0;
220 }
221
222 /* count the number of spaces in the string and infer list length */
223 for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL;
224 count++, tmp1 = tmp2 + 1) {
225 ;
226 }
227
228 /* allocate resources */
229 if (!(*level = calloc((*levellen = count), sizeof(**level)))) {
230 fprintf(stderr, "calloc: %s\n", strerror(errno));
231 exit(1);
232 }
233
234 /* go through the string again, parsing the levels */
235 for (i = 0, tmp1 = tmp2 = str; tmp2 != NULL; i++) {
236 tmp2 = strchr(tmp1, ' ');
237 if (strtolevel(tmp1,
238 tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp1),
239 &((*level)[i]))) {
240 return 1;
241 }
242 if (tmp2 != NULL) {
243 tmp1 = tmp2 + 1;
244 }
245 }
246
247 return 0;
248 }
249
250 static int
251 parse_reorder_list(const char *str, int_least16_t **reorder, size_t *reorderlen)
252 {
253 size_t count, i;
254 const char *tmp1 = NULL, *tmp2 = NULL;
255
256 if (strlen(str) == 0) {
257 *reorder = NULL;
258 *reorderlen = 0;
259 return 0;
260 }
261
262 /* count the number of spaces in the string and infer list length */
263 for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL;
264 count++, tmp1 = tmp2 + 1) {
265 ;
266 }
267
268 /* allocate resources */
269 if (!(*reorder = calloc((*reorderlen = count), sizeof(**reorder)))) {
270 fprintf(stderr, "calloc: %s\n", strerror(errno));
271 exit(1);
272 }
273
274 /* go through the string again, parsing the reorders */
275 for (i = 0, tmp1 = tmp2 = str; tmp2 != NULL; i++) {
276 tmp2 = strchr(tmp1, ' ');
277 if (strtoreorder(tmp1,
278 tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp1),
279 &((*reorder)[i]))) {
280 return 1;
281 }
282 if (tmp2 != NULL) {
283 tmp1 = tmp2 + 1;
284 }
285 }
286
287 return 0;
288 }
289
290 static void
291 bidirectional_test_list_print(const struct bidirectional_test *test,
292 size_t testlen, const char *identifier,
293 const char *progname)
294 {
295 size_t i, j;
296
297 printf("/* Automatically generated by %s */\n"
298 "#include <stdint.h>\n#include <stddef.h>\n\n"
299 "#include \"../grapheme.h\"\n\n",
300 progname);
301
302 printf("static const struct {\n"
303 "\tuint_least32_t *cp;\n"
304 "\tsize_t cplen;\n"
305 "\tenum grapheme_bidirectional_direction *mode;\n"
306 "\tsize_t modelen;\n"
307 "\tenum grapheme_bidirectional_direction resolved;\n"
308 "\tint_least8_t *level;\n"
309 "\tint_least16_t *reorder;\n"
310 "\tsize_t reorderlen;\n} %s[] = {\n",
311 identifier);
312 for (i = 0; i < testlen; i++) {
313 printf("\t{\n");
314
315 printf("\t\t.cp = (uint_least32_t[]){");
316 for (j = 0; j < test[i].cplen; j++) {
317 printf(" UINT32_C(0x%06X)", test[i].cp[j]);
318 if (j + 1 < test[i].cplen) {
319 putchar(',');
320 }
321 }
322 printf(" },\n");
323 printf("\t\t.cplen = %zu,\n", test[i].cplen);
324
325 printf("\t\t.mode = (enum "
326 "grapheme_bidirectional_direction[]){");
327 for (j = 0; j < test[i].modelen; j++) {
328 if (test[i].mode[j] ==
329 GRAPHEME_BIDIRECTIONAL_DIRECTION_NEUTRAL) {
330 printf(" GRAPHEME_BIDIRECTIONAL_DIRECTION_"
331 "NEUTRAL");
332 } else if (test[i].mode[j] ==
333 GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR) {
334 printf(" GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR");
335 } else if (test[i].mode[j] ==
336 GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL) {
337 printf(" GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL");
338 }
339 if (j + 1 < test[i].modelen) {
340 putchar(',');
341 }
342 }
343 printf(" },\n");
344 printf("\t\t.modelen = %zu,\n", test[i].modelen);
345
346 printf("\t\t.resolved = ");
347 if (test[i].resolved ==
348 GRAPHEME_BIDIRECTIONAL_DIRECTION_NEUTRAL) {
349 printf("GRAPHEME_BIDIRECTIONAL_DIRECTION_"
350 "NEUTRAL");
351 } else if (test[i].resolved ==
352 GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR) {
353 printf("GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR");
354 } else if (test[i].resolved ==
355 GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL) {
356 printf("GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL");
357 }
358 printf(",\n");
359
360 printf("\t\t.level = (int_least8_t[]){");
361 for (j = 0; j < test[i].cplen; j++) {
362 printf(" %" PRIdLEAST8, test[i].level[j]);
363 if (j + 1 < test[i].cplen) {
364 putchar(',');
365 }
366 }
367 printf(" },\n");
368
369 printf("\t\t.reorder = ");
370 if (test[i].reorderlen > 0) {
371 printf("(int_least16_t[]){");
372 for (j = 0; j < test[i].reorderlen; j++) {
373 printf(" %" PRIdLEAST16, test[i].reorder[j]);
374 if (j + 1 < test[i].reorderlen) {
375 putchar(',');
376 }
377 }
378 printf(" },\n");
379 } else {
380 printf("NULL,\n");
381 }
382 printf("\t\t.reorderlen = %zu,\n", test[i].reorderlen);
383
384 printf("\t},\n");
385 }
386 printf("};\n");
387 }
388
389 static struct bidirectional_test *test;
390 static size_t testlen;
391
392 static int_least8_t *current_level;
393 static size_t current_level_len;
394 static int_least16_t *current_reorder;
395 static size_t current_reorder_len;
396
397 static int
398 test_callback(const char *file, char **field, size_t nfields, char *comment,
399 void *payload)
400 {
401 char *tmp;
402
403 (void)file;
404 (void)comment;
405 (void)payload;
406
407 /* we either get a line beginning with an '@', or an input line */
408 if (nfields > 0 && field[0][0] == '@') {
409 if (!strncmp(field[0], "@Levels:", sizeof("@Levels:") - 1)) {
410 tmp = field[0] + sizeof("@Levels:") - 1;
411 for (; *tmp != '\0' && (*tmp == ' ' || *tmp == '\t');
412 tmp++) {
413 ;
414 }
415 free(current_level);
416 parse_level_list(tmp, ¤t_level,
417 ¤t_level_len);
418 } else if (!strncmp(field[0],
419 "@Reorder:", sizeof("@Reorder:") - 1)) {
420 tmp = field[0] + sizeof("@Reorder:") - 1;
421 for (; *tmp != '\0' && (*tmp == ' ' || *tmp == '\t');
422 tmp++) {
423 ;
424 }
425 free(current_reorder);
426 parse_reorder_list(tmp, ¤t_reorder,
427 ¤t_reorder_len);
428 } else {
429 fprintf(stderr, "Unknown @-input-line.\n");
430 exit(1);
431 }
432 } else {
433 if (nfields < 2) {
434 /* discard any line that does not have at least 2 fields
435 */
436 return 0;
437 }
438
439 /* extend test array */
440 if (!(test = realloc(test, (++testlen) * sizeof(*test)))) {
441 fprintf(stderr, "realloc: %s\n", strerror(errno));
442 exit(1);
443 }
444
445 /* parse field data */
446 parse_class_list(field[0], &(test[testlen - 1].cp),
447 &(test[testlen - 1].cplen));
448
449 /* copy current level- and reorder-arrays */
450 if (!(test[testlen - 1].level =
451 calloc(current_level_len,
452 sizeof(*(test[testlen - 1].level))))) {
453 fprintf(stderr, "calloc: %s\n", strerror(errno));
454 exit(1);
455 }
456 memcpy(test[testlen - 1].level, current_level,
457 current_level_len * sizeof(*(test[testlen - 1].level)));
458
459 if (!(test[testlen - 1].reorder =
460 calloc(current_reorder_len,
461 sizeof(*(test[testlen - 1].reorder))))) {
462 fprintf(stderr, "calloc: %s\n", strerror(errno));
463 exit(1);
464 }
465 if (current_reorder != NULL) {
466 memcpy(test[testlen - 1].reorder, current_reorder,
467 current_reorder_len *
468 sizeof(*(test[testlen - 1].reorder)));
469 }
470 test[testlen - 1].reorderlen = current_reorder_len;
471
472 if (current_level_len != test[testlen - 1].cplen) {
473 fprintf(stderr,
474 "mismatch between string and level lengths.\n");
475 exit(1);
476 }
477
478 /* parse paragraph-level-bitset */
479 if (strlen(field[1]) != 1) {
480 fprintf(stderr, "malformed paragraph-level-bitset.\n");
481 exit(1);
482 } else if (field[1][0] == '2') {
483 test[testlen - 1].mode[0] =
484 GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR;
485 test[testlen - 1].modelen = 1;
486 } else if (field[1][0] == '3') {
487 /* auto=0 and LTR=1 */
488 test[testlen - 1].mode[0] =
489 GRAPHEME_BIDIRECTIONAL_DIRECTION_NEUTRAL;
490 test[testlen - 1].mode[1] =
491 GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR;
492 test[testlen - 1].modelen = 2;
493 } else if (field[1][0] == '4') {
494 test[testlen - 1].mode[0] =
495 GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL;
496 test[testlen - 1].modelen = 1;
497 } else if (field[1][0] == '5') {
498 test[testlen - 1].mode[0] =
499 GRAPHEME_BIDIRECTIONAL_DIRECTION_NEUTRAL;
500 test[testlen - 1].mode[1] =
501 GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL;
502 test[testlen - 1].modelen = 2;
503 } else if (field[1][0] == '7') {
504 test[testlen - 1].mode[0] =
505 GRAPHEME_BIDIRECTIONAL_DIRECTION_NEUTRAL;
506 test[testlen - 1].mode[1] =
507 GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR;
508 test[testlen - 1].mode[2] =
509 GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL;
510 test[testlen - 1].modelen = 3;
511 } else {
512 fprintf(stderr,
513 "unhandled paragraph-level-bitset %s.\n",
514 field[1]);
515 exit(1);
516 }
517
518 /* the resolved paragraph level is always neutral as the test
519 * file does not specify it */
520 test[testlen - 1].resolved =
521 GRAPHEME_BIDIRECTIONAL_DIRECTION_NEUTRAL;
522 }
523
524 return 0;
525 }
526
527 static int
528 character_test_callback(const char *file, char **field, size_t nfields,
529 char *comment, void *payload)
530 {
531 size_t tmp;
532
533 (void)file;
534 (void)comment;
535 (void)payload;
536
537 if (nfields < 5) {
538 /* discard any line that does not have at least 5 fields */
539 return 0;
540 }
541
542 /* extend test array */
543 if (!(test = realloc(test, (++testlen) * sizeof(*test)))) {
544 fprintf(stderr, "realloc: %s\n", strerror(errno));
545 exit(1);
546 }
547
548 /* parse field data */
549 parse_cp_list(field[0], &(test[testlen - 1].cp),
550 &(test[testlen - 1].cplen));
551 parse_level_list(field[3], &(test[testlen - 1].level), &tmp);
552 parse_reorder_list(field[4], &(test[testlen - 1].reorder),
553 &(test[testlen - 1].reorderlen));
554
555 /* parse paragraph-level-mode */
556 if (strlen(field[1]) != 1) {
557 fprintf(stderr, "malformed paragraph-level-setting.\n");
558 exit(1);
559 } else if (field[1][0] == '0') {
560 test[testlen - 1].mode[0] =
561 GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR;
562 } else if (field[1][0] == '1') {
563 test[testlen - 1].mode[0] =
564 GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL;
565 } else if (field[1][0] == '2') {
566 test[testlen - 1].mode[0] =
567 GRAPHEME_BIDIRECTIONAL_DIRECTION_NEUTRAL;
568 } else {
569 fprintf(stderr, "unhandled paragraph-level-setting.\n");
570 exit(1);
571 }
572 test[testlen - 1].modelen = 1;
573
574 /* parse resolved paragraph level */
575 if (strlen(field[2]) != 1) {
576 fprintf(stderr, "malformed resolved paragraph level.\n");
577 exit(1);
578 } else if (field[2][0] == '0') {
579 test[testlen - 1].resolved =
580 GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR;
581 } else if (field[2][0] == '1') {
582 test[testlen - 1].resolved =
583 GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL;
584 } else {
585 fprintf(stderr, "unhandled resolved paragraph level.\n");
586 exit(1);
587 }
588
589 if (tmp != test[testlen - 1].cplen) {
590 fprintf(stderr, "mismatch between string and level lengths.\n");
591 exit(1);
592 }
593
594 return 0;
595 }
596
597 int
598 main(int argc, char *argv[])
599 {
600 (void)argc;
601
602 parse_file_with_callback("data/BidiTest.txt", test_callback, NULL);
603 parse_file_with_callback("data/BidiCharacterTest.txt",
604 character_test_callback, NULL);
605 bidirectional_test_list_print(test, testlen, "bidirectional_test",
606 argv[0]);
607
608 return 0;
609 }