utf8-decode.c - libgrapheme - unicode string library
(HTM) git clone git://git.suckless.org/libgrapheme
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
utf8-decode.c (2129B)
---
1 /* See LICENSE file for copyright and license details. */
2 #include <errno.h>
3 #include <math.h>
4 #include <stdint.h>
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <string.h>
8
9 #include "../gen/character-test.h"
10 #include "../grapheme.h"
11 #include "util.h"
12
13 #include <utf8proc.h>
14
15 #define NUM_ITERATIONS 100000
16
17 struct utf8_benchmark_payload {
18 char *buf;
19 utf8proc_uint8_t *buf_utf8proc;
20 size_t buflen;
21 };
22
23 void
24 libgrapheme(const void *payload)
25 {
26 const struct utf8_benchmark_payload *p = payload;
27 uint_least32_t cp;
28 size_t ret, off;
29
30 for (off = 0; off < p->buflen; off += ret) {
31 if ((ret = grapheme_decode_utf8(p->buf + off, p->buflen - off,
32 &cp)) > (p->buflen - off)) {
33 break;
34 }
35 (void)cp;
36 }
37 }
38
39 void
40 libutf8proc(const void *payload)
41 {
42 const struct utf8_benchmark_payload *p = payload;
43 utf8proc_int32_t cp;
44 utf8proc_ssize_t ret;
45 size_t off;
46
47 for (off = 0; off < p->buflen; off += (size_t)ret) {
48 if ((ret = utf8proc_iterate(p->buf_utf8proc + off,
49 (utf8proc_ssize_t)(p->buflen - off),
50 &cp)) < 0) {
51 break;
52 }
53 (void)cp;
54 }
55 }
56
57 int
58 main(int argc, char *argv[])
59 {
60 struct utf8_benchmark_payload p;
61 size_t i;
62 double baseline = (double)NAN;
63
64 (void)argc;
65
66 p.buf = generate_utf8_test_buffer(
67 character_break_test, LEN(character_break_test), &(p.buflen));
68
69 /* convert cp-buffer to stupid custom libutf8proc-uint8-type */
70 if ((p.buf_utf8proc = malloc(p.buflen)) == NULL) {
71 fprintf(stderr, "malloc: %s\n", strerror(errno));
72 exit(1);
73 }
74 for (i = 0; i < p.buflen; i++) {
75 /*
76 * even if char is larger than 8 bit, it will only have
77 * any of the first 8 bits set (by construction).
78 */
79 p.buf_utf8proc[i] = (utf8proc_uint8_t)p.buf[i];
80 }
81
82 printf("%s\n", argv[0]);
83 run_benchmark(libgrapheme, &p, "libgrapheme ", NULL, "byte", &baseline,
84 NUM_ITERATIONS, p.buflen);
85 run_benchmark(libutf8proc, &p, "libutf8proc ",
86 "but unsafe (does not detect overlong encodings)", "byte",
87 &baseline, NUM_ITERATIONS, p.buflen);
88
89 free(p.buf);
90 free(p.buf_utf8proc);
91
92 return 0;
93 }