utf8-decode.c - libgrapheme - unicode string library
 (HTM) git clone git://git.suckless.org/libgrapheme
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       utf8-decode.c (2129B)
       ---
            1 /* See LICENSE file for copyright and license details. */
            2 #include <errno.h>
            3 #include <math.h>
            4 #include <stdint.h>
            5 #include <stdio.h>
            6 #include <stdlib.h>
            7 #include <string.h>
            8 
            9 #include "../gen/character-test.h"
           10 #include "../grapheme.h"
           11 #include "util.h"
           12 
           13 #include <utf8proc.h>
           14 
           15 #define NUM_ITERATIONS 100000
           16 
           17 struct utf8_benchmark_payload {
           18         char *buf;
           19         utf8proc_uint8_t *buf_utf8proc;
           20         size_t buflen;
           21 };
           22 
           23 void
           24 libgrapheme(const void *payload)
           25 {
           26         const struct utf8_benchmark_payload *p = payload;
           27         uint_least32_t cp;
           28         size_t ret, off;
           29 
           30         for (off = 0; off < p->buflen; off += ret) {
           31                 if ((ret = grapheme_decode_utf8(p->buf + off, p->buflen - off,
           32                                                 &cp)) > (p->buflen - off)) {
           33                         break;
           34                 }
           35                 (void)cp;
           36         }
           37 }
           38 
           39 void
           40 libutf8proc(const void *payload)
           41 {
           42         const struct utf8_benchmark_payload *p = payload;
           43         utf8proc_int32_t cp;
           44         utf8proc_ssize_t ret;
           45         size_t off;
           46 
           47         for (off = 0; off < p->buflen; off += (size_t)ret) {
           48                 if ((ret = utf8proc_iterate(p->buf_utf8proc + off,
           49                                             (utf8proc_ssize_t)(p->buflen - off),
           50                                             &cp)) < 0) {
           51                         break;
           52                 }
           53                 (void)cp;
           54         }
           55 }
           56 
           57 int
           58 main(int argc, char *argv[])
           59 {
           60         struct utf8_benchmark_payload p;
           61         size_t i;
           62         double baseline = (double)NAN;
           63 
           64         (void)argc;
           65 
           66         p.buf = generate_utf8_test_buffer(
           67                 character_break_test, LEN(character_break_test), &(p.buflen));
           68 
           69         /* convert cp-buffer to stupid custom libutf8proc-uint8-type */
           70         if ((p.buf_utf8proc = malloc(p.buflen)) == NULL) {
           71                 fprintf(stderr, "malloc: %s\n", strerror(errno));
           72                 exit(1);
           73         }
           74         for (i = 0; i < p.buflen; i++) {
           75                 /*
           76                  * even if char is larger than 8 bit, it will only have
           77                  * any of the first 8 bits set (by construction).
           78                  */
           79                 p.buf_utf8proc[i] = (utf8proc_uint8_t)p.buf[i];
           80         }
           81 
           82         printf("%s\n", argv[0]);
           83         run_benchmark(libgrapheme, &p, "libgrapheme ", NULL, "byte", &baseline,
           84                       NUM_ITERATIONS, p.buflen);
           85         run_benchmark(libutf8proc, &p, "libutf8proc ",
           86                       "but unsafe (does not detect overlong encodings)", "byte",
           87                       &baseline, NUM_ITERATIONS, p.buflen);
           88 
           89         free(p.buf);
           90         free(p.buf_utf8proc);
           91 
           92         return 0;
           93 }