sentence.c - libgrapheme - unicode string library
 (HTM) git clone git://git.suckless.org/libgrapheme
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       sentence.c (3181B)
       ---
            1 /* See LICENSE file for copyright and license details. */
            2 #include <stdbool.h>
            3 #include <stdint.h>
            4 
            5 #include "../gen/sentence-test.h"
            6 #include "../grapheme.h"
            7 #include "util.h"
            8 
            9 static const struct unit_test_next_break next_sentence_break[] = {
           10         {
           11                 .description = "NULL input",
           12                 .input = {
           13                         .src    = NULL,
           14                         .srclen = 0,
           15                 },
           16                 .output = { 0 },
           17         },
           18         {
           19                 .description = "empty input",
           20                 .input = {
           21                         .src    = (uint_least32_t *)(uint_least32_t[]){ 0x0 },
           22                         .srclen = 0,
           23                 },
           24                 .output = { 0 },
           25         },
           26         {
           27                 .description = "empty input, null-terminated",
           28                 .input = {
           29                         .src    = (uint_least32_t *)(uint_least32_t[]){ 0x0 },
           30                         .srclen = SIZE_MAX,
           31                 },
           32                 .output = { 0 },
           33         },
           34         {
           35                 .description = "one sentence",
           36                 .input = {
           37                         .src    = (uint_least32_t *)(uint_least32_t[]){ 0x1F1E9, 0x1F1EA, 0x2E, 0x20, 0x2A },
           38                         .srclen = 5,
           39                 },
           40                 .output = { 4 },
           41         },
           42         {
           43                 .description = "one sentence, null-terminated",
           44                 .input = {
           45                         .src    = (uint_least32_t *)(uint_least32_t[]){ 0x1F1E9, 0x1F1EA, 0x2E, 0x20, 0x2A, 0x0 },
           46                         .srclen = SIZE_MAX,
           47                 },
           48                 .output = { 4 },
           49         },
           50 };
           51 
           52 static const struct unit_test_next_break_utf8 next_sentence_break_utf8[] = {
           53         {
           54                 .description = "NULL input",
           55                 .input = {
           56                         .src    = NULL,
           57                         .srclen = 0,
           58                 },
           59                 .output = { 0 },
           60         },
           61         {
           62                 .description = "empty input",
           63                 .input = { "", 0 },
           64                 .output = { 0 },
           65         },
           66         {
           67                 .description = "empty input, NUL-terminated",
           68                 .input = { "", SIZE_MAX },
           69                 .output = { 0 },
           70         },
           71         {
           72                 .description = "one sentence",
           73                 .input = { "\xF0\x9F\x87\xA9\xF0\x9F\x87\xAA is the flag of Germany.  It", 36 },
           74                 .output = { 34 },
           75         },
           76         {
           77                 .description = "one sentence, fragment",
           78                 .input = { "\xF0\x9F\x87\xA9\xF0", 5 },
           79                 .output = { 4 },
           80         },
           81         {
           82                 .description = "one sentence, NUL-terminated",
           83                 .input = { "\xF0\x9F\x87\xA9\xF0\x9F\x87\xAA is the flag of Germany.  It", SIZE_MAX },
           84                 .output = { 34 },
           85         },
           86         {
           87                 .description = "one sentence, fragment, NUL-terminated",
           88                 .input = { "\xF0\x9F\x87\xA9\xF0\x9F", SIZE_MAX },
           89                 .output = { 6 },
           90         },
           91 };
           92 
           93 static int
           94 unit_test_callback_next_sentence_break(const void *t, size_t off,
           95                                        const char *name, const char *argv0)
           96 {
           97         return unit_test_callback_next_break(
           98                 t, off, grapheme_next_sentence_break, name, argv0);
           99 }
          100 
          101 static int
          102 unit_test_callback_next_sentence_break_utf8(const void *t, size_t off,
          103                                             const char *name, const char *argv0)
          104 {
          105         return unit_test_callback_next_break_utf8(
          106                 t, off, grapheme_next_sentence_break_utf8, name, argv0);
          107 }
          108 
          109 int
          110 main(int argc, char *argv[])
          111 {
          112         (void)argc;
          113 
          114         return run_break_tests(grapheme_next_sentence_break,
          115                                sentence_break_test, LEN(sentence_break_test),
          116                                argv[0]) +
          117                run_unit_tests(unit_test_callback_next_sentence_break,
          118                               next_sentence_break, LEN(next_sentence_break),
          119                               "grapheme_next_sentence_break", argv[0]) +
          120                run_unit_tests(unit_test_callback_next_sentence_break_utf8,
          121                               next_sentence_break_utf8,
          122                               LEN(next_sentence_break_utf8),
          123                               "grapheme_next_character_break_utf8", argv[0]);
          124 }