word.c - libgrapheme - unicode string library
 (HTM) git clone git://git.suckless.org/libgrapheme
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       word.c (3038B)
       ---
            1 /* See LICENSE file for copyright and license details. */
            2 #include <stdio.h>
            3 #include <stdlib.h>
            4 #include <string.h>
            5 
            6 #include "util.h"
            7 
            8 #define FILE_EMOJI "data/emoji-data.txt"
            9 #define FILE_WORD  "data/WordBreakProperty.txt"
           10 
           11 static const struct property_spec word_break_property[] = {
           12         {
           13                 .enumname = "OTHER",
           14                 .file = NULL,
           15                 .ucdname = NULL,
           16         },
           17         {
           18                 .enumname = "ALETTER",
           19                 .file = FILE_WORD,
           20                 .ucdname = "ALetter",
           21         },
           22         {
           23                 .enumname = "BOTH_ALETTER_EXTPICT",
           24                 .file = NULL,
           25                 .ucdname = NULL,
           26         },
           27         {
           28                 .enumname = "CR",
           29                 .file = FILE_WORD,
           30                 .ucdname = "CR",
           31         },
           32         {
           33                 .enumname = "DOUBLE_QUOTE",
           34                 .file = FILE_WORD,
           35                 .ucdname = "Double_Quote",
           36         },
           37         {
           38                 .enumname = "EXTEND",
           39                 .file = FILE_WORD,
           40                 .ucdname = "Extend",
           41         },
           42         {
           43                 .enumname = "EXTENDED_PICTOGRAPHIC",
           44                 .file = FILE_EMOJI,
           45                 .ucdname = "Extended_Pictographic",
           46         },
           47         {
           48                 .enumname = "EXTENDNUMLET",
           49                 .file = FILE_WORD,
           50                 .ucdname = "ExtendNumLet",
           51         },
           52         {
           53                 .enumname = "FORMAT",
           54                 .file = FILE_WORD,
           55                 .ucdname = "Format",
           56         },
           57         {
           58                 .enumname = "HEBREW_LETTER",
           59                 .file = FILE_WORD,
           60                 .ucdname = "Hebrew_Letter",
           61         },
           62         {
           63                 .enumname = "KATAKANA",
           64                 .file = FILE_WORD,
           65                 .ucdname = "Katakana",
           66         },
           67         {
           68                 .enumname = "LF",
           69                 .file = FILE_WORD,
           70                 .ucdname = "LF",
           71         },
           72         {
           73                 .enumname = "MIDLETTER",
           74                 .file = FILE_WORD,
           75                 .ucdname = "MidLetter",
           76         },
           77         {
           78                 .enumname = "MIDNUM",
           79                 .file = FILE_WORD,
           80                 .ucdname = "MidNum",
           81         },
           82         {
           83                 .enumname = "MIDNUMLET",
           84                 .file = FILE_WORD,
           85                 .ucdname = "MidNumLet",
           86         },
           87         {
           88                 .enumname = "NEWLINE",
           89                 .file = FILE_WORD,
           90                 .ucdname = "Newline",
           91         },
           92         {
           93                 .enumname = "NUMERIC",
           94                 .file = FILE_WORD,
           95                 .ucdname = "Numeric",
           96         },
           97         {
           98                 .enumname = "REGIONAL_INDICATOR",
           99                 .file = FILE_WORD,
          100                 .ucdname = "Regional_Indicator",
          101         },
          102         {
          103                 .enumname = "SINGLE_QUOTE",
          104                 .file = FILE_WORD,
          105                 .ucdname = "Single_Quote",
          106         },
          107         {
          108                 .enumname = "WSEGSPACE",
          109                 .file = FILE_WORD,
          110                 .ucdname = "WSegSpace",
          111         },
          112         {
          113                 .enumname = "ZWJ",
          114                 .file = FILE_WORD,
          115                 .ucdname = "ZWJ",
          116         },
          117 };
          118 
          119 static uint_least8_t
          120 handle_conflict(uint_least32_t cp, uint_least8_t prop1, uint_least8_t prop2)
          121 {
          122         uint_least8_t result;
          123 
          124         (void)cp;
          125 
          126         if ((!strcmp(word_break_property[prop1].enumname, "ALETTER") &&
          127              !strcmp(word_break_property[prop2].enumname,
          128                      "EXTENDED_PICTOGRAPHIC")) ||
          129             (!strcmp(word_break_property[prop1].enumname,
          130                      "EXTENDED_PICTOGRAPHIC") &&
          131              !strcmp(word_break_property[prop2].enumname, "ALETTER"))) {
          132                 for (result = 0; result < LEN(word_break_property); result++) {
          133                         if (!strcmp(word_break_property[result].enumname,
          134                                     "BOTH_ALETTER_EXTPICT")) {
          135                                 break;
          136                         }
          137                 }
          138                 if (result == LEN(word_break_property)) {
          139                         fprintf(stderr, "handle_conflict: Internal error.\n");
          140                         exit(1);
          141                 }
          142         } else {
          143                 fprintf(stderr, "handle_conflict: Cannot handle conflict.\n");
          144                 exit(1);
          145         }
          146 
          147         return result;
          148 }
          149 
          150 int
          151 main(int argc, char *argv[])
          152 {
          153         (void)argc;
          154 
          155         properties_generate_break_property(
          156                 word_break_property, LEN(word_break_property), NULL,
          157                 handle_conflict, NULL, "word_break", argv[0]);
          158 
          159         return 0;
          160 }