character.c - libgrapheme - unicode string library
 (HTM) git clone git://git.suckless.org/libgrapheme
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       character.c (3173B)
       ---
            1 /* See LICENSE file for copyright and license details. */
            2 #include <errno.h>
            3 #include <stddef.h>
            4 #include <stdint.h>
            5 #include <stdio.h>
            6 #include <stdlib.h>
            7 #include <string.h>
            8 
            9 #include "character.h"
           10 #include "util.h"
           11 
           12 int
           13 main(int argc, char *argv[])
           14 {
           15         const struct codepoint_property *match;
           16         struct codepoint_property_set *properties_dcp, *properties_emoji,
           17                                       *properties_grapheme;
           18         uint_least64_t *properties;
           19         uint_least32_t cp;
           20 
           21         (void)argc;
           22         
           23         /* parse properties from the Unicode data files */
           24         properties_dcp = parse_property_file("data/DerivedCoreProperties.txt");
           25         properties_emoji = parse_property_file("data/emoji-data.txt");
           26         properties_grapheme = parse_property_file("data/GraphemeBreakProperty.txt");
           27 
           28         /* allocate property array and initialise to zero */
           29         if (!(properties = calloc(UINT32_C(0x110000), sizeof(*properties)))) {        
           30                 fprintf(stderr, "%s: malloc: %s\n", argv[0], strerror(errno));
           31                 exit(1);
           32         }
           33 
           34         for (cp = 0; cp <= UINT32_C(0x10FFFF); cp++) {
           35                 if (match_in_codepoint_property_set(
           36                         &(properties_grapheme[cp]), "Control", 0)) {
           37                         properties[cp] |= CHAR_PROP_CONTROL;
           38                 }
           39 
           40                 if (match_in_codepoint_property_set(
           41                         &(properties_grapheme[cp]), "Extend", 0)) {
           42                         properties[cp] |= CHAR_PROP_EXTEND;
           43                 }
           44 
           45                 if (match_in_codepoint_property_set(
           46                         &(properties_emoji[cp]), "Extended_Pictographic", 0)) {
           47                         properties[cp] |= CHAR_PROP_EXTENDED_PICTOGRAPHIC;
           48                 }
           49 
           50                 if (match_in_codepoint_property_set(
           51                         &(properties_grapheme[cp]), "L", 0)) {
           52                         properties[cp] |= CHAR_PROP_HANGUL_L;
           53                 }
           54 
           55                 if (match_in_codepoint_property_set(
           56                         &(properties_grapheme[cp]), "V", 0)) {
           57                         properties[cp] |= CHAR_PROP_HANGUL_V;
           58                 }
           59 
           60                 if (match_in_codepoint_property_set(
           61                         &(properties_grapheme[cp]), "T", 0)) {
           62                         properties[cp] |= CHAR_PROP_HANGUL_T;
           63                 }
           64 
           65                 if (match_in_codepoint_property_set(
           66                         &(properties_grapheme[cp]), "LV", 0)) {
           67                         properties[cp] |= CHAR_PROP_HANGUL_LV;
           68                 }
           69 
           70                 if (match_in_codepoint_property_set(
           71                         &(properties_grapheme[cp]), "LVT", 0)) {
           72                         properties[cp] |= CHAR_PROP_HANGUL_LVT;
           73                 }
           74 
           75                 if ((match = match_in_codepoint_property_set(
           76                         &(properties_dcp[cp]), "InCB", 0))) {
           77                         if (strcmp(match->fields[1], "Consonant") == 0) {
           78                                 properties[cp] |= CHAR_PROP_ICB_CONSONANT;
           79                         } else if (strcmp(match->fields[1], "Extend") == 0) {
           80                                 properties[cp] |= CHAR_PROP_ICB_EXTEND;
           81                         } else if (strcmp(match->fields[1], "Linker") == 0) {
           82                                 properties[cp] |= CHAR_PROP_ICB_LINKER;
           83                         }
           84                 }
           85 
           86                 if (match_in_codepoint_property_set(
           87                         &(properties_grapheme[cp]), "Prepend", 0)) {
           88                         properties[cp] |= CHAR_PROP_PREPEND;
           89                 }
           90 
           91                 if (match_in_codepoint_property_set(
           92                         &(properties_grapheme[cp]), "Regional_Indicator", 0)) {
           93                         properties[cp] |= CHAR_PROP_REGIONAL_INDICATOR;
           94                 }
           95 
           96                 if (match_in_codepoint_property_set(
           97                         &(properties_grapheme[cp]), "SpacingMark", 0)) {
           98                         properties[cp] |= CHAR_PROP_SPACINGMARK;
           99                 }
          100         }
          101 
          102         /* generate code */
          103         compress_and_output(properties, "character");
          104 
          105         /* cleanup */
          106         free_codepoint_property_set_array(properties_dcp);
          107         free_codepoint_property_set_array(properties_emoji);
          108         free_codepoint_property_set_array(properties_grapheme);
          109         free(properties);
          110 
          111         return 0;
          112 }