character.c - libgrapheme - unicode string library
(HTM) git clone git://git.suckless.org/libgrapheme
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
character.c (3173B)
---
1 /* See LICENSE file for copyright and license details. */
2 #include <errno.h>
3 #include <stddef.h>
4 #include <stdint.h>
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <string.h>
8
9 #include "character.h"
10 #include "util.h"
11
12 int
13 main(int argc, char *argv[])
14 {
15 const struct codepoint_property *match;
16 struct codepoint_property_set *properties_dcp, *properties_emoji,
17 *properties_grapheme;
18 uint_least64_t *properties;
19 uint_least32_t cp;
20
21 (void)argc;
22
23 /* parse properties from the Unicode data files */
24 properties_dcp = parse_property_file("data/DerivedCoreProperties.txt");
25 properties_emoji = parse_property_file("data/emoji-data.txt");
26 properties_grapheme = parse_property_file("data/GraphemeBreakProperty.txt");
27
28 /* allocate property array and initialise to zero */
29 if (!(properties = calloc(UINT32_C(0x110000), sizeof(*properties)))) {
30 fprintf(stderr, "%s: malloc: %s\n", argv[0], strerror(errno));
31 exit(1);
32 }
33
34 for (cp = 0; cp <= UINT32_C(0x10FFFF); cp++) {
35 if (match_in_codepoint_property_set(
36 &(properties_grapheme[cp]), "Control", 0)) {
37 properties[cp] |= CHAR_PROP_CONTROL;
38 }
39
40 if (match_in_codepoint_property_set(
41 &(properties_grapheme[cp]), "Extend", 0)) {
42 properties[cp] |= CHAR_PROP_EXTEND;
43 }
44
45 if (match_in_codepoint_property_set(
46 &(properties_emoji[cp]), "Extended_Pictographic", 0)) {
47 properties[cp] |= CHAR_PROP_EXTENDED_PICTOGRAPHIC;
48 }
49
50 if (match_in_codepoint_property_set(
51 &(properties_grapheme[cp]), "L", 0)) {
52 properties[cp] |= CHAR_PROP_HANGUL_L;
53 }
54
55 if (match_in_codepoint_property_set(
56 &(properties_grapheme[cp]), "V", 0)) {
57 properties[cp] |= CHAR_PROP_HANGUL_V;
58 }
59
60 if (match_in_codepoint_property_set(
61 &(properties_grapheme[cp]), "T", 0)) {
62 properties[cp] |= CHAR_PROP_HANGUL_T;
63 }
64
65 if (match_in_codepoint_property_set(
66 &(properties_grapheme[cp]), "LV", 0)) {
67 properties[cp] |= CHAR_PROP_HANGUL_LV;
68 }
69
70 if (match_in_codepoint_property_set(
71 &(properties_grapheme[cp]), "LVT", 0)) {
72 properties[cp] |= CHAR_PROP_HANGUL_LVT;
73 }
74
75 if ((match = match_in_codepoint_property_set(
76 &(properties_dcp[cp]), "InCB", 0))) {
77 if (strcmp(match->fields[1], "Consonant") == 0) {
78 properties[cp] |= CHAR_PROP_ICB_CONSONANT;
79 } else if (strcmp(match->fields[1], "Extend") == 0) {
80 properties[cp] |= CHAR_PROP_ICB_EXTEND;
81 } else if (strcmp(match->fields[1], "Linker") == 0) {
82 properties[cp] |= CHAR_PROP_ICB_LINKER;
83 }
84 }
85
86 if (match_in_codepoint_property_set(
87 &(properties_grapheme[cp]), "Prepend", 0)) {
88 properties[cp] |= CHAR_PROP_PREPEND;
89 }
90
91 if (match_in_codepoint_property_set(
92 &(properties_grapheme[cp]), "Regional_Indicator", 0)) {
93 properties[cp] |= CHAR_PROP_REGIONAL_INDICATOR;
94 }
95
96 if (match_in_codepoint_property_set(
97 &(properties_grapheme[cp]), "SpacingMark", 0)) {
98 properties[cp] |= CHAR_PROP_SPACINGMARK;
99 }
100 }
101
102 /* generate code */
103 compress_and_output(properties, "character");
104
105 /* cleanup */
106 free_codepoint_property_set_array(properties_dcp);
107 free_codepoint_property_set_array(properties_emoji);
108 free_codepoint_property_set_array(properties_grapheme);
109 free(properties);
110
111 return 0;
112 }