mkrunetype.awk - sbase - suckless unix tools
(HTM) git clone git://git.suckless.org/sbase
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
mkrunetype.awk (7613B)
---
1 # See LICENSE file for copyright and license details.
2
3 BEGIN {
4 FS = ";"
5 # set up hexadecimal lookup table
6 for(i = 0; i < 16; i++)
7 hex[sprintf("%X",i)] = i;
8 HEADER = "/* Automatically generated by mkrunetype.awk */\n#include <stdlib.h>\n\n#include \"../utf.h\"\n#include \"runetype.h\"\n"
9 HEADER_OTHER = "/* Automatically generated by mkrunetype.awk */\n#include \"../utf.h\"\n#include \"runetype.h\"\n"
10 }
11
12 $3 ~ /^L/ { alphav[alphac++] = $1; }
13 ($3 ~ /^Z/) || ($5 == "WS") || ($5 == "S") || ($5 == "B") { spacev[spacec++] = $1; }
14 $3 == "Cc" { cntrlv[cntrlc++] = $1; }
15 $3 == "Lu" { upperv[upperc++] = $1; tolowerv[uppercc++] = ($14 == "") ? $1 : $14; }
16 $3 == "Ll" { lowerv[lowerc++] = $1; toupperv[lowercc++] = ($13 == "") ? $1 : $13; }
17 $3 == "Lt" { titlev[titlec++] = $1; }
18 $3 == "Nd" { digitv[digitc++] = $1; }
19
20 END {
21 system("rm -f isalpharune.c isspacerune.c iscntrlrune.c upperrune.c lowerrune.c istitlerune.c isdigitrune.c");
22
23 mkis("alpha", alphav, alphac, "isalpharune.c", q, "");
24 mkis("space", spacev, spacec, "isspacerune.c", q, "");
25 mkis("cntrl", cntrlv, cntrlc, "iscntrlrune.c", q, "");
26 mkis("upper", upperv, upperc, "upperrune.c", tolowerv, "lower");
27 mkis("lower", lowerv, lowerc, "lowerrune.c", toupperv, "upper");
28 mkis("title", titlev, titlec, "istitlerune.c", q, "");
29 mkis("digit", digitv, digitc, "isdigitrune.c", q, "");
30
31 system("rm -f isalnumrune.c isblankrune.c isprintrune.c isgraphrune.c ispunctrune.c isxdigitrune.c");
32
33 otheris();
34 }
35
36 # parse hexadecimal rune index to int
37 function code(s) {
38 x = 0;
39 for(i = 1; i <= length(s); i++) {
40 c = substr(s, i, 1);
41 x = (x*16) + hex[c];
42 }
43 return x;
44 }
45
46 # generate 'is<name>rune' unicode lookup function
47 function mkis(name, runev, runec, file, casev, casename) {
48 rune1c = 0;
49 rune2c = 0;
50 rune3c = 0;
51 rune4c = 0;
52 mode = 1;
53
54 #sort rune groups into singletons, ranges and laces
55 for(j = 0; j < runec; j++) {
56 # range
57 if(code(runev[j+1]) == code(runev[j])+1 && ((length(casev) == 0) ||
58 code(casev[j+1]) == code(casev[j])+1) && j+1 < runec) {
59 if (mode == 2) {
60 continue;
61 } else if (mode == 3) {
62 rune3v1[rune3c] = runev[j];
63 rune3c++;
64 } else if (mode == 4) {
65 rune4v1[rune4c] = runev[j];
66 rune4c++;
67 }
68 mode = 2;
69 rune2v0[rune2c] = runev[j];
70 if(length(casev) > 0) {
71 case2v[rune2c] = casev[j];
72 }
73 continue;
74 }
75 # lace 1
76 if(code(runev[j+1]) == code(runev[j])+2 && ((length(casev) == 0) ||
77 (code(casev[j+1]) == code(runev[j+1])+1 && code(casev[j]) == code(runev[j])+1)) &&
78 j+1 < runec) {
79 if (mode == 3) {
80 continue;
81 } else if (mode == 2) {
82 rune2v1[rune2c] = runev[j];
83 rune2c++;
84 } else if (mode == 4) {
85 rune4v1[rune2c] = runev[j];
86 rune4c++;
87 }
88 mode = 3;
89 rune3v0[rune3c] = runev[j];
90 continue;
91 }
92 # lace 2
93 if(code(runev[j+1]) == code(runev[j])+2 && ((length(casev) == 0) ||
94 (code(casev[j+1]) == code(runev[j+1])-1 && code(casev[j]) == code(runev[j])-1)) &&
95 j+1 < runec) {
96 if (mode == 4) {
97 continue;
98 } else if (mode == 2) {
99 rune2v1[rune2c] = runev[j];
100 rune2c++;
101 } else if (mode == 3) {
102 rune3v1[rune2c] = runev[j];
103 rune3c++;
104 }
105 mode = 4;
106 rune4v0[rune4c] = runev[j];
107 continue;
108 }
109 # terminating case
110 if (mode == 1) {
111 rune1v[rune1c] = runev[j];
112 if (length(casev) > 0) {
113 case1v[rune1c] = casev[j];
114 }
115 rune1c++;
116 } else if (mode == 2) {
117 rune2v1[rune2c] = runev[j];
118 rune2c++;
119 } else if (mode == 3) {
120 rune3v1[rune3c] = runev[j];
121 rune3c++;
122 } else { #lace 2
123 rune4v1[rune4c] = runev[j];
124 rune4c++;
125 }
126 mode = 1;
127 }
128 print HEADER > file;
129
130 #generate list of laces 1
131 if(rune3c > 0) {
132 print "static const Rune "name"3[][2] = {" > file;
133 for(j = 0; j < rune3c; j++) {
134 print "\t{ 0x"rune3v0[j]", 0x"rune3v1[j]" }," > file;
135 }
136 print "};\n" > file;
137 }
138
139 #generate list of laces 2
140 if(rune4c > 0) {
141 print "static const Rune "name"4[][2] = {" > file;
142 for(j = 0; j < rune4c; j++) {
143 print "\t{ 0x"rune4v0[j]", 0x"rune4v1[j]" }," > file;
144 }
145 print "};\n" > file;
146 }
147
148 # generate list of ranges
149 if(rune2c > 0) {
150 if(length(casev) > 0) {
151 print "static const Rune "name"2[][3] = {" > file;
152 for(j = 0; j < rune2c; j++) {
153 print "\t{ 0x"rune2v0[j]", 0x"rune2v1[j]", 0x"case2v[j]" }," > file;
154 }
155 } else {
156 print "static const Rune "name"2[][2] = {" > file
157 for(j = 0; j < rune2c; j++) {
158 print "\t{ 0x"rune2v0[j]", 0x"rune2v1[j]" }," > file;
159 }
160 }
161 print "};\n" > file;
162 }
163
164 # generate list of singletons
165 if(rune1c > 0) {
166 if(length(casev) > 0) {
167 print "static const Rune "name"1[][2] = {" > file;
168 for(j = 0; j < rune1c; j++) {
169 print "\t{ 0x"rune1v[j]", 0x"case1v[j]" }," > file;
170 }
171 } else {
172 print "static const Rune "name"1[] = {" > file;
173 for(j = 0; j < rune1c; j++) {
174 print "\t0x"rune1v[j]"," > file;
175 }
176 }
177 print "};\n" > file;
178 }
179 # generate lookup function
180 print "int\nis"name"rune(Rune r)\n{" > file;
181 if(rune4c > 0 || rune3c > 0)
182 print "\tconst Rune *match;\n" > file;
183 if(rune4c > 0) {
184 print "\tif((match = bsearch(&r, "name"4, nelem("name"4), sizeof *"name"4, &rune2cmp)))" > file;
185 print "\t\treturn !((r - match[0]) % 2);" > file;
186 }
187 if(rune3c > 0) {
188 print "\tif((match = bsearch(&r, "name"3, nelem("name"3), sizeof *"name"3, &rune2cmp)))" > file;
189 print "\t\treturn !((r - match[0]) % 2);" > file;
190 }
191 if(rune2c > 0) {
192 print "\tif(bsearch(&r, "name"2, nelem("name"2), sizeof *"name"2, &rune2cmp))\n\t\treturn 1;" > file;
193 }
194 if(rune1c > 0) {
195 print "\tif(bsearch(&r, "name"1, nelem("name"1), sizeof *"name"1, &rune1cmp))\n\t\treturn 1;" > file;
196 }
197 print "\treturn 0;\n}" > file;
198
199 # generate case conversion function
200 if(length(casev) > 0) {
201 print "\nint\nto"casename"rune(Rune r)\n{\n\tRune *match;\n" > file;
202 if(rune4c > 0) {
203 print "\tmatch = bsearch(&r, "name"4, nelem("name"4), sizeof *"name"4, &rune2cmp);" > file;
204 print "\tif (match)" > file;
205 print "\t\treturn ((r - match[0]) % 2) ? r : r - 1;" > file;
206 }
207 if(rune3c > 0) {
208 print "\tmatch = bsearch(&r, "name"3, nelem("name"3), sizeof *"name"3, &rune2cmp);" > file;
209 print "\tif (match)" > file;
210 print "\t\treturn ((r - match[0]) % 2) ? r : r + 1;" > file;
211 }
212 if(rune2c > 0) {
213 print "\tmatch = bsearch(&r, "name"2, nelem("name"2), sizeof *"name"2, &rune2cmp);" > file;
214 print "\tif (match)" > file;
215 print "\t\treturn match[2] + (r - match[0]);" > file;
216 }
217 if(rune1c > 0) {
218 print "\tmatch = bsearch(&r, "name"1, nelem("name"1), sizeof *"name"1, &rune1cmp);" > file;
219 print "\tif (match)" > file;
220 print "\t\treturn match[1];" > file;
221 }
222 print "\treturn r;\n}" > file;
223 }
224 }
225
226 function otheris() {
227 print HEADER_OTHER > "isalnumrune.c";
228 print "int\nisalnumrune(Rune r)\n{\n\treturn isalpharune(r) || isdigitrune(r);\n}" > "isalnumrune.c";
229 print HEADER_OTHER > "isblankrune.c";
230 print "int\nisblankrune(Rune r)\n{\n\treturn r == ' ' || r == '\\t';\n}" > "isblankrune.c";
231 print HEADER_OTHER > "isprintrune.c";
232 print "int\nisprintrune(Rune r)\n{\n\treturn !iscntrlrune(r) && (r != 0x2028) && (r != 0x2029) &&" > "isprintrune.c";
233 print "\t ((r < 0xFFF9) || (r > 0xFFFB));\n}" > "isprintrune.c";
234 print HEADER_OTHER > "isgraphrune.c";
235 print "int\nisgraphrune(Rune r)\n{\n\treturn !isspacerune(r) && isprintrune(r);\n}" > "isgraphrune.c";
236 print HEADER_OTHER > "ispunctrune.c";
237 print "int\nispunctrune(Rune r)\n{\n\treturn isgraphrune(r) && !isalnumrune(r);\n}" > "ispunctrune.c";
238 print HEADER_OTHER > "isxdigitrune.c";
239 print "int\nisxdigitrune(Rune r)\n{\n\treturn (r >= '0' && (r - '0') < 10) || (r >= 'a' && (r - 'a') < 6);\n}" > "isxdigitrune.c";
240 }