mkrunetype.awk - sbase - suckless unix tools
 (HTM) git clone git://git.suckless.org/sbase
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       mkrunetype.awk (7613B)
       ---
            1 # See LICENSE file for copyright and license details.
            2 
            3 BEGIN {
            4         FS = ";"
            5         # set up hexadecimal lookup table
            6         for(i = 0; i < 16; i++)
            7                 hex[sprintf("%X",i)] = i;
            8         HEADER = "/* Automatically generated by mkrunetype.awk */\n#include <stdlib.h>\n\n#include \"../utf.h\"\n#include \"runetype.h\"\n"
            9         HEADER_OTHER = "/* Automatically generated by mkrunetype.awk */\n#include \"../utf.h\"\n#include \"runetype.h\"\n"
           10 }
           11 
           12 $3  ~ /^L/ { alphav[alphac++] = $1; }
           13 ($3  ~ /^Z/) || ($5 == "WS") || ($5 == "S") || ($5 == "B") { spacev[spacec++] = $1; }
           14 $3 == "Cc" { cntrlv[cntrlc++] = $1; }
           15 $3 == "Lu" { upperv[upperc++] = $1; tolowerv[uppercc++] = ($14 == "") ? $1 : $14; }
           16 $3 == "Ll" { lowerv[lowerc++] = $1; toupperv[lowercc++] = ($13 == "") ? $1 : $13; }
           17 $3 == "Lt" { titlev[titlec++] = $1; }
           18 $3 == "Nd" { digitv[digitc++] = $1; }
           19 
           20 END {
           21         system("rm -f isalpharune.c isspacerune.c iscntrlrune.c upperrune.c lowerrune.c istitlerune.c isdigitrune.c");
           22 
           23         mkis("alpha", alphav, alphac, "isalpharune.c", q, "");
           24         mkis("space", spacev, spacec, "isspacerune.c", q, "");
           25         mkis("cntrl", cntrlv, cntrlc, "iscntrlrune.c", q, "");
           26         mkis("upper", upperv, upperc,   "upperrune.c", tolowerv, "lower");
           27         mkis("lower", lowerv, lowerc,   "lowerrune.c", toupperv, "upper");
           28         mkis("title", titlev, titlec, "istitlerune.c", q, "");
           29         mkis("digit", digitv, digitc, "isdigitrune.c", q, "");
           30 
           31         system("rm -f isalnumrune.c isblankrune.c isprintrune.c isgraphrune.c ispunctrune.c isxdigitrune.c");
           32 
           33         otheris();
           34 }
           35 
           36 # parse hexadecimal rune index to int
           37 function code(s) {
           38         x = 0;
           39         for(i = 1; i <= length(s); i++) {
           40                 c = substr(s, i, 1);
           41                 x = (x*16) + hex[c];
           42         }
           43         return x;
           44 }
           45 
           46 # generate 'is<name>rune' unicode lookup function
           47 function mkis(name, runev, runec, file, casev, casename) {
           48         rune1c = 0;
           49         rune2c = 0;
           50         rune3c = 0;
           51         rune4c = 0;
           52         mode = 1;
           53 
           54         #sort rune groups into singletons, ranges and laces
           55         for(j = 0; j < runec; j++) {
           56                 # range
           57                 if(code(runev[j+1]) == code(runev[j])+1 && ((length(casev) == 0) ||
           58                    code(casev[j+1]) == code(casev[j])+1) && j+1 < runec) {
           59                         if (mode == 2) {
           60                                 continue;
           61                         } else if (mode == 3) {
           62                                 rune3v1[rune3c] = runev[j];
           63                                 rune3c++;
           64                         } else if (mode == 4) {
           65                                 rune4v1[rune4c] = runev[j];
           66                                 rune4c++;
           67                         }
           68                         mode = 2;
           69                         rune2v0[rune2c] = runev[j];
           70                         if(length(casev) > 0) {
           71                                 case2v[rune2c] = casev[j];
           72                         }
           73                         continue;
           74                 }
           75                 # lace 1
           76                 if(code(runev[j+1]) == code(runev[j])+2 && ((length(casev) == 0) ||
           77                    (code(casev[j+1]) == code(runev[j+1])+1 && code(casev[j]) == code(runev[j])+1)) &&
           78                    j+1 < runec) {
           79                         if (mode == 3) {
           80                                 continue;
           81                         } else if (mode == 2) {
           82                                 rune2v1[rune2c] = runev[j];
           83                                 rune2c++;
           84                         } else if (mode == 4) {
           85                                 rune4v1[rune2c] = runev[j];
           86                                 rune4c++;
           87                         }
           88                         mode = 3;
           89                         rune3v0[rune3c] = runev[j];
           90                         continue;
           91                 }
           92                 # lace 2
           93                 if(code(runev[j+1]) == code(runev[j])+2 && ((length(casev) == 0) ||
           94                    (code(casev[j+1]) == code(runev[j+1])-1 && code(casev[j]) == code(runev[j])-1)) &&
           95                    j+1 < runec) {
           96                         if (mode == 4) {
           97                                 continue;
           98                         } else if (mode == 2) {
           99                                 rune2v1[rune2c] = runev[j];
          100                                 rune2c++;
          101                         } else if (mode == 3) {
          102                                 rune3v1[rune2c] = runev[j];
          103                                 rune3c++;
          104                         }
          105                         mode = 4;
          106                         rune4v0[rune4c] = runev[j];
          107                         continue;
          108                 }
          109                 # terminating case
          110                 if (mode == 1) {
          111                         rune1v[rune1c] = runev[j];
          112                         if (length(casev) > 0) {
          113                                 case1v[rune1c] = casev[j];
          114                         }
          115                         rune1c++;
          116                 } else if (mode == 2) {
          117                         rune2v1[rune2c] = runev[j];
          118                         rune2c++;
          119                 } else if (mode == 3) {
          120                         rune3v1[rune3c] = runev[j];
          121                         rune3c++;
          122                 } else { #lace 2
          123                         rune4v1[rune4c] = runev[j];
          124                         rune4c++;
          125                 }
          126                 mode = 1;
          127         }
          128         print HEADER > file;
          129 
          130         #generate list of laces 1
          131         if(rune3c > 0) {
          132                 print "static const Rune "name"3[][2] = {" > file;
          133                 for(j = 0; j < rune3c; j++) {
          134                         print "\t{ 0x"rune3v0[j]", 0x"rune3v1[j]" }," > file;
          135                 }
          136                 print "};\n" > file;
          137         }
          138 
          139         #generate list of laces 2
          140         if(rune4c > 0) {
          141                 print "static const Rune "name"4[][2] = {" > file;
          142                 for(j = 0; j < rune4c; j++) {
          143                         print "\t{ 0x"rune4v0[j]", 0x"rune4v1[j]" }," > file;
          144                 }
          145                 print "};\n" > file;
          146         }
          147 
          148         # generate list of ranges
          149         if(rune2c > 0) {
          150                 if(length(casev) > 0) {
          151                         print "static const Rune "name"2[][3] = {" > file;
          152                         for(j = 0; j < rune2c; j++) {
          153                                 print "\t{ 0x"rune2v0[j]", 0x"rune2v1[j]", 0x"case2v[j]" }," > file;
          154                         }
          155                 } else {
          156                         print "static const Rune "name"2[][2] = {" > file
          157                         for(j = 0; j < rune2c; j++) {
          158                                 print "\t{ 0x"rune2v0[j]", 0x"rune2v1[j]" }," > file;
          159                         }
          160                 }
          161                 print "};\n" > file;
          162         }
          163 
          164         # generate list of singletons
          165         if(rune1c > 0) {
          166                 if(length(casev) > 0) {
          167                         print "static const Rune "name"1[][2] = {" > file;
          168                         for(j = 0; j < rune1c; j++) {
          169                                 print "\t{ 0x"rune1v[j]", 0x"case1v[j]" }," > file;
          170                         }
          171                 } else {
          172                         print "static const Rune "name"1[] = {" > file;
          173                         for(j = 0; j < rune1c; j++) {
          174                                 print "\t0x"rune1v[j]"," > file;
          175                         }
          176                 }
          177                 print "};\n" > file;
          178         }
          179         # generate lookup function
          180         print "int\nis"name"rune(Rune r)\n{" > file;
          181         if(rune4c > 0 || rune3c > 0)
          182                 print "\tconst Rune *match;\n" > file;
          183         if(rune4c > 0) {
          184                 print "\tif((match = bsearch(&r, "name"4, nelem("name"4), sizeof *"name"4, &rune2cmp)))" > file;
          185                 print "\t\treturn !((r - match[0]) % 2);" > file;
          186         }
          187         if(rune3c > 0) {
          188                 print "\tif((match = bsearch(&r, "name"3, nelem("name"3), sizeof *"name"3, &rune2cmp)))" > file;
          189                 print "\t\treturn !((r - match[0]) % 2);" > file;
          190         }
          191         if(rune2c > 0) {
          192                 print "\tif(bsearch(&r, "name"2, nelem("name"2), sizeof *"name"2, &rune2cmp))\n\t\treturn 1;" > file;
          193         }
          194         if(rune1c > 0) {
          195                 print "\tif(bsearch(&r, "name"1, nelem("name"1), sizeof *"name"1, &rune1cmp))\n\t\treturn 1;" > file;
          196         }
          197         print "\treturn 0;\n}" > file;
          198 
          199         # generate case conversion function
          200         if(length(casev) > 0) {
          201                 print "\nint\nto"casename"rune(Rune r)\n{\n\tRune *match;\n" > file;
          202                 if(rune4c > 0) {
          203                         print "\tmatch = bsearch(&r, "name"4, nelem("name"4), sizeof *"name"4, &rune2cmp);" > file;
          204                         print "\tif (match)" > file;
          205                         print "\t\treturn ((r - match[0]) % 2) ? r : r - 1;" > file;
          206                 }
          207                 if(rune3c > 0) {
          208                         print "\tmatch = bsearch(&r, "name"3, nelem("name"3), sizeof *"name"3, &rune2cmp);" > file;
          209                         print "\tif (match)" > file;
          210                         print "\t\treturn ((r - match[0]) % 2) ? r : r + 1;" > file;
          211                 }
          212                 if(rune2c > 0) {
          213                         print "\tmatch = bsearch(&r, "name"2, nelem("name"2), sizeof *"name"2, &rune2cmp);" > file;
          214                         print "\tif (match)" > file;
          215                         print "\t\treturn match[2] + (r - match[0]);" > file;
          216                 }
          217                 if(rune1c > 0) {
          218                         print "\tmatch = bsearch(&r, "name"1, nelem("name"1), sizeof *"name"1, &rune1cmp);" > file;
          219                         print "\tif (match)" > file;
          220                         print "\t\treturn match[1];" > file;
          221                 }
          222                 print "\treturn r;\n}" > file;
          223         }
          224 }
          225 
          226 function otheris() {
          227         print HEADER_OTHER > "isalnumrune.c";
          228         print "int\nisalnumrune(Rune r)\n{\n\treturn isalpharune(r) || isdigitrune(r);\n}" > "isalnumrune.c";
          229         print HEADER_OTHER > "isblankrune.c";
          230         print "int\nisblankrune(Rune r)\n{\n\treturn r == ' ' || r == '\\t';\n}" > "isblankrune.c";
          231         print HEADER_OTHER > "isprintrune.c";
          232         print "int\nisprintrune(Rune r)\n{\n\treturn !iscntrlrune(r) && (r != 0x2028) && (r != 0x2029) &&" > "isprintrune.c";
          233         print "\t       ((r < 0xFFF9) || (r > 0xFFFB));\n}" > "isprintrune.c";
          234         print HEADER_OTHER > "isgraphrune.c";
          235         print "int\nisgraphrune(Rune r)\n{\n\treturn !isspacerune(r) && isprintrune(r);\n}" > "isgraphrune.c";
          236         print HEADER_OTHER > "ispunctrune.c";
          237         print "int\nispunctrune(Rune r)\n{\n\treturn isgraphrune(r) && !isalnumrune(r);\n}" > "ispunctrune.c";
          238         print HEADER_OTHER > "isxdigitrune.c";
          239         print "int\nisxdigitrune(Rune r)\n{\n\treturn (r >= '0' && (r - '0') < 10) || (r >= 'a' && (r - 'a') < 6);\n}" > "isxdigitrune.c";
          240 }