utf.c - vx32 - Local 9vx git repository for patches.
(HTM) git clone git://r-36.net/vx32
(DIR) Log
(DIR) Files
(DIR) Refs
---
utf.c (54810B)
---
1 /*
2 * The authors of this software are Rob Pike and Ken Thompson.
3 * Copyright (c) 1998-2002 by Lucent Technologies.
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose without fee is hereby granted, provided that this entire notice
6 * is included in all copies of any software which is or includes a copy
7 * or modification of this software and in all copies of the supporting
8 * documentation for such software.
9 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
10 * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
11 * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
12 * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
13 */
14 #define _BSD_SOURCE 1 /* for memcccpy */
15 #include <stdarg.h>
16 #include <string.h>
17 #include <stdlib.h>
18 #include "u.h"
19 #include "utf.h"
20 #define bsearch utfbsearch
21 /*
22 * compiler directive on Plan 9
23 */
24 #ifndef USED
25 #define USED(x) if(x);else
26 #endif
27
28 /*
29 * nil cannot be ((void*)0) on ANSI C,
30 * because it is used for function pointers
31 */
32 #undef nil
33 #define nil 0
34
35 #undef nelem
36 #define nelem(x) (sizeof (x)/sizeof (x)[0])
37
38
39 /* ---------- end preamble -------- */
40
41 /* -------------- rune.c --------------- */
42 /*
43 * The authors of this software are Rob Pike and Ken Thompson.
44 * Copyright (c) 2002 by Lucent Technologies.
45 * Permission to use, copy, modify, and distribute this software for any
46 * purpose without fee is hereby granted, provided that this entire notice
47 * is included in all copies of any software which is or includes a copy
48 * or modification of this software and in all copies of the supporting
49 * documentation for such software.
50 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
51 * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
52 * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
53 * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
54 */
55 // #include <stdarg.h>
56 // #include <string.h>
57 // #include "plan9.h"
58 // #include "utf.h"
59
60 enum
61 {
62 Bit1 = 7,
63 Bitx = 6,
64 Bit2 = 5,
65 Bit3 = 4,
66 Bit4 = 3,
67
68 T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */
69 Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */
70 T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */
71 T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */
72 T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
73
74 Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */
75 Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */
76 Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */
77
78 Maskx = (1<<Bitx)-1, /* 0011 1111 */
79 Testx = Maskx ^ 0xFF, /* 1100 0000 */
80
81 Bad = Runeerror
82 };
83
84 int
85 chartorune(Rune *rune, char *str)
86 {
87 int c, c1, c2;
88 long l;
89
90 /*
91 * one character sequence
92 * 00000-0007F => T1
93 */
94 c = *(uchar*)str;
95 if(c < Tx) {
96 *rune = c;
97 return 1;
98 }
99
100 /*
101 * two character sequence
102 * 0080-07FF => T2 Tx
103 */
104 c1 = *(uchar*)(str+1) ^ Tx;
105 if(c1 & Testx)
106 goto bad;
107 if(c < T3) {
108 if(c < T2)
109 goto bad;
110 l = ((c << Bitx) | c1) & Rune2;
111 if(l <= Rune1)
112 goto bad;
113 *rune = l;
114 return 2;
115 }
116
117 /*
118 * three character sequence
119 * 0800-FFFF => T3 Tx Tx
120 */
121 c2 = *(uchar*)(str+2) ^ Tx;
122 if(c2 & Testx)
123 goto bad;
124 if(c < T4) {
125 l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
126 if(l <= Rune2)
127 goto bad;
128 *rune = l;
129 return 3;
130 }
131
132 /*
133 * bad decoding
134 */
135 bad:
136 *rune = Bad;
137 return 1;
138 }
139
140 int
141 runetochar(char *str, Rune *rune)
142 {
143 long c;
144
145 /*
146 * one character sequence
147 * 00000-0007F => 00-7F
148 */
149 c = *rune;
150 if(c <= Rune1) {
151 str[0] = c;
152 return 1;
153 }
154
155 /*
156 * two character sequence
157 * 0080-07FF => T2 Tx
158 */
159 if(c <= Rune2) {
160 str[0] = T2 | (c >> 1*Bitx);
161 str[1] = Tx | (c & Maskx);
162 return 2;
163 }
164
165 /*
166 * three character sequence
167 * 0800-FFFF => T3 Tx Tx
168 */
169 str[0] = T3 | (c >> 2*Bitx);
170 str[1] = Tx | ((c >> 1*Bitx) & Maskx);
171 str[2] = Tx | (c & Maskx);
172 return 3;
173 }
174
175 int
176 runelen(long c)
177 {
178 Rune rune;
179 char str[10];
180
181 rune = c;
182 return runetochar(str, &rune);
183 }
184
185 int
186 runenlen(Rune *r, int nrune)
187 {
188 int nb, c;
189
190 nb = 0;
191 while(nrune--) {
192 c = *r++;
193 if(c <= Rune1)
194 nb++;
195 else
196 if(c <= Rune2)
197 nb += 2;
198 else
199 nb += 3;
200 }
201 return nb;
202 }
203
204 int
205 fullrune(char *str, int n)
206 {
207 int c;
208
209 if(n > 0) {
210 c = *(uchar*)str;
211 if(c < Tx)
212 return 1;
213 if(n > 1)
214 if(c < T3 || n > 2)
215 return 1;
216 }
217 return 0;
218 }
219 /* -------------- runestrcat.c --------------- */
220 /*
221 * The authors of this software are Rob Pike and Ken Thompson.
222 * Copyright (c) 2002 by Lucent Technologies.
223 * Permission to use, copy, modify, and distribute this software for any
224 * purpose without fee is hereby granted, provided that this entire notice
225 * is included in all copies of any software which is or includes a copy
226 * or modification of this software and in all copies of the supporting
227 * documentation for such software.
228 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
229 * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
230 * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
231 * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
232 */
233 // #include <stdarg.h>
234 // #include <string.h>
235 // #include "plan9.h"
236 // #include "utf.h"
237
238 Rune*
239 runestrcat(Rune *s1, Rune *s2)
240 {
241
242 runestrcpy(runestrchr(s1, 0), s2);
243 return s1;
244 }
245 /* -------------- runestrchr.c --------------- */
246 /*
247 * The authors of this software are Rob Pike and Ken Thompson.
248 * Copyright (c) 2002 by Lucent Technologies.
249 * Permission to use, copy, modify, and distribute this software for any
250 * purpose without fee is hereby granted, provided that this entire notice
251 * is included in all copies of any software which is or includes a copy
252 * or modification of this software and in all copies of the supporting
253 * documentation for such software.
254 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
255 * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
256 * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
257 * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
258 */
259 // #include <stdarg.h>
260 // #include <string.h>
261 // #include "plan9.h"
262 // #include "utf.h"
263
264 Rune*
265 runestrchr(Rune *s, Rune c)
266 {
267 Rune c0 = c;
268 Rune c1;
269
270 if(c == 0) {
271 while(*s++)
272 ;
273 return s-1;
274 }
275
276 while((c1 = *s++))
277 if(c1 == c0)
278 return s-1;
279 return 0;
280 }
281 /* -------------- runestrcmp.c --------------- */
282 /*
283 * The authors of this software are Rob Pike and Ken Thompson.
284 * Copyright (c) 2002 by Lucent Technologies.
285 * Permission to use, copy, modify, and distribute this software for any
286 * purpose without fee is hereby granted, provided that this entire notice
287 * is included in all copies of any software which is or includes a copy
288 * or modification of this software and in all copies of the supporting
289 * documentation for such software.
290 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
291 * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
292 * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
293 * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
294 */
295 // #include <stdarg.h>
296 // #include <string.h>
297 // #include "plan9.h"
298 // #include "utf.h"
299
300 int
301 runestrcmp(Rune *s1, Rune *s2)
302 {
303 Rune c1, c2;
304
305 for(;;) {
306 c1 = *s1++;
307 c2 = *s2++;
308 if(c1 != c2) {
309 if(c1 > c2)
310 return 1;
311 return -1;
312 }
313 if(c1 == 0)
314 return 0;
315 }
316 }
317 /* -------------- runestrcpy.c --------------- */
318 /*
319 * The authors of this software are Rob Pike and Ken Thompson.
320 * Copyright (c) 2002 by Lucent Technologies.
321 * Permission to use, copy, modify, and distribute this software for any
322 * purpose without fee is hereby granted, provided that this entire notice
323 * is included in all copies of any software which is or includes a copy
324 * or modification of this software and in all copies of the supporting
325 * documentation for such software.
326 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
327 * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
328 * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
329 * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
330 */
331 // #include <stdarg.h>
332 // #include <string.h>
333 // #include "plan9.h"
334 // #include "utf.h"
335
336 Rune*
337 runestrcpy(Rune *s1, Rune *s2)
338 {
339 Rune *os1;
340
341 os1 = s1;
342 while((*s1++ = *s2++))
343 ;
344 return os1;
345 }
346 /* -------------- runestrdup.c --------------- */
347 /*
348 * The authors of this software are Rob Pike and Ken Thompson.
349 * Copyright (c) 2002 by Lucent Technologies.
350 * Permission to use, copy, modify, and distribute this software for any
351 * purpose without fee is hereby granted, provided that this entire notice
352 * is included in all copies of any software which is or includes a copy
353 * or modification of this software and in all copies of the supporting
354 * documentation for such software.
355 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
356 * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
357 * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
358 * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
359 */
360 // #include <stdarg.h>
361 // #include <string.h>
362 // #include <stdlib.h>
363 // #include "plan9.h"
364 // #include "utf.h"
365
366 Rune*
367 runestrdup(Rune *s)
368 {
369 Rune *ns;
370
371 ns = malloc(sizeof(Rune)*(runestrlen(s) + 1));
372 if(ns == 0)
373 return 0;
374
375 return runestrcpy(ns, s);
376 }
377 /* -------------- runestrlen.c --------------- */
378 /*
379 * The authors of this software are Rob Pike and Ken Thompson.
380 * Copyright (c) 2002 by Lucent Technologies.
381 * Permission to use, copy, modify, and distribute this software for any
382 * purpose without fee is hereby granted, provided that this entire notice
383 * is included in all copies of any software which is or includes a copy
384 * or modification of this software and in all copies of the supporting
385 * documentation for such software.
386 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
387 * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
388 * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
389 * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
390 */
391 // #include <stdarg.h>
392 // #include <string.h>
393 // #include "plan9.h"
394 // #include "utf.h"
395
396 long
397 runestrlen(Rune *s)
398 {
399
400 return runestrchr(s, 0) - s;
401 }
402 /* -------------- runestrecpy.c --------------- */
403 /*
404 * The authors of this software are Rob Pike and Ken Thompson.
405 * Copyright (c) 2002 by Lucent Technologies.
406 * Permission to use, copy, modify, and distribute this software for any
407 * purpose without fee is hereby granted, provided that this entire notice
408 * is included in all copies of any software which is or includes a copy
409 * or modification of this software and in all copies of the supporting
410 * documentation for such software.
411 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
412 * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
413 * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
414 * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
415 */
416 // #include <stdarg.h>
417 // #include <string.h>
418 // #include "plan9.h"
419 // #include "utf.h"
420
421 Rune*
422 runestrecpy(Rune *s1, Rune *es1, Rune *s2)
423 {
424 if(s1 >= es1)
425 return s1;
426
427 while((*s1++ = *s2++)){
428 if(s1 == es1){
429 *--s1 = '\0';
430 break;
431 }
432 }
433 return s1;
434 }
435 /* -------------- runestrncat.c --------------- */
436 /*
437 * The authors of this software are Rob Pike and Ken Thompson.
438 * Copyright (c) 2002 by Lucent Technologies.
439 * Permission to use, copy, modify, and distribute this software for any
440 * purpose without fee is hereby granted, provided that this entire notice
441 * is included in all copies of any software which is or includes a copy
442 * or modification of this software and in all copies of the supporting
443 * documentation for such software.
444 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
445 * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
446 * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
447 * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
448 */
449 // #include <stdarg.h>
450 // #include <string.h>
451 // #include "plan9.h"
452 // #include "utf.h"
453
454 Rune*
455 runestrncat(Rune *s1, Rune *s2, long n)
456 {
457 Rune *os1;
458
459 os1 = s1;
460 s1 = runestrchr(s1, 0);
461 while((*s1++ = *s2++))
462 if(--n < 0) {
463 s1[-1] = 0;
464 break;
465 }
466 return os1;
467 }
468 /* -------------- runestrncmp.c --------------- */
469 /*
470 * The authors of this software are Rob Pike and Ken Thompson.
471 * Copyright (c) 2002 by Lucent Technologies.
472 * Permission to use, copy, modify, and distribute this software for any
473 * purpose without fee is hereby granted, provided that this entire notice
474 * is included in all copies of any software which is or includes a copy
475 * or modification of this software and in all copies of the supporting
476 * documentation for such software.
477 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
478 * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
479 * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
480 * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
481 */
482 // #include <stdarg.h>
483 // #include <string.h>
484 // #include "plan9.h"
485 // #include "utf.h"
486
487 int
488 runestrncmp(Rune *s1, Rune *s2, long n)
489 {
490 Rune c1, c2;
491
492 while(n > 0) {
493 c1 = *s1++;
494 c2 = *s2++;
495 n--;
496 if(c1 != c2) {
497 if(c1 > c2)
498 return 1;
499 return -1;
500 }
501 if(c1 == 0)
502 break;
503 }
504 return 0;
505 }
506 /* -------------- runestrncpy.c --------------- */
507 /*
508 * The authors of this software are Rob Pike and Ken Thompson.
509 * Copyright (c) 2002 by Lucent Technologies.
510 * Permission to use, copy, modify, and distribute this software for any
511 * purpose without fee is hereby granted, provided that this entire notice
512 * is included in all copies of any software which is or includes a copy
513 * or modification of this software and in all copies of the supporting
514 * documentation for such software.
515 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
516 * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
517 * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
518 * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
519 */
520 // #include <stdarg.h>
521 // #include <string.h>
522 // #include "plan9.h"
523 // #include "utf.h"
524
525 Rune*
526 runestrncpy(Rune *s1, Rune *s2, long n)
527 {
528 int i;
529 Rune *os1;
530
531 os1 = s1;
532 for(i = 0; i < n; i++)
533 if((*s1++ = *s2++) == 0) {
534 while(++i < n)
535 *s1++ = 0;
536 return os1;
537 }
538 return os1;
539 }
540 /* -------------- runestrrchr.c --------------- */
541 /*
542 * The authors of this software are Rob Pike and Ken Thompson.
543 * Copyright (c) 2002 by Lucent Technologies.
544 * Permission to use, copy, modify, and distribute this software for any
545 * purpose without fee is hereby granted, provided that this entire notice
546 * is included in all copies of any software which is or includes a copy
547 * or modification of this software and in all copies of the supporting
548 * documentation for such software.
549 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
550 * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
551 * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
552 * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
553 */
554 // #include <stdarg.h>
555 // #include <string.h>
556 // #include "plan9.h"
557 // #include "utf.h"
558
559 Rune*
560 runestrrchr(Rune *s, Rune c)
561 {
562 Rune *r;
563
564 if(c == 0)
565 return runestrchr(s, 0);
566 r = 0;
567 while((s = runestrchr(s, c)))
568 r = s++;
569 return r;
570 }
571 /* -------------- runestrstr.c --------------- */
572 /*
573 * The authors of this software are Rob Pike and Ken Thompson.
574 * Copyright (c) 2002 by Lucent Technologies.
575 * Permission to use, copy, modify, and distribute this software for any
576 * purpose without fee is hereby granted, provided that this entire notice
577 * is included in all copies of any software which is or includes a copy
578 * or modification of this software and in all copies of the supporting
579 * documentation for such software.
580 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
581 * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
582 * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
583 * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
584 */
585 // #include <stdarg.h>
586 // #include <string.h>
587 // #include "plan9.h"
588 // #include "utf.h"
589
590 /*
591 * Return pointer to first occurrence of s2 in s1,
592 * 0 if none
593 */
594 Rune*
595 runestrstr(Rune *s1, Rune *s2)
596 {
597 Rune *p, *pa, *pb;
598 int c0, c;
599
600 c0 = *s2;
601 if(c0 == 0)
602 return s1;
603 s2++;
604 for(p=runestrchr(s1, c0); p; p=runestrchr(p+1, c0)) {
605 pa = p;
606 for(pb=s2;; pb++) {
607 c = *pb;
608 if(c == 0)
609 return p;
610 if(c != *++pa)
611 break;
612 }
613 }
614 return 0;
615 }
616 /* -------------- runetype.c --------------- */
617 /*
618 * The authors of this software are Rob Pike and Ken Thompson.
619 * Copyright (c) 2002 by Lucent Technologies.
620 * Permission to use, copy, modify, and distribute this software for any
621 * purpose without fee is hereby granted, provided that this entire notice
622 * is included in all copies of any software which is or includes a copy
623 * or modification of this software and in all copies of the supporting
624 * documentation for such software.
625 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
626 * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
627 * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
628 * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
629 */
630 // #include <stdarg.h>
631 // #include <string.h>
632 // #include "plan9.h"
633 // #include "utf.h"
634
635 /*
636 * alpha ranges -
637 * only covers ranges not in lower||upper
638 */
639 static
640 Rune __alpha2[] =
641 {
642 0x00d8, 0x00f6, /* Ø - ö */
643 0x00f8, 0x01f5, /* ø - ǵ */
644 0x0250, 0x02a8, /* ɐ - ʨ */
645 0x038e, 0x03a1, /* Ύ - Ρ */
646 0x03a3, 0x03ce, /* Σ - ώ */
647 0x03d0, 0x03d6, /* ϐ - ϖ */
648 0x03e2, 0x03f3, /* Ϣ - ϳ */
649 0x0490, 0x04c4, /* Ґ - ӄ */
650 0x0561, 0x0587, /* ա - և */
651 0x05d0, 0x05ea, /* א - ת */
652 0x05f0, 0x05f2, /* װ - ײ */
653 0x0621, 0x063a, /* ء - غ */
654 0x0640, 0x064a, /* ـ - ي */
655 0x0671, 0x06b7, /* ٱ - ڷ */
656 0x06ba, 0x06be, /* ں - ھ */
657 0x06c0, 0x06ce, /* ۀ - ێ */
658 0x06d0, 0x06d3, /* ې - ۓ */
659 0x0905, 0x0939, /* अ - ह */
660 0x0958, 0x0961, /* क़ - ॡ */
661 0x0985, 0x098c, /* অ - ঌ */
662 0x098f, 0x0990, /* এ - ঐ */
663 0x0993, 0x09a8, /* ও - ন */
664 0x09aa, 0x09b0, /* প - র */
665 0x09b6, 0x09b9, /* শ - হ */
666 0x09dc, 0x09dd, /* ড় - ঢ় */
667 0x09df, 0x09e1, /* য় - ৡ */
668 0x09f0, 0x09f1, /* ৰ - ৱ */
669 0x0a05, 0x0a0a, /* ਅ - ਊ */
670 0x0a0f, 0x0a10, /* ਏ - ਐ */
671 0x0a13, 0x0a28, /* ਓ - ਨ */
672 0x0a2a, 0x0a30, /* ਪ - ਰ */
673 0x0a32, 0x0a33, /* ਲ - ਲ਼ */
674 0x0a35, 0x0a36, /* ਵ - ਸ਼ */
675 0x0a38, 0x0a39, /* ਸ - ਹ */
676 0x0a59, 0x0a5c, /* ਖ਼ - ੜ */
677 0x0a85, 0x0a8b, /* અ - ઋ */
678 0x0a8f, 0x0a91, /* એ - ઑ */
679 0x0a93, 0x0aa8, /* ઓ - ન */
680 0x0aaa, 0x0ab0, /* પ - ર */
681 0x0ab2, 0x0ab3, /* લ - ળ */
682 0x0ab5, 0x0ab9, /* વ - હ */
683 0x0b05, 0x0b0c, /* ଅ - ଌ */
684 0x0b0f, 0x0b10, /* ଏ - ଐ */
685 0x0b13, 0x0b28, /* ଓ - ନ */
686 0x0b2a, 0x0b30, /* ପ - ର */
687 0x0b32, 0x0b33, /* ଲ - ଳ */
688 0x0b36, 0x0b39, /* ଶ - ହ */
689 0x0b5c, 0x0b5d, /* ଡ଼ - ଢ଼ */
690 0x0b5f, 0x0b61, /* ୟ - ୡ */
691 0x0b85, 0x0b8a, /* அ - ஊ */
692 0x0b8e, 0x0b90, /* எ - ஐ */
693 0x0b92, 0x0b95, /* ஒ - க */
694 0x0b99, 0x0b9a, /* ங - ச */
695 0x0b9e, 0x0b9f, /* ஞ - ட */
696 0x0ba3, 0x0ba4, /* ண - த */
697 0x0ba8, 0x0baa, /* ந - ப */
698 0x0bae, 0x0bb5, /* ம - வ */
699 0x0bb7, 0x0bb9, /* ஷ - ஹ */
700 0x0c05, 0x0c0c, /* అ - ఌ */
701 0x0c0e, 0x0c10, /* ఎ - ఐ */
702 0x0c12, 0x0c28, /* ఒ - న */
703 0x0c2a, 0x0c33, /* ప - ళ */
704 0x0c35, 0x0c39, /* వ - హ */
705 0x0c60, 0x0c61, /* ౠ - ౡ */
706 0x0c85, 0x0c8c, /* ಅ - ಌ */
707 0x0c8e, 0x0c90, /* ಎ - ಐ */
708 0x0c92, 0x0ca8, /* ಒ - ನ */
709 0x0caa, 0x0cb3, /* ಪ - ಳ */
710 0x0cb5, 0x0cb9, /* ವ - ಹ */
711 0x0ce0, 0x0ce1, /* ೠ - ೡ */
712 0x0d05, 0x0d0c, /* അ - ഌ */
713 0x0d0e, 0x0d10, /* എ - ഐ */
714 0x0d12, 0x0d28, /* ഒ - ന */
715 0x0d2a, 0x0d39, /* പ - ഹ */
716 0x0d60, 0x0d61, /* ൠ - ൡ */
717 0x0e01, 0x0e30, /* ก - ะ */
718 0x0e32, 0x0e33, /* า - ำ */
719 0x0e40, 0x0e46, /* เ - ๆ */
720 0x0e5a, 0x0e5b, /* ๚ - ๛ */
721 0x0e81, 0x0e82, /* ກ - ຂ */
722 0x0e87, 0x0e88, /* ງ - ຈ */
723 0x0e94, 0x0e97, /* ດ - ທ */
724 0x0e99, 0x0e9f, /* ນ - ຟ */
725 0x0ea1, 0x0ea3, /* ມ - ຣ */
726 0x0eaa, 0x0eab, /* ສ - ຫ */
727 0x0ead, 0x0eae, /* ອ - ຮ */
728 0x0eb2, 0x0eb3, /* າ - ຳ */
729 0x0ec0, 0x0ec4, /* ເ - ໄ */
730 0x0edc, 0x0edd, /* ໜ - ໝ */
731 0x0f18, 0x0f19, /* ༘ - ༙ */
732 0x0f40, 0x0f47, /* ཀ - ཇ */
733 0x0f49, 0x0f69, /* ཉ - ཀྵ */
734 0x10d0, 0x10f6, /* ა - ჶ */
735 0x1100, 0x1159, /* ᄀ - ᅙ */
736 0x115f, 0x11a2, /* ᅟ - ᆢ */
737 0x11a8, 0x11f9, /* ᆨ - ᇹ */
738 0x1e00, 0x1e9b, /* Ḁ - ẛ */
739 0x1f50, 0x1f57, /* ὐ - ὗ */
740 0x1f80, 0x1fb4, /* ᾀ - ᾴ */
741 0x1fb6, 0x1fbc, /* ᾶ - ᾼ */
742 0x1fc2, 0x1fc4, /* ῂ - ῄ */
743 0x1fc6, 0x1fcc, /* ῆ - ῌ */
744 0x1fd0, 0x1fd3, /* ῐ - ΐ */
745 0x1fd6, 0x1fdb, /* ῖ - Ί */
746 0x1fe0, 0x1fec, /* ῠ - Ῥ */
747 0x1ff2, 0x1ff4, /* ῲ - ῴ */
748 0x1ff6, 0x1ffc, /* ῶ - ῼ */
749 0x210a, 0x2113, /* ℊ - ℓ */
750 0x2115, 0x211d, /* ℕ - ℝ */
751 0x2120, 0x2122, /* ℠ - ™ */
752 0x212a, 0x2131, /* K - ℱ */
753 0x2133, 0x2138, /* ℳ - ℸ */
754 0x3041, 0x3094, /* ぁ - ゔ */
755 0x30a1, 0x30fa, /* ァ - ヺ */
756 0x3105, 0x312c, /* ㄅ - ㄬ */
757 0x3131, 0x318e, /* ㄱ - ㆎ */
758 0x3192, 0x319f, /* ㆒ - ㆟ */
759 0x3260, 0x327b, /* ㉠ - ㉻ */
760 0x328a, 0x32b0, /* ㊊ - ㊰ */
761 0x32d0, 0x32fe, /* ㋐ - ㋾ */
762 0x3300, 0x3357, /* ㌀ - ㍗ */
763 0x3371, 0x3376, /* ㍱ - ㍶ */
764 0x337b, 0x3394, /* ㍻ - ㎔ */
765 0x3399, 0x339e, /* ㎙ - ㎞ */
766 0x33a9, 0x33ad, /* ㎩ - ㎭ */
767 0x33b0, 0x33c1, /* ㎰ - ㏁ */
768 0x33c3, 0x33c5, /* ㏃ - ㏅ */
769 0x33c7, 0x33d7, /* ㏇ - ㏗ */
770 0x33d9, 0x33dd, /* ㏙ - ㏝ */
771 0x4e00, 0x9fff, /* 一 - 鿿 */
772 0xac00, 0xd7a3, /* 가 - 힣 */
773 0xf900, 0xfb06, /* 豈 - st */
774 0xfb13, 0xfb17, /* ﬓ - ﬗ */
775 0xfb1f, 0xfb28, /* ײַ - ﬨ */
776 0xfb2a, 0xfb36, /* שׁ - זּ */
777 0xfb38, 0xfb3c, /* טּ - לּ */
778 0xfb40, 0xfb41, /* נּ - סּ */
779 0xfb43, 0xfb44, /* ףּ - פּ */
780 0xfb46, 0xfbb1, /* צּ - ﮱ */
781 0xfbd3, 0xfd3d, /* ﯓ - ﴽ */
782 0xfd50, 0xfd8f, /* ﵐ - ﶏ */
783 0xfd92, 0xfdc7, /* ﶒ - ﷇ */
784 0xfdf0, 0xfdf9, /* ﷰ - ﷹ */
785 0xfe70, 0xfe72, /* ﹰ - ﹲ */
786 0xfe76, 0xfefc, /* ﹶ - ﻼ */
787 0xff66, 0xff6f, /* ヲ - ッ */
788 0xff71, 0xff9d, /* ア - ン */
789 0xffa0, 0xffbe, /* ᅠ - ᄒ */
790 0xffc2, 0xffc7, /* ᅡ - ᅦ */
791 0xffca, 0xffcf, /* ᅧ - ᅬ */
792 0xffd2, 0xffd7, /* ᅭ - ᅲ */
793 0xffda, 0xffdc, /* ᅳ - ᅵ */
794 };
795
796 /*
797 * alpha singlets -
798 * only covers ranges not in lower||upper
799 */
800 static
801 Rune __alpha1[] =
802 {
803 0x00aa, /* ª */
804 0x00b5, /* µ */
805 0x00ba, /* º */
806 0x03da, /* Ϛ */
807 0x03dc, /* Ϝ */
808 0x03de, /* Ϟ */
809 0x03e0, /* Ϡ */
810 0x06d5, /* ە */
811 0x09b2, /* ল */
812 0x0a5e, /* ਫ਼ */
813 0x0a8d, /* ઍ */
814 0x0ae0, /* ૠ */
815 0x0b9c, /* ஜ */
816 0x0cde, /* ೞ */
817 0x0e4f, /* ๏ */
818 0x0e84, /* ຄ */
819 0x0e8a, /* ຊ */
820 0x0e8d, /* ຍ */
821 0x0ea5, /* ລ */
822 0x0ea7, /* ວ */
823 0x0eb0, /* ະ */
824 0x0ebd, /* ຽ */
825 0x1fbe, /* ι */
826 0x207f, /* ⁿ */
827 0x20a8, /* ₨ */
828 0x2102, /* ℂ */
829 0x2107, /* ℇ */
830 0x2124, /* ℤ */
831 0x2126, /* Ω */
832 0x2128, /* ℨ */
833 0xfb3e, /* מּ */
834 0xfe74, /* ﹴ */
835 };
836
837 /*
838 * space ranges
839 */
840 static
841 Rune __space2[] =
842 {
843 0x0009, 0x000a, /* tab and newline */
844 0x0020, 0x0020, /* space */
845 0x00a0, 0x00a0, /* */
846 0x2000, 0x200b, /* - */
847 0x2028, 0x2029, /*
-
*/
848 0x3000, 0x3000, /* */
849 0xfeff, 0xfeff, /* */
850 };
851
852 /*
853 * lower case ranges
854 * 3rd col is conversion excess 500
855 */
856 static
857 Rune __toupper2[] =
858 {
859 0x0061, 0x007a, 468, /* a-z A-Z */
860 0x00e0, 0x00f6, 468, /* à-ö À-Ö */
861 0x00f8, 0x00fe, 468, /* ø-þ Ø-Þ */
862 0x0256, 0x0257, 295, /* ɖ-ɗ Ɖ-Ɗ */
863 0x0258, 0x0259, 298, /* ɘ-ə Ǝ-Ə */
864 0x028a, 0x028b, 283, /* ʊ-ʋ Ʊ-Ʋ */
865 0x03ad, 0x03af, 463, /* έ-ί Έ-Ί */
866 0x03b1, 0x03c1, 468, /* α-ρ Α-Ρ */
867 0x03c3, 0x03cb, 468, /* σ-ϋ Σ-Ϋ */
868 0x03cd, 0x03ce, 437, /* ύ-ώ Ύ-Ώ */
869 0x0430, 0x044f, 468, /* а-я А-Я */
870 0x0451, 0x045c, 420, /* ё-ќ Ё-Ќ */
871 0x045e, 0x045f, 420, /* ў-џ Ў-Џ */
872 0x0561, 0x0586, 452, /* ա-ֆ Ա-Ֆ */
873 0x1f00, 0x1f07, 508, /* ἀ-ἇ Ἀ-Ἇ */
874 0x1f10, 0x1f15, 508, /* ἐ-ἕ Ἐ-Ἕ */
875 0x1f20, 0x1f27, 508, /* ἠ-ἧ Ἠ-Ἧ */
876 0x1f30, 0x1f37, 508, /* ἰ-ἷ Ἰ-Ἷ */
877 0x1f40, 0x1f45, 508, /* ὀ-ὅ Ὀ-Ὅ */
878 0x1f60, 0x1f67, 508, /* ὠ-ὧ Ὠ-Ὧ */
879 0x1f70, 0x1f71, 574, /* ὰ-ά Ὰ-Ά */
880 0x1f72, 0x1f75, 586, /* ὲ-ή Ὲ-Ή */
881 0x1f76, 0x1f77, 600, /* ὶ-ί Ὶ-Ί */
882 0x1f78, 0x1f79, 628, /* ὸ-ό Ὸ-Ό */
883 0x1f7a, 0x1f7b, 612, /* ὺ-ύ Ὺ-Ύ */
884 0x1f7c, 0x1f7d, 626, /* ὼ-ώ Ὼ-Ώ */
885 0x1f80, 0x1f87, 508, /* ᾀ-ᾇ ᾈ-ᾏ */
886 0x1f90, 0x1f97, 508, /* ᾐ-ᾗ ᾘ-ᾟ */
887 0x1fa0, 0x1fa7, 508, /* ᾠ-ᾧ ᾨ-ᾯ */
888 0x1fb0, 0x1fb1, 508, /* ᾰ-ᾱ Ᾰ-Ᾱ */
889 0x1fd0, 0x1fd1, 508, /* ῐ-ῑ Ῐ-Ῑ */
890 0x1fe0, 0x1fe1, 508, /* ῠ-ῡ Ῠ-Ῡ */
891 0x2170, 0x217f, 484, /* ⅰ-ⅿ Ⅰ-Ⅿ */
892 0x24d0, 0x24e9, 474, /* ⓐ-ⓩ Ⓐ-Ⓩ */
893 0xff41, 0xff5a, 468, /* a-z A-Z */
894 };
895
896 /*
897 * lower case singlets
898 * 2nd col is conversion excess 500
899 */
900 static
901 Rune __toupper1[] =
902 {
903 0x00ff, 621, /* ÿ Ÿ */
904 0x0101, 499, /* ā Ā */
905 0x0103, 499, /* ă Ă */
906 0x0105, 499, /* ą Ą */
907 0x0107, 499, /* ć Ć */
908 0x0109, 499, /* ĉ Ĉ */
909 0x010b, 499, /* ċ Ċ */
910 0x010d, 499, /* č Č */
911 0x010f, 499, /* ď Ď */
912 0x0111, 499, /* đ Đ */
913 0x0113, 499, /* ē Ē */
914 0x0115, 499, /* ĕ Ĕ */
915 0x0117, 499, /* ė Ė */
916 0x0119, 499, /* ę Ę */
917 0x011b, 499, /* ě Ě */
918 0x011d, 499, /* ĝ Ĝ */
919 0x011f, 499, /* ğ Ğ */
920 0x0121, 499, /* ġ Ġ */
921 0x0123, 499, /* ģ Ģ */
922 0x0125, 499, /* ĥ Ĥ */
923 0x0127, 499, /* ħ Ħ */
924 0x0129, 499, /* ĩ Ĩ */
925 0x012b, 499, /* ī Ī */
926 0x012d, 499, /* ĭ Ĭ */
927 0x012f, 499, /* į Į */
928 0x0131, 268, /* ı I */
929 0x0133, 499, /* ij IJ */
930 0x0135, 499, /* ĵ Ĵ */
931 0x0137, 499, /* ķ Ķ */
932 0x013a, 499, /* ĺ Ĺ */
933 0x013c, 499, /* ļ Ļ */
934 0x013e, 499, /* ľ Ľ */
935 0x0140, 499, /* ŀ Ŀ */
936 0x0142, 499, /* ł Ł */
937 0x0144, 499, /* ń Ń */
938 0x0146, 499, /* ņ Ņ */
939 0x0148, 499, /* ň Ň */
940 0x014b, 499, /* ŋ Ŋ */
941 0x014d, 499, /* ō Ō */
942 0x014f, 499, /* ŏ Ŏ */
943 0x0151, 499, /* ő Ő */
944 0x0153, 499, /* œ Œ */
945 0x0155, 499, /* ŕ Ŕ */
946 0x0157, 499, /* ŗ Ŗ */
947 0x0159, 499, /* ř Ř */
948 0x015b, 499, /* ś Ś */
949 0x015d, 499, /* ŝ Ŝ */
950 0x015f, 499, /* ş Ş */
951 0x0161, 499, /* š Š */
952 0x0163, 499, /* ţ Ţ */
953 0x0165, 499, /* ť Ť */
954 0x0167, 499, /* ŧ Ŧ */
955 0x0169, 499, /* ũ Ũ */
956 0x016b, 499, /* ū Ū */
957 0x016d, 499, /* ŭ Ŭ */
958 0x016f, 499, /* ů Ů */
959 0x0171, 499, /* ű Ű */
960 0x0173, 499, /* ų Ų */
961 0x0175, 499, /* ŵ Ŵ */
962 0x0177, 499, /* ŷ Ŷ */
963 0x017a, 499, /* ź Ź */
964 0x017c, 499, /* ż Ż */
965 0x017e, 499, /* ž Ž */
966 0x017f, 200, /* ſ S */
967 0x0183, 499, /* ƃ Ƃ */
968 0x0185, 499, /* ƅ Ƅ */
969 0x0188, 499, /* ƈ Ƈ */
970 0x018c, 499, /* ƌ Ƌ */
971 0x0192, 499, /* ƒ Ƒ */
972 0x0199, 499, /* ƙ Ƙ */
973 0x01a1, 499, /* ơ Ơ */
974 0x01a3, 499, /* ƣ Ƣ */
975 0x01a5, 499, /* ƥ Ƥ */
976 0x01a8, 499, /* ƨ Ƨ */
977 0x01ad, 499, /* ƭ Ƭ */
978 0x01b0, 499, /* ư Ư */
979 0x01b4, 499, /* ƴ Ƴ */
980 0x01b6, 499, /* ƶ Ƶ */
981 0x01b9, 499, /* ƹ Ƹ */
982 0x01bd, 499, /* ƽ Ƽ */
983 0x01c5, 499, /* Dž DŽ */
984 0x01c6, 498, /* dž DŽ */
985 0x01c8, 499, /* Lj LJ */
986 0x01c9, 498, /* lj LJ */
987 0x01cb, 499, /* Nj NJ */
988 0x01cc, 498, /* nj NJ */
989 0x01ce, 499, /* ǎ Ǎ */
990 0x01d0, 499, /* ǐ Ǐ */
991 0x01d2, 499, /* ǒ Ǒ */
992 0x01d4, 499, /* ǔ Ǔ */
993 0x01d6, 499, /* ǖ Ǖ */
994 0x01d8, 499, /* ǘ Ǘ */
995 0x01da, 499, /* ǚ Ǚ */
996 0x01dc, 499, /* ǜ Ǜ */
997 0x01df, 499, /* ǟ Ǟ */
998 0x01e1, 499, /* ǡ Ǡ */
999 0x01e3, 499, /* ǣ Ǣ */
1000 0x01e5, 499, /* ǥ Ǥ */
1001 0x01e7, 499, /* ǧ Ǧ */
1002 0x01e9, 499, /* ǩ Ǩ */
1003 0x01eb, 499, /* ǫ Ǫ */
1004 0x01ed, 499, /* ǭ Ǭ */
1005 0x01ef, 499, /* ǯ Ǯ */
1006 0x01f2, 499, /* Dz DZ */
1007 0x01f3, 498, /* dz DZ */
1008 0x01f5, 499, /* ǵ Ǵ */
1009 0x01fb, 499, /* ǻ Ǻ */
1010 0x01fd, 499, /* ǽ Ǽ */
1011 0x01ff, 499, /* ǿ Ǿ */
1012 0x0201, 499, /* ȁ Ȁ */
1013 0x0203, 499, /* ȃ Ȃ */
1014 0x0205, 499, /* ȅ Ȅ */
1015 0x0207, 499, /* ȇ Ȇ */
1016 0x0209, 499, /* ȉ Ȉ */
1017 0x020b, 499, /* ȋ Ȋ */
1018 0x020d, 499, /* ȍ Ȍ */
1019 0x020f, 499, /* ȏ Ȏ */
1020 0x0211, 499, /* ȑ Ȑ */
1021 0x0213, 499, /* ȓ Ȓ */
1022 0x0215, 499, /* ȕ Ȕ */
1023 0x0217, 499, /* ȗ Ȗ */
1024 0x0253, 290, /* ɓ Ɓ */
1025 0x0254, 294, /* ɔ Ɔ */
1026 0x025b, 297, /* ɛ Ɛ */
1027 0x0260, 295, /* ɠ Ɠ */
1028 0x0263, 293, /* ɣ Ɣ */
1029 0x0268, 291, /* ɨ Ɨ */
1030 0x0269, 289, /* ɩ Ɩ */
1031 0x026f, 289, /* ɯ Ɯ */
1032 0x0272, 287, /* ɲ Ɲ */
1033 0x0283, 282, /* ʃ Ʃ */
1034 0x0288, 282, /* ʈ Ʈ */
1035 0x0292, 281, /* ʒ Ʒ */
1036 0x03ac, 462, /* ά Ά */
1037 0x03cc, 436, /* ό Ό */
1038 0x03d0, 438, /* ϐ Β */
1039 0x03d1, 443, /* ϑ Θ */
1040 0x03d5, 453, /* ϕ Φ */
1041 0x03d6, 446, /* ϖ Π */
1042 0x03e3, 499, /* ϣ Ϣ */
1043 0x03e5, 499, /* ϥ Ϥ */
1044 0x03e7, 499, /* ϧ Ϧ */
1045 0x03e9, 499, /* ϩ Ϩ */
1046 0x03eb, 499, /* ϫ Ϫ */
1047 0x03ed, 499, /* ϭ Ϭ */
1048 0x03ef, 499, /* ϯ Ϯ */
1049 0x03f0, 414, /* ϰ Κ */
1050 0x03f1, 420, /* ϱ Ρ */
1051 0x0461, 499, /* ѡ Ѡ */
1052 0x0463, 499, /* ѣ Ѣ */
1053 0x0465, 499, /* ѥ Ѥ */
1054 0x0467, 499, /* ѧ Ѧ */
1055 0x0469, 499, /* ѩ Ѩ */
1056 0x046b, 499, /* ѫ Ѫ */
1057 0x046d, 499, /* ѭ Ѭ */
1058 0x046f, 499, /* ѯ Ѯ */
1059 0x0471, 499, /* ѱ Ѱ */
1060 0x0473, 499, /* ѳ Ѳ */
1061 0x0475, 499, /* ѵ Ѵ */
1062 0x0477, 499, /* ѷ Ѷ */
1063 0x0479, 499, /* ѹ Ѹ */
1064 0x047b, 499, /* ѻ Ѻ */
1065 0x047d, 499, /* ѽ Ѽ */
1066 0x047f, 499, /* ѿ Ѿ */
1067 0x0481, 499, /* ҁ Ҁ */
1068 0x0491, 499, /* ґ Ґ */
1069 0x0493, 499, /* ғ Ғ */
1070 0x0495, 499, /* ҕ Ҕ */
1071 0x0497, 499, /* җ Җ */
1072 0x0499, 499, /* ҙ Ҙ */
1073 0x049b, 499, /* қ Қ */
1074 0x049d, 499, /* ҝ Ҝ */
1075 0x049f, 499, /* ҟ Ҟ */
1076 0x04a1, 499, /* ҡ Ҡ */
1077 0x04a3, 499, /* ң Ң */
1078 0x04a5, 499, /* ҥ Ҥ */
1079 0x04a7, 499, /* ҧ Ҧ */
1080 0x04a9, 499, /* ҩ Ҩ */
1081 0x04ab, 499, /* ҫ Ҫ */
1082 0x04ad, 499, /* ҭ Ҭ */
1083 0x04af, 499, /* ү Ү */
1084 0x04b1, 499, /* ұ Ұ */
1085 0x04b3, 499, /* ҳ Ҳ */
1086 0x04b5, 499, /* ҵ Ҵ */
1087 0x04b7, 499, /* ҷ Ҷ */
1088 0x04b9, 499, /* ҹ Ҹ */
1089 0x04bb, 499, /* һ Һ */
1090 0x04bd, 499, /* ҽ Ҽ */
1091 0x04bf, 499, /* ҿ Ҿ */
1092 0x04c2, 499, /* ӂ Ӂ */
1093 0x04c4, 499, /* ӄ Ӄ */
1094 0x04c8, 499, /* ӈ Ӈ */
1095 0x04cc, 499, /* ӌ Ӌ */
1096 0x04d1, 499, /* ӑ Ӑ */
1097 0x04d3, 499, /* ӓ Ӓ */
1098 0x04d5, 499, /* ӕ Ӕ */
1099 0x04d7, 499, /* ӗ Ӗ */
1100 0x04d9, 499, /* ә Ә */
1101 0x04db, 499, /* ӛ Ӛ */
1102 0x04dd, 499, /* ӝ Ӝ */
1103 0x04df, 499, /* ӟ Ӟ */
1104 0x04e1, 499, /* ӡ Ӡ */
1105 0x04e3, 499, /* ӣ Ӣ */
1106 0x04e5, 499, /* ӥ Ӥ */
1107 0x04e7, 499, /* ӧ Ӧ */
1108 0x04e9, 499, /* ө Ө */
1109 0x04eb, 499, /* ӫ Ӫ */
1110 0x04ef, 499, /* ӯ Ӯ */
1111 0x04f1, 499, /* ӱ Ӱ */
1112 0x04f3, 499, /* ӳ Ӳ */
1113 0x04f5, 499, /* ӵ Ӵ */
1114 0x04f9, 499, /* ӹ Ӹ */
1115 0x1e01, 499, /* ḁ Ḁ */
1116 0x1e03, 499, /* ḃ Ḃ */
1117 0x1e05, 499, /* ḅ Ḅ */
1118 0x1e07, 499, /* ḇ Ḇ */
1119 0x1e09, 499, /* ḉ Ḉ */
1120 0x1e0b, 499, /* ḋ Ḋ */
1121 0x1e0d, 499, /* ḍ Ḍ */
1122 0x1e0f, 499, /* ḏ Ḏ */
1123 0x1e11, 499, /* ḑ Ḑ */
1124 0x1e13, 499, /* ḓ Ḓ */
1125 0x1e15, 499, /* ḕ Ḕ */
1126 0x1e17, 499, /* ḗ Ḗ */
1127 0x1e19, 499, /* ḙ Ḙ */
1128 0x1e1b, 499, /* ḛ Ḛ */
1129 0x1e1d, 499, /* ḝ Ḝ */
1130 0x1e1f, 499, /* ḟ Ḟ */
1131 0x1e21, 499, /* ḡ Ḡ */
1132 0x1e23, 499, /* ḣ Ḣ */
1133 0x1e25, 499, /* ḥ Ḥ */
1134 0x1e27, 499, /* ḧ Ḧ */
1135 0x1e29, 499, /* ḩ Ḩ */
1136 0x1e2b, 499, /* ḫ Ḫ */
1137 0x1e2d, 499, /* ḭ Ḭ */
1138 0x1e2f, 499, /* ḯ Ḯ */
1139 0x1e31, 499, /* ḱ Ḱ */
1140 0x1e33, 499, /* ḳ Ḳ */
1141 0x1e35, 499, /* ḵ Ḵ */
1142 0x1e37, 499, /* ḷ Ḷ */
1143 0x1e39, 499, /* ḹ Ḹ */
1144 0x1e3b, 499, /* ḻ Ḻ */
1145 0x1e3d, 499, /* ḽ Ḽ */
1146 0x1e3f, 499, /* ḿ Ḿ */
1147 0x1e41, 499, /* ṁ Ṁ */
1148 0x1e43, 499, /* ṃ Ṃ */
1149 0x1e45, 499, /* ṅ Ṅ */
1150 0x1e47, 499, /* ṇ Ṇ */
1151 0x1e49, 499, /* ṉ Ṉ */
1152 0x1e4b, 499, /* ṋ Ṋ */
1153 0x1e4d, 499, /* ṍ Ṍ */
1154 0x1e4f, 499, /* ṏ Ṏ */
1155 0x1e51, 499, /* ṑ Ṑ */
1156 0x1e53, 499, /* ṓ Ṓ */
1157 0x1e55, 499, /* ṕ Ṕ */
1158 0x1e57, 499, /* ṗ Ṗ */
1159 0x1e59, 499, /* ṙ Ṙ */
1160 0x1e5b, 499, /* ṛ Ṛ */
1161 0x1e5d, 499, /* ṝ Ṝ */
1162 0x1e5f, 499, /* ṟ Ṟ */
1163 0x1e61, 499, /* ṡ Ṡ */
1164 0x1e63, 499, /* ṣ Ṣ */
1165 0x1e65, 499, /* ṥ Ṥ */
1166 0x1e67, 499, /* ṧ Ṧ */
1167 0x1e69, 499, /* ṩ Ṩ */
1168 0x1e6b, 499, /* ṫ Ṫ */
1169 0x1e6d, 499, /* ṭ Ṭ */
1170 0x1e6f, 499, /* ṯ Ṯ */
1171 0x1e71, 499, /* ṱ Ṱ */
1172 0x1e73, 499, /* ṳ Ṳ */
1173 0x1e75, 499, /* ṵ Ṵ */
1174 0x1e77, 499, /* ṷ Ṷ */
1175 0x1e79, 499, /* ṹ Ṹ */
1176 0x1e7b, 499, /* ṻ Ṻ */
1177 0x1e7d, 499, /* ṽ Ṽ */
1178 0x1e7f, 499, /* ṿ Ṿ */
1179 0x1e81, 499, /* ẁ Ẁ */
1180 0x1e83, 499, /* ẃ Ẃ */
1181 0x1e85, 499, /* ẅ Ẅ */
1182 0x1e87, 499, /* ẇ Ẇ */
1183 0x1e89, 499, /* ẉ Ẉ */
1184 0x1e8b, 499, /* ẋ Ẋ */
1185 0x1e8d, 499, /* ẍ Ẍ */
1186 0x1e8f, 499, /* ẏ Ẏ */
1187 0x1e91, 499, /* ẑ Ẑ */
1188 0x1e93, 499, /* ẓ Ẓ */
1189 0x1e95, 499, /* ẕ Ẕ */
1190 0x1ea1, 499, /* ạ Ạ */
1191 0x1ea3, 499, /* ả Ả */
1192 0x1ea5, 499, /* ấ Ấ */
1193 0x1ea7, 499, /* ầ Ầ */
1194 0x1ea9, 499, /* ẩ Ẩ */
1195 0x1eab, 499, /* ẫ Ẫ */
1196 0x1ead, 499, /* ậ Ậ */
1197 0x1eaf, 499, /* ắ Ắ */
1198 0x1eb1, 499, /* ằ Ằ */
1199 0x1eb3, 499, /* ẳ Ẳ */
1200 0x1eb5, 499, /* ẵ Ẵ */
1201 0x1eb7, 499, /* ặ Ặ */
1202 0x1eb9, 499, /* ẹ Ẹ */
1203 0x1ebb, 499, /* ẻ Ẻ */
1204 0x1ebd, 499, /* ẽ Ẽ */
1205 0x1ebf, 499, /* ế Ế */
1206 0x1ec1, 499, /* ề Ề */
1207 0x1ec3, 499, /* ể Ể */
1208 0x1ec5, 499, /* ễ Ễ */
1209 0x1ec7, 499, /* ệ Ệ */
1210 0x1ec9, 499, /* ỉ Ỉ */
1211 0x1ecb, 499, /* ị Ị */
1212 0x1ecd, 499, /* ọ Ọ */
1213 0x1ecf, 499, /* ỏ Ỏ */
1214 0x1ed1, 499, /* ố Ố */
1215 0x1ed3, 499, /* ồ Ồ */
1216 0x1ed5, 499, /* ổ Ổ */
1217 0x1ed7, 499, /* ỗ Ỗ */
1218 0x1ed9, 499, /* ộ Ộ */
1219 0x1edb, 499, /* ớ Ớ */
1220 0x1edd, 499, /* ờ Ờ */
1221 0x1edf, 499, /* ở Ở */
1222 0x1ee1, 499, /* ỡ Ỡ */
1223 0x1ee3, 499, /* ợ Ợ */
1224 0x1ee5, 499, /* ụ Ụ */
1225 0x1ee7, 499, /* ủ Ủ */
1226 0x1ee9, 499, /* ứ Ứ */
1227 0x1eeb, 499, /* ừ Ừ */
1228 0x1eed, 499, /* ử Ử */
1229 0x1eef, 499, /* ữ Ữ */
1230 0x1ef1, 499, /* ự Ự */
1231 0x1ef3, 499, /* ỳ Ỳ */
1232 0x1ef5, 499, /* ỵ Ỵ */
1233 0x1ef7, 499, /* ỷ Ỷ */
1234 0x1ef9, 499, /* ỹ Ỹ */
1235 0x1f51, 508, /* ὑ Ὑ */
1236 0x1f53, 508, /* ὓ Ὓ */
1237 0x1f55, 508, /* ὕ Ὕ */
1238 0x1f57, 508, /* ὗ Ὗ */
1239 0x1fb3, 509, /* ᾳ ᾼ */
1240 0x1fc3, 509, /* ῃ ῌ */
1241 0x1fe5, 507, /* ῥ Ῥ */
1242 0x1ff3, 509, /* ῳ ῼ */
1243 };
1244
1245 /*
1246 * upper case ranges
1247 * 3rd col is conversion excess 500
1248 */
1249 static
1250 Rune __tolower2[] =
1251 {
1252 0x0041, 0x005a, 532, /* A-Z a-z */
1253 0x00c0, 0x00d6, 532, /* À-Ö à-ö */
1254 0x00d8, 0x00de, 532, /* Ø-Þ ø-þ */
1255 0x0189, 0x018a, 705, /* Ɖ-Ɗ ɖ-ɗ */
1256 0x018e, 0x018f, 702, /* Ǝ-Ə ɘ-ə */
1257 0x01b1, 0x01b2, 717, /* Ʊ-Ʋ ʊ-ʋ */
1258 0x0388, 0x038a, 537, /* Έ-Ί έ-ί */
1259 0x038e, 0x038f, 563, /* Ύ-Ώ ύ-ώ */
1260 0x0391, 0x03a1, 532, /* Α-Ρ α-ρ */
1261 0x03a3, 0x03ab, 532, /* Σ-Ϋ σ-ϋ */
1262 0x0401, 0x040c, 580, /* Ё-Ќ ё-ќ */
1263 0x040e, 0x040f, 580, /* Ў-Џ ў-џ */
1264 0x0410, 0x042f, 532, /* А-Я а-я */
1265 0x0531, 0x0556, 548, /* Ա-Ֆ ա-ֆ */
1266 0x10a0, 0x10c5, 548, /* Ⴀ-Ⴥ ა-ჵ */
1267 0x1f08, 0x1f0f, 492, /* Ἀ-Ἇ ἀ-ἇ */
1268 0x1f18, 0x1f1d, 492, /* Ἐ-Ἕ ἐ-ἕ */
1269 0x1f28, 0x1f2f, 492, /* Ἠ-Ἧ ἠ-ἧ */
1270 0x1f38, 0x1f3f, 492, /* Ἰ-Ἷ ἰ-ἷ */
1271 0x1f48, 0x1f4d, 492, /* Ὀ-Ὅ ὀ-ὅ */
1272 0x1f68, 0x1f6f, 492, /* Ὠ-Ὧ ὠ-ὧ */
1273 0x1f88, 0x1f8f, 492, /* ᾈ-ᾏ ᾀ-ᾇ */
1274 0x1f98, 0x1f9f, 492, /* ᾘ-ᾟ ᾐ-ᾗ */
1275 0x1fa8, 0x1faf, 492, /* ᾨ-ᾯ ᾠ-ᾧ */
1276 0x1fb8, 0x1fb9, 492, /* Ᾰ-Ᾱ ᾰ-ᾱ */
1277 0x1fba, 0x1fbb, 426, /* Ὰ-Ά ὰ-ά */
1278 0x1fc8, 0x1fcb, 414, /* Ὲ-Ή ὲ-ή */
1279 0x1fd8, 0x1fd9, 492, /* Ῐ-Ῑ ῐ-ῑ */
1280 0x1fda, 0x1fdb, 400, /* Ὶ-Ί ὶ-ί */
1281 0x1fe8, 0x1fe9, 492, /* Ῠ-Ῡ ῠ-ῡ */
1282 0x1fea, 0x1feb, 388, /* Ὺ-Ύ ὺ-ύ */
1283 0x1ff8, 0x1ff9, 372, /* Ὸ-Ό ὸ-ό */
1284 0x1ffa, 0x1ffb, 374, /* Ὼ-Ώ ὼ-ώ */
1285 0x2160, 0x216f, 516, /* Ⅰ-Ⅿ ⅰ-ⅿ */
1286 0x24b6, 0x24cf, 526, /* Ⓐ-Ⓩ ⓐ-ⓩ */
1287 0xff21, 0xff3a, 532, /* A-Z a-z */
1288 };
1289
1290 /*
1291 * upper case singlets
1292 * 2nd col is conversion excess 500
1293 */
1294 static
1295 Rune __tolower1[] =
1296 {
1297 0x0100, 501, /* Ā ā */
1298 0x0102, 501, /* Ă ă */
1299 0x0104, 501, /* Ą ą */
1300 0x0106, 501, /* Ć ć */
1301 0x0108, 501, /* Ĉ ĉ */
1302 0x010a, 501, /* Ċ ċ */
1303 0x010c, 501, /* Č č */
1304 0x010e, 501, /* Ď ď */
1305 0x0110, 501, /* Đ đ */
1306 0x0112, 501, /* Ē ē */
1307 0x0114, 501, /* Ĕ ĕ */
1308 0x0116, 501, /* Ė ė */
1309 0x0118, 501, /* Ę ę */
1310 0x011a, 501, /* Ě ě */
1311 0x011c, 501, /* Ĝ ĝ */
1312 0x011e, 501, /* Ğ ğ */
1313 0x0120, 501, /* Ġ ġ */
1314 0x0122, 501, /* Ģ ģ */
1315 0x0124, 501, /* Ĥ ĥ */
1316 0x0126, 501, /* Ħ ħ */
1317 0x0128, 501, /* Ĩ ĩ */
1318 0x012a, 501, /* Ī ī */
1319 0x012c, 501, /* Ĭ ĭ */
1320 0x012e, 501, /* Į į */
1321 0x0130, 301, /* İ i */
1322 0x0132, 501, /* IJ ij */
1323 0x0134, 501, /* Ĵ ĵ */
1324 0x0136, 501, /* Ķ ķ */
1325 0x0139, 501, /* Ĺ ĺ */
1326 0x013b, 501, /* Ļ ļ */
1327 0x013d, 501, /* Ľ ľ */
1328 0x013f, 501, /* Ŀ ŀ */
1329 0x0141, 501, /* Ł ł */
1330 0x0143, 501, /* Ń ń */
1331 0x0145, 501, /* Ņ ņ */
1332 0x0147, 501, /* Ň ň */
1333 0x014a, 501, /* Ŋ ŋ */
1334 0x014c, 501, /* Ō ō */
1335 0x014e, 501, /* Ŏ ŏ */
1336 0x0150, 501, /* Ő ő */
1337 0x0152, 501, /* Œ œ */
1338 0x0154, 501, /* Ŕ ŕ */
1339 0x0156, 501, /* Ŗ ŗ */
1340 0x0158, 501, /* Ř ř */
1341 0x015a, 501, /* Ś ś */
1342 0x015c, 501, /* Ŝ ŝ */
1343 0x015e, 501, /* Ş ş */
1344 0x0160, 501, /* Š š */
1345 0x0162, 501, /* Ţ ţ */
1346 0x0164, 501, /* Ť ť */
1347 0x0166, 501, /* Ŧ ŧ */
1348 0x0168, 501, /* Ũ ũ */
1349 0x016a, 501, /* Ū ū */
1350 0x016c, 501, /* Ŭ ŭ */
1351 0x016e, 501, /* Ů ů */
1352 0x0170, 501, /* Ű ű */
1353 0x0172, 501, /* Ų ų */
1354 0x0174, 501, /* Ŵ ŵ */
1355 0x0176, 501, /* Ŷ ŷ */
1356 0x0178, 379, /* Ÿ ÿ */
1357 0x0179, 501, /* Ź ź */
1358 0x017b, 501, /* Ż ż */
1359 0x017d, 501, /* Ž ž */
1360 0x0181, 710, /* Ɓ ɓ */
1361 0x0182, 501, /* Ƃ ƃ */
1362 0x0184, 501, /* Ƅ ƅ */
1363 0x0186, 706, /* Ɔ ɔ */
1364 0x0187, 501, /* Ƈ ƈ */
1365 0x018b, 501, /* Ƌ ƌ */
1366 0x0190, 703, /* Ɛ ɛ */
1367 0x0191, 501, /* Ƒ ƒ */
1368 0x0193, 705, /* Ɠ ɠ */
1369 0x0194, 707, /* Ɣ ɣ */
1370 0x0196, 711, /* Ɩ ɩ */
1371 0x0197, 709, /* Ɨ ɨ */
1372 0x0198, 501, /* Ƙ ƙ */
1373 0x019c, 711, /* Ɯ ɯ */
1374 0x019d, 713, /* Ɲ ɲ */
1375 0x01a0, 501, /* Ơ ơ */
1376 0x01a2, 501, /* Ƣ ƣ */
1377 0x01a4, 501, /* Ƥ ƥ */
1378 0x01a7, 501, /* Ƨ ƨ */
1379 0x01a9, 718, /* Ʃ ʃ */
1380 0x01ac, 501, /* Ƭ ƭ */
1381 0x01ae, 718, /* Ʈ ʈ */
1382 0x01af, 501, /* Ư ư */
1383 0x01b3, 501, /* Ƴ ƴ */
1384 0x01b5, 501, /* Ƶ ƶ */
1385 0x01b7, 719, /* Ʒ ʒ */
1386 0x01b8, 501, /* Ƹ ƹ */
1387 0x01bc, 501, /* Ƽ ƽ */
1388 0x01c4, 502, /* DŽ dž */
1389 0x01c5, 501, /* Dž dž */
1390 0x01c7, 502, /* LJ lj */
1391 0x01c8, 501, /* Lj lj */
1392 0x01ca, 502, /* NJ nj */
1393 0x01cb, 501, /* Nj nj */
1394 0x01cd, 501, /* Ǎ ǎ */
1395 0x01cf, 501, /* Ǐ ǐ */
1396 0x01d1, 501, /* Ǒ ǒ */
1397 0x01d3, 501, /* Ǔ ǔ */
1398 0x01d5, 501, /* Ǖ ǖ */
1399 0x01d7, 501, /* Ǘ ǘ */
1400 0x01d9, 501, /* Ǚ ǚ */
1401 0x01db, 501, /* Ǜ ǜ */
1402 0x01de, 501, /* Ǟ ǟ */
1403 0x01e0, 501, /* Ǡ ǡ */
1404 0x01e2, 501, /* Ǣ ǣ */
1405 0x01e4, 501, /* Ǥ ǥ */
1406 0x01e6, 501, /* Ǧ ǧ */
1407 0x01e8, 501, /* Ǩ ǩ */
1408 0x01ea, 501, /* Ǫ ǫ */
1409 0x01ec, 501, /* Ǭ ǭ */
1410 0x01ee, 501, /* Ǯ ǯ */
1411 0x01f1, 502, /* DZ dz */
1412 0x01f2, 501, /* Dz dz */
1413 0x01f4, 501, /* Ǵ ǵ */
1414 0x01fa, 501, /* Ǻ ǻ */
1415 0x01fc, 501, /* Ǽ ǽ */
1416 0x01fe, 501, /* Ǿ ǿ */
1417 0x0200, 501, /* Ȁ ȁ */
1418 0x0202, 501, /* Ȃ ȃ */
1419 0x0204, 501, /* Ȅ ȅ */
1420 0x0206, 501, /* Ȇ ȇ */
1421 0x0208, 501, /* Ȉ ȉ */
1422 0x020a, 501, /* Ȋ ȋ */
1423 0x020c, 501, /* Ȍ ȍ */
1424 0x020e, 501, /* Ȏ ȏ */
1425 0x0210, 501, /* Ȑ ȑ */
1426 0x0212, 501, /* Ȓ ȓ */
1427 0x0214, 501, /* Ȕ ȕ */
1428 0x0216, 501, /* Ȗ ȗ */
1429 0x0386, 538, /* Ά ά */
1430 0x038c, 564, /* Ό ό */
1431 0x03e2, 501, /* Ϣ ϣ */
1432 0x03e4, 501, /* Ϥ ϥ */
1433 0x03e6, 501, /* Ϧ ϧ */
1434 0x03e8, 501, /* Ϩ ϩ */
1435 0x03ea, 501, /* Ϫ ϫ */
1436 0x03ec, 501, /* Ϭ ϭ */
1437 0x03ee, 501, /* Ϯ ϯ */
1438 0x0460, 501, /* Ѡ ѡ */
1439 0x0462, 501, /* Ѣ ѣ */
1440 0x0464, 501, /* Ѥ ѥ */
1441 0x0466, 501, /* Ѧ ѧ */
1442 0x0468, 501, /* Ѩ ѩ */
1443 0x046a, 501, /* Ѫ ѫ */
1444 0x046c, 501, /* Ѭ ѭ */
1445 0x046e, 501, /* Ѯ ѯ */
1446 0x0470, 501, /* Ѱ ѱ */
1447 0x0472, 501, /* Ѳ ѳ */
1448 0x0474, 501, /* Ѵ ѵ */
1449 0x0476, 501, /* Ѷ ѷ */
1450 0x0478, 501, /* Ѹ ѹ */
1451 0x047a, 501, /* Ѻ ѻ */
1452 0x047c, 501, /* Ѽ ѽ */
1453 0x047e, 501, /* Ѿ ѿ */
1454 0x0480, 501, /* Ҁ ҁ */
1455 0x0490, 501, /* Ґ ґ */
1456 0x0492, 501, /* Ғ ғ */
1457 0x0494, 501, /* Ҕ ҕ */
1458 0x0496, 501, /* Җ җ */
1459 0x0498, 501, /* Ҙ ҙ */
1460 0x049a, 501, /* Қ қ */
1461 0x049c, 501, /* Ҝ ҝ */
1462 0x049e, 501, /* Ҟ ҟ */
1463 0x04a0, 501, /* Ҡ ҡ */
1464 0x04a2, 501, /* Ң ң */
1465 0x04a4, 501, /* Ҥ ҥ */
1466 0x04a6, 501, /* Ҧ ҧ */
1467 0x04a8, 501, /* Ҩ ҩ */
1468 0x04aa, 501, /* Ҫ ҫ */
1469 0x04ac, 501, /* Ҭ ҭ */
1470 0x04ae, 501, /* Ү ү */
1471 0x04b0, 501, /* Ұ ұ */
1472 0x04b2, 501, /* Ҳ ҳ */
1473 0x04b4, 501, /* Ҵ ҵ */
1474 0x04b6, 501, /* Ҷ ҷ */
1475 0x04b8, 501, /* Ҹ ҹ */
1476 0x04ba, 501, /* Һ һ */
1477 0x04bc, 501, /* Ҽ ҽ */
1478 0x04be, 501, /* Ҿ ҿ */
1479 0x04c1, 501, /* Ӂ ӂ */
1480 0x04c3, 501, /* Ӄ ӄ */
1481 0x04c7, 501, /* Ӈ ӈ */
1482 0x04cb, 501, /* Ӌ ӌ */
1483 0x04d0, 501, /* Ӑ ӑ */
1484 0x04d2, 501, /* Ӓ ӓ */
1485 0x04d4, 501, /* Ӕ ӕ */
1486 0x04d6, 501, /* Ӗ ӗ */
1487 0x04d8, 501, /* Ә ә */
1488 0x04da, 501, /* Ӛ ӛ */
1489 0x04dc, 501, /* Ӝ ӝ */
1490 0x04de, 501, /* Ӟ ӟ */
1491 0x04e0, 501, /* Ӡ ӡ */
1492 0x04e2, 501, /* Ӣ ӣ */
1493 0x04e4, 501, /* Ӥ ӥ */
1494 0x04e6, 501, /* Ӧ ӧ */
1495 0x04e8, 501, /* Ө ө */
1496 0x04ea, 501, /* Ӫ ӫ */
1497 0x04ee, 501, /* Ӯ ӯ */
1498 0x04f0, 501, /* Ӱ ӱ */
1499 0x04f2, 501, /* Ӳ ӳ */
1500 0x04f4, 501, /* Ӵ ӵ */
1501 0x04f8, 501, /* Ӹ ӹ */
1502 0x1e00, 501, /* Ḁ ḁ */
1503 0x1e02, 501, /* Ḃ ḃ */
1504 0x1e04, 501, /* Ḅ ḅ */
1505 0x1e06, 501, /* Ḇ ḇ */
1506 0x1e08, 501, /* Ḉ ḉ */
1507 0x1e0a, 501, /* Ḋ ḋ */
1508 0x1e0c, 501, /* Ḍ ḍ */
1509 0x1e0e, 501, /* Ḏ ḏ */
1510 0x1e10, 501, /* Ḑ ḑ */
1511 0x1e12, 501, /* Ḓ ḓ */
1512 0x1e14, 501, /* Ḕ ḕ */
1513 0x1e16, 501, /* Ḗ ḗ */
1514 0x1e18, 501, /* Ḙ ḙ */
1515 0x1e1a, 501, /* Ḛ ḛ */
1516 0x1e1c, 501, /* Ḝ ḝ */
1517 0x1e1e, 501, /* Ḟ ḟ */
1518 0x1e20, 501, /* Ḡ ḡ */
1519 0x1e22, 501, /* Ḣ ḣ */
1520 0x1e24, 501, /* Ḥ ḥ */
1521 0x1e26, 501, /* Ḧ ḧ */
1522 0x1e28, 501, /* Ḩ ḩ */
1523 0x1e2a, 501, /* Ḫ ḫ */
1524 0x1e2c, 501, /* Ḭ ḭ */
1525 0x1e2e, 501, /* Ḯ ḯ */
1526 0x1e30, 501, /* Ḱ ḱ */
1527 0x1e32, 501, /* Ḳ ḳ */
1528 0x1e34, 501, /* Ḵ ḵ */
1529 0x1e36, 501, /* Ḷ ḷ */
1530 0x1e38, 501, /* Ḹ ḹ */
1531 0x1e3a, 501, /* Ḻ ḻ */
1532 0x1e3c, 501, /* Ḽ ḽ */
1533 0x1e3e, 501, /* Ḿ ḿ */
1534 0x1e40, 501, /* Ṁ ṁ */
1535 0x1e42, 501, /* Ṃ ṃ */
1536 0x1e44, 501, /* Ṅ ṅ */
1537 0x1e46, 501, /* Ṇ ṇ */
1538 0x1e48, 501, /* Ṉ ṉ */
1539 0x1e4a, 501, /* Ṋ ṋ */
1540 0x1e4c, 501, /* Ṍ ṍ */
1541 0x1e4e, 501, /* Ṏ ṏ */
1542 0x1e50, 501, /* Ṑ ṑ */
1543 0x1e52, 501, /* Ṓ ṓ */
1544 0x1e54, 501, /* Ṕ ṕ */
1545 0x1e56, 501, /* Ṗ ṗ */
1546 0x1e58, 501, /* Ṙ ṙ */
1547 0x1e5a, 501, /* Ṛ ṛ */
1548 0x1e5c, 501, /* Ṝ ṝ */
1549 0x1e5e, 501, /* Ṟ ṟ */
1550 0x1e60, 501, /* Ṡ ṡ */
1551 0x1e62, 501, /* Ṣ ṣ */
1552 0x1e64, 501, /* Ṥ ṥ */
1553 0x1e66, 501, /* Ṧ ṧ */
1554 0x1e68, 501, /* Ṩ ṩ */
1555 0x1e6a, 501, /* Ṫ ṫ */
1556 0x1e6c, 501, /* Ṭ ṭ */
1557 0x1e6e, 501, /* Ṯ ṯ */
1558 0x1e70, 501, /* Ṱ ṱ */
1559 0x1e72, 501, /* Ṳ ṳ */
1560 0x1e74, 501, /* Ṵ ṵ */
1561 0x1e76, 501, /* Ṷ ṷ */
1562 0x1e78, 501, /* Ṹ ṹ */
1563 0x1e7a, 501, /* Ṻ ṻ */
1564 0x1e7c, 501, /* Ṽ ṽ */
1565 0x1e7e, 501, /* Ṿ ṿ */
1566 0x1e80, 501, /* Ẁ ẁ */
1567 0x1e82, 501, /* Ẃ ẃ */
1568 0x1e84, 501, /* Ẅ ẅ */
1569 0x1e86, 501, /* Ẇ ẇ */
1570 0x1e88, 501, /* Ẉ ẉ */
1571 0x1e8a, 501, /* Ẋ ẋ */
1572 0x1e8c, 501, /* Ẍ ẍ */
1573 0x1e8e, 501, /* Ẏ ẏ */
1574 0x1e90, 501, /* Ẑ ẑ */
1575 0x1e92, 501, /* Ẓ ẓ */
1576 0x1e94, 501, /* Ẕ ẕ */
1577 0x1ea0, 501, /* Ạ ạ */
1578 0x1ea2, 501, /* Ả ả */
1579 0x1ea4, 501, /* Ấ ấ */
1580 0x1ea6, 501, /* Ầ ầ */
1581 0x1ea8, 501, /* Ẩ ẩ */
1582 0x1eaa, 501, /* Ẫ ẫ */
1583 0x1eac, 501, /* Ậ ậ */
1584 0x1eae, 501, /* Ắ ắ */
1585 0x1eb0, 501, /* Ằ ằ */
1586 0x1eb2, 501, /* Ẳ ẳ */
1587 0x1eb4, 501, /* Ẵ ẵ */
1588 0x1eb6, 501, /* Ặ ặ */
1589 0x1eb8, 501, /* Ẹ ẹ */
1590 0x1eba, 501, /* Ẻ ẻ */
1591 0x1ebc, 501, /* Ẽ ẽ */
1592 0x1ebe, 501, /* Ế ế */
1593 0x1ec0, 501, /* Ề ề */
1594 0x1ec2, 501, /* Ể ể */
1595 0x1ec4, 501, /* Ễ ễ */
1596 0x1ec6, 501, /* Ệ ệ */
1597 0x1ec8, 501, /* Ỉ ỉ */
1598 0x1eca, 501, /* Ị ị */
1599 0x1ecc, 501, /* Ọ ọ */
1600 0x1ece, 501, /* Ỏ ỏ */
1601 0x1ed0, 501, /* Ố ố */
1602 0x1ed2, 501, /* Ồ ồ */
1603 0x1ed4, 501, /* Ổ ổ */
1604 0x1ed6, 501, /* Ỗ ỗ */
1605 0x1ed8, 501, /* Ộ ộ */
1606 0x1eda, 501, /* Ớ ớ */
1607 0x1edc, 501, /* Ờ ờ */
1608 0x1ede, 501, /* Ở ở */
1609 0x1ee0, 501, /* Ỡ ỡ */
1610 0x1ee2, 501, /* Ợ ợ */
1611 0x1ee4, 501, /* Ụ ụ */
1612 0x1ee6, 501, /* Ủ ủ */
1613 0x1ee8, 501, /* Ứ ứ */
1614 0x1eea, 501, /* Ừ ừ */
1615 0x1eec, 501, /* Ử ử */
1616 0x1eee, 501, /* Ữ ữ */
1617 0x1ef0, 501, /* Ự ự */
1618 0x1ef2, 501, /* Ỳ ỳ */
1619 0x1ef4, 501, /* Ỵ ỵ */
1620 0x1ef6, 501, /* Ỷ ỷ */
1621 0x1ef8, 501, /* Ỹ ỹ */
1622 0x1f59, 492, /* Ὑ ὑ */
1623 0x1f5b, 492, /* Ὓ ὓ */
1624 0x1f5d, 492, /* Ὕ ὕ */
1625 0x1f5f, 492, /* Ὗ ὗ */
1626 0x1fbc, 491, /* ᾼ ᾳ */
1627 0x1fcc, 491, /* ῌ ῃ */
1628 0x1fec, 493, /* Ῥ ῥ */
1629 0x1ffc, 491, /* ῼ ῳ */
1630 };
1631
1632 /*
1633 * title characters are those between
1634 * upper and lower case. ie DZ Dz dz
1635 */
1636 static
1637 Rune __totitle1[] =
1638 {
1639 0x01c4, 501, /* DŽ Dž */
1640 0x01c6, 499, /* dž Dž */
1641 0x01c7, 501, /* LJ Lj */
1642 0x01c9, 499, /* lj Lj */
1643 0x01ca, 501, /* NJ Nj */
1644 0x01cc, 499, /* nj Nj */
1645 0x01f1, 501, /* DZ Dz */
1646 0x01f3, 499, /* dz Dz */
1647 };
1648
1649 static Rune*
1650 bsearch(Rune c, Rune *t, int n, int ne)
1651 {
1652 Rune *p;
1653 int m;
1654
1655 while(n > 1) {
1656 m = n/2;
1657 p = t + m*ne;
1658 if(c >= p[0]) {
1659 t = p;
1660 n = n-m;
1661 } else
1662 n = m;
1663 }
1664 if(n && c >= t[0])
1665 return t;
1666 return 0;
1667 }
1668
1669 Rune
1670 tolowerrune(Rune c)
1671 {
1672 Rune *p;
1673
1674 p = bsearch(c, __tolower2, nelem(__tolower2)/3, 3);
1675 if(p && c >= p[0] && c <= p[1])
1676 return c + p[2] - 500;
1677 p = bsearch(c, __tolower1, nelem(__tolower1)/2, 2);
1678 if(p && c == p[0])
1679 return c + p[1] - 500;
1680 return c;
1681 }
1682
1683 Rune
1684 toupperrune(Rune c)
1685 {
1686 Rune *p;
1687
1688 p = bsearch(c, __toupper2, nelem(__toupper2)/3, 3);
1689 if(p && c >= p[0] && c <= p[1])
1690 return c + p[2] - 500;
1691 p = bsearch(c, __toupper1, nelem(__toupper1)/2, 2);
1692 if(p && c == p[0])
1693 return c + p[1] - 500;
1694 return c;
1695 }
1696
1697 Rune
1698 totitlerune(Rune c)
1699 {
1700 Rune *p;
1701
1702 p = bsearch(c, __totitle1, nelem(__totitle1)/2, 2);
1703 if(p && c == p[0])
1704 return c + p[1] - 500;
1705 return c;
1706 }
1707
1708 int
1709 islowerrune(Rune c)
1710 {
1711 Rune *p;
1712
1713 p = bsearch(c, __toupper2, nelem(__toupper2)/3, 3);
1714 if(p && c >= p[0] && c <= p[1])
1715 return 1;
1716 p = bsearch(c, __toupper1, nelem(__toupper1)/2, 2);
1717 if(p && c == p[0])
1718 return 1;
1719 return 0;
1720 }
1721
1722 int
1723 isupperrune(Rune c)
1724 {
1725 Rune *p;
1726
1727 p = bsearch(c, __tolower2, nelem(__tolower2)/3, 3);
1728 if(p && c >= p[0] && c <= p[1])
1729 return 1;
1730 p = bsearch(c, __tolower1, nelem(__tolower1)/2, 2);
1731 if(p && c == p[0])
1732 return 1;
1733 return 0;
1734 }
1735
1736 int
1737 isalpharune(Rune c)
1738 {
1739 Rune *p;
1740
1741 if(isupperrune(c) || islowerrune(c))
1742 return 1;
1743 p = bsearch(c, __alpha2, nelem(__alpha2)/2, 2);
1744 if(p && c >= p[0] && c <= p[1])
1745 return 1;
1746 p = bsearch(c, __alpha1, nelem(__alpha1), 1);
1747 if(p && c == p[0])
1748 return 1;
1749 return 0;
1750 }
1751
1752 int
1753 istitlerune(Rune c)
1754 {
1755 return isupperrune(c) && islowerrune(c);
1756 }
1757
1758 int
1759 isspacerune(Rune c)
1760 {
1761 Rune *p;
1762
1763 p = bsearch(c, __space2, nelem(__space2)/2, 2);
1764 if(p && c >= p[0] && c <= p[1])
1765 return 1;
1766 return 0;
1767 }
1768 /* -------------- utfecpy.c --------------- */
1769 /*
1770 * The authors of this software are Rob Pike and Ken Thompson.
1771 * Copyright (c) 2002 by Lucent Technologies.
1772 * Permission to use, copy, modify, and distribute this software for any
1773 * purpose without fee is hereby granted, provided that this entire notice
1774 * is included in all copies of any software which is or includes a copy
1775 * or modification of this software and in all copies of the supporting
1776 * documentation for such software.
1777 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
1778 * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
1779 * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
1780 * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
1781 */
1782 #define _BSD_SOURCE 1 /* memccpy */
1783 // #include <stdarg.h>
1784 // #include <string.h>
1785 // #include "plan9.h"
1786 // #include "utf.h"
1787
1788 char*
1789 utfecpy(char *to, char *e, char *from)
1790 {
1791 char *end;
1792
1793 if(to >= e)
1794 return to;
1795 end = memccpy(to, from, '\0', e - to);
1796 if(end == nil){
1797 end = e-1;
1798 while(end>to && (*--end&0xC0)==0x80)
1799 ;
1800 *end = '\0';
1801 }else{
1802 end--;
1803 }
1804 return end;
1805 }
1806 /* -------------- utflen.c --------------- */
1807 /*
1808 * The authors of this software are Rob Pike and Ken Thompson.
1809 * Copyright (c) 2002 by Lucent Technologies.
1810 * Permission to use, copy, modify, and distribute this software for any
1811 * purpose without fee is hereby granted, provided that this entire notice
1812 * is included in all copies of any software which is or includes a copy
1813 * or modification of this software and in all copies of the supporting
1814 * documentation for such software.
1815 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
1816 * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
1817 * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
1818 * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
1819 */
1820 // #include <stdarg.h>
1821 // #include <string.h>
1822 // #include "plan9.h"
1823 // #include "utf.h"
1824
1825 int
1826 utflen(char *s)
1827 {
1828 int c;
1829 long n;
1830 Rune rune;
1831
1832 n = 0;
1833 for(;;) {
1834 c = *(uchar*)s;
1835 if(c < Runeself) {
1836 if(c == 0)
1837 return n;
1838 s++;
1839 } else
1840 s += chartorune(&rune, s);
1841 n++;
1842 }
1843 }
1844 /* -------------- utfnlen.c --------------- */
1845 /*
1846 * The authors of this software are Rob Pike and Ken Thompson.
1847 * Copyright (c) 2002 by Lucent Technologies.
1848 * Permission to use, copy, modify, and distribute this software for any
1849 * purpose without fee is hereby granted, provided that this entire notice
1850 * is included in all copies of any software which is or includes a copy
1851 * or modification of this software and in all copies of the supporting
1852 * documentation for such software.
1853 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
1854 * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
1855 * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
1856 * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
1857 */
1858 // #include <stdarg.h>
1859 // #include <string.h>
1860 // #include "plan9.h"
1861 // #include "utf.h"
1862
1863 int
1864 utfnlen(char *s, long m)
1865 {
1866 int c;
1867 long n;
1868 Rune rune;
1869 char *es;
1870
1871 es = s + m;
1872 for(n = 0; s < es; n++) {
1873 c = *(uchar*)s;
1874 if(c < Runeself){
1875 if(c == '\0')
1876 break;
1877 s++;
1878 continue;
1879 }
1880 if(!fullrune(s, es-s))
1881 break;
1882 s += chartorune(&rune, s);
1883 }
1884 return n;
1885 }
1886 /* -------------- utfrrune.c --------------- */
1887 /*
1888 * The authors of this software are Rob Pike and Ken Thompson.
1889 * Copyright (c) 2002 by Lucent Technologies.
1890 * Permission to use, copy, modify, and distribute this software for any
1891 * purpose without fee is hereby granted, provided that this entire notice
1892 * is included in all copies of any software which is or includes a copy
1893 * or modification of this software and in all copies of the supporting
1894 * documentation for such software.
1895 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
1896 * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
1897 * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
1898 * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
1899 */
1900 // #include <stdarg.h>
1901 // #include <string.h>
1902 // #include "plan9.h"
1903 // #include "utf.h"
1904
1905 char*
1906 utfrrune(char *s, long c)
1907 {
1908 long c1;
1909 Rune r;
1910 char *s1;
1911
1912 if(c < Runesync) /* not part of utf sequence */
1913 return strrchr(s, c);
1914
1915 s1 = 0;
1916 for(;;) {
1917 c1 = *(uchar*)s;
1918 if(c1 < Runeself) { /* one byte rune */
1919 if(c1 == 0)
1920 return s1;
1921 if(c1 == c)
1922 s1 = s;
1923 s++;
1924 continue;
1925 }
1926 c1 = chartorune(&r, s);
1927 if(r == c)
1928 s1 = s;
1929 s += c1;
1930 }
1931 }
1932 /* -------------- utfrune.c --------------- */
1933 /*
1934 * The authors of this software are Rob Pike and Ken Thompson.
1935 * Copyright (c) 2002 by Lucent Technologies.
1936 * Permission to use, copy, modify, and distribute this software for any
1937 * purpose without fee is hereby granted, provided that this entire notice
1938 * is included in all copies of any software which is or includes a copy
1939 * or modification of this software and in all copies of the supporting
1940 * documentation for such software.
1941 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
1942 * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
1943 * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
1944 * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
1945 */
1946 // #include <stdarg.h>
1947 // #include <string.h>
1948 // #include "plan9.h"
1949 // #include "utf.h"
1950
1951 char*
1952 utfrune(char *s, long c)
1953 {
1954 long c1;
1955 Rune r;
1956 int n;
1957
1958 if(c < Runesync) /* not part of utf sequence */
1959 return strchr(s, c);
1960
1961 for(;;) {
1962 c1 = *(uchar*)s;
1963 if(c1 < Runeself) { /* one byte rune */
1964 if(c1 == 0)
1965 return 0;
1966 if(c1 == c)
1967 return s;
1968 s++;
1969 continue;
1970 }
1971 n = chartorune(&r, s);
1972 if(r == c)
1973 return s;
1974 s += n;
1975 }
1976 }
1977 /* -------------- utfutf.c --------------- */
1978 /*
1979 * The authors of this software are Rob Pike and Ken Thompson.
1980 * Copyright (c) 2002 by Lucent Technologies.
1981 * Permission to use, copy, modify, and distribute this software for any
1982 * purpose without fee is hereby granted, provided that this entire notice
1983 * is included in all copies of any software which is or includes a copy
1984 * or modification of this software and in all copies of the supporting
1985 * documentation for such software.
1986 * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
1987 * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
1988 * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
1989 * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
1990 */
1991 // #include <stdarg.h>
1992 // #include <string.h>
1993 // #include "plan9.h"
1994 // #include "utf.h"
1995
1996
1997 /*
1998 * Return pointer to first occurrence of s2 in s1,
1999 * 0 if none
2000 */
2001 char*
2002 utfutf(char *s1, char *s2)
2003 {
2004 char *p;
2005 long f, n1, n2;
2006 Rune r;
2007
2008 n1 = chartorune(&r, s2);
2009 f = r;
2010 if(f <= Runesync) /* represents self */
2011 return strstr(s1, s2);
2012
2013 n2 = strlen(s2);
2014 for(p=s1; (p=utfrune(p, f)); p+=n1)
2015 if(strncmp(p, s2, n2) == 0)
2016 return p;
2017 return 0;
2018 }