asciitoutf8.c - randomcrap - random crap programs of varying quality
(HTM) git clone git://git.codemadness.org/randomcrap
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
asciitoutf8.c (1636B)
---
1 /* convert text encoding ascii to utf-8 */
2
3 #include <stdio.h>
4 #include <stdint.h>
5
6 #ifdef WIN32
7 /* fcntl.h required for Windows binary mode */
8 #include <fcntl.h>
9 #endif
10
11 #ifdef __OpenBSD__
12 #include <err.h>
13 #include <unistd.h>
14 #endif
15
16 int
17 codepointtoutf8(long r, char *s)
18 {
19 if (r == 0) {
20 return 0; /* NUL byte */
21 } else if (r <= 0x7F) {
22 /* 1 byte: 0aaaaaaa */
23 s[0] = r;
24 return 1;
25 } else if (r <= 0x07FF) {
26 /* 2 bytes: 00000aaa aabbbbbb */
27 s[0] = 0xC0 | ((r & 0x0007C0) >> 6); /* 110aaaaa */
28 s[1] = 0x80 | (r & 0x00003F); /* 10bbbbbb */
29 return 2;
30 } else if (r <= 0xFFFF) {
31 /* 3 bytes: aaaabbbb bbcccccc */
32 s[0] = 0xE0 | ((r & 0x00F000) >> 12); /* 1110aaaa */
33 s[1] = 0x80 | ((r & 0x000FC0) >> 6); /* 10bbbbbb */
34 s[2] = 0x80 | (r & 0x00003F); /* 10cccccc */
35 return 3;
36 } else {
37 /* 4 bytes: 000aaabb bbbbcccc ccdddddd */
38 s[0] = 0xF0 | ((r & 0x1C0000) >> 18); /* 11110aaa */
39 s[1] = 0x80 | ((r & 0x03F000) >> 12); /* 10bbbbbb */
40 s[2] = 0x80 | ((r & 0x000FC0) >> 6); /* 10cccccc */
41 s[3] = 0x80 | (r & 0x00003F); /* 10dddddd */
42 return 4;
43 }
44 }
45
46 int
47 main(void)
48 {
49 char buf[5];
50 int c, i, n;
51
52 #ifdef __OpenBSD__
53 if (pledge("stdio", NULL) == -1)
54 err(1, "pledge");
55 #endif
56
57 /* required for Windows binary mode aka more retarded bullshit. */
58 #if WIN32
59 _setmode(_fileno(stdin), _O_BINARY);
60 _setmode(_fileno(stdout), _O_BINARY);
61 _setmode(_fileno(stderr), _O_BINARY);
62 #endif
63
64 while ((c = getchar()) != EOF) {
65 n = codepointtoutf8(c, buf);
66 for (i = 0; i < n; i++)
67 putchar(buf[i]);
68 }
69
70 if (ferror(stdin)) {
71 perror(NULL);
72 return 1;
73 }
74 return 0;
75 }