asciitoutf8.c - randomcrap - random crap programs of varying quality
 (HTM) git clone git://git.codemadness.org/randomcrap
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       asciitoutf8.c (1636B)
       ---
            1 /* convert text encoding ascii to utf-8 */
            2 
            3 #include <stdio.h>
            4 #include <stdint.h>
            5 
            6 #ifdef WIN32
            7 /* fcntl.h required for Windows binary mode */
            8 #include <fcntl.h>
            9 #endif
           10 
           11 #ifdef __OpenBSD__
           12 #include <err.h>
           13 #include <unistd.h>
           14 #endif
           15 
           16 int
           17 codepointtoutf8(long r, char *s)
           18 {
           19         if (r == 0) {
           20                 return 0; /* NUL byte */
           21         } else if (r <= 0x7F) {
           22                 /* 1 byte: 0aaaaaaa */
           23                 s[0] = r;
           24                 return 1;
           25         } else if (r <= 0x07FF) {
           26                 /* 2 bytes: 00000aaa aabbbbbb */
           27                 s[0] = 0xC0 | ((r & 0x0007C0) >>  6); /* 110aaaaa */
           28                 s[1] = 0x80 |  (r & 0x00003F);        /* 10bbbbbb */
           29                 return 2;
           30         } else if (r <= 0xFFFF) {
           31                 /* 3 bytes: aaaabbbb bbcccccc */
           32                 s[0] = 0xE0 | ((r & 0x00F000) >> 12); /* 1110aaaa */
           33                 s[1] = 0x80 | ((r & 0x000FC0) >>  6); /* 10bbbbbb */
           34                 s[2] = 0x80 |  (r & 0x00003F);        /* 10cccccc */
           35                 return 3;
           36         } else {
           37                 /* 4 bytes: 000aaabb bbbbcccc ccdddddd */
           38                 s[0] = 0xF0 | ((r & 0x1C0000) >> 18); /* 11110aaa */
           39                 s[1] = 0x80 | ((r & 0x03F000) >> 12); /* 10bbbbbb */
           40                 s[2] = 0x80 | ((r & 0x000FC0) >>  6); /* 10cccccc */
           41                 s[3] = 0x80 |  (r & 0x00003F);        /* 10dddddd */
           42                 return 4;
           43         }
           44 }
           45 
           46 int
           47 main(void)
           48 {
           49         char buf[5];
           50         int c, i, n;
           51 
           52 #ifdef __OpenBSD__
           53         if (pledge("stdio", NULL) == -1)
           54                 err(1, "pledge");
           55 #endif
           56 
           57         /* required for Windows binary mode aka more retarded bullshit. */
           58 #if WIN32
           59         _setmode(_fileno(stdin), _O_BINARY);
           60         _setmode(_fileno(stdout), _O_BINARY);
           61         _setmode(_fileno(stderr), _O_BINARY);
           62 #endif
           63 
           64         while ((c = getchar()) != EOF) {
           65                 n = codepointtoutf8(c, buf);
           66                 for (i = 0; i < n; i++)
           67                         putchar(buf[i]);
           68         }
           69 
           70         if (ferror(stdin)) {
           71                 perror(NULL);
           72                 return 1;
           73         }
           74         return 0;
           75 }