tphroxy.c - phroxy - Gopher to HTTP proxy
 (HTM) git clone git://git.z3bra.org/phroxy.git
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) LICENSE
       ---
       tphroxy.c (9901B)
       ---
            1 #include <err.h>
            2 #include <errno.h>
            3 #include <limits.h>
            4 #include <netdb.h>
            5 #include <signal.h>
            6 #include <stdio.h>
            7 #include <stdlib.h>
            8 #include <string.h>
            9 #include <time.h>
           10 #include <unistd.h>
           11 
           12 #include <sys/types.h>
           13 #include <sys/socket.h>
           14 #include <sys/socket.h>
           15 
           16 /* supported items */
           17 enum {
           18         ITEM_0,
           19         ITEM_1,
           20         ITEM_2,
           21         ITEM_3,
           22         ITEM_4,
           23         ITEM_5,
           24         ITEM_6,
           25         ITEM_7,
           26         ITEM_8,
           27         ITEM_9,
           28         ITEM_I,
           29         ITEM_g,
           30         ITEM_h,
           31         ITEM_s,
           32         ITEM_P,
           33         ITEM_d,
           34 };
           35 
           36 #include "config.h"
           37 
           38 void *
           39 xreallocarray(void *m, const size_t n, const size_t s)
           40 {
           41         void *nm;
           42 
           43         if (n == 0 || s == 0) {
           44                 free(m);
           45                 return NULL;
           46         }
           47         if (s && n > (size_t)-1/s)
           48                 errx(1, "realloc: overflow");
           49         if (!(nm = realloc(m, n * s)))
           50                 errx(1, "realloc: %s", strerror(errno));
           51 
           52         return nm;
           53 }
           54 
           55 
           56 static int
           57 connectto(const char *host, const char *port)
           58 {
           59         sigset_t set, oset;
           60         static const struct addrinfo hints = {
           61             .ai_family = AF_UNSPEC,
           62             .ai_socktype = SOCK_STREAM,
           63             .ai_protocol = IPPROTO_TCP,
           64         };
           65         struct addrinfo *addrs, *addr;
           66         int r, sock = -1;
           67 
           68         sigemptyset(&set);
           69         sigaddset(&set, SIGWINCH);
           70         sigprocmask(SIG_BLOCK, &set, &oset);
           71 
           72         if ((r = getaddrinfo(host, port, &hints, &addrs))) {
           73                 fprintf(stderr, "Can't resolve hostname \"%s\": %s\n", host, gai_strerror(r));
           74                 goto err;
           75         }
           76 
           77         for (addr = addrs; addr; addr = addr->ai_next) {
           78                 if ((sock = socket(addr->ai_family, addr->ai_socktype,
           79                                    addr->ai_protocol)) < 0)
           80                         continue;
           81                 if ((r = connect(sock, addr->ai_addr, addr->ai_addrlen)) < 0) {
           82                         close(sock);
           83                         continue;
           84                 }
           85                 break;
           86         }
           87 
           88         freeaddrinfo(addrs);
           89 
           90         if (sock < 0) {
           91                 fprintf(stderr, "Can't open socket: %s\n", strerror(errno));
           92                 goto err;
           93         }
           94         if (r < 0) {
           95                 fprintf(stderr, "Can't connect to: %s:%s: %s\n", host, port, strerror(errno));
           96                 goto err;
           97         }
           98 
           99         sigprocmask(SIG_SETMASK, &oset, NULL);
          100         return sock;
          101 
          102 err:
          103         sigprocmask(SIG_SETMASK, &oset, NULL);
          104         return -1;
          105 }
          106 
          107 int
          108 sendselector(int sock, const char *selector, const char *search)
          109 {
          110         char *msg, *p;
          111         char *fmt = "%s\r\n";
          112         size_t ln;
          113         ssize_t n;
          114 
          115         ln = strlen(selector) + 3;
          116         if (search) {
          117                 fmt = "%s\t%s\r\n";
          118                 ln += strlen(search) + 1;
          119         }
          120 
          121         msg = p = malloc(ln);
          122         snprintf(msg, ln--, fmt, selector, search);
          123 
          124         while ((n = write(sock, p, ln)) > 0) {
          125                 ln -= n;
          126                 p += n;
          127         }
          128 
          129         free(msg);
          130         if (n == -1)
          131                 fprintf(stderr, "Can't send message: %s\n", strerror(errno));
          132 
          133         return n;
          134 }
          135 
          136 static char
          137 hex2bin(const unsigned char *in)
          138 {
          139   int out;
          140 
          141   if (*in == '%')
          142     in++;
          143 
          144   if ('A' <= in[0] && in[0] <= 'F') out  = 16 * (in[0] - 'A' + 10);
          145   if ('0' <= in[0] && in[0] <= '9') out  = 16 * (in[0] - '0');
          146 
          147   if ('A' <= in[1] && in[1] <= 'F') out += (in[1] - 'A' + 10);
          148   if ('0' <= in[1] && in[1] <= '9') out += (in[1] - '0');
          149 
          150   return out;
          151 }
          152 
          153 char *
          154 urldec(char *search)
          155 {
          156         char *msg, *p;
          157 
          158         if (!search)
          159                 return NULL;
          160 
          161         msg = p = search;
          162         for (p = msg; *p != '\0'; msg++, p++) {
          163                 switch(*p) {
          164                 case '+':
          165                         *msg = ' ';
          166                         break;
          167                 case '%':
          168                         *msg = hex2bin((unsigned char *)p);
          169                         p += 2;
          170                         break;
          171                 default:
          172                         *msg = *p;
          173                 }
          174         }
          175         *msg = '\0';
          176 
          177         return search;
          178 }
          179 
          180 
          181 char *
          182 getrawitem(int sock, size_t *sz)
          183 {
          184         char *raw, *buf;
          185         size_t bn, bs;
          186         ssize_t n;
          187 
          188         raw = buf = NULL;
          189         bn = bs = n = 0;
          190 
          191         do {
          192                 bs -= n;
          193                 buf += n;
          194                 if (bs < 1) {
          195                         raw = xreallocarray(raw, ++bn, BUFSIZ);
          196                         buf = raw + (bn-1) * BUFSIZ;
          197                         bs = BUFSIZ;
          198                 }
          199         } while ((n = read(sock, buf, bs)) > 0);
          200 
          201         *buf = '\0';
          202 
          203         if (sz)
          204                 *sz = buf - raw;
          205 
          206         if (n < 0) {
          207                 fprintf(stderr, "Can't read socket: %s\n", strerror(errno));
          208                 free(raw);
          209         }
          210 
          211         return raw;
          212 }
          213 
          214 void
          215 printhttp(int code)
          216 {
          217         switch (code) {
          218         case 400: printf("HTTP/1.1 400 That's Illegal\r\n"); break;
          219         case 404: printf("HTTP/1.1 404 Google Broke The Web\r\n"); break;
          220         case 405: printf("HTTP/1.1 405 Don't Do That\r\n"); break;
          221         case 415: printf("HTTP/1.1 415 Gopher Type Not Handled\r\n"); break;
          222         case 500: printf("HTTP/1.1 500 You Broke The Web\r\n"); break;
          223         }
          224         printf("\r\n");
          225 }
          226 
          227 char *
          228 contenttype(char i, char *path)
          229 {
          230         static char *ext, type[32];
          231 
          232         /* isolate file extention, if any */
          233         ext = strrchr(path, '.');
          234         ext = ext ? ext + 1 : "*";
          235 
          236         switch(i) {
          237         case '0':
          238         case '1':
          239         case '7':
          240         case 'h':
          241                 snprintf(type, sizeof(type)-1, "text/html; charset=utf-8");
          242                 break;
          243         case '6':
          244                 snprintf(type, sizeof(type)-1, "text/x-uuencode");
          245                 break;
          246         case '4':
          247         case '5':
          248         case '9':
          249                 snprintf(type, sizeof(type)-1, "application/octet-stream");
          250                 break;
          251         case 'I':
          252                 /* assume 4 chars max for extension */
          253                 snprintf(type, sizeof(type)-1, "image/%s", ext);
          254                 break;
          255         case 's':
          256                 snprintf(type, sizeof(type)-1, "audio/%s", ext);
          257                 break;
          258         case 'g':
          259                 snprintf(type, sizeof(type)-1, "image/gif");
          260                 break;
          261         case 'P':
          262         case 'd':
          263                 snprintf(type, sizeof(type)-1, "application/%s", ext);
          264                 break;
          265         default:
          266                 return NULL;
          267                 break; /* NOTREACHED */
          268         }
          269 
          270         return type;
          271 }
          272 
          273 const char *
          274 itemname(char i)
          275 {
          276         switch(i) {
          277         case '0': return items[ITEM_0];
          278         case '1': return items[ITEM_1];
          279         case '2': return items[ITEM_2];
          280         case '3': return items[ITEM_3];
          281         case '4': return items[ITEM_4];
          282         case '5': return items[ITEM_5];
          283         case '6': return items[ITEM_6];
          284         case '7': return items[ITEM_7];
          285         case '8': return items[ITEM_8];
          286         case '9': return items[ITEM_9];
          287         case 'I': return items[ITEM_I];
          288         case 'g': return items[ITEM_g];
          289         case 'h': return items[ITEM_h];
          290         case 's': return items[ITEM_s];
          291         case 'd': return items[ITEM_d];
          292         case 'P': return items[ITEM_P];
          293         }
          294 
          295         return NULL;
          296 }
          297 
          298 
          299 void
          300 printheaders(char *ctype)
          301 {
          302         time_t t;
          303 
          304         t = time(NULL);
          305         if (t > 0)
          306                 printf("Date: %s", asctime(gmtime(&t)));
          307         if (ctype)
          308                 printf("Content-Type: %s\r\n", ctype);
          309         printf("Server: phroxy\r\n");
          310         printf("Host: %s\r\n", http_host);
          311         printf("Connection: close\r\n");
          312 }
          313 
          314 int
          315 printmenu(int fd, char *data)
          316 {
          317         char i, *p, a[LINE_MAX], *f[4];
          318         char *ifmt = "<div class='item'><span> </span><code>%s</code></div>\n";
          319         char *afmt = "<div class='item'><span>%s</span><a href='/%s:%s/%c%s'>%s</a></div>\n";
          320         char *sfmt = "<div class='item'><span>%s</span><details><summary>%s</summary><form method='get' action='/%s:%s/%c%s'><input type='text' name='q'></form></details></div>\n";
          321 
          322         p = data;
          323 
          324         while((p = strsep(&data, "\n"))) {
          325                 i = *p++;
          326                 if (i == '.')
          327                         break;
          328 
          329                 f[0] = strsep(&p, "\t");
          330                 f[1] = strsep(&p, "\t");
          331                 f[2] = strsep(&p, "\t");
          332                 f[3] = strsep(&p, "\r");
          333                 if (!f[1])
          334                         continue;
          335 
          336                 switch(i) {
          337                 case 'i':
          338                         snprintf(a, sizeof(a), ifmt, f[0]);
          339                         break;
          340                 case '7':
          341                         snprintf(a, sizeof(a), sfmt, itemname(i), f[0], f[2], f[3], i, f[1]);
          342                         break;
          343                 default:
          344                         snprintf(a, sizeof(a), afmt, itemname(i), f[2], f[3], i, f[1], f[0]);
          345                 }
          346 
          347                 write(fd, a, strlen(a));
          348         }
          349 
          350         return 0;
          351 }
          352 
          353 int
          354 printhtml(int fd, const char *data, size_t len)
          355 {
          356         size_t r, n;
          357         const char *s, *e, *x;
          358 
          359         write(fd, "<pre>", 5);
          360 
          361         for (n = 0; n < len; n++) {
          362 
          363                 s = data + n;
          364 
          365                 /* escape XML characters */
          366                 x = NULL;
          367                 switch (*s) {
          368                 case '&': x = x ? x : "&amp;"; /* FALLTHROUGH */
          369                 case '<': x = x ? x : "&lt;";  /* FALLTHROUGH */
          370                 case '>': x = x ? x : "&gt;";  /* FALLTHROUGH */
          371                         write(fd, x, strlen(x));
          372                         break;
          373                 default:
          374                         e = strpbrk(s, "&<>");
          375                         r = e ? (size_t)(e - s) : len - n;
          376                         if (r) {
          377                                 write(fd, s, r);
          378                                 n += r - 1;
          379                         }
          380                 }
          381         }
          382         write(fd, "</pre>\n", 7);
          383         return 0;
          384 }
          385 
          386 int
          387 servebots()
          388 {
          389         printf("HTTP/1.1 200 OK\r\n");
          390         printheaders("text/plain");
          391         printf("Content-Length: %ld\r\n", strlen(robotstxt));
          392         printf("\r\n");
          393         fflush(stdout);
          394         write(1, robotstxt, strlen(robotstxt));
          395         fflush(stdout);
          396 
          397         return 0;
          398 }
          399 
          400 int
          401 serveitem(char item, char *path, char *data, size_t len)
          402 {
          403         char *send;
          404         int sent;
          405 
          406 
          407         if (!contenttype(item, path)) {
          408                 printhttp(415);
          409                 return 1;
          410         }
          411 
          412         printf("HTTP/1.1 200 OK\r\n");
          413         printheaders(contenttype(item, path));
          414 
          415         switch(item) {
          416         case '7': // search
          417         case '1': // menu
          418         case '0': // text
          419                 printf("\r\n");
          420                 fflush(stdout);
          421                 write(1, head, strlen(head));
          422                 if (item == '1' || item == '7') printmenu(1, data);
          423                 if (item == '0') printhtml(1, data, len);
          424                 write(1, foot, strlen(foot));
          425                 break;
          426 
          427         case '4': // BinHexed Macintosh file
          428         case '5': // DOS binary archive of some sort
          429         case '6': // uuencoded
          430         case '9': // binary
          431         case 'g': // gif
          432         case 'I': // image
          433         case 's': // sound
          434         case 'd': // document
          435         case 'P': // pdf (~document)
          436         case 'h': // http redirect
          437                 printf("Content-Length: %ld\r\n", len);
          438                 printf("\r\n");
          439                 fflush(stdout);
          440                 send = data;
          441                 while (len > 0) {
          442                         if ((sent = write(1, send, len)) < 0)
          443                                 return 1;
          444                         len -= sent;
          445                         send += sent;
          446                 }
          447                 break;
          448 
          449         case '2': // CSO phone-book server
          450         case '3': // Error
          451         case '8': // telnet session.
          452         case 'T': // tn3270 session.
          453         case '+': // mirror link
          454         default:
          455                 /* IGNORE */
          456                 break;
          457         }
          458 
          459         free(data);
          460         fflush(stdout);
          461 
          462         return 0;
          463 }
          464 
          465 int
          466 phroxy(char *url)
          467 {
          468         int sock;
          469         size_t len;
          470         char item = 0;
          471         char *hole, *path, *host, *port;
          472         char *data = NULL, *srch = NULL;
          473 
          474         if (!strncmp(url, "/robots.txt", 11))
          475                 return servebots();
          476 
          477         url++;
          478         hole = strsep(&url, "/");
          479         if (!hole || !strnlen(hole, 1))
          480                 hole = default_hole;
          481 
          482         host = strsep(&hole, ":");
          483         port = strsep(&hole, "\0");
          484         if (!port)
          485                 port = "70";
          486 
          487         if (url)
          488                 item = *url++;
          489 
          490         if (!item)
          491                 item = '1';
          492 
          493         path = strsep(&url, "\0");
          494         if (!path || *path == '\0')
          495                 path = "/";
          496 
          497         if((srch = strchr(path, '?'))) {
          498                 *srch = '\0';
          499                 srch += 3; /* go past "?q=" in URL, to fetch actual query */
          500         }
          501 
          502         if ((sock = connectto(host, port)) < 0) {
          503                 printhttp(500);
          504                 return 1;
          505         }
          506 
          507         if (!sendselector(sock, path, urldec(srch)))
          508                 data = getrawitem(sock, &len);
          509 
          510         close(sock);
          511 
          512         if (!data) {
          513                 printhttp(444);
          514                 return 1;
          515         }
          516 
          517         serveitem(item, path, data, len);
          518 
          519         return 0;
          520 }
          521 
          522 int
          523 main(void)
          524 {
          525         ssize_t rlen;
          526         char request[512], *url;
          527 
          528         rlen = read(0, request, sizeof(request) - 1);
          529         if (rlen < 0)
          530                 return 1;
          531 
          532         request[rlen] = '\0';
          533 
          534         if (strncmp(request, "GET ", 4)) {
          535                 printhttp(405);
          536                 return 1;
          537         }
          538 
          539         url = strtok(request + 4, " ");
          540 
          541         return phroxy(url);
          542 }