tphroxy.c - phroxy - Gopher to HTTP proxy
(HTM) git clone git://git.z3bra.org/phroxy.git
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) LICENSE
---
tphroxy.c (9901B)
---
1 #include <err.h>
2 #include <errno.h>
3 #include <limits.h>
4 #include <netdb.h>
5 #include <signal.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <time.h>
10 #include <unistd.h>
11
12 #include <sys/types.h>
13 #include <sys/socket.h>
14 #include <sys/socket.h>
15
16 /* supported items */
17 enum {
18 ITEM_0,
19 ITEM_1,
20 ITEM_2,
21 ITEM_3,
22 ITEM_4,
23 ITEM_5,
24 ITEM_6,
25 ITEM_7,
26 ITEM_8,
27 ITEM_9,
28 ITEM_I,
29 ITEM_g,
30 ITEM_h,
31 ITEM_s,
32 ITEM_P,
33 ITEM_d,
34 };
35
36 #include "config.h"
37
38 void *
39 xreallocarray(void *m, const size_t n, const size_t s)
40 {
41 void *nm;
42
43 if (n == 0 || s == 0) {
44 free(m);
45 return NULL;
46 }
47 if (s && n > (size_t)-1/s)
48 errx(1, "realloc: overflow");
49 if (!(nm = realloc(m, n * s)))
50 errx(1, "realloc: %s", strerror(errno));
51
52 return nm;
53 }
54
55
56 static int
57 connectto(const char *host, const char *port)
58 {
59 sigset_t set, oset;
60 static const struct addrinfo hints = {
61 .ai_family = AF_UNSPEC,
62 .ai_socktype = SOCK_STREAM,
63 .ai_protocol = IPPROTO_TCP,
64 };
65 struct addrinfo *addrs, *addr;
66 int r, sock = -1;
67
68 sigemptyset(&set);
69 sigaddset(&set, SIGWINCH);
70 sigprocmask(SIG_BLOCK, &set, &oset);
71
72 if ((r = getaddrinfo(host, port, &hints, &addrs))) {
73 fprintf(stderr, "Can't resolve hostname \"%s\": %s\n", host, gai_strerror(r));
74 goto err;
75 }
76
77 for (addr = addrs; addr; addr = addr->ai_next) {
78 if ((sock = socket(addr->ai_family, addr->ai_socktype,
79 addr->ai_protocol)) < 0)
80 continue;
81 if ((r = connect(sock, addr->ai_addr, addr->ai_addrlen)) < 0) {
82 close(sock);
83 continue;
84 }
85 break;
86 }
87
88 freeaddrinfo(addrs);
89
90 if (sock < 0) {
91 fprintf(stderr, "Can't open socket: %s\n", strerror(errno));
92 goto err;
93 }
94 if (r < 0) {
95 fprintf(stderr, "Can't connect to: %s:%s: %s\n", host, port, strerror(errno));
96 goto err;
97 }
98
99 sigprocmask(SIG_SETMASK, &oset, NULL);
100 return sock;
101
102 err:
103 sigprocmask(SIG_SETMASK, &oset, NULL);
104 return -1;
105 }
106
107 int
108 sendselector(int sock, const char *selector, const char *search)
109 {
110 char *msg, *p;
111 char *fmt = "%s\r\n";
112 size_t ln;
113 ssize_t n;
114
115 ln = strlen(selector) + 3;
116 if (search) {
117 fmt = "%s\t%s\r\n";
118 ln += strlen(search) + 1;
119 }
120
121 msg = p = malloc(ln);
122 snprintf(msg, ln--, fmt, selector, search);
123
124 while ((n = write(sock, p, ln)) > 0) {
125 ln -= n;
126 p += n;
127 }
128
129 free(msg);
130 if (n == -1)
131 fprintf(stderr, "Can't send message: %s\n", strerror(errno));
132
133 return n;
134 }
135
136 static char
137 hex2bin(const unsigned char *in)
138 {
139 int out;
140
141 if (*in == '%')
142 in++;
143
144 if ('A' <= in[0] && in[0] <= 'F') out = 16 * (in[0] - 'A' + 10);
145 if ('0' <= in[0] && in[0] <= '9') out = 16 * (in[0] - '0');
146
147 if ('A' <= in[1] && in[1] <= 'F') out += (in[1] - 'A' + 10);
148 if ('0' <= in[1] && in[1] <= '9') out += (in[1] - '0');
149
150 return out;
151 }
152
153 char *
154 urldec(char *search)
155 {
156 char *msg, *p;
157
158 if (!search)
159 return NULL;
160
161 msg = p = search;
162 for (p = msg; *p != '\0'; msg++, p++) {
163 switch(*p) {
164 case '+':
165 *msg = ' ';
166 break;
167 case '%':
168 *msg = hex2bin((unsigned char *)p);
169 p += 2;
170 break;
171 default:
172 *msg = *p;
173 }
174 }
175 *msg = '\0';
176
177 return search;
178 }
179
180
181 char *
182 getrawitem(int sock, size_t *sz)
183 {
184 char *raw, *buf;
185 size_t bn, bs;
186 ssize_t n;
187
188 raw = buf = NULL;
189 bn = bs = n = 0;
190
191 do {
192 bs -= n;
193 buf += n;
194 if (bs < 1) {
195 raw = xreallocarray(raw, ++bn, BUFSIZ);
196 buf = raw + (bn-1) * BUFSIZ;
197 bs = BUFSIZ;
198 }
199 } while ((n = read(sock, buf, bs)) > 0);
200
201 *buf = '\0';
202
203 if (sz)
204 *sz = buf - raw;
205
206 if (n < 0) {
207 fprintf(stderr, "Can't read socket: %s\n", strerror(errno));
208 free(raw);
209 }
210
211 return raw;
212 }
213
214 void
215 printhttp(int code)
216 {
217 switch (code) {
218 case 400: printf("HTTP/1.1 400 That's Illegal\r\n"); break;
219 case 404: printf("HTTP/1.1 404 Google Broke The Web\r\n"); break;
220 case 405: printf("HTTP/1.1 405 Don't Do That\r\n"); break;
221 case 415: printf("HTTP/1.1 415 Gopher Type Not Handled\r\n"); break;
222 case 500: printf("HTTP/1.1 500 You Broke The Web\r\n"); break;
223 }
224 printf("\r\n");
225 }
226
227 char *
228 contenttype(char i, char *path)
229 {
230 static char *ext, type[32];
231
232 /* isolate file extention, if any */
233 ext = strrchr(path, '.');
234 ext = ext ? ext + 1 : "*";
235
236 switch(i) {
237 case '0':
238 case '1':
239 case '7':
240 case 'h':
241 snprintf(type, sizeof(type)-1, "text/html; charset=utf-8");
242 break;
243 case '6':
244 snprintf(type, sizeof(type)-1, "text/x-uuencode");
245 break;
246 case '4':
247 case '5':
248 case '9':
249 snprintf(type, sizeof(type)-1, "application/octet-stream");
250 break;
251 case 'I':
252 /* assume 4 chars max for extension */
253 snprintf(type, sizeof(type)-1, "image/%s", ext);
254 break;
255 case 's':
256 snprintf(type, sizeof(type)-1, "audio/%s", ext);
257 break;
258 case 'g':
259 snprintf(type, sizeof(type)-1, "image/gif");
260 break;
261 case 'P':
262 case 'd':
263 snprintf(type, sizeof(type)-1, "application/%s", ext);
264 break;
265 default:
266 return NULL;
267 break; /* NOTREACHED */
268 }
269
270 return type;
271 }
272
273 const char *
274 itemname(char i)
275 {
276 switch(i) {
277 case '0': return items[ITEM_0];
278 case '1': return items[ITEM_1];
279 case '2': return items[ITEM_2];
280 case '3': return items[ITEM_3];
281 case '4': return items[ITEM_4];
282 case '5': return items[ITEM_5];
283 case '6': return items[ITEM_6];
284 case '7': return items[ITEM_7];
285 case '8': return items[ITEM_8];
286 case '9': return items[ITEM_9];
287 case 'I': return items[ITEM_I];
288 case 'g': return items[ITEM_g];
289 case 'h': return items[ITEM_h];
290 case 's': return items[ITEM_s];
291 case 'd': return items[ITEM_d];
292 case 'P': return items[ITEM_P];
293 }
294
295 return NULL;
296 }
297
298
299 void
300 printheaders(char *ctype)
301 {
302 time_t t;
303
304 t = time(NULL);
305 if (t > 0)
306 printf("Date: %s", asctime(gmtime(&t)));
307 if (ctype)
308 printf("Content-Type: %s\r\n", ctype);
309 printf("Server: phroxy\r\n");
310 printf("Host: %s\r\n", http_host);
311 printf("Connection: close\r\n");
312 }
313
314 int
315 printmenu(int fd, char *data)
316 {
317 char i, *p, a[LINE_MAX], *f[4];
318 char *ifmt = "<div class='item'><span> </span><code>%s</code></div>\n";
319 char *afmt = "<div class='item'><span>%s</span><a href='/%s:%s/%c%s'>%s</a></div>\n";
320 char *sfmt = "<div class='item'><span>%s</span><details><summary>%s</summary><form method='get' action='/%s:%s/%c%s'><input type='text' name='q'></form></details></div>\n";
321
322 p = data;
323
324 while((p = strsep(&data, "\n"))) {
325 i = *p++;
326 if (i == '.')
327 break;
328
329 f[0] = strsep(&p, "\t");
330 f[1] = strsep(&p, "\t");
331 f[2] = strsep(&p, "\t");
332 f[3] = strsep(&p, "\r");
333 if (!f[1])
334 continue;
335
336 switch(i) {
337 case 'i':
338 snprintf(a, sizeof(a), ifmt, f[0]);
339 break;
340 case '7':
341 snprintf(a, sizeof(a), sfmt, itemname(i), f[0], f[2], f[3], i, f[1]);
342 break;
343 default:
344 snprintf(a, sizeof(a), afmt, itemname(i), f[2], f[3], i, f[1], f[0]);
345 }
346
347 write(fd, a, strlen(a));
348 }
349
350 return 0;
351 }
352
353 int
354 printhtml(int fd, const char *data, size_t len)
355 {
356 size_t r, n;
357 const char *s, *e, *x;
358
359 write(fd, "<pre>", 5);
360
361 for (n = 0; n < len; n++) {
362
363 s = data + n;
364
365 /* escape XML characters */
366 x = NULL;
367 switch (*s) {
368 case '&': x = x ? x : "&"; /* FALLTHROUGH */
369 case '<': x = x ? x : "<"; /* FALLTHROUGH */
370 case '>': x = x ? x : ">"; /* FALLTHROUGH */
371 write(fd, x, strlen(x));
372 break;
373 default:
374 e = strpbrk(s, "&<>");
375 r = e ? (size_t)(e - s) : len - n;
376 if (r) {
377 write(fd, s, r);
378 n += r - 1;
379 }
380 }
381 }
382 write(fd, "</pre>\n", 7);
383 return 0;
384 }
385
386 int
387 servebots()
388 {
389 printf("HTTP/1.1 200 OK\r\n");
390 printheaders("text/plain");
391 printf("Content-Length: %ld\r\n", strlen(robotstxt));
392 printf("\r\n");
393 fflush(stdout);
394 write(1, robotstxt, strlen(robotstxt));
395 fflush(stdout);
396
397 return 0;
398 }
399
400 int
401 serveitem(char item, char *path, char *data, size_t len)
402 {
403 char *send;
404 int sent;
405
406
407 if (!contenttype(item, path)) {
408 printhttp(415);
409 return 1;
410 }
411
412 printf("HTTP/1.1 200 OK\r\n");
413 printheaders(contenttype(item, path));
414
415 switch(item) {
416 case '7': // search
417 case '1': // menu
418 case '0': // text
419 printf("\r\n");
420 fflush(stdout);
421 write(1, head, strlen(head));
422 if (item == '1' || item == '7') printmenu(1, data);
423 if (item == '0') printhtml(1, data, len);
424 write(1, foot, strlen(foot));
425 break;
426
427 case '4': // BinHexed Macintosh file
428 case '5': // DOS binary archive of some sort
429 case '6': // uuencoded
430 case '9': // binary
431 case 'g': // gif
432 case 'I': // image
433 case 's': // sound
434 case 'd': // document
435 case 'P': // pdf (~document)
436 case 'h': // http redirect
437 printf("Content-Length: %ld\r\n", len);
438 printf("\r\n");
439 fflush(stdout);
440 send = data;
441 while (len > 0) {
442 if ((sent = write(1, send, len)) < 0)
443 return 1;
444 len -= sent;
445 send += sent;
446 }
447 break;
448
449 case '2': // CSO phone-book server
450 case '3': // Error
451 case '8': // telnet session.
452 case 'T': // tn3270 session.
453 case '+': // mirror link
454 default:
455 /* IGNORE */
456 break;
457 }
458
459 free(data);
460 fflush(stdout);
461
462 return 0;
463 }
464
465 int
466 phroxy(char *url)
467 {
468 int sock;
469 size_t len;
470 char item = 0;
471 char *hole, *path, *host, *port;
472 char *data = NULL, *srch = NULL;
473
474 if (!strncmp(url, "/robots.txt", 11))
475 return servebots();
476
477 url++;
478 hole = strsep(&url, "/");
479 if (!hole || !strnlen(hole, 1))
480 hole = default_hole;
481
482 host = strsep(&hole, ":");
483 port = strsep(&hole, "\0");
484 if (!port)
485 port = "70";
486
487 if (url)
488 item = *url++;
489
490 if (!item)
491 item = '1';
492
493 path = strsep(&url, "\0");
494 if (!path || *path == '\0')
495 path = "/";
496
497 if((srch = strchr(path, '?'))) {
498 *srch = '\0';
499 srch += 3; /* go past "?q=" in URL, to fetch actual query */
500 }
501
502 if ((sock = connectto(host, port)) < 0) {
503 printhttp(500);
504 return 1;
505 }
506
507 if (!sendselector(sock, path, urldec(srch)))
508 data = getrawitem(sock, &len);
509
510 close(sock);
511
512 if (!data) {
513 printhttp(444);
514 return 1;
515 }
516
517 serveitem(item, path, data, len);
518
519 return 0;
520 }
521
522 int
523 main(void)
524 {
525 ssize_t rlen;
526 char request[512], *url;
527
528 rlen = read(0, request, sizeof(request) - 1);
529 if (rlen < 0)
530 return 1;
531
532 request[rlen] = '\0';
533
534 if (strncmp(request, "GET ", 4)) {
535 printhttp(405);
536 return 1;
537 }
538
539 url = strtok(request + 4, " ");
540
541 return phroxy(url);
542 }