gopherproxy.c - gopherproxy-c - Gopher HTTP proxy in C (CGI)
(HTM) git clone git://git.codemadness.org/gopherproxy-c
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
gopherproxy.c (23849B)
---
1 #include <sys/socket.h>
2 #include <sys/time.h>
3 #include <sys/types.h>
4
5 #include <ctype.h>
6 #include <errno.h>
7 #include <netdb.h>
8 #include <signal.h>
9 #include <stdarg.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <unistd.h>
14
15 #include <tls.h>
16
17 #ifndef TLS_CA_CERT_FILE
18 #define TLS_CA_CERT_FILE "/etc/ssl/cert.pem"
19 #endif
20
21 #ifdef USE_TLS
22 static int usetls = 0;
23 /* TLS context */
24 static struct tls *t;
25 /* TLS config */
26 static struct tls_config *tls_config;
27 #endif
28
29 #define MAX_LINE_SIZ 2048 /* max size of a DirEntity in bytes */
30 #define READ_BUF_SIZ 16384 /* read buffer size in bytes */
31
32 #define MAX_RESPONSETIMEOUT 10 /* timeout in seconds */
33 #define MAX_RESPONSESIZ 4000000 /* max download size in bytes */
34
35 #ifndef __OpenBSD__
36 #define pledge(p1,p2) 0
37 #define unveil(p1,p2) 0
38 #endif
39
40 /* URI */
41 struct uri {
42 char proto[48]; /* scheme including ":" or "://" */
43 char userinfo[256]; /* username [:password] */
44 char host[256];
45 char port[6]; /* numeric port */
46 char path[1024];
47 char query[1024];
48 char fragment[1024];
49 };
50
51 struct visited {
52 int _type;
53 char username[1024];
54 char path[1024];
55 char server[256];
56 char port[8];
57 };
58
59 struct linebuf {
60 /* line buffer */
61 char *line;
62 char linebuf[MAX_LINE_SIZ];
63 size_t linelen;
64 size_t lineoff;
65 /* read buffer */
66 char buf[READ_BUF_SIZ];
67 char *bufoff, *bufend;
68 int err;
69 int eof;
70 };
71
72 /* parsed URI */
73 static struct uri u;
74 /* socket fd */
75 static int sock = -1;
76
77 int headerset = 0, isdir = 0;
78 ssize_t (*readbuf)(char *, size_t);
79 ssize_t (*writebuf)(const char *, size_t);
80
81 void
82 sighandler(int signo)
83 {
84 if (signo == SIGALRM)
85 _exit(2);
86 }
87
88 /* print to stderr, print error message of errno and exit().
89 * Unlike BSD err() it does not prefix __progname */
90 void
91 err(int exitstatus, const char *fmt, ...)
92 {
93 va_list ap;
94 int saved_errno;
95
96 saved_errno = errno;
97
98 if (fmt) {
99 va_start(ap, fmt);
100 vfprintf(stderr, fmt, ap);
101 va_end(ap);
102 fputs(": ", stderr);
103 }
104 fprintf(stderr, "%s\n", strerror(saved_errno));
105
106 exit(exitstatus);
107 }
108
109 /* print to stderr and exit().
110 * Unlike BSD errx() it does not prefix __progname */
111 void
112 errx(int exitstatus, const char *fmt, ...)
113 {
114 va_list ap;
115
116 if (fmt) {
117 va_start(ap, fmt);
118 vfprintf(stderr, fmt, ap);
119 va_end(ap);
120 }
121 fputs("\n", stderr);
122
123 exit(exitstatus);
124 }
125
126 void
127 die(int code, const char *fmt, ...)
128 {
129 va_list ap;
130
131 if (!headerset) {
132 switch (code) {
133 case 400:
134 fputs("Status: 400 Bad Request\r\n", stdout);
135 break;
136 case 403:
137 fputs("Status: 403 Permission Denied\r\n", stdout);
138 break;
139 default:
140 fputs("Status: 500 Internal Server Error\r\n", stdout);
141 break;
142 }
143 fputs("Content-Type: text/plain; charset=utf-8\r\n\r\n", stdout);
144 }
145
146 /* write error to stderr and stdout */
147
148 va_start(ap, fmt);
149 vfprintf(stderr, fmt, ap);
150 va_end(ap);
151
152 va_start(ap, fmt);
153 vfprintf(stdout, fmt, ap);
154 va_end(ap);
155
156 if (isdir)
157 fputs("</pre>\n</body>\n</html>\n", stdout);
158
159 exit(1);
160 }
161
162 /* Escape characters below as HTML 2.0 / XML 1.0. */
163 void
164 xmlencode(const char *s)
165 {
166 for (; *s; s++) {
167 switch(*s) {
168 case '<': fputs("<", stdout); break;
169 case '>': fputs(">", stdout); break;
170 case '\'': fputs("'", stdout); break;
171 case '&': fputs("&", stdout); break;
172 case '"': fputs(""", stdout); break;
173 default: putchar(*s);
174 }
175 }
176 }
177
178 /* Percent-encode characters so that the string can be used as a value for a
179 query string.
180 Additional characters get encoded so that no xmlencode() is needed */
181 void
182 encodeparam(const char *s)
183 {
184 for (; *s; s++) {
185 switch (*s) {
186 case '<':
187 case '>':
188 case '\'':
189 case '&':
190 case '"':
191 case '#':
192 case '=':
193 printf("%%%02X", (unsigned char)*s);
194 break;
195 default:
196 putchar(*s);
197 break;
198 }
199 }
200 }
201
202 int
203 edial(const char *host, const char *port)
204 {
205 struct addrinfo hints, *res, *res0;
206 int error, save_errno, s;
207 const char *cause = NULL;
208
209 memset(&hints, 0, sizeof(hints));
210 hints.ai_family = AF_UNSPEC;
211 hints.ai_socktype = SOCK_STREAM;
212 hints.ai_flags = AI_NUMERICSERV; /* numeric port only */
213 if ((error = getaddrinfo(host, port, &hints, &res0)))
214 die(500, "%s: %s: %s:%s\n", __func__, gai_strerror(error), host, port);
215 s = -1;
216 for (res = res0; res; res = res->ai_next) {
217 s = socket(res->ai_family, res->ai_socktype,
218 res->ai_protocol);
219 if (s == -1) {
220 cause = "socket";
221 continue;
222 }
223
224 if (connect(s, res->ai_addr, res->ai_addrlen) == -1) {
225 cause = "connect";
226 save_errno = errno;
227 close(s);
228 errno = save_errno;
229 s = -1;
230 continue;
231 }
232 break;
233 }
234 if (s == -1)
235 die(500, "%s: %s: %s:%s\n", __func__, cause, host, port);
236 freeaddrinfo(res0);
237
238 return s;
239 }
240
241 void
242 setup_plain(void)
243 {
244 if (pledge("stdio dns inet", NULL) == -1)
245 err(1, "pledge");
246
247 sock = edial(u.host, u.port);
248 }
249
250 #ifdef USE_TLS
251 void
252 setup_tls(void)
253 {
254 if (tls_init())
255 errx(1, "tls_init failed");
256 if (!(tls_config = tls_config_new()))
257 errx(1, "tls config failed");
258 if (unveil(TLS_CA_CERT_FILE, "r") == -1)
259 err(1, "unveil: %s", TLS_CA_CERT_FILE);
260 #if 0
261 if (tls_config_set_ca_file(tls_config, TLS_CA_CERT_FILE) == -1)
262 errx(1, "tls_config_set_ca_file: %s: %s", TLS_CA_CERT_FILE,
263 tls_config_error(tls_config));
264 #endif
265
266 if (pledge("stdio dns inet rpath", NULL) == -1)
267 err(1, "pledge");
268
269 if (!(t = tls_client()))
270 errx(1, "tls_client: %s", tls_error(t));
271 if (tls_configure(t, tls_config))
272 errx(1, "tls_configure: %s", tls_error(t));
273
274 sock = edial(u.host, u.port);
275 if (tls_connect_socket(t, sock, u.host) == -1)
276 die(500, "tls_connect: %s\n", tls_error(t));
277 }
278
279 ssize_t
280 tls_writebuf(const char *buf, size_t buflen)
281 {
282 const char *errstr;
283 const char *p;
284 size_t len;
285 ssize_t r, written = 0;
286
287 for (len = buflen, p = buf; len > 0; ) {
288 r = tls_write(t, p, len);
289 if (r == TLS_WANT_POLLIN || r == TLS_WANT_POLLOUT) {
290 continue;
291 } else if (r == -1) {
292 errstr = tls_error(t);
293 fprintf(stderr, "tls_write: %s\n", errstr ? errstr : "");
294 return -1;
295 }
296 p += r;
297 len -= r;
298 written += r;
299 }
300 return written;
301 }
302
303 ssize_t
304 tls_readbuf(char *buf, size_t bufsiz)
305 {
306 const char *errstr;
307 ssize_t r, len;
308
309 for (len = 0; bufsiz > 0;) {
310 r = tls_read(t, buf + len, bufsiz);
311 if (r == TLS_WANT_POLLIN || r == TLS_WANT_POLLOUT) {
312 continue;
313 } else if (r == 0) {
314 break;
315 } else if (r == -1) {
316 errstr = tls_error(t);
317 fprintf(stderr, "tls_read: %s\n", errstr ? errstr : "");
318 return -1;
319 }
320 len += r;
321 bufsiz -= r;
322 }
323 return len;
324 }
325 #endif
326
327 ssize_t
328 plain_writebuf(const char *buf, size_t buflen)
329 {
330 ssize_t r;
331
332 if ((r = write(sock, buf, buflen)) == -1)
333 fprintf(stderr, "write: %s\n", strerror(errno));
334
335 return r;
336 }
337
338 ssize_t
339 plain_readbuf(char *buf, size_t bufsiz)
340 {
341 ssize_t r, len;
342
343 for (len = 0; bufsiz > 0;) {
344 r = read(sock, buf + len, bufsiz);
345 if (r == 0) {
346 break;
347 } else if (r == -1) {
348 fprintf(stderr, "read: %s\n", strerror(errno));
349 return -1;
350 }
351 len += r;
352 bufsiz -= r;
353 }
354 return len;
355 }
356
357 void
358 linebuf_init(struct linebuf *b)
359 {
360 memset(b, 0, sizeof(struct linebuf));
361 b->line = b->linebuf;
362 }
363
364 ssize_t
365 linebuf_get(struct linebuf *b)
366 {
367 size_t len;
368 ssize_t n;
369 char *p;
370
371 b->line = b->linebuf;
372 while (!(b->err) && !(b->eof)) {
373 /* need to read more */
374 if (b->bufoff >= b->bufend) {
375 b->bufoff = b->buf;
376 b->bufend = b->buf;
377 n = readbuf(b->buf, sizeof(b->buf));
378 if (n == -1)
379 b->err = EIO;
380
381 /* use remaining data even if not terminated by a newline */
382 if (n == 0 && b->lineoff > 0) {
383 b->eof = 1;
384 return b->lineoff;
385 }
386
387 if (n > 0)
388 b->bufend = b->buf + n;
389 else
390 return n;
391 }
392
393 /* search first newline */
394 if ((p = memchr(b->bufoff, '\n', b->bufend - b->bufoff))) {
395 len = (p - b->bufoff);
396 /* full line in buffer, no need to copy to line buffer */
397 if (b->lineoff == 0)
398 b->line = b->bufoff; /* just point to buffer, no copy */
399 } else {
400 /* use remaining data into line buffer and read more */
401 len = (b->bufend - b->bufoff);
402 }
403
404 if (b->line == b->linebuf) {
405 if (b->lineoff + len + 1 >= sizeof(b->linebuf)) {
406 b->err = ENOMEM;
407 return -1;
408 }
409 memcpy(b->linebuf + b->lineoff, b->bufoff, len);
410 }
411
412 b->lineoff += len;
413 b->linelen = b->lineoff;
414 b->line[b->linelen] = '\0';
415
416 if (p) {
417 b->bufoff = p + 1; /* after newline */
418 b->lineoff = 0; /* reset line: start at beginning */
419 return b->linelen;
420 } else {
421 b->bufoff = b->bufend; /* read more */
422 }
423 }
424 return -1; /* UNREACHED */
425 }
426
427 int
428 isblacklisted(const char *host, const char *port, const char *path)
429 {
430 char *p;
431
432 if (strcmp(port, "70") && strcmp(port, "7070"))
433 return 1;
434 if ((p = strstr(host, ".onion")) && strlen(p) == strlen(".onion"))
435 return 1;
436 return 0;
437 }
438
439 char *
440 typestr(int c)
441 {
442 switch (c) {
443 case '0': return " TEXT";
444 case '1': return " DIR";
445 case '2': return " CSO";
446 case '3': return " ERR";
447 case '4': return " MAC";
448 case '5': return " DOS";
449 case '6': return " UUENC";
450 case '7': return "SEARCH";
451 case '8': return "TELNET";
452 case '9': return " BIN";
453 case 'g': return " GIF";
454 case 'h': return " HTML"; /* non-standard */
455 case 's': return " SND"; /* non-standard */
456 case '+': return "MIRROR";
457 case 'I': return " IMG";
458 case 'T': return "TN3270";
459 default:
460 /* "Characters '0' through 'Z' are reserved." (ASCII) */
461 if (c >= '0' && c <= 'Z')
462 return "RESERV";
463 else
464 return " ";
465 }
466 }
467
468 void
469 servefile(const char *server, const char *port, const char *path, const char *query)
470 {
471 char buf[READ_BUF_SIZ];
472 int r, w;
473 size_t totalsiz = 0;
474
475 w = snprintf(buf, sizeof(buf), "%s%s%s\r\n", path, query[0] ? "?" : "", query);
476 if (w < 0 || (size_t)w >= sizeof(buf))
477 die(500, "servefile: path too long\n");
478 if (writebuf(buf, w) == -1)
479 die(500, "servefile: writebuf failed\n");
480
481 while ((r = readbuf(buf, sizeof(buf))) > 0) {
482 /* too big total response */
483 totalsiz += r;
484 if (totalsiz > MAX_RESPONSESIZ) {
485 dprintf(1, "--- transfer too big, truncated ---\n");
486 break;
487 }
488
489 if ((w = write(1, buf, r)) == -1)
490 die(500, "write: %s\n", strerror(errno));
491 }
492 if (r == -1)
493 die(500, "read: %s\n", strerror(errno));
494 }
495
496 void
497 servedir(const char *server, const char *port, const char *path, const char *query, const char *param)
498 {
499 struct visited v;
500 struct linebuf lb;
501 const char *prefix = "";
502 char buf[2048], *uri;
503 char *line;
504 size_t totalsiz, linenr;
505 ssize_t n;
506 char primarytype = '\0';
507 int i, len, w;
508
509 #ifdef USE_TLS
510 if (usetls)
511 prefix = "gophers://";
512 #endif
513
514 if (param[0])
515 w = snprintf(buf, sizeof(buf), "%s%s%s\t%s\r\n", path, query[0] ? "?" : "", query, param);
516 else
517 w = snprintf(buf, sizeof(buf), "%s%s%s\r\n", path, query[0] ? "?" : "", query);
518
519 if (w < 0 || (size_t)w >= sizeof(buf))
520 die(500, "servedir: path too long\n");
521 if (writebuf(buf, w) == -1)
522 die(500, "servedir: writebuf failed\n");
523
524 linebuf_init(&lb);
525
526 totalsiz = 0;
527 for (linenr = 1; (n = linebuf_get(&lb)) > 0; linenr++) {
528 line = lb.line;
529
530 /* too big total response */
531 if (n > 0)
532 totalsiz += n;
533 if (totalsiz > MAX_RESPONSESIZ) {
534 dprintf(1, "--- transfer too big, truncated ---\n");
535 break;
536 }
537
538 if (n > 0 && line[n - 1] == '\n')
539 line[--n] = '\0';
540 if (n > 0 && line[n - 1] == '\r')
541 line[--n] = '\0';
542 if (n == 1 && line[0] == '.')
543 break;
544
545 memset(&v, 0, sizeof(v));
546
547 v._type = line[0];
548 if (v._type != '+')
549 primarytype = v._type;
550 else if (!primarytype)
551 die(500, "%s:%s %s:%d: undefined primary server\n",
552 server, port, path, linenr);
553
554 /* "username" */
555 i = 1;
556 len = strcspn(line + i, "\t");
557 if (len + 1 < sizeof(v.username)) {
558 memcpy(v.username, line + i, len);
559 v.username[len] = '\0';
560 } else {
561 die(500, "%s:%s %s:%d: username field too long\n",
562 server, port, path, linenr);
563 }
564 if (line[i + len] == '\t')
565 i += len + 1;
566 else
567 die(500, "%s:%s %s:%d: invalid line / field count\n",
568 server, port, path, linenr);
569
570 /* selector / path */
571 len = strcspn(line + i, "\t");
572 if (len + 1 < sizeof(v.path)) {
573 memcpy(v.path, line + i, len);
574 v.path[len] = '\0';
575 } else {
576 die(500, "%s:%s %s:%d: path field too long\n",
577 server, port, path, linenr);
578 }
579 if (line[i + len] == '\t')
580 i += len + 1;
581 else
582 die(500, "%s:%s %s:%d: invalid line / field count\n",
583 server, port, path, linenr);
584
585 /* server */
586 len = strcspn(line + i, "\t");
587 if (len + 1 < sizeof(v.server)) {
588 memcpy(v.server, line + i, len);
589 v.server[len] = '\0';
590 } else {
591 die(500, "%s:%s %s:%d: server field too long\n",
592 server, port, path, linenr);
593 }
594 if (line[i + len] == '\t')
595 i += len + 1;
596 else
597 die(500, "%s:%s %s:%d: invalid line / field count\n",
598 server, port, path, linenr);
599
600 /* port */
601 len = strcspn(line + i, "\t");
602 if (len + 1 < sizeof(v.port)) {
603 memcpy(v.port, line + i, len);
604 v.port[len] = '\0';
605 } else {
606 die(500, "%s:%s %s:%d: port field too long\n",
607 server, port, path, linenr);
608 }
609
610 if (!strcmp(v.port, "70"))
611 snprintf(buf, sizeof(buf), "%s%s/%c%s",
612 prefix, v.server, primarytype, v.path);
613 else
614 snprintf(buf, sizeof(buf), "%s%s:%s/%c%s",
615 prefix, v.server, v.port, primarytype, v.path);
616 uri = buf;
617
618 switch (primarytype) {
619 case 'i': /* info */
620 case '3': /* error */
621 fputs(typestr(v._type), stdout);
622 fputs(" ", stdout);
623 xmlencode(v.username);
624 break;
625 case '7': /* search */
626 fputs("</pre><form method=\"get\" action=\"\"><pre>", stdout);
627 fputs(typestr(v._type), stdout);
628 fputs(" <input type=\"hidden\" name=\"q\" value=\"", stdout);
629 xmlencode(uri);
630 fputs("\" /><input type=\"search\" placeholder=\"", stdout);
631 xmlencode(v.username);
632 fputs(
633 "\" name=\"p\" value=\"\" size=\"72\" />"
634 "<input type=\"submit\" value=\"Search\" /></pre></form><pre>", stdout);
635 break;
636 case '8': /* telnet */
637 case 'T': /* tn3270 */
638 fputs(typestr(v._type), stdout);
639 printf(" <a href=\"%s://", primarytype == '8' ? "telnet" : "tn3270");
640 if (v.path[0]) {
641 xmlencode(v.path);
642 fputs("@", stdout);
643 }
644 xmlencode(v.server);
645 fputs(":", stdout);
646 xmlencode(v.port);
647 fputs("\">", stdout);
648 xmlencode(v.username);
649 fputs("</a>", stdout);
650 break;
651 case 'I': /* image: show inline */
652 fputs(typestr(v._type), stdout);
653 fputs(" <a href=\"?q=", stdout);
654 encodeparam(uri);
655 fputs("\">", stdout);
656
657 fputs("<img src=\"?q=", stdout);
658 encodeparam(uri);
659 fputs("\" />", stdout);
660
661 fputs("</a>", stdout);
662 break;
663 default: /* other */
664 fputs(typestr(v._type), stdout);
665 fputs(" <a href=\"", stdout);
666 if (primarytype == 'h' && !strncmp(v.path, "URL:", sizeof("URL:") - 1)) {
667 xmlencode(v.path + sizeof("URL:") - 1);
668 } else {
669 fputs("?q=", stdout);
670 encodeparam(uri);
671 }
672 fputs("\">", stdout);
673 xmlencode(v.username);
674 fputs("</a>", stdout);
675
676 }
677 putchar('\n');
678 }
679 if (lb.err)
680 die(500, "%s:%s after line %d: error reading line\n", server, port, linenr);
681 }
682
683 int
684 hexdigit(int c)
685 {
686 if (c >= '0' && c <= '9')
687 return c - '0';
688 else if (c >= 'A' && c <= 'F')
689 return c - 'A' + 10;
690 else if (c >= 'a' && c <= 'f')
691 return c - 'a' + 10;
692
693 return 0;
694 }
695
696 /* decode until NUL separator or end of "key". */
697 int
698 decodeparam(char *buf, size_t bufsiz, const char *s)
699 {
700 size_t i;
701
702 if (!bufsiz)
703 return -1;
704
705 for (i = 0; *s && *s != '&'; s++) {
706 if (i + 3 >= bufsiz)
707 return -1;
708 switch (*s) {
709 case '%':
710 if (!isxdigit(*(s+1)) || !isxdigit(*(s+2)))
711 return -1;
712 buf[i++] = hexdigit(*(s+1)) * 16 + hexdigit(*(s+2));
713 s += 2;
714 break;
715 case '+':
716 buf[i++] = ' ';
717 break;
718 default:
719 buf[i++] = *s;
720 break;
721 }
722 }
723 buf[i] = '\0';
724
725 return i;
726 }
727
728 char *
729 getparam(const char *query, const char *s)
730 {
731 const char *p;
732 size_t len;
733
734 len = strlen(s);
735 for (p = query; (p = strstr(p, s)); p += len) {
736 if (p[len] == '=' && (p == query || p[-1] == '&'))
737 return (char *)p + len + 1;
738 }
739
740 return NULL;
741 }
742
743 int
744 checkparam(const char *s)
745 {
746 for (; *s; s++)
747 if (iscntrl(*s))
748 return 0;
749 return 1;
750 }
751
752 /* Check if string has a non-empty scheme / protocol part. */
753 int
754 uri_hasscheme(const char *s)
755 {
756 const char *p = s;
757
758 for (; isalpha((unsigned char)*p) || isdigit((unsigned char)*p) ||
759 *p == '+' || *p == '-' || *p == '.'; p++)
760 ;
761 /* scheme, except if empty and starts with ":" then it is a path */
762 return (*p == ':' && p != s);
763 }
764
765 /* Parse URI string `s` into an uri structure `u`.
766 Returns 0 on success or -1 on failure */
767 int
768 uri_parse(const char *s, struct uri *u)
769 {
770 const char *p = s;
771 char *endptr;
772 size_t i;
773 long l;
774
775 u->proto[0] = u->userinfo[0] = u->host[0] = u->port[0] = '\0';
776 u->path[0] = u->query[0] = u->fragment[0] = '\0';
777
778 /* protocol-relative */
779 if (*p == '/' && *(p + 1) == '/') {
780 p += 2; /* skip "//" */
781 goto parseauth;
782 }
783
784 /* scheme / protocol part */
785 for (; isalpha((unsigned char)*p) || isdigit((unsigned char)*p) ||
786 *p == '+' || *p == '-' || *p == '.'; p++)
787 ;
788 /* scheme, except if empty and starts with ":" then it is a path */
789 if (*p == ':' && p != s) {
790 if (*(p + 1) == '/' && *(p + 2) == '/')
791 p += 3; /* skip "://" */
792 else
793 p++; /* skip ":" */
794
795 if ((size_t)(p - s) >= sizeof(u->proto))
796 return -1; /* protocol too long */
797 memcpy(u->proto, s, p - s);
798 u->proto[p - s] = '\0';
799
800 if (*(p - 1) != '/')
801 goto parsepath;
802 } else {
803 p = s; /* no scheme format, reset to start */
804 goto parsepath;
805 }
806
807 parseauth:
808 /* userinfo (username:password) */
809 i = strcspn(p, "@/?#");
810 if (p[i] == '@') {
811 if (i >= sizeof(u->userinfo))
812 return -1; /* userinfo too long */
813 memcpy(u->userinfo, p, i);
814 u->userinfo[i] = '\0';
815 p += i + 1;
816 }
817
818 /* IPv6 address */
819 if (*p == '[') {
820 /* bracket not found, host too short or too long */
821 i = strcspn(p, "]");
822 if (p[i] != ']' || i < 3)
823 return -1;
824 i++; /* including "]" */
825 } else {
826 /* domain / host part, skip until port, path or end. */
827 i = strcspn(p, ":/?#");
828 }
829 if (i >= sizeof(u->host))
830 return -1; /* host too long */
831 memcpy(u->host, p, i);
832 u->host[i] = '\0';
833 p += i;
834
835 /* port */
836 if (*p == ':') {
837 p++;
838 if ((i = strcspn(p, "/?#")) >= sizeof(u->port))
839 return -1; /* port too long */
840 memcpy(u->port, p, i);
841 u->port[i] = '\0';
842 /* check for valid port: range 1 - 65535, may be empty */
843 errno = 0;
844 l = strtol(u->port, &endptr, 10);
845 if (i && (errno || *endptr || l <= 0 || l > 65535))
846 return -1;
847 p += i;
848 }
849
850 parsepath:
851 /* path */
852 if ((i = strcspn(p, "?#")) >= sizeof(u->path))
853 return -1; /* path too long */
854 memcpy(u->path, p, i);
855 u->path[i] = '\0';
856 p += i;
857
858 /* query */
859 if (*p == '?') {
860 p++;
861 if ((i = strcspn(p, "#")) >= sizeof(u->query))
862 return -1; /* query too long */
863 memcpy(u->query, p, i);
864 u->query[i] = '\0';
865 p += i;
866 }
867
868 /* fragment */
869 if (*p == '#') {
870 p++;
871 if ((i = strlen(p)) >= sizeof(u->fragment))
872 return -1; /* fragment too long */
873 memcpy(u->fragment, p, i);
874 u->fragment[i] = '\0';
875 }
876
877 return 0;
878 }
879
880 int
881 main(void)
882 {
883 const char *p, *qs, *path, *showuri = "";
884 char query[1024] = "", param[1024] = "", fulluri[4096];
885 int r, _type = '1';
886
887 if (pledge("stdio inet dns rpath unveil", NULL) == -1)
888 die(500, "pledge: %s\n", strerror(errno));
889
890 #ifdef MAX_RESPONSETIMEOUT
891 signal(SIGALRM, sighandler);
892 alarm(MAX_RESPONSETIMEOUT);
893 #endif
894
895 if (!(qs = getenv("QUERY_STRING")))
896 qs = "";
897 if ((p = getparam(qs, "q"))) {
898 if (decodeparam(query, sizeof(query), p) == -1 ||
899 !checkparam(query))
900 die(400, "Invalid parameter: q\n");
901 }
902 if ((p = getparam(qs, "p"))) {
903 if (decodeparam(param, sizeof(param), p) == -1 ||
904 !checkparam(param))
905 die(400, "Invalid parameter: p\n");
906 }
907
908 path = "/";
909 if (query[0]) {
910 if (!strncmp(query, "gopher://", sizeof("gopher://") - 1)) {
911 showuri = query + sizeof("gopher://") - 1;
912 r = snprintf(fulluri, sizeof(fulluri), "%s", query);
913 } else if (!strncmp(query, "gophers://", sizeof("gophers://") - 1)) {
914 /* if "gophers://" is used then keep it so TLS is kept being used */
915 showuri = query;
916 r = snprintf(fulluri, sizeof(fulluri), "%s", query);
917 #ifdef USE_TLS
918 usetls = 1;
919 #endif
920 } else {
921 showuri = query;
922 if (uri_hasscheme(query))
923 die(400, "Invalid protocol: only gopher is supported\n");
924 r = snprintf(fulluri, sizeof(fulluri), "gopher://%s", query);
925 }
926 if (r < 0 || (size_t)r >= sizeof(fulluri))
927 die(400, "invalid URI: too long\n");
928
929 if (!uri_hasscheme(fulluri) ||
930 uri_parse(fulluri, &u) == -1)
931 die(400, "Invalid or unsupported URI: %s\n", showuri);
932
933 if (strcmp(u.proto, "gopher://") && strcmp(u.proto, "gophers://"))
934 die(400, "Invalid protocol: only gopher is supported\n");
935 if (u.host[0] == '\0')
936 die(400, "Invalid hostname\n");
937
938 if (u.path[0] == '\0')
939 memcpy(u.path, "/", 2);
940 if (u.port[0] == '\0')
941 memcpy(u.port, "70", 3);
942
943 path = u.path;
944 if (path[0] == '/') {
945 path++;
946 if (*path) {
947 _type = *path;
948 path++;
949 }
950 } else {
951 path = "";
952 }
953
954 if (isblacklisted(u.host, u.port, path))
955 die(403, "%s:%s %s: blacklisted\n", u.host, u.port, path);
956
957 #ifdef USE_TLS
958 /* setup TLS or plain connection */
959 if (usetls) {
960 setup_tls();
961 readbuf = tls_readbuf;
962 writebuf = tls_writebuf;
963 } else
964 #endif
965 {
966 setup_plain();
967 readbuf = plain_readbuf;
968 writebuf = plain_writebuf;
969 }
970
971 if (pledge("stdio", NULL) == -1)
972 err(1, "pledge");
973
974 headerset = 1;
975 switch (_type) {
976 case '1':
977 case '7':
978 break; /* handled below */
979 case '0':
980 dprintf(1, "Content-Type: text/plain; charset=utf-8\r\n\r\n");
981 servefile(u.host, u.port, path, u.query);
982 goto cleanup;
983 case 'g':
984 dprintf(1, "Content-Type: image/gif\r\n\r\n");
985 servefile(u.host, u.port, path, u.query);
986 goto cleanup;
987 case 'I':
988 /* try to set Content-Type based on extension */
989 if ((p = strrchr(path, '.'))) {
990 p++;
991 if (!strcasecmp("png", p))
992 dprintf(1, "Content-Type: image/png\r\n");
993 else if (!strcasecmp("jpg", p) || !strcasecmp("jpeg", p))
994 dprintf(1, "Content-Type: image/jpeg\r\n");
995 else if (!strcasecmp("gif", p))
996 dprintf(1, "Content-Type: image/gif\r\n");
997 }
998 write(1, "\r\n", 2);
999 servefile(u.host, u.port, path, u.query);
1000 goto cleanup;
1001 case '9':
1002 /* try to detect filename */
1003 if ((p = strrchr(path, '/')))
1004 dprintf(1, "Content-Disposition: attachment; filename=\"%s\"\r\n", p + 1);
1005 dprintf(1, "Content-Type: application/octet-stream\r\n\r\n");
1006 servefile(u.host, u.port, path, u.query);
1007 goto cleanup;
1008 default:
1009 write(1, "\r\n", 2);
1010 servefile(u.host, u.port, path, u.query);
1011 goto cleanup;
1012 }
1013 }
1014
1015 headerset = isdir = 1;
1016 fputs(
1017 "Content-Type: text/html; charset=utf-8\r\n"
1018 "\r\n"
1019 "<!DOCTYPE html>\n"
1020 "<html dir=\"ltr\">\n"
1021 "<head>\n"
1022 "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n"
1023 "<title>", stdout);
1024 xmlencode(query);
1025 if (query[0])
1026 fputs(" - ", stdout);
1027 fputs(
1028 "Gopher HTTP proxy</title>\n"
1029 "<style type=\"text/css\">\n"
1030 "a { text-decoration: none; } a:hover { text-decoration: underline; } img { vertical-align: middle; max-width: 400px; }\n"
1031 "@media (prefers-color-scheme: dark) { body { background-color: #000; color: #bdbdbd; color-scheme: dark; } a { color: #56c8ff; } }\n"
1032 "</style>\n"
1033 "<meta name=\"robots\" content=\"noindex, nofollow\" />\n"
1034 "<meta name=\"robots\" content=\"none\" />\n"
1035 "<meta content=\"width=device-width\" name=\"viewport\" />\n"
1036 "</head>\n"
1037 "<body>\n"
1038 "<form method=\"get\" action=\"\"><pre>"
1039 " URI: <input type=\"search\" name=\"q\" value=\"", stdout);
1040 xmlencode(showuri);
1041 fputs(
1042 "\" placeholder=\"URI...\" size=\"72\" autofocus=\"autofocus\" class=\"search\" />"
1043 "<input type=\"submit\" value=\"Go for it!\" /></pre>"
1044 "</form><pre>\n", stdout);
1045
1046 if (query[0]) {
1047 if (_type != '7')
1048 param[0] = '\0';
1049 servedir(u.host, u.port, path, u.query, param);
1050 }
1051
1052 fputs("</pre>\n</body>\n</html>\n", stdout);
1053
1054 cleanup:
1055 #ifdef USE_TLS
1056 /* cleanup TLS and plain connection */
1057 if (t) {
1058 tls_close(t);
1059 tls_free(t);
1060 }
1061 #endif
1062 if (sock != -1)
1063 close(sock);
1064
1065 return 0;
1066 }