gopherproxy.c - gopherproxy-c - Gopher HTTP proxy in C (CGI)
 (HTM) git clone git://git.codemadness.org/gopherproxy-c
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       gopherproxy.c (23849B)
       ---
            1 #include <sys/socket.h>
            2 #include <sys/time.h>
            3 #include <sys/types.h>
            4 
            5 #include <ctype.h>
            6 #include <errno.h>
            7 #include <netdb.h>
            8 #include <signal.h>
            9 #include <stdarg.h>
           10 #include <stdio.h>
           11 #include <stdlib.h>
           12 #include <string.h>
           13 #include <unistd.h>
           14 
           15 #include <tls.h>
           16 
           17 #ifndef TLS_CA_CERT_FILE
           18 #define TLS_CA_CERT_FILE "/etc/ssl/cert.pem"
           19 #endif
           20 
           21 #ifdef USE_TLS
           22 static int usetls = 0;
           23 /* TLS context */
           24 static struct tls *t;
           25 /* TLS config */
           26 static struct tls_config *tls_config;
           27 #endif
           28 
           29 #define MAX_LINE_SIZ        2048    /* max size of a DirEntity in bytes */
           30 #define READ_BUF_SIZ        16384   /* read buffer size in bytes */
           31 
           32 #define MAX_RESPONSETIMEOUT 10      /* timeout in seconds */
           33 #define MAX_RESPONSESIZ     4000000 /* max download size in bytes */
           34 
           35 #ifndef __OpenBSD__
           36 #define pledge(p1,p2) 0
           37 #define unveil(p1,p2) 0
           38 #endif
           39 
           40 /* URI */
           41 struct uri {
           42         char proto[48];     /* scheme including ":" or "://" */
           43         char userinfo[256]; /* username [:password] */
           44         char host[256];
           45         char port[6];       /* numeric port */
           46         char path[1024];
           47         char query[1024];
           48         char fragment[1024];
           49 };
           50 
           51 struct visited {
           52         int _type;
           53         char username[1024];
           54         char path[1024];
           55         char server[256];
           56         char port[8];
           57 };
           58 
           59 struct linebuf {
           60         /* line buffer */
           61         char *line;
           62         char linebuf[MAX_LINE_SIZ];
           63         size_t linelen;
           64         size_t lineoff;
           65         /* read buffer */
           66         char buf[READ_BUF_SIZ];
           67         char *bufoff, *bufend;
           68         int err;
           69         int eof;
           70 };
           71 
           72 /* parsed URI */
           73 static struct uri u;
           74 /* socket fd */
           75 static int sock = -1;
           76 
           77 int headerset = 0, isdir = 0;
           78 ssize_t (*readbuf)(char *, size_t);
           79 ssize_t (*writebuf)(const char *, size_t);
           80 
           81 void
           82 sighandler(int signo)
           83 {
           84         if (signo == SIGALRM)
           85                  _exit(2);
           86 }
           87 
           88 /* print to stderr, print error message of errno and exit().
           89  * Unlike BSD err() it does not prefix __progname */
           90 void
           91 err(int exitstatus, const char *fmt, ...)
           92 {
           93         va_list ap;
           94         int saved_errno;
           95 
           96         saved_errno = errno;
           97 
           98         if (fmt) {
           99                 va_start(ap, fmt);
          100                 vfprintf(stderr, fmt, ap);
          101                 va_end(ap);
          102                 fputs(": ", stderr);
          103         }
          104         fprintf(stderr, "%s\n", strerror(saved_errno));
          105 
          106         exit(exitstatus);
          107 }
          108 
          109 /* print to stderr and exit().
          110  * Unlike BSD errx() it does not prefix __progname */
          111 void
          112 errx(int exitstatus, const char *fmt, ...)
          113 {
          114         va_list ap;
          115 
          116         if (fmt) {
          117                 va_start(ap, fmt);
          118                 vfprintf(stderr, fmt, ap);
          119                 va_end(ap);
          120         }
          121         fputs("\n", stderr);
          122 
          123         exit(exitstatus);
          124 }
          125 
          126 void
          127 die(int code, const char *fmt, ...)
          128 {
          129         va_list ap;
          130 
          131         if (!headerset) {
          132                 switch (code) {
          133                 case 400:
          134                         fputs("Status: 400 Bad Request\r\n", stdout);
          135                         break;
          136                 case 403:
          137                         fputs("Status: 403 Permission Denied\r\n", stdout);
          138                         break;
          139                 default:
          140                         fputs("Status: 500 Internal Server Error\r\n", stdout);
          141                         break;
          142                 }
          143                 fputs("Content-Type: text/plain; charset=utf-8\r\n\r\n", stdout);
          144         }
          145 
          146         /* write error to stderr and stdout */
          147 
          148         va_start(ap, fmt);
          149         vfprintf(stderr, fmt, ap);
          150         va_end(ap);
          151 
          152         va_start(ap, fmt);
          153         vfprintf(stdout, fmt, ap);
          154         va_end(ap);
          155 
          156         if (isdir)
          157                 fputs("</pre>\n</body>\n</html>\n", stdout);
          158 
          159         exit(1);
          160 }
          161 
          162 /* Escape characters below as HTML 2.0 / XML 1.0. */
          163 void
          164 xmlencode(const char *s)
          165 {
          166         for (; *s; s++) {
          167                 switch(*s) {
          168                 case '<':  fputs("&lt;", stdout);   break;
          169                 case '>':  fputs("&gt;", stdout);   break;
          170                 case '\'': fputs("&#39;", stdout);  break;
          171                 case '&':  fputs("&amp;", stdout);  break;
          172                 case '"':  fputs("&quot;", stdout); break;
          173                 default:   putchar(*s);
          174                 }
          175         }
          176 }
          177 
          178 /* Percent-encode characters so that the string can be used as a value for a
          179    query string.
          180    Additional characters get encoded so that no xmlencode() is needed */
          181 void
          182 encodeparam(const char *s)
          183 {
          184         for (; *s; s++) {
          185                 switch (*s) {
          186                 case '<':
          187                 case '>':
          188                 case '\'':
          189                 case '&':
          190                 case '"':
          191                 case '#':
          192                 case '=':
          193                         printf("%%%02X", (unsigned char)*s);
          194                         break;
          195                 default:
          196                         putchar(*s);
          197                         break;
          198                 }
          199         }
          200 }
          201 
          202 int
          203 edial(const char *host, const char *port)
          204 {
          205         struct addrinfo hints, *res, *res0;
          206         int error, save_errno, s;
          207         const char *cause = NULL;
          208 
          209         memset(&hints, 0, sizeof(hints));
          210         hints.ai_family = AF_UNSPEC;
          211         hints.ai_socktype = SOCK_STREAM;
          212         hints.ai_flags = AI_NUMERICSERV; /* numeric port only */
          213         if ((error = getaddrinfo(host, port, &hints, &res0)))
          214                 die(500, "%s: %s: %s:%s\n", __func__, gai_strerror(error), host, port);
          215         s = -1;
          216         for (res = res0; res; res = res->ai_next) {
          217                 s = socket(res->ai_family, res->ai_socktype,
          218                            res->ai_protocol);
          219                 if (s == -1) {
          220                         cause = "socket";
          221                         continue;
          222                 }
          223 
          224                 if (connect(s, res->ai_addr, res->ai_addrlen) == -1) {
          225                         cause = "connect";
          226                         save_errno = errno;
          227                         close(s);
          228                         errno = save_errno;
          229                         s = -1;
          230                         continue;
          231                 }
          232                 break;
          233         }
          234         if (s == -1)
          235                 die(500, "%s: %s: %s:%s\n", __func__, cause, host, port);
          236         freeaddrinfo(res0);
          237 
          238         return s;
          239 }
          240 
          241 void
          242 setup_plain(void)
          243 {
          244         if (pledge("stdio dns inet", NULL) == -1)
          245                 err(1, "pledge");
          246 
          247         sock = edial(u.host, u.port);
          248 }
          249 
          250 #ifdef USE_TLS
          251 void
          252 setup_tls(void)
          253 {
          254         if (tls_init())
          255                 errx(1, "tls_init failed");
          256         if (!(tls_config = tls_config_new()))
          257                 errx(1, "tls config failed");
          258         if (unveil(TLS_CA_CERT_FILE, "r") == -1)
          259                 err(1, "unveil: %s", TLS_CA_CERT_FILE);
          260 #if 0
          261         if (tls_config_set_ca_file(tls_config, TLS_CA_CERT_FILE) == -1)
          262                 errx(1, "tls_config_set_ca_file: %s: %s", TLS_CA_CERT_FILE,
          263                      tls_config_error(tls_config));
          264 #endif
          265 
          266         if (pledge("stdio dns inet rpath", NULL) == -1)
          267                 err(1, "pledge");
          268 
          269         if (!(t = tls_client()))
          270                 errx(1, "tls_client: %s", tls_error(t));
          271         if (tls_configure(t, tls_config))
          272                 errx(1, "tls_configure: %s", tls_error(t));
          273 
          274         sock = edial(u.host, u.port);
          275         if (tls_connect_socket(t, sock, u.host) == -1)
          276                 die(500, "tls_connect: %s\n", tls_error(t));
          277 }
          278 
          279 ssize_t
          280 tls_writebuf(const char *buf, size_t buflen)
          281 {
          282         const char *errstr;
          283         const char *p;
          284         size_t len;
          285         ssize_t r, written = 0;
          286 
          287         for (len = buflen, p = buf; len > 0; ) {
          288                 r = tls_write(t, p, len);
          289                 if (r == TLS_WANT_POLLIN || r == TLS_WANT_POLLOUT) {
          290                         continue;
          291                 } else if (r == -1) {
          292                         errstr = tls_error(t);
          293                         fprintf(stderr, "tls_write: %s\n", errstr ? errstr : "");
          294                         return -1;
          295                 }
          296                 p += r;
          297                 len -= r;
          298                 written += r;
          299         }
          300         return written;
          301 }
          302 
          303 ssize_t
          304 tls_readbuf(char *buf, size_t bufsiz)
          305 {
          306         const char *errstr;
          307         ssize_t r, len;
          308 
          309         for (len = 0; bufsiz > 0;) {
          310                 r = tls_read(t, buf + len, bufsiz);
          311                 if (r == TLS_WANT_POLLIN || r == TLS_WANT_POLLOUT) {
          312                         continue;
          313                 } else if (r == 0) {
          314                         break;
          315                 } else if (r == -1) {
          316                         errstr = tls_error(t);
          317                         fprintf(stderr, "tls_read: %s\n", errstr ? errstr : "");
          318                         return -1;
          319                 }
          320                 len += r;
          321                 bufsiz -= r;
          322         }
          323         return len;
          324 }
          325 #endif
          326 
          327 ssize_t
          328 plain_writebuf(const char *buf, size_t buflen)
          329 {
          330         ssize_t r;
          331 
          332         if ((r = write(sock, buf, buflen)) == -1)
          333                 fprintf(stderr, "write: %s\n", strerror(errno));
          334 
          335         return r;
          336 }
          337 
          338 ssize_t
          339 plain_readbuf(char *buf, size_t bufsiz)
          340 {
          341         ssize_t r, len;
          342 
          343         for (len = 0; bufsiz > 0;) {
          344                 r = read(sock, buf + len, bufsiz);
          345                 if (r == 0) {
          346                         break;
          347                 } else if (r == -1) {
          348                         fprintf(stderr, "read: %s\n", strerror(errno));
          349                         return -1;
          350                 }
          351                 len += r;
          352                 bufsiz -= r;
          353         }
          354         return len;
          355 }
          356 
          357 void
          358 linebuf_init(struct linebuf *b)
          359 {
          360         memset(b, 0, sizeof(struct linebuf));
          361         b->line = b->linebuf;
          362 }
          363 
          364 ssize_t
          365 linebuf_get(struct linebuf *b)
          366 {
          367         size_t len;
          368         ssize_t n;
          369         char *p;
          370 
          371         b->line = b->linebuf;
          372         while (!(b->err) && !(b->eof)) {
          373                 /* need to read more */
          374                 if (b->bufoff >= b->bufend) {
          375                         b->bufoff = b->buf;
          376                         b->bufend = b->buf;
          377                         n = readbuf(b->buf, sizeof(b->buf));
          378                         if (n == -1)
          379                                 b->err = EIO;
          380 
          381                         /* use remaining data even if not terminated by a newline */
          382                         if (n == 0 && b->lineoff > 0) {
          383                                 b->eof = 1;
          384                                 return b->lineoff;
          385                         }
          386 
          387                         if (n > 0)
          388                                 b->bufend = b->buf + n;
          389                         else
          390                                 return n;
          391                 }
          392 
          393                 /* search first newline */
          394                 if ((p = memchr(b->bufoff, '\n', b->bufend - b->bufoff))) {
          395                         len = (p - b->bufoff);
          396                         /* full line in buffer, no need to copy to line buffer */
          397                         if (b->lineoff == 0)
          398                                 b->line = b->bufoff; /* just point to buffer, no copy */
          399                 } else {
          400                         /* use remaining data into line buffer and read more */
          401                         len = (b->bufend - b->bufoff);
          402                 }
          403 
          404                 if (b->line == b->linebuf) {
          405                         if (b->lineoff + len + 1 >= sizeof(b->linebuf)) {
          406                                 b->err = ENOMEM;
          407                                 return -1;
          408                         }
          409                         memcpy(b->linebuf + b->lineoff, b->bufoff, len);
          410                 }
          411 
          412                 b->lineoff += len;
          413                 b->linelen = b->lineoff;
          414                 b->line[b->linelen] = '\0';
          415 
          416                 if (p) {
          417                         b->bufoff = p + 1; /* after newline */
          418                         b->lineoff = 0; /* reset line: start at beginning */
          419                         return b->linelen;
          420                 } else {
          421                         b->bufoff = b->bufend; /* read more */
          422                 }
          423         }
          424         return -1; /* UNREACHED */
          425 }
          426 
          427 int
          428 isblacklisted(const char *host, const char *port, const char *path)
          429 {
          430         char *p;
          431 
          432         if (strcmp(port, "70") && strcmp(port, "7070"))
          433                 return 1;
          434         if ((p = strstr(host, ".onion")) && strlen(p) == strlen(".onion"))
          435                 return 1;
          436         return 0;
          437 }
          438 
          439 char *
          440 typestr(int c)
          441 {
          442         switch (c) {
          443         case '0': return "  TEXT";
          444         case '1': return "   DIR";
          445         case '2': return "   CSO";
          446         case '3': return "   ERR";
          447         case '4': return "   MAC";
          448         case '5': return "   DOS";
          449         case '6': return " UUENC";
          450         case '7': return "SEARCH";
          451         case '8': return "TELNET";
          452         case '9': return "   BIN";
          453         case 'g': return "   GIF";
          454         case 'h': return "  HTML"; /* non-standard */
          455         case 's': return "   SND"; /* non-standard */
          456         case '+': return "MIRROR";
          457         case 'I': return "   IMG";
          458         case 'T': return "TN3270";
          459         default:
          460                 /* "Characters '0' through 'Z' are reserved." (ASCII) */
          461                 if (c >= '0' && c <= 'Z')
          462                         return "RESERV";
          463                 else
          464                         return "      ";
          465         }
          466 }
          467 
          468 void
          469 servefile(const char *server, const char *port, const char *path, const char *query)
          470 {
          471         char buf[READ_BUF_SIZ];
          472         int r, w;
          473         size_t totalsiz = 0;
          474 
          475         w = snprintf(buf, sizeof(buf), "%s%s%s\r\n", path, query[0] ? "?" : "", query);
          476         if (w < 0 || (size_t)w >= sizeof(buf))
          477                 die(500, "servefile: path too long\n");
          478         if (writebuf(buf, w) == -1)
          479                 die(500, "servefile: writebuf failed\n");
          480 
          481         while ((r = readbuf(buf, sizeof(buf))) > 0) {
          482                 /* too big total response */
          483                 totalsiz += r;
          484                 if (totalsiz > MAX_RESPONSESIZ) {
          485                         dprintf(1, "--- transfer too big, truncated ---\n");
          486                         break;
          487                 }
          488 
          489                 if ((w = write(1, buf, r)) == -1)
          490                         die(500, "write: %s\n", strerror(errno));
          491         }
          492         if (r == -1)
          493                 die(500, "read: %s\n", strerror(errno));
          494 }
          495 
          496 void
          497 servedir(const char *server, const char *port, const char *path, const char *query, const char *param)
          498 {
          499         struct visited v;
          500         struct linebuf lb;
          501         const char *prefix = "";
          502         char buf[2048], *uri;
          503         char *line;
          504         size_t totalsiz, linenr;
          505         ssize_t n;
          506         char primarytype = '\0';
          507         int i, len, w;
          508 
          509 #ifdef USE_TLS
          510         if (usetls)
          511                 prefix = "gophers://";
          512 #endif
          513 
          514         if (param[0])
          515                 w = snprintf(buf, sizeof(buf), "%s%s%s\t%s\r\n", path, query[0] ? "?" : "", query, param);
          516         else
          517                 w = snprintf(buf, sizeof(buf), "%s%s%s\r\n", path, query[0] ? "?" : "", query);
          518 
          519         if (w < 0 || (size_t)w >= sizeof(buf))
          520                 die(500, "servedir: path too long\n");
          521         if (writebuf(buf, w) == -1)
          522                 die(500, "servedir: writebuf failed\n");
          523 
          524         linebuf_init(&lb);
          525 
          526         totalsiz = 0;
          527         for (linenr = 1; (n = linebuf_get(&lb)) > 0; linenr++) {
          528                 line = lb.line;
          529 
          530                 /* too big total response */
          531                 if (n > 0)
          532                         totalsiz += n;
          533                 if (totalsiz > MAX_RESPONSESIZ) {
          534                         dprintf(1, "--- transfer too big, truncated ---\n");
          535                         break;
          536                 }
          537 
          538                 if (n > 0 && line[n - 1] == '\n')
          539                         line[--n] = '\0';
          540                 if (n > 0 && line[n - 1] == '\r')
          541                         line[--n] = '\0';
          542                 if (n == 1 && line[0] == '.')
          543                         break;
          544 
          545                 memset(&v, 0, sizeof(v));
          546 
          547                 v._type = line[0];
          548                 if (v._type != '+')
          549                         primarytype = v._type;
          550                 else if (!primarytype)
          551                         die(500, "%s:%s %s:%d: undefined primary server\n",
          552                                 server, port, path, linenr);
          553 
          554                 /* "username" */
          555                 i = 1;
          556                 len = strcspn(line + i, "\t");
          557                 if (len + 1 < sizeof(v.username)) {
          558                         memcpy(v.username, line + i, len);
          559                         v.username[len] = '\0';
          560                 } else  {
          561                         die(500, "%s:%s %s:%d: username field too long\n",
          562                                 server, port, path, linenr);
          563                 }
          564                 if (line[i + len] == '\t')
          565                         i += len + 1;
          566                 else
          567                         die(500, "%s:%s %s:%d: invalid line / field count\n",
          568                                 server, port, path, linenr);
          569 
          570                 /* selector / path */
          571                 len = strcspn(line + i, "\t");
          572                 if (len + 1 < sizeof(v.path)) {
          573                         memcpy(v.path, line + i, len);
          574                         v.path[len] = '\0';
          575                 } else {
          576                         die(500, "%s:%s %s:%d: path field too long\n",
          577                                 server, port, path, linenr);
          578                 }
          579                 if (line[i + len] == '\t')
          580                         i += len + 1;
          581                 else
          582                         die(500, "%s:%s %s:%d: invalid line / field count\n",
          583                                 server, port, path, linenr);
          584 
          585                 /* server */
          586                 len = strcspn(line + i, "\t");
          587                 if (len + 1 < sizeof(v.server)) {
          588                         memcpy(v.server, line + i, len);
          589                         v.server[len] = '\0';
          590                 } else {
          591                         die(500, "%s:%s %s:%d: server field too long\n",
          592                                 server, port, path, linenr);
          593                 }
          594                 if (line[i + len] == '\t')
          595                         i += len + 1;
          596                 else
          597                         die(500, "%s:%s %s:%d: invalid line / field count\n",
          598                                 server, port, path, linenr);
          599 
          600                 /* port */
          601                 len = strcspn(line + i, "\t");
          602                 if (len + 1 < sizeof(v.port)) {
          603                         memcpy(v.port, line + i, len);
          604                         v.port[len] = '\0';
          605                 } else {
          606                         die(500, "%s:%s %s:%d: port field too long\n",
          607                                 server, port, path, linenr);
          608                 }
          609 
          610                 if (!strcmp(v.port, "70"))
          611                         snprintf(buf, sizeof(buf), "%s%s/%c%s",
          612                                 prefix, v.server, primarytype, v.path);
          613                 else
          614                         snprintf(buf, sizeof(buf), "%s%s:%s/%c%s",
          615                                 prefix, v.server, v.port, primarytype, v.path);
          616                 uri = buf;
          617 
          618                 switch (primarytype) {
          619                 case 'i': /* info */
          620                 case '3': /* error */
          621                         fputs(typestr(v._type), stdout);
          622                         fputs(" ", stdout);
          623                         xmlencode(v.username);
          624                         break;
          625                 case '7': /* search */
          626                         fputs("</pre><form method=\"get\" action=\"\"><pre>", stdout);
          627                         fputs(typestr(v._type), stdout);
          628                         fputs(" <input type=\"hidden\" name=\"q\" value=\"", stdout);
          629                         xmlencode(uri);
          630                         fputs("\" /><input type=\"search\" placeholder=\"", stdout);
          631                         xmlencode(v.username);
          632                         fputs(
          633                                 "\" name=\"p\" value=\"\" size=\"72\" />"
          634                                 "<input type=\"submit\" value=\"Search\" /></pre></form><pre>", stdout);
          635                         break;
          636                 case '8': /* telnet */
          637                 case 'T': /* tn3270 */
          638                         fputs(typestr(v._type), stdout);
          639                         printf(" <a href=\"%s://", primarytype == '8' ? "telnet" : "tn3270");
          640                         if (v.path[0]) {
          641                                 xmlencode(v.path);
          642                                 fputs("@", stdout);
          643                         }
          644                         xmlencode(v.server);
          645                         fputs(":", stdout);
          646                         xmlencode(v.port);
          647                         fputs("\">", stdout);
          648                         xmlencode(v.username);
          649                         fputs("</a>", stdout);
          650                         break;
          651                 case 'I': /* image: show inline */
          652                         fputs(typestr(v._type), stdout);
          653                         fputs(" <a href=\"?q=", stdout);
          654                         encodeparam(uri);
          655                         fputs("\">", stdout);
          656 
          657                         fputs("<img src=\"?q=", stdout);
          658                         encodeparam(uri);
          659                         fputs("\" />", stdout);
          660 
          661                         fputs("</a>", stdout);
          662                         break;
          663                 default: /* other */
          664                         fputs(typestr(v._type), stdout);
          665                         fputs(" <a href=\"", stdout);
          666                         if (primarytype == 'h' && !strncmp(v.path, "URL:", sizeof("URL:") - 1)) {
          667                                 xmlencode(v.path + sizeof("URL:") - 1);
          668                         } else {
          669                                 fputs("?q=", stdout);
          670                                 encodeparam(uri);
          671                         }
          672                         fputs("\">", stdout);
          673                         xmlencode(v.username);
          674                         fputs("</a>", stdout);
          675 
          676                 }
          677                 putchar('\n');
          678         }
          679         if (lb.err)
          680                 die(500, "%s:%s after line %d: error reading line\n", server, port, linenr);
          681 }
          682 
          683 int
          684 hexdigit(int c)
          685 {
          686         if (c >= '0' && c <= '9')
          687                 return c - '0';
          688         else if (c >= 'A' && c <= 'F')
          689                 return c - 'A' + 10;
          690         else if (c >= 'a' && c <= 'f')
          691                 return c - 'a' + 10;
          692 
          693         return 0;
          694 }
          695 
          696 /* decode until NUL separator or end of "key". */
          697 int
          698 decodeparam(char *buf, size_t bufsiz, const char *s)
          699 {
          700         size_t i;
          701 
          702         if (!bufsiz)
          703                 return -1;
          704 
          705         for (i = 0; *s && *s != '&'; s++) {
          706                 if (i + 3 >= bufsiz)
          707                         return -1;
          708                 switch (*s) {
          709                 case '%':
          710                         if (!isxdigit(*(s+1)) || !isxdigit(*(s+2)))
          711                                 return -1;
          712                         buf[i++] = hexdigit(*(s+1)) * 16 + hexdigit(*(s+2));
          713                         s += 2;
          714                         break;
          715                 case '+':
          716                         buf[i++] = ' ';
          717                         break;
          718                 default:
          719                         buf[i++] = *s;
          720                         break;
          721                 }
          722         }
          723         buf[i] = '\0';
          724 
          725         return i;
          726 }
          727 
          728 char *
          729 getparam(const char *query, const char *s)
          730 {
          731         const char *p;
          732         size_t len;
          733 
          734         len = strlen(s);
          735         for (p = query; (p = strstr(p, s)); p += len) {
          736                 if (p[len] == '=' && (p == query || p[-1] == '&'))
          737                         return (char *)p + len + 1;
          738         }
          739 
          740         return NULL;
          741 }
          742 
          743 int
          744 checkparam(const char *s)
          745 {
          746         for (; *s; s++)
          747                 if (iscntrl(*s))
          748                         return 0;
          749         return 1;
          750 }
          751 
          752 /* Check if string has a non-empty scheme / protocol part. */
          753 int
          754 uri_hasscheme(const char *s)
          755 {
          756         const char *p = s;
          757 
          758         for (; isalpha((unsigned char)*p) || isdigit((unsigned char)*p) ||
          759                        *p == '+' || *p == '-' || *p == '.'; p++)
          760                 ;
          761         /* scheme, except if empty and starts with ":" then it is a path */
          762         return (*p == ':' && p != s);
          763 }
          764 
          765 /* Parse URI string `s` into an uri structure `u`.
          766    Returns 0 on success or -1 on failure */
          767 int
          768 uri_parse(const char *s, struct uri *u)
          769 {
          770         const char *p = s;
          771         char *endptr;
          772         size_t i;
          773         long l;
          774 
          775         u->proto[0] = u->userinfo[0] = u->host[0] = u->port[0] = '\0';
          776         u->path[0] = u->query[0] = u->fragment[0] = '\0';
          777 
          778         /* protocol-relative */
          779         if (*p == '/' && *(p + 1) == '/') {
          780                 p += 2; /* skip "//" */
          781                 goto parseauth;
          782         }
          783 
          784         /* scheme / protocol part */
          785         for (; isalpha((unsigned char)*p) || isdigit((unsigned char)*p) ||
          786                        *p == '+' || *p == '-' || *p == '.'; p++)
          787                 ;
          788         /* scheme, except if empty and starts with ":" then it is a path */
          789         if (*p == ':' && p != s) {
          790                 if (*(p + 1) == '/' && *(p + 2) == '/')
          791                         p += 3; /* skip "://" */
          792                 else
          793                         p++; /* skip ":" */
          794 
          795                 if ((size_t)(p - s) >= sizeof(u->proto))
          796                         return -1; /* protocol too long */
          797                 memcpy(u->proto, s, p - s);
          798                 u->proto[p - s] = '\0';
          799 
          800                 if (*(p - 1) != '/')
          801                         goto parsepath;
          802         } else {
          803                 p = s; /* no scheme format, reset to start */
          804                 goto parsepath;
          805         }
          806 
          807 parseauth:
          808         /* userinfo (username:password) */
          809         i = strcspn(p, "@/?#");
          810         if (p[i] == '@') {
          811                 if (i >= sizeof(u->userinfo))
          812                         return -1; /* userinfo too long */
          813                 memcpy(u->userinfo, p, i);
          814                 u->userinfo[i] = '\0';
          815                 p += i + 1;
          816         }
          817 
          818         /* IPv6 address */
          819         if (*p == '[') {
          820                 /* bracket not found, host too short or too long */
          821                 i = strcspn(p, "]");
          822                 if (p[i] != ']' || i < 3)
          823                         return -1;
          824                 i++; /* including "]" */
          825         } else {
          826                 /* domain / host part, skip until port, path or end. */
          827                 i = strcspn(p, ":/?#");
          828         }
          829         if (i >= sizeof(u->host))
          830                 return -1; /* host too long */
          831         memcpy(u->host, p, i);
          832         u->host[i] = '\0';
          833         p += i;
          834 
          835         /* port */
          836         if (*p == ':') {
          837                 p++;
          838                 if ((i = strcspn(p, "/?#")) >= sizeof(u->port))
          839                         return -1; /* port too long */
          840                 memcpy(u->port, p, i);
          841                 u->port[i] = '\0';
          842                 /* check for valid port: range 1 - 65535, may be empty */
          843                 errno = 0;
          844                 l = strtol(u->port, &endptr, 10);
          845                 if (i && (errno || *endptr || l <= 0 || l > 65535))
          846                         return -1;
          847                 p += i;
          848         }
          849 
          850 parsepath:
          851         /* path */
          852         if ((i = strcspn(p, "?#")) >= sizeof(u->path))
          853                 return -1; /* path too long */
          854         memcpy(u->path, p, i);
          855         u->path[i] = '\0';
          856         p += i;
          857 
          858         /* query */
          859         if (*p == '?') {
          860                 p++;
          861                 if ((i = strcspn(p, "#")) >= sizeof(u->query))
          862                         return -1; /* query too long */
          863                 memcpy(u->query, p, i);
          864                 u->query[i] = '\0';
          865                 p += i;
          866         }
          867 
          868         /* fragment */
          869         if (*p == '#') {
          870                 p++;
          871                 if ((i = strlen(p)) >= sizeof(u->fragment))
          872                         return -1; /* fragment too long */
          873                 memcpy(u->fragment, p, i);
          874                 u->fragment[i] = '\0';
          875         }
          876 
          877         return 0;
          878 }
          879 
          880 int
          881 main(void)
          882 {
          883         const char *p, *qs, *path, *showuri = "";
          884         char query[1024] = "", param[1024] = "", fulluri[4096];
          885         int r, _type = '1';
          886 
          887         if (pledge("stdio inet dns rpath unveil", NULL) == -1)
          888                 die(500, "pledge: %s\n", strerror(errno));
          889 
          890 #ifdef MAX_RESPONSETIMEOUT
          891         signal(SIGALRM, sighandler);
          892         alarm(MAX_RESPONSETIMEOUT);
          893 #endif
          894 
          895         if (!(qs = getenv("QUERY_STRING")))
          896                 qs = "";
          897         if ((p = getparam(qs, "q"))) {
          898                 if (decodeparam(query, sizeof(query), p) == -1 ||
          899                     !checkparam(query))
          900                         die(400, "Invalid parameter: q\n");
          901         }
          902         if ((p = getparam(qs, "p"))) {
          903                 if (decodeparam(param, sizeof(param), p) == -1 ||
          904                     !checkparam(param))
          905                         die(400, "Invalid parameter: p\n");
          906         }
          907 
          908         path = "/";
          909         if (query[0]) {
          910                 if (!strncmp(query, "gopher://", sizeof("gopher://") - 1)) {
          911                         showuri = query + sizeof("gopher://") - 1;
          912                         r = snprintf(fulluri, sizeof(fulluri), "%s", query);
          913                 } else if (!strncmp(query, "gophers://", sizeof("gophers://") - 1)) {
          914                         /* if "gophers://" is used then keep it so TLS is kept being used */
          915                         showuri = query;
          916                         r = snprintf(fulluri, sizeof(fulluri), "%s", query);
          917 #ifdef USE_TLS
          918                         usetls = 1;
          919 #endif
          920                 } else {
          921                         showuri = query;
          922                         if (uri_hasscheme(query))
          923                                 die(400, "Invalid protocol: only gopher is supported\n");
          924                         r = snprintf(fulluri, sizeof(fulluri), "gopher://%s", query);
          925                 }
          926                 if (r < 0 || (size_t)r >= sizeof(fulluri))
          927                         die(400, "invalid URI: too long\n");
          928 
          929                 if (!uri_hasscheme(fulluri) ||
          930                     uri_parse(fulluri, &u) == -1)
          931                         die(400, "Invalid or unsupported URI: %s\n", showuri);
          932 
          933                 if (strcmp(u.proto, "gopher://") && strcmp(u.proto, "gophers://"))
          934                         die(400, "Invalid protocol: only gopher is supported\n");
          935                 if (u.host[0] == '\0')
          936                         die(400, "Invalid hostname\n");
          937 
          938                 if (u.path[0] == '\0')
          939                         memcpy(u.path, "/", 2);
          940                 if (u.port[0] == '\0')
          941                         memcpy(u.port, "70", 3);
          942 
          943                 path = u.path;
          944                 if (path[0] == '/') {
          945                         path++;
          946                         if (*path) {
          947                                 _type = *path;
          948                                 path++;
          949                         }
          950                 } else {
          951                         path = "";
          952                 }
          953 
          954                 if (isblacklisted(u.host, u.port, path))
          955                         die(403, "%s:%s %s: blacklisted\n", u.host, u.port, path);
          956 
          957 #ifdef USE_TLS
          958                 /* setup TLS or plain connection */
          959                 if (usetls) {
          960                         setup_tls();
          961                         readbuf = tls_readbuf;
          962                         writebuf = tls_writebuf;
          963                 } else
          964 #endif
          965                 {
          966                         setup_plain();
          967                         readbuf = plain_readbuf;
          968                         writebuf = plain_writebuf;
          969                 }
          970 
          971                 if (pledge("stdio", NULL) == -1)
          972                         err(1, "pledge");
          973 
          974                 headerset = 1;
          975                 switch (_type) {
          976                 case '1':
          977                 case '7':
          978                         break; /* handled below */
          979                 case '0':
          980                         dprintf(1, "Content-Type: text/plain; charset=utf-8\r\n\r\n");
          981                         servefile(u.host, u.port, path, u.query);
          982                         goto cleanup;
          983                 case 'g':
          984                         dprintf(1, "Content-Type: image/gif\r\n\r\n");
          985                         servefile(u.host, u.port, path, u.query);
          986                         goto cleanup;
          987                 case 'I':
          988                         /* try to set Content-Type based on extension */
          989                         if ((p = strrchr(path, '.'))) {
          990                                 p++;
          991                                 if (!strcasecmp("png", p))
          992                                         dprintf(1, "Content-Type: image/png\r\n");
          993                                 else if (!strcasecmp("jpg", p) || !strcasecmp("jpeg", p))
          994                                         dprintf(1, "Content-Type: image/jpeg\r\n");
          995                                 else if (!strcasecmp("gif", p))
          996                                         dprintf(1, "Content-Type: image/gif\r\n");
          997                         }
          998                         write(1, "\r\n", 2);
          999                         servefile(u.host, u.port, path, u.query);
         1000                         goto cleanup;
         1001                 case '9':
         1002                         /* try to detect filename */
         1003                         if ((p = strrchr(path, '/')))
         1004                                 dprintf(1, "Content-Disposition: attachment; filename=\"%s\"\r\n", p + 1);
         1005                         dprintf(1, "Content-Type: application/octet-stream\r\n\r\n");
         1006                         servefile(u.host, u.port, path, u.query);
         1007                         goto cleanup;
         1008                 default:
         1009                         write(1, "\r\n", 2);
         1010                         servefile(u.host, u.port, path, u.query);
         1011                         goto cleanup;
         1012                 }
         1013         }
         1014 
         1015         headerset = isdir = 1;
         1016         fputs(
         1017                 "Content-Type: text/html; charset=utf-8\r\n"
         1018                 "\r\n"
         1019                 "<!DOCTYPE html>\n"
         1020                 "<html dir=\"ltr\">\n"
         1021                 "<head>\n"
         1022                 "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" />\n"
         1023                 "<title>", stdout);
         1024         xmlencode(query);
         1025         if (query[0])
         1026                 fputs(" - ", stdout);
         1027         fputs(
         1028                 "Gopher HTTP proxy</title>\n"
         1029                 "<style type=\"text/css\">\n"
         1030                 "a { text-decoration: none; } a:hover { text-decoration: underline; } img { vertical-align: middle; max-width: 400px; }\n"
         1031                 "@media (prefers-color-scheme: dark) { body { background-color: #000; color: #bdbdbd; color-scheme: dark; } a { color: #56c8ff; } }\n"
         1032                 "</style>\n"
         1033                 "<meta name=\"robots\" content=\"noindex, nofollow\" />\n"
         1034                 "<meta name=\"robots\" content=\"none\" />\n"
         1035                 "<meta content=\"width=device-width\" name=\"viewport\" />\n"
         1036                 "</head>\n"
         1037                 "<body>\n"
         1038                 "<form method=\"get\" action=\"\"><pre>"
         1039                 "  URI: <input type=\"search\" name=\"q\" value=\"", stdout);
         1040         xmlencode(showuri);
         1041         fputs(
         1042                 "\" placeholder=\"URI...\" size=\"72\" autofocus=\"autofocus\" class=\"search\" />"
         1043                 "<input type=\"submit\" value=\"Go for it!\" /></pre>"
         1044                 "</form><pre>\n", stdout);
         1045 
         1046         if (query[0]) {
         1047                 if (_type != '7')
         1048                         param[0] = '\0';
         1049                 servedir(u.host, u.port, path, u.query, param);
         1050         }
         1051 
         1052         fputs("</pre>\n</body>\n</html>\n", stdout);
         1053 
         1054 cleanup:
         1055 #ifdef USE_TLS
         1056         /* cleanup TLS and plain connection */
         1057         if (t) {
         1058                 tls_close(t);
         1059                 tls_free(t);
         1060         }
         1061 #endif
         1062         if (sock != -1)
         1063                 close(sock);
         1064 
         1065         return 0;
         1066 }