hurl.c - hurl - Gopher/HTTP/HTTPS file grabber
 (HTM) git clone git://git.codemadness.org/hurl
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       hurl.c (13950B)
       ---
            1 #include <sys/socket.h>
            2 #include <sys/time.h>
            3 
            4 #include <ctype.h>
            5 #include <err.h>
            6 #include <errno.h>
            7 #include <netdb.h>
            8 #include <locale.h>
            9 #include <signal.h>
           10 #include <stdio.h>
           11 #include <stdlib.h>
           12 #include <string.h>
           13 #include <time.h>
           14 #include <unistd.h>
           15 
           16 #include <tls.h>
           17 
           18 #include "arg.h"
           19 
           20 #define READ_BUF_SIZ        16384
           21 
           22 #ifndef __OpenBSD__
           23 #define pledge(p1,p2) 0
           24 #define unveil(p1,p2) 0
           25 #endif
           26 
           27 #ifndef TLS_CA_CERT_FILE
           28 #define TLS_CA_CERT_FILE "/etc/ssl/cert.pem"
           29 #endif
           30 
           31 /* URI */
           32 struct uri {
           33         char proto[48];     /* scheme including ":" or "://" */
           34         char userinfo[256]; /* username [:password] */
           35         char host[256];
           36         char port[6];       /* numeric port */
           37         char path[1024];
           38         char query[1024];
           39         char fragment[1024];
           40 };
           41 
           42 char *argv0;
           43 
           44 /* raw header(s) to add */
           45 static const char *config_headers = "";
           46 /* max response size in bytes, 0 is unlimited */
           47 static size_t config_maxresponsesiz = 0;
           48 /* time-out in seconds */
           49 static long long config_timeout = 0;
           50 /* legacy ciphers? */
           51 static int config_legacy = 0;
           52 /* TLS CA file */
           53 static char *config_ca_file;
           54 /* parsed URI */
           55 static struct uri u;
           56 /* socket fd */
           57 static int sock = -1;
           58 /* raw command-line argument */
           59 static char *url;
           60 /* TLS context */
           61 static struct tls *t;
           62 /* TLS config */
           63 static struct tls_config *tls_config;
           64 
           65 /* protocol handlers */
           66 int gopher_request(void);
           67 int http_request(void);
           68 
           69 struct handler {
           70         int (*handler)(void); /* function / handler / callback */
           71         const char *proto; /* protocol / scheme, "gopher://" */
           72         const char *port; /* default port */
           73         int usetls; /* setup TLS (=1) or plain connection (=0) */
           74 };
           75 
           76 static const struct handler handlers[] = {
           77         { .handler = gopher_request,  .proto = "gopher://",  .port = "70",   .usetls = 0 },
           78         { .handler = gopher_request,  .proto = "gophers://", .port = "70",   .usetls = 1 },
           79         { .handler = http_request,    .proto = "http://",    .port = "80",   .usetls = 0 },
           80         { .handler = http_request,    .proto = "https://",   .port = "443",  .usetls = 1 },
           81 };
           82 
           83 ssize_t (*readbuf)(char *, size_t);
           84 ssize_t (*writebuf)(const char *, size_t);
           85 
           86 void
           87 sighandler(int signo)
           88 {
           89         if (signo == SIGALRM)
           90                  _exit(2);
           91 }
           92 
           93 int
           94 parse_content_length(const char *s, size_t *length)
           95 {
           96         const char *p;
           97         char *end;
           98         long long l;
           99 
          100         if (!(p = strcasestr(s, "\r\nContent-Length:")))
          101                 return -1;
          102 
          103         p += sizeof("\r\nContent-Length:") - 1;
          104         p += strspn(p, " \t");
          105 
          106         if (!isdigit((unsigned char)*p))
          107                 return -1;
          108 
          109         errno = 0;
          110         l = strtoll(p, &end, 10);
          111         if (errno || p == end || (*end != '\0' && *end != '\r') || l < 0)
          112                 return -1;
          113 
          114         *length = l;
          115 
          116         return 0;
          117 }
          118 
          119 int
          120 uri_parse(const char *s, struct uri *u)
          121 {
          122         const char *p = s;
          123         char *endptr;
          124         size_t i;
          125         long l;
          126 
          127         u->proto[0] = u->userinfo[0] = u->host[0] = u->port[0] = '\0';
          128         u->path[0] = u->query[0] = u->fragment[0] = '\0';
          129 
          130         /* protocol-relative */
          131         if (*p == '/' && *(p + 1) == '/') {
          132                 p += 2; /* skip "//" */
          133                 goto parseauth;
          134         }
          135 
          136         /* scheme / protocol part */
          137         for (; isalpha((unsigned char)*p) || isdigit((unsigned char)*p) ||
          138                        *p == '+' || *p == '-' || *p == '.'; p++)
          139                 ;
          140         /* scheme, except if empty and starts with ":" then it is a path */
          141         if (*p == ':' && p != s) {
          142                 if (*(p + 1) == '/' && *(p + 2) == '/')
          143                         p += 3; /* skip "://" */
          144                 else
          145                         p++; /* skip ":" */
          146 
          147                 if ((size_t)(p - s) >= sizeof(u->proto))
          148                         return -1; /* protocol too long */
          149                 memcpy(u->proto, s, p - s);
          150                 u->proto[p - s] = '\0';
          151 
          152                 if (*(p - 1) != '/')
          153                         goto parsepath;
          154         } else {
          155                 p = s; /* no scheme format, reset to start */
          156                 goto parsepath;
          157         }
          158 
          159 parseauth:
          160         /* userinfo (username:password) */
          161         i = strcspn(p, "@/?#");
          162         if (p[i] == '@') {
          163                 if (i >= sizeof(u->userinfo))
          164                         return -1; /* userinfo too long */
          165                 memcpy(u->userinfo, p, i);
          166                 u->userinfo[i] = '\0';
          167                 p += i + 1;
          168         }
          169 
          170         /* IPv6 address */
          171         if (*p == '[') {
          172                 /* bracket not found, host too short or too long */
          173                 i = strcspn(p, "]");
          174                 if (p[i] != ']' || i < 3)
          175                         return -1;
          176                 i++; /* including "]" */
          177         } else {
          178                 /* domain / host part, skip until port, path or end. */
          179                 i = strcspn(p, ":/?#");
          180         }
          181         if (i >= sizeof(u->host))
          182                 return -1; /* host too long */
          183         memcpy(u->host, p, i);
          184         u->host[i] = '\0';
          185         p += i;
          186 
          187         /* port */
          188         if (*p == ':') {
          189                 p++;
          190                 if ((i = strcspn(p, "/?#")) >= sizeof(u->port))
          191                         return -1; /* port too long */
          192                 memcpy(u->port, p, i);
          193                 u->port[i] = '\0';
          194                 /* check for valid port: range 1 - 65535, may be empty */
          195                 errno = 0;
          196                 l = strtol(u->port, &endptr, 10);
          197                 if (i && (errno || *endptr || l <= 0 || l > 65535))
          198                         return -1;
          199                 p += i;
          200         }
          201 
          202 parsepath:
          203         /* path */
          204         if ((i = strcspn(p, "?#")) >= sizeof(u->path))
          205                 return -1; /* path too long */
          206         memcpy(u->path, p, i);
          207         u->path[i] = '\0';
          208         p += i;
          209 
          210         /* query */
          211         if (*p == '?') {
          212                 p++;
          213                 if ((i = strcspn(p, "#")) >= sizeof(u->query))
          214                         return -1; /* query too long */
          215                 memcpy(u->query, p, i);
          216                 u->query[i] = '\0';
          217                 p += i;
          218         }
          219 
          220         /* fragment */
          221         if (*p == '#') {
          222                 p++;
          223                 if ((i = strlen(p)) >= sizeof(u->fragment))
          224                         return -1; /* fragment too long */
          225                 memcpy(u->fragment, p, i);
          226                 u->fragment[i] = '\0';
          227         }
          228 
          229         return 0;
          230 }
          231 
          232 int
          233 edial(const char *host, const char *port)
          234 {
          235         struct addrinfo hints, *res, *res0;
          236         int error, save_errno, s;
          237         const char *cause = NULL;
          238 
          239         memset(&hints, 0, sizeof(hints));
          240         hints.ai_family = AF_UNSPEC;
          241         hints.ai_socktype = SOCK_STREAM;
          242         hints.ai_flags = AI_NUMERICSERV; /* numeric port only */
          243         if ((error = getaddrinfo(host, port, &hints, &res0)))
          244                 errx(1, "%s: %s: %s:%s", __func__, gai_strerror(error), host, port);
          245         s = -1;
          246         for (res = res0; res; res = res->ai_next) {
          247                 s = socket(res->ai_family, res->ai_socktype,
          248                            res->ai_protocol);
          249                 if (s == -1) {
          250                         cause = "socket";
          251                         continue;
          252                 }
          253 
          254                 if (connect(s, res->ai_addr, res->ai_addrlen) == -1) {
          255                         cause = "connect";
          256                         save_errno = errno;
          257                         close(s);
          258                         errno = save_errno;
          259                         s = -1;
          260                         continue;
          261                 }
          262                 break;
          263         }
          264         if (s == -1)
          265                 errx(1, "%s: %s: %s:%s", __func__, cause, host, port);
          266         freeaddrinfo(res0);
          267 
          268         return s;
          269 }
          270 
          271 void
          272 setup_plain(void)
          273 {
          274         if (pledge("stdio dns inet", NULL) == -1)
          275                 err(1, "pledge");
          276 
          277         sock = edial(u.host, u.port);
          278 }
          279 
          280 void
          281 setup_tls(void)
          282 {
          283         if (tls_init())
          284                 errx(1, "tls_init failed");
          285         if (!(tls_config = tls_config_new()))
          286                 errx(1, "tls config failed");
          287         if (config_legacy) {
          288                 /* enable legacy cipher and negotiation. */
          289                 if (tls_config_set_ciphers(tls_config, "legacy"))
          290                         errx(1, "tls_config_set_ciphers: %s",
          291                              tls_config_error(tls_config));
          292         }
          293         if (config_ca_file) {
          294                 if (unveil(config_ca_file, "r") == -1)
          295                         err(1, "unveil: %s", config_ca_file);
          296                 if (tls_config_set_ca_file(tls_config, config_ca_file) == -1)
          297                         errx(1, "tls_config_set_ca_file: %s: %s", config_ca_file,
          298                              tls_config_error(tls_config));
          299         } else {
          300                 if (unveil(TLS_CA_CERT_FILE, "r") == -1)
          301                         err(1, "unveil: %s", TLS_CA_CERT_FILE);
          302         }
          303 
          304         if (pledge("stdio dns inet rpath", NULL) == -1)
          305                 err(1, "pledge");
          306 
          307         if (!(t = tls_client()))
          308                 errx(1, "tls_client: %s", tls_error(t));
          309         if (tls_configure(t, tls_config))
          310                 errx(1, "tls_configure: %s", tls_error(t));
          311 
          312         sock = edial(u.host, u.port);
          313         if (tls_connect_socket(t, sock, u.host) == -1)
          314                 errx(1, "tls_connect: %s", tls_error(t));
          315 }
          316 
          317 ssize_t
          318 tls_writebuf(const char *buf, size_t buflen)
          319 {
          320         const char *errstr;
          321         const char *p;
          322         size_t len;
          323         ssize_t r, written = 0;
          324 
          325         for (len = buflen, p = buf; len > 0; ) {
          326                 r = tls_write(t, p, len);
          327                 if (r == TLS_WANT_POLLIN || r == TLS_WANT_POLLOUT) {
          328                         continue;
          329                 } else if (r == -1) {
          330                         errstr = tls_error(t);
          331                         fprintf(stderr, "tls_write: %s\n", errstr ? errstr : "");
          332                         return -1;
          333                 }
          334                 p += r;
          335                 len -= r;
          336                 written += r;
          337         }
          338         return written;
          339 }
          340 
          341 ssize_t
          342 tls_readbuf(char *buf, size_t bufsiz)
          343 {
          344         const char *errstr;
          345         ssize_t r, len;
          346 
          347         for (len = 0; bufsiz > 0;) {
          348                 r = tls_read(t, buf + len, bufsiz);
          349                 if (r == TLS_WANT_POLLIN || r == TLS_WANT_POLLOUT) {
          350                         continue;
          351                 } else if (r == 0) {
          352                         break;
          353                 } else if (r == -1) {
          354                         errstr = tls_error(t);
          355                         fprintf(stderr, "tls_read: %s\n", errstr ? errstr : "");
          356                         return -1;
          357                 }
          358                 len += r;
          359                 bufsiz -= r;
          360         }
          361         return len;
          362 }
          363 
          364 ssize_t
          365 plain_writebuf(const char *buf, size_t buflen)
          366 {
          367         ssize_t r;
          368 
          369         if ((r = write(sock, buf, buflen)) == -1)
          370                 fprintf(stderr, "write: %s\n", strerror(errno));
          371         return r;
          372 }
          373 
          374 ssize_t
          375 plain_readbuf(char *buf, size_t bufsiz)
          376 {
          377         ssize_t r, len;
          378 
          379         for (len = 0; bufsiz > 0;) {
          380                 r = read(sock, buf + len, bufsiz);
          381                 if (r == 0) {
          382                         break;
          383                 } else if (r == -1) {
          384                         fprintf(stderr, "read: %s\n", strerror(errno));
          385                         return -1;
          386                 }
          387                 len += r;
          388                 bufsiz -= r;
          389         }
          390         return len;
          391 }
          392 
          393 int
          394 http_request(void)
          395 {
          396         char buf[READ_BUF_SIZ], *p;
          397         size_t bodylen, expectedlen, n, len;
          398         ssize_t r;
          399         int cs, httpok = 0, ret = 1, stdport;
          400 
          401         stdport = u.port[0] == '\0' || strcmp(u.port, t ? "443" : "80") == 0;
          402 
          403         /* create and send HTTP header */
          404         r = snprintf(buf, sizeof(buf),
          405                 "GET %s%s%s HTTP/1.0\r\n"
          406                 "Host: %s%s%s\r\n"
          407                 "Connection: close\r\n"
          408                 "%s%s"
          409                 "\r\n",
          410                 u.path[0] ? u.path : "/",
          411                 u.query[0] ? "?" : "", u.query,
          412                 u.host,
          413                 stdport ? "" : ":",
          414                 stdport ? "" : u.port,
          415                 config_headers, config_headers[0] ? "\r\n" : "");
          416         if (r < 0 || (size_t)r >= sizeof(buf)) {
          417                 fprintf(stderr, "not writing header because it is truncated");
          418                 goto err;
          419         }
          420 
          421         if ((r = writebuf(buf, r)) == -1)
          422                 goto err;
          423 
          424         /* NOTE: HTTP header must fit in the buffer, buffer size is -1 to NUL
          425                  terminate the buffer for a string comparison. */
          426         if ((r = readbuf(buf, sizeof(buf) - 1)) == -1)
          427                 goto err;
          428         len = r;
          429         buf[len] = '\0'; /* NUL terminate buffer */
          430 
          431         if (!strncmp(buf, "HTTP/1.0 200 ", sizeof("HTTP/1.0 200 ") - 1) ||
          432             !strncmp(buf, "HTTP/1.1 200 ", sizeof("HTTP/1.1 200 ") - 1))
          433                 httpok = 1;
          434 
          435         if (!(p = strstr(buf, "\r\n\r\n"))) {
          436                 fprintf(stderr, "no HTTP header found or header too big\n");
          437                 goto err;
          438         }
          439         *p = '\0'; /* NUL terminate header part */
          440         cs = parse_content_length(buf, &expectedlen);
          441         p += strlen("\r\n\r\n");
          442         bodylen = len - (p - buf); /* (partial) body after header */
          443 
          444         if (httpok) {
          445                 n = len - (p - buf);
          446                 fwrite(p, 1, n, stdout);
          447                 if (ferror(stdout)) {
          448                         fprintf(stderr, "fwrite: stdout: %s\n", strerror(errno));
          449                         goto err;
          450                 }
          451         } else {
          452                 /* if not 200 OK print header */
          453                 fputs(buf, stderr);
          454                 fputs("\r\n\r\n", stderr);
          455                 /* NOTE: we are nice and keep reading (not closing) until the server is done. */
          456         }
          457 
          458         while ((r = readbuf(buf, sizeof(buf))) > 0) {
          459                 len += r;
          460                 bodylen += r;
          461 
          462                 if (httpok) {
          463                         fwrite(buf, 1, r, stdout);
          464                         if (ferror(stdout)) {
          465                                 fprintf(stderr, "fwrite: stdout: %s\n", strerror(errno));
          466                                 goto err;
          467                         }
          468                 }
          469 
          470                 if (config_maxresponsesiz && len >= config_maxresponsesiz)
          471                         break;
          472         }
          473         if (r == -1)
          474                 goto err;
          475         if (config_maxresponsesiz && len >= config_maxresponsesiz) {
          476                 fprintf(stderr, "response too big: %zu >= %zu\n",
          477                         len, config_maxresponsesiz);
          478                 goto err;
          479         }
          480         if (cs != -1 && expectedlen != bodylen) {
          481                 fprintf(stderr, "Content-Length mismatch: %zu expected != %zu received\n",
          482                         expectedlen, bodylen);
          483                 goto err;
          484         }
          485         ret = 0;
          486 
          487 err:
          488         return httpok ? ret : 2;
          489 }
          490 
          491 int
          492 gopher_request(void)
          493 {
          494         char buf[READ_BUF_SIZ];
          495         const char *path;
          496         size_t len = 0;
          497         ssize_t r;
          498         int ret = 1;
          499 
          500         /* create and send path, skip type part, empty path is allowed,
          501            see RFC 4266 The gopher URI Scheme - section 2.1 */
          502         path = u.path;
          503         if (*path == '/') {
          504                 path++;
          505                 if (*path)
          506                         path++; /* skip type */
          507         }
          508 
          509         r = snprintf(buf, sizeof(buf), "%s%s%s\r\n",
          510                 path, u.query[0] ? "?" : "", u.query);
          511         if (r < 0 || (size_t)r >= sizeof(buf)) {
          512                 fprintf(stderr, "not writing header because it is truncated");
          513                 goto err;
          514         }
          515 
          516         if ((r = writebuf(buf, r)) == -1)
          517                 goto err;
          518 
          519         while ((r = readbuf(buf, sizeof(buf))) > 0) {
          520                 len += r;
          521 
          522                 fwrite(buf, 1, r, stdout);
          523                 if (ferror(stdout)) {
          524                         fprintf(stderr, "fwrite: stdout: %s\n", strerror(errno));
          525                         goto err;
          526                 }
          527 
          528                 if (config_maxresponsesiz && len >= config_maxresponsesiz)
          529                         break;
          530         }
          531         if (r == -1)
          532                 goto err;
          533 
          534         if (config_maxresponsesiz && len >= config_maxresponsesiz) {
          535                 fprintf(stderr, "response too big: %zu >= %zu\n",
          536                         len, config_maxresponsesiz);
          537                 goto err;
          538         }
          539         ret = 0;
          540 
          541 err:
          542         return ret;
          543 }
          544 
          545 void
          546 usage(void)
          547 {
          548         fprintf(stderr, "usage: %s [-c ca_file] [-H headers] [-l] "
          549                 "[-m maxresponse] [-t timeout] url\n", argv0);
          550         exit(1);
          551 }
          552 
          553 int
          554 main(int argc, char **argv)
          555 {
          556         char *end;
          557         size_t i;
          558         int statuscode;
          559         long long l;
          560 
          561         ARGBEGIN {
          562         case 'c':
          563                 config_ca_file = EARGF(usage());
          564                 break;
          565         case 'H': /* header(s) */
          566                 config_headers = EARGF(usage());
          567                 break;
          568         case 'l': /* legacy ciphers */
          569                 config_legacy = 1;
          570                 break;
          571         case 'm': /* max filesize */
          572                 errno = 0;
          573                 l = strtoll(EARGF(usage()), &end, 10);
          574                 if (errno || *end != '\0' || l < 0)
          575                         usage();
          576                 config_maxresponsesiz = l;
          577                 break;
          578         case 't': /* timeout */
          579                 errno = 0;
          580                 l = strtoll(EARGF(usage()), &end, 10);
          581                 if (errno || *end != '\0' || l < 0 || l >= 65535)
          582                         usage();
          583                 config_timeout = l;
          584                 break;
          585         default:
          586                 usage();
          587         } ARGEND
          588 
          589         if (argc != 1)
          590                 usage();
          591 
          592         url = argv[0];
          593         if (uri_parse(url, &u) == -1)
          594                 errx(1, "invalid URL: %s", url);
          595         if (u.userinfo[0])
          596                 errx(1, "userinfo field not supported in the URL: %s", url);
          597 
          598         if (config_timeout > 0) {
          599                 signal(SIGALRM, sighandler);
          600                 alarm(config_timeout);
          601         }
          602 
          603         /* match a protocol handler */
          604         for (i = 0; i < sizeof(handlers) / sizeof(*handlers); i++) {
          605                 if (strcmp(u.proto, handlers[i].proto))
          606                         continue;
          607                 if (!u.port[0])
          608                         strcpy(u.port, handlers[i].port); /* default port if unset */
          609 
          610                 /* setup TLS or plain connection */
          611                 if (handlers[i].usetls) {
          612                         setup_tls();
          613                         readbuf = tls_readbuf;
          614                         writebuf = tls_writebuf;
          615                 } else {
          616                         setup_plain();
          617                         readbuf = plain_readbuf;
          618                         writebuf = plain_writebuf;
          619                 }
          620 
          621                 if (pledge("stdio", NULL) == -1)
          622                         err(1, "pledge");
          623 
          624                 statuscode = handlers[i].handler();
          625 
          626                 /* cleanup TLS and plain connection */
          627                 if (t) {
          628                         tls_close(t);
          629                         tls_free(t);
          630                 }
          631                 if (sock != -1)
          632                         close(sock);
          633 
          634                 return statuscode;
          635         }
          636         if (u.proto[0])
          637                 errx(1, "unsupported protocol specified: %s", u.proto);
          638         else
          639                 errx(1, "no protocol specified");
          640 
          641         return 1;
          642 }