hurl.c - hurl - Gopher/HTTP/HTTPS file grabber
(HTM) git clone git://git.codemadness.org/hurl
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
hurl.c (13950B)
---
1 #include <sys/socket.h>
2 #include <sys/time.h>
3
4 #include <ctype.h>
5 #include <err.h>
6 #include <errno.h>
7 #include <netdb.h>
8 #include <locale.h>
9 #include <signal.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <time.h>
14 #include <unistd.h>
15
16 #include <tls.h>
17
18 #include "arg.h"
19
20 #define READ_BUF_SIZ 16384
21
22 #ifndef __OpenBSD__
23 #define pledge(p1,p2) 0
24 #define unveil(p1,p2) 0
25 #endif
26
27 #ifndef TLS_CA_CERT_FILE
28 #define TLS_CA_CERT_FILE "/etc/ssl/cert.pem"
29 #endif
30
31 /* URI */
32 struct uri {
33 char proto[48]; /* scheme including ":" or "://" */
34 char userinfo[256]; /* username [:password] */
35 char host[256];
36 char port[6]; /* numeric port */
37 char path[1024];
38 char query[1024];
39 char fragment[1024];
40 };
41
42 char *argv0;
43
44 /* raw header(s) to add */
45 static const char *config_headers = "";
46 /* max response size in bytes, 0 is unlimited */
47 static size_t config_maxresponsesiz = 0;
48 /* time-out in seconds */
49 static long long config_timeout = 0;
50 /* legacy ciphers? */
51 static int config_legacy = 0;
52 /* TLS CA file */
53 static char *config_ca_file;
54 /* parsed URI */
55 static struct uri u;
56 /* socket fd */
57 static int sock = -1;
58 /* raw command-line argument */
59 static char *url;
60 /* TLS context */
61 static struct tls *t;
62 /* TLS config */
63 static struct tls_config *tls_config;
64
65 /* protocol handlers */
66 int gopher_request(void);
67 int http_request(void);
68
69 struct handler {
70 int (*handler)(void); /* function / handler / callback */
71 const char *proto; /* protocol / scheme, "gopher://" */
72 const char *port; /* default port */
73 int usetls; /* setup TLS (=1) or plain connection (=0) */
74 };
75
76 static const struct handler handlers[] = {
77 { .handler = gopher_request, .proto = "gopher://", .port = "70", .usetls = 0 },
78 { .handler = gopher_request, .proto = "gophers://", .port = "70", .usetls = 1 },
79 { .handler = http_request, .proto = "http://", .port = "80", .usetls = 0 },
80 { .handler = http_request, .proto = "https://", .port = "443", .usetls = 1 },
81 };
82
83 ssize_t (*readbuf)(char *, size_t);
84 ssize_t (*writebuf)(const char *, size_t);
85
86 void
87 sighandler(int signo)
88 {
89 if (signo == SIGALRM)
90 _exit(2);
91 }
92
93 int
94 parse_content_length(const char *s, size_t *length)
95 {
96 const char *p;
97 char *end;
98 long long l;
99
100 if (!(p = strcasestr(s, "\r\nContent-Length:")))
101 return -1;
102
103 p += sizeof("\r\nContent-Length:") - 1;
104 p += strspn(p, " \t");
105
106 if (!isdigit((unsigned char)*p))
107 return -1;
108
109 errno = 0;
110 l = strtoll(p, &end, 10);
111 if (errno || p == end || (*end != '\0' && *end != '\r') || l < 0)
112 return -1;
113
114 *length = l;
115
116 return 0;
117 }
118
119 int
120 uri_parse(const char *s, struct uri *u)
121 {
122 const char *p = s;
123 char *endptr;
124 size_t i;
125 long l;
126
127 u->proto[0] = u->userinfo[0] = u->host[0] = u->port[0] = '\0';
128 u->path[0] = u->query[0] = u->fragment[0] = '\0';
129
130 /* protocol-relative */
131 if (*p == '/' && *(p + 1) == '/') {
132 p += 2; /* skip "//" */
133 goto parseauth;
134 }
135
136 /* scheme / protocol part */
137 for (; isalpha((unsigned char)*p) || isdigit((unsigned char)*p) ||
138 *p == '+' || *p == '-' || *p == '.'; p++)
139 ;
140 /* scheme, except if empty and starts with ":" then it is a path */
141 if (*p == ':' && p != s) {
142 if (*(p + 1) == '/' && *(p + 2) == '/')
143 p += 3; /* skip "://" */
144 else
145 p++; /* skip ":" */
146
147 if ((size_t)(p - s) >= sizeof(u->proto))
148 return -1; /* protocol too long */
149 memcpy(u->proto, s, p - s);
150 u->proto[p - s] = '\0';
151
152 if (*(p - 1) != '/')
153 goto parsepath;
154 } else {
155 p = s; /* no scheme format, reset to start */
156 goto parsepath;
157 }
158
159 parseauth:
160 /* userinfo (username:password) */
161 i = strcspn(p, "@/?#");
162 if (p[i] == '@') {
163 if (i >= sizeof(u->userinfo))
164 return -1; /* userinfo too long */
165 memcpy(u->userinfo, p, i);
166 u->userinfo[i] = '\0';
167 p += i + 1;
168 }
169
170 /* IPv6 address */
171 if (*p == '[') {
172 /* bracket not found, host too short or too long */
173 i = strcspn(p, "]");
174 if (p[i] != ']' || i < 3)
175 return -1;
176 i++; /* including "]" */
177 } else {
178 /* domain / host part, skip until port, path or end. */
179 i = strcspn(p, ":/?#");
180 }
181 if (i >= sizeof(u->host))
182 return -1; /* host too long */
183 memcpy(u->host, p, i);
184 u->host[i] = '\0';
185 p += i;
186
187 /* port */
188 if (*p == ':') {
189 p++;
190 if ((i = strcspn(p, "/?#")) >= sizeof(u->port))
191 return -1; /* port too long */
192 memcpy(u->port, p, i);
193 u->port[i] = '\0';
194 /* check for valid port: range 1 - 65535, may be empty */
195 errno = 0;
196 l = strtol(u->port, &endptr, 10);
197 if (i && (errno || *endptr || l <= 0 || l > 65535))
198 return -1;
199 p += i;
200 }
201
202 parsepath:
203 /* path */
204 if ((i = strcspn(p, "?#")) >= sizeof(u->path))
205 return -1; /* path too long */
206 memcpy(u->path, p, i);
207 u->path[i] = '\0';
208 p += i;
209
210 /* query */
211 if (*p == '?') {
212 p++;
213 if ((i = strcspn(p, "#")) >= sizeof(u->query))
214 return -1; /* query too long */
215 memcpy(u->query, p, i);
216 u->query[i] = '\0';
217 p += i;
218 }
219
220 /* fragment */
221 if (*p == '#') {
222 p++;
223 if ((i = strlen(p)) >= sizeof(u->fragment))
224 return -1; /* fragment too long */
225 memcpy(u->fragment, p, i);
226 u->fragment[i] = '\0';
227 }
228
229 return 0;
230 }
231
232 int
233 edial(const char *host, const char *port)
234 {
235 struct addrinfo hints, *res, *res0;
236 int error, save_errno, s;
237 const char *cause = NULL;
238
239 memset(&hints, 0, sizeof(hints));
240 hints.ai_family = AF_UNSPEC;
241 hints.ai_socktype = SOCK_STREAM;
242 hints.ai_flags = AI_NUMERICSERV; /* numeric port only */
243 if ((error = getaddrinfo(host, port, &hints, &res0)))
244 errx(1, "%s: %s: %s:%s", __func__, gai_strerror(error), host, port);
245 s = -1;
246 for (res = res0; res; res = res->ai_next) {
247 s = socket(res->ai_family, res->ai_socktype,
248 res->ai_protocol);
249 if (s == -1) {
250 cause = "socket";
251 continue;
252 }
253
254 if (connect(s, res->ai_addr, res->ai_addrlen) == -1) {
255 cause = "connect";
256 save_errno = errno;
257 close(s);
258 errno = save_errno;
259 s = -1;
260 continue;
261 }
262 break;
263 }
264 if (s == -1)
265 errx(1, "%s: %s: %s:%s", __func__, cause, host, port);
266 freeaddrinfo(res0);
267
268 return s;
269 }
270
271 void
272 setup_plain(void)
273 {
274 if (pledge("stdio dns inet", NULL) == -1)
275 err(1, "pledge");
276
277 sock = edial(u.host, u.port);
278 }
279
280 void
281 setup_tls(void)
282 {
283 if (tls_init())
284 errx(1, "tls_init failed");
285 if (!(tls_config = tls_config_new()))
286 errx(1, "tls config failed");
287 if (config_legacy) {
288 /* enable legacy cipher and negotiation. */
289 if (tls_config_set_ciphers(tls_config, "legacy"))
290 errx(1, "tls_config_set_ciphers: %s",
291 tls_config_error(tls_config));
292 }
293 if (config_ca_file) {
294 if (unveil(config_ca_file, "r") == -1)
295 err(1, "unveil: %s", config_ca_file);
296 if (tls_config_set_ca_file(tls_config, config_ca_file) == -1)
297 errx(1, "tls_config_set_ca_file: %s: %s", config_ca_file,
298 tls_config_error(tls_config));
299 } else {
300 if (unveil(TLS_CA_CERT_FILE, "r") == -1)
301 err(1, "unveil: %s", TLS_CA_CERT_FILE);
302 }
303
304 if (pledge("stdio dns inet rpath", NULL) == -1)
305 err(1, "pledge");
306
307 if (!(t = tls_client()))
308 errx(1, "tls_client: %s", tls_error(t));
309 if (tls_configure(t, tls_config))
310 errx(1, "tls_configure: %s", tls_error(t));
311
312 sock = edial(u.host, u.port);
313 if (tls_connect_socket(t, sock, u.host) == -1)
314 errx(1, "tls_connect: %s", tls_error(t));
315 }
316
317 ssize_t
318 tls_writebuf(const char *buf, size_t buflen)
319 {
320 const char *errstr;
321 const char *p;
322 size_t len;
323 ssize_t r, written = 0;
324
325 for (len = buflen, p = buf; len > 0; ) {
326 r = tls_write(t, p, len);
327 if (r == TLS_WANT_POLLIN || r == TLS_WANT_POLLOUT) {
328 continue;
329 } else if (r == -1) {
330 errstr = tls_error(t);
331 fprintf(stderr, "tls_write: %s\n", errstr ? errstr : "");
332 return -1;
333 }
334 p += r;
335 len -= r;
336 written += r;
337 }
338 return written;
339 }
340
341 ssize_t
342 tls_readbuf(char *buf, size_t bufsiz)
343 {
344 const char *errstr;
345 ssize_t r, len;
346
347 for (len = 0; bufsiz > 0;) {
348 r = tls_read(t, buf + len, bufsiz);
349 if (r == TLS_WANT_POLLIN || r == TLS_WANT_POLLOUT) {
350 continue;
351 } else if (r == 0) {
352 break;
353 } else if (r == -1) {
354 errstr = tls_error(t);
355 fprintf(stderr, "tls_read: %s\n", errstr ? errstr : "");
356 return -1;
357 }
358 len += r;
359 bufsiz -= r;
360 }
361 return len;
362 }
363
364 ssize_t
365 plain_writebuf(const char *buf, size_t buflen)
366 {
367 ssize_t r;
368
369 if ((r = write(sock, buf, buflen)) == -1)
370 fprintf(stderr, "write: %s\n", strerror(errno));
371 return r;
372 }
373
374 ssize_t
375 plain_readbuf(char *buf, size_t bufsiz)
376 {
377 ssize_t r, len;
378
379 for (len = 0; bufsiz > 0;) {
380 r = read(sock, buf + len, bufsiz);
381 if (r == 0) {
382 break;
383 } else if (r == -1) {
384 fprintf(stderr, "read: %s\n", strerror(errno));
385 return -1;
386 }
387 len += r;
388 bufsiz -= r;
389 }
390 return len;
391 }
392
393 int
394 http_request(void)
395 {
396 char buf[READ_BUF_SIZ], *p;
397 size_t bodylen, expectedlen, n, len;
398 ssize_t r;
399 int cs, httpok = 0, ret = 1, stdport;
400
401 stdport = u.port[0] == '\0' || strcmp(u.port, t ? "443" : "80") == 0;
402
403 /* create and send HTTP header */
404 r = snprintf(buf, sizeof(buf),
405 "GET %s%s%s HTTP/1.0\r\n"
406 "Host: %s%s%s\r\n"
407 "Connection: close\r\n"
408 "%s%s"
409 "\r\n",
410 u.path[0] ? u.path : "/",
411 u.query[0] ? "?" : "", u.query,
412 u.host,
413 stdport ? "" : ":",
414 stdport ? "" : u.port,
415 config_headers, config_headers[0] ? "\r\n" : "");
416 if (r < 0 || (size_t)r >= sizeof(buf)) {
417 fprintf(stderr, "not writing header because it is truncated");
418 goto err;
419 }
420
421 if ((r = writebuf(buf, r)) == -1)
422 goto err;
423
424 /* NOTE: HTTP header must fit in the buffer, buffer size is -1 to NUL
425 terminate the buffer for a string comparison. */
426 if ((r = readbuf(buf, sizeof(buf) - 1)) == -1)
427 goto err;
428 len = r;
429 buf[len] = '\0'; /* NUL terminate buffer */
430
431 if (!strncmp(buf, "HTTP/1.0 200 ", sizeof("HTTP/1.0 200 ") - 1) ||
432 !strncmp(buf, "HTTP/1.1 200 ", sizeof("HTTP/1.1 200 ") - 1))
433 httpok = 1;
434
435 if (!(p = strstr(buf, "\r\n\r\n"))) {
436 fprintf(stderr, "no HTTP header found or header too big\n");
437 goto err;
438 }
439 *p = '\0'; /* NUL terminate header part */
440 cs = parse_content_length(buf, &expectedlen);
441 p += strlen("\r\n\r\n");
442 bodylen = len - (p - buf); /* (partial) body after header */
443
444 if (httpok) {
445 n = len - (p - buf);
446 fwrite(p, 1, n, stdout);
447 if (ferror(stdout)) {
448 fprintf(stderr, "fwrite: stdout: %s\n", strerror(errno));
449 goto err;
450 }
451 } else {
452 /* if not 200 OK print header */
453 fputs(buf, stderr);
454 fputs("\r\n\r\n", stderr);
455 /* NOTE: we are nice and keep reading (not closing) until the server is done. */
456 }
457
458 while ((r = readbuf(buf, sizeof(buf))) > 0) {
459 len += r;
460 bodylen += r;
461
462 if (httpok) {
463 fwrite(buf, 1, r, stdout);
464 if (ferror(stdout)) {
465 fprintf(stderr, "fwrite: stdout: %s\n", strerror(errno));
466 goto err;
467 }
468 }
469
470 if (config_maxresponsesiz && len >= config_maxresponsesiz)
471 break;
472 }
473 if (r == -1)
474 goto err;
475 if (config_maxresponsesiz && len >= config_maxresponsesiz) {
476 fprintf(stderr, "response too big: %zu >= %zu\n",
477 len, config_maxresponsesiz);
478 goto err;
479 }
480 if (cs != -1 && expectedlen != bodylen) {
481 fprintf(stderr, "Content-Length mismatch: %zu expected != %zu received\n",
482 expectedlen, bodylen);
483 goto err;
484 }
485 ret = 0;
486
487 err:
488 return httpok ? ret : 2;
489 }
490
491 int
492 gopher_request(void)
493 {
494 char buf[READ_BUF_SIZ];
495 const char *path;
496 size_t len = 0;
497 ssize_t r;
498 int ret = 1;
499
500 /* create and send path, skip type part, empty path is allowed,
501 see RFC 4266 The gopher URI Scheme - section 2.1 */
502 path = u.path;
503 if (*path == '/') {
504 path++;
505 if (*path)
506 path++; /* skip type */
507 }
508
509 r = snprintf(buf, sizeof(buf), "%s%s%s\r\n",
510 path, u.query[0] ? "?" : "", u.query);
511 if (r < 0 || (size_t)r >= sizeof(buf)) {
512 fprintf(stderr, "not writing header because it is truncated");
513 goto err;
514 }
515
516 if ((r = writebuf(buf, r)) == -1)
517 goto err;
518
519 while ((r = readbuf(buf, sizeof(buf))) > 0) {
520 len += r;
521
522 fwrite(buf, 1, r, stdout);
523 if (ferror(stdout)) {
524 fprintf(stderr, "fwrite: stdout: %s\n", strerror(errno));
525 goto err;
526 }
527
528 if (config_maxresponsesiz && len >= config_maxresponsesiz)
529 break;
530 }
531 if (r == -1)
532 goto err;
533
534 if (config_maxresponsesiz && len >= config_maxresponsesiz) {
535 fprintf(stderr, "response too big: %zu >= %zu\n",
536 len, config_maxresponsesiz);
537 goto err;
538 }
539 ret = 0;
540
541 err:
542 return ret;
543 }
544
545 void
546 usage(void)
547 {
548 fprintf(stderr, "usage: %s [-c ca_file] [-H headers] [-l] "
549 "[-m maxresponse] [-t timeout] url\n", argv0);
550 exit(1);
551 }
552
553 int
554 main(int argc, char **argv)
555 {
556 char *end;
557 size_t i;
558 int statuscode;
559 long long l;
560
561 ARGBEGIN {
562 case 'c':
563 config_ca_file = EARGF(usage());
564 break;
565 case 'H': /* header(s) */
566 config_headers = EARGF(usage());
567 break;
568 case 'l': /* legacy ciphers */
569 config_legacy = 1;
570 break;
571 case 'm': /* max filesize */
572 errno = 0;
573 l = strtoll(EARGF(usage()), &end, 10);
574 if (errno || *end != '\0' || l < 0)
575 usage();
576 config_maxresponsesiz = l;
577 break;
578 case 't': /* timeout */
579 errno = 0;
580 l = strtoll(EARGF(usage()), &end, 10);
581 if (errno || *end != '\0' || l < 0 || l >= 65535)
582 usage();
583 config_timeout = l;
584 break;
585 default:
586 usage();
587 } ARGEND
588
589 if (argc != 1)
590 usage();
591
592 url = argv[0];
593 if (uri_parse(url, &u) == -1)
594 errx(1, "invalid URL: %s", url);
595 if (u.userinfo[0])
596 errx(1, "userinfo field not supported in the URL: %s", url);
597
598 if (config_timeout > 0) {
599 signal(SIGALRM, sighandler);
600 alarm(config_timeout);
601 }
602
603 /* match a protocol handler */
604 for (i = 0; i < sizeof(handlers) / sizeof(*handlers); i++) {
605 if (strcmp(u.proto, handlers[i].proto))
606 continue;
607 if (!u.port[0])
608 strcpy(u.port, handlers[i].port); /* default port if unset */
609
610 /* setup TLS or plain connection */
611 if (handlers[i].usetls) {
612 setup_tls();
613 readbuf = tls_readbuf;
614 writebuf = tls_writebuf;
615 } else {
616 setup_plain();
617 readbuf = plain_readbuf;
618 writebuf = plain_writebuf;
619 }
620
621 if (pledge("stdio", NULL) == -1)
622 err(1, "pledge");
623
624 statuscode = handlers[i].handler();
625
626 /* cleanup TLS and plain connection */
627 if (t) {
628 tls_close(t);
629 tls_free(t);
630 }
631 if (sock != -1)
632 close(sock);
633
634 return statuscode;
635 }
636 if (u.proto[0])
637 errx(1, "unsupported protocol specified: %s", u.proto);
638 else
639 errx(1, "no protocol specified");
640
641 return 1;
642 }