From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-3.5 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,NORMAL_HTTP_TO_IP, NUMERIC_HTTP_ADDR shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 4B4551FAF0 for ; Sat, 10 Dec 2022 01:55:19 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1670637319; bh=M1x9Hhqc3EYLwlX76QUZ82WMLn5ZuERGWc3AqgoNNiU=; h=From:To:Subject:Date:In-Reply-To:References:From; b=X8EdhU4SgaMv9ROBEqT4MIqBWM1a9l6+vASIqeyhuINw/MpGFX1HtZdXjeNrWiN+p OdYuw1NC4kVwTEYE3bezDqZE8+ZYthReqlR3KMEWLeoOSFU+oVkpX9pzB+gOGopcyi FVD99CrtPwRBcGmEIqAENDHVEm+4qjhUF22UIB9A= From: Eric Wong To: mwrap-perl@80x24.org Subject: [PATCH 3/3] C-only HTTP Unix socket server + PSGI TCP reverse proxy Date: Sat, 10 Dec 2022 01:55:18 +0000 Message-Id: <20221210015518.272576-4-e@80x24.org> In-Reply-To: <20221210015518.272576-1-e@80x24.org> References: <20221210015518.272576-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: The C-only HTTP server (mwrap_httpd) should be able to run in non-Perl programs and anything which LD_PRELOADs Mwrap.so. This HTTP server emulates the existing Devel::Mwrap::PSGI app, but without any Perl or PSGI runtime dependencies. It only binds to a per-process UNIX stream socket at $socket_dir/$PID.sock where $socket_dir is set by the `socket_dir:' directive in the MWRAP environment variable. URLs for mwrap_httpd are similar to Devel::Mwrap::PSGI ones, but prefixed with the $PID to simplify use with the reverse proxy. Thus, "/each/2000" endpoint becomes "/$PID/each/2000" The PSGI reverse proxy (Devel::Mwrap::Rproxy + mwrap-rproxy) is a tiny PSGI application designed to expose the mwrap_httpd UNIX sockets to web browsers such as `w3m'. The overall configuration allows mwrap-proxy to provide access to every mwrapped process via the `/$PID/' prefixed endpoints. browser <-> mwrap-rproxy <----> mwrapped #1 (on /$socket_dir/$PID_1.sock) \---> mwrapped #2 (on /$socket_dir/$PID_2.sock) \---> mwrapped #3 (on /$socket_dir/$PID_3.sock) ... The process running mwrap-rproxy and Devel::Mwrap::Rproxy does not require Mwrap.so to be loaded in them, so they can use any malloc implementation if tracing rproxy isn't necessary. --- .gitignore | 1 + MANIFEST | 7 + Makefile.PL | 10 +- Mwrap.xs | 23 +- lib/Devel/Mwrap/Rproxy.pm | 78 +++ mwrap_core.h | 77 ++- mwrap_httpd.h | 1119 +++++++++++++++++++++++++++++++++++++ mymalloc.h | 10 + picohttpparser.h | 92 +++ picohttpparser_c.h | 670 ++++++++++++++++++++++ script/mwrap-rproxy | 29 + t/httpd-unit.t | 98 ++++ t/mwrap-httpd.t | 129 +++++ 13 files changed, 2314 insertions(+), 29 deletions(-) create mode 100644 lib/Devel/Mwrap/Rproxy.pm create mode 100644 mwrap_httpd.h create mode 100644 picohttpparser.h create mode 100644 picohttpparser_c.h create mode 100644 script/mwrap-rproxy create mode 100644 t/httpd-unit.t create mode 100644 t/mwrap-httpd.t diff --git a/.gitignore b/.gitignore index c228ee1..71cb379 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ /pm_to_blib /config.mak /_Inline +/build.env diff --git a/MANIFEST b/MANIFEST index c732c56..39473a5 100644 --- a/MANIFEST +++ b/MANIFEST @@ -11,10 +11,17 @@ gcc.h jhash.h lib/Devel/Mwrap.pm lib/Devel/Mwrap/PSGI.pm +lib/Devel/Mwrap/Rproxy.pm mwrap_core.h +mwrap_httpd.h mymalloc.h +picohttpparser.h +picohttpparser_c.h ppport.h script/mwrap-perl +script/mwrap-rproxy +t/httpd-unit.t +t/mwrap-httpd.t t/mwrap.t t/source_location.perl t/test_common.perl diff --git a/Makefile.PL b/Makefile.PL index eab1edb..3f1200b 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -20,6 +20,7 @@ END exit 0; } +chomp($LIBS); if ($Config{usemymalloc} eq 'y') { print STDERR < "$Config{ccflags} $ccflags", PREREQ_PM => {}, ABSTRACT_FROM => 'lib/Devel/Mwrap.pm', - EXE_FILES => [qw(script/mwrap-perl)], + EXE_FILES => [qw(script/mwrap-perl script/mwrap-rproxy)], AUTHOR => 'mwrap hackers ', LIBS => $LIBS, # e.g. -lurcu-cds LICENSE => 'gpl_2', # GPL-2.0+, CPAN::Meta::Spec limitation @@ -111,6 +112,13 @@ N = \$\$(( \$\$(nproc 2>/dev/null || gnproc 2>/dev/null || echo 2) + 1 )) check-manifest :: MANIFEST if git ls-files >\$?.gen 2>&1; then diff -u \$? \$?.gen; fi +build.env :: Makefile + echo >\$\@+ extra_linker_flags=$LIBS -lpthread + echo >>\$\@+ extra_compiler_flags=-I. $INC $Config{ccflags} $ccflags + mv \$\@+ \$\@ + +pure_all :: build.env + check:: all check-manifest prove -bvw -j\$(N) EOF diff --git a/Mwrap.xs b/Mwrap.xs index 9f105c6..5541559 100644 --- a/Mwrap.xs +++ b/Mwrap.xs @@ -139,34 +139,15 @@ mwrap_get(loc) PREINIT: STRLEN len; const char *str; - uintptr_t p; struct src_loc *l; CODE: ++locating; if (!SvPOK(loc)) XSRETURN_UNDEF; str = SvPV(loc, len); - if (len >= PATH_MAX) + l = mwrap_get(str, len); + if (!l) XSRETURN_UNDEF; - if (extract_addr(str, len, (void **)&p)) { - struct cds_lfht *t = CMM_LOAD_SHARED(totals); - struct src_loc *k; - - if (!t) - XSRETURN_UNDEF; - k = &tsd.src_loc; - - k->f = NULL; - k->lineno = 0; - k->bt[0] = p; - k->bt_len = 1; - hash_src_loc(k); - rcu_read_lock(); - l = src_loc_get(t, k); - rcu_read_unlock(); - } else { - l = src_loc_lookup(str, len); - } RETVAL = l; OUTPUT: RETVAL diff --git a/lib/Devel/Mwrap/Rproxy.pm b/lib/Devel/Mwrap/Rproxy.pm new file mode 100644 index 0000000..7955f55 --- /dev/null +++ b/lib/Devel/Mwrap/Rproxy.pm @@ -0,0 +1,78 @@ +# Copyright (C) mwrap hackers +# License: GPL-2.0+ + +# minimal reverse proxy to expose the embedded mwrap_httpd.h UNIX sockets +# via PSGI (and thus TCP HTTP/1.x). This does not have a hard dependency +# on Mwrap.so. +# +# Warning: this has a synchronous wait dependency, so isn't suited for +# non-blocking async HTTP servers. +package Devel::Mwrap::Rproxy; +use v5.12; # strict +use Fcntl qw(SEEK_SET); +use IO::Socket::UNIX; +use Plack::Util; + +sub new { bless { socket_dir => $_[1]}, $_[0] } + +sub r { + [ $_[0], [ + 'Expires' => 'Fri, 01 Jan 1980 00:00:00 GMT', + 'Pragma' => 'no-cache', + 'Cache-Control' => 'no-cache, max-age=0, must-revalidate', + 'Content-Type' => 'text/html; charset=UTF-8', + 'Content-Length' => length($_[1]), + ], [ $_[1] ] ]; +} + +my $valid_pid = $^O eq 'linux' ? sub { + my ($pid) = @_; + if (open(my $fh, '<', "/proc/$pid/cmdline")) { + local $/; + my $str = <$fh>; + $str =~ tr/\0/ /; + Plack::Util::encode_html($str); + } +} : sub { kill(0, $_[0]) ? "PID: $_[0]" : undef }; + +sub list { + my ($self, $env) = @_; + state $t = 'mwrap reverse proxy endpoints'; + open(my $fh, '+>', \(my $str)) or die "open: $!"; + print $fh '', $t, '
', $t,
+		"\n\n";
+	opendir(my $dh, $self->{socket_dir}) or return r(500, "socket_dir: $!");
+	my @pids = grep(/\A[0-9]+\.sock\z/, readdir($dh));
+	for (@pids) {
+		substr($_, -5, 5, ''); # chop off .sock
+		my $cmd = $valid_pid->($_) // next;
+		$_ .= '/each/2000';
+		say $fh qq(', $_, "\t", $cmd;
+	}
+	print $fh '
'; + r(200, $str); +} + +sub call { # PSGI entry point + my ($self, $env) = @_; + my $uri = $env->{REQUEST_URI}; + my $method = $env->{REQUEST_METHOD}; + return list(@_) if $uri eq '/' && $method eq 'GET'; + + # must have /$PID/ prefix to map socket + $uri =~ m!\A/([0-9]+)/! or return r(404, 'not found'); + my $s = "$self->{socket_dir}/$1.sock"; + my %o = (Peer => $s, Type => SOCK_STREAM); + my $c = IO::Socket::UNIX->new(%o) or return r(500, "connect: $!"); + my $h = "$method $uri HTTP/1.0\n\n"; + $s = send($c, $h, MSG_NOSIGNAL) // return r(500, "send: $!"); + $s == length($h) or return r(500, "send $s <".length($h)); + # this only expects mwrap_httpd.h output, so no continuation lines: + $h = do { local $/ = "\r\n\r\n"; <$c> } // return r(500, "read: $!"); + my ($code, @hdr) = split(/\r\n/, $h); + (undef, $code, undef) = split(/ /, $code); + @hdr = map { split(/: /, $_, 2) } @hdr; + [ $code, \@hdr, $c ]; +} + +1; diff --git a/mwrap_core.h b/mwrap_core.h index 730699a..bed43d6 100644 --- a/mwrap_core.h +++ b/mwrap_core.h @@ -9,6 +9,10 @@ # define MWRAP_PERL 0 #endif +#if !MWRAP_PERL +typedef void COP; +#endif + #ifndef MWRAP_BT_MAX # define MWRAP_BT_MAX 32 #endif @@ -37,6 +41,7 @@ #include #include #include +#include /* * XXH3 (truncated to 32-bits) seems to provide a ~2% speedup. @@ -713,14 +718,35 @@ out_unlock: return 0; } +/* extract from backtrace_symbols(3) output */ static int extract_addr(const char *str, size_t len, void **p) { - const char *c; -#if defined(__GLIBC__) - return ((c = memrchr(str, '[', len)) && sscanf(c, "[%p]", p)); -#else /* TODO: test FreeBSD */ - return ((c = strstr(str, "0x")) && sscanf(c, "%p", p)); + unsigned long x; + char *e; +#if defined(__GLIBC__) /* str = "/path/to/foo.so(+0x123) [0xdeadbeefcafe]" */ + const char *end = str + len; + const char *c = memrchr(str, '[', len); + if (c && (c + 2) < end && c[1] == '0' && c[2] == 'x') { + errno = 0; + x = strtoul(c + 3, &e, 16); + if (!errno && *e == ']') { + *p = (void *)x; + return 1; + } + } +#elif defined(__FreeBSD__) /* str = "0xdeadbeefcafe <%n%D> at %f" */ + const char *c = memchr(str, ' ', len); + errno = 0; + if (len > 4 && c && str[0] == '0' && str[1] == 'x') { + errno = 0; + x = strtoul(str + 3, &e, 16); + if (!errno && *e == ' ') { + *p = (void *)x; + return 1; + } + } #endif + return 0; } /* str is $PATHNAME:$LINENO, len is strlen(str) */ @@ -734,7 +760,6 @@ static struct src_loc *src_loc_lookup(const char *str, size_t len) struct src_loc *l = NULL; struct cds_lfht *t = CMM_LOAD_SHARED(files); - mwrap_assert(str[len] == 0); if (!c || c == end || !t) return NULL; @@ -744,7 +769,7 @@ static struct src_loc *src_loc_lookup(const char *str, size_t len) lineno = UINT_MAX; } else { lineno = 0; - for (; *c; c++) { + for (; c < end; c++) { if (*c < '0' || *c > '9') return NULL; lineno *= 10; @@ -770,6 +795,7 @@ static struct src_loc *src_loc_lookup(const char *str, size_t len) #ifndef O_CLOEXEC # define O_CLOEXEC 0 #endif +static void h1d_atexit(void); __attribute__ ((destructor)) static void dump_destructor(void) { @@ -781,6 +807,9 @@ static void dump_destructor(void) char *dump_path; char *s; + /* n.b. unsetenv("MWRAP") may be called, so run this unconditionally */ + h1d_atexit(); + if (!opt) return; @@ -858,10 +887,43 @@ static void mwrap_reset(void) rcu_read_unlock(); } +static struct src_loc *mwrap_get(const char *str, size_t len) +{ + struct src_loc *l = NULL; + uintptr_t p; + + if (len >= PATH_MAX) + return l; + if (extract_addr(str, len, (void **)&p)) { + struct cds_lfht *t = CMM_LOAD_SHARED(totals); + struct src_loc *k; + + if (!t) + return l; + k = &tsd.src_loc; + + k->f = NULL; + k->lineno = 0; + k->bt[0] = p; + k->bt_len = 1; + hash_src_loc(k); + rcu_read_lock(); + l = src_loc_get(t, k); + rcu_read_unlock(); + } else { + l = src_loc_lookup(str, len); + } + return l; +} + +static const char *mwrap_env; +#include "mwrap_httpd.h" + __attribute__((constructor)) static void mwrap_ctor(void) { sigset_t set, old; struct alloc_hdr *h; + mwrap_env = getenv("MWRAP"); ++locating; @@ -891,6 +953,7 @@ __attribute__((constructor)) static void mwrap_ctor(void) } else fprintf(stderr, "malloc failed: %s\n", strerror(errno)); + h1d_start(); CHECK(int, 0, pthread_sigmask(SIG_SETMASK, &old, NULL)); CHECK(int, 0, pthread_atfork(atfork_prepare, atfork_parent, atfork_child)); diff --git a/mwrap_httpd.h b/mwrap_httpd.h new file mode 100644 index 0000000..fe9f292 --- /dev/null +++ b/mwrap_httpd.h @@ -0,0 +1,1119 @@ +/* + * Copyright (C) mwrap hackers + * License: GPL-2.0+ + * + * Single-threaded multiplexing HTTP/1.x AF_UNIX server. + * Not using epoll|kqueue here since we don't want to be wasting another + * FD for a few clients. + * + * stdio (via open_memstream) is used for all vector management, + * thus everything is a `FILE *' + */ +#ifndef _DEFAULT_SOURCE +# define _DEFAULT_SOURCE +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "picohttpparser.h" +#include "picohttpparser_c.h" +#include +#include + +enum mw_qev { + MW_QEV_IGNORE = 0, + MW_QEV_RD = POLLIN, + MW_QEV_WR = POLLOUT +}; + +struct mw_membuf { /* for open_memstream */ + char *ptr; + size_t len; +}; + +struct mw_wbuf { /* for response headers + bodies */ + struct iovec iov[2]; + unsigned iov_nr; + unsigned iov_written; + char bytes[]; +}; + +#define MW_RBUF_SIZE 8192 +#define MW_NR_NAME 8 +struct mw_h1req { /* HTTP/1.x request (TSD in common (fast) case) */ + const char *method, *path, *qstr; + size_t method_len, path_len, qlen; + uint16_t rbuf_len; + int pret, minor_ver; + size_t nr_hdr; + struct phr_header hdr[MW_NR_NAME]; + char rbuf[MW_RBUF_SIZE]; /* read(2) in to thi */ +}; + +struct mw_h1 { /* each HTTP/1.x client (heap) */ + int fd; + short events; /* for poll */ + unsigned prev_len:13; /* capped by MW_RBUF_SIZE */ + unsigned persist:1; /* HTTP/1.1 */ + unsigned has_input:1; + unsigned unused_:1; + struct mw_h1req *h1r; /* only for slow clients */ + unsigned long in_len; + struct mw_wbuf *wbuf; + struct cds_list_head nd; /* <=> mw_h1d.conn */ +}; + +struct mw_h1d { /* the daemon + listener, a singleton */ + int lfd; + unsigned alive; + struct cds_list_head conn; /* <=> mw_h1.nd */ + /* use open_memstream + fwrite to implement a growing pollfd array */ + FILE *pfp; /* see poll_add, poll_detach */ + struct mw_membuf pbuf; /* pollfd vector */ + pthread_t tid; + size_t pid_len; + char pid_str[10]; +}; + +union mw_sockaddr { /* cast-avoiding convenience :> */ + struct sockaddr_un un; + struct sockaddr any; +}; + +static struct mw_h1d g_h1d = { .lfd = -1 }; +static MWRAP_TSD struct mw_h1req *tsd_h1r; + +/* sortable snapshot version of struct src_loc */ +struct h1_src_loc { + long double mean_life; + size_t bytes; + size_t allocations; + size_t frees; + size_t live; + size_t max_life; + union { + const struct src_loc *src_loc; + char *loc_name; + } as; + size_t lname_len; +}; + +/* sort numeric stuff descending */ +#define CMP_FN(F) static int cmp_##F(const void *x, const void *y) \ +{ \ + const struct h1_src_loc *a = x, *b = y; \ + if (a->F < b->F) return 1; \ + return (a->F > b->F) ? -1 : 0; \ +} +CMP_FN(bytes) +CMP_FN(allocations) +CMP_FN(frees) +CMP_FN(live) +CMP_FN(max_life) +CMP_FN(mean_life) +#undef CMP_FN + +static int cmp_location(const void *x, const void *y) +{ + const struct h1_src_loc *a = x, *b = y; + return strcmp(a->as.loc_name, b->as.loc_name); +} + +/* fields for /each/$MIN/ endpoint */ +struct h1_tbl { + const char *fname; + size_t flen; + int (*cmp)(const void *, const void *); +} fields[] = { +#define F(n) { #n, sizeof(#n) - 1, cmp_##n } + F(bytes), + F(allocations), + F(frees), + F(live), + F(mean_life), + F(max_life), + F(location) +#undef F +}; + +static enum mw_qev h1_close(struct mw_h1 *h1) +{ + mwrap_assert(h1->fd >= 0); + cds_list_del(&h1->nd); /* drop from h1d->conn */ + close(h1->fd); + free(h1->wbuf); + free(h1->h1r); + free(h1); + return MW_QEV_IGNORE; +} + +static enum mw_qev h1_400(struct mw_h1 *h1) +{ + /* best-effort response, so no checking send() */ + static const char r400[] = "HTTP/1.1 400 Bad Request\r\n" + "Content-Type: text/html\r\n" + "Content-Length: 12\r\n" + "Connection: close\r\n\r\n" "Bad Request\n"; + (void)send(h1->fd, r400, sizeof(r400) - 1, MSG_NOSIGNAL); + return h1_close(h1); +} + +static enum mw_qev h1_send_flush(struct mw_h1 *h1) +{ + struct mw_wbuf *wbuf = h1->wbuf; + struct msghdr mh = { 0 }; + + mh.msg_iov = wbuf->iov + wbuf->iov_written; + mh.msg_iovlen = wbuf->iov_nr; + do { + ssize_t w = sendmsg(h1->fd, &mh, MSG_NOSIGNAL); + if (w < 0) + return errno == EAGAIN ? MW_QEV_WR : h1_close(h1); + if (w == 0) + return h1_close(h1); + while (w > 0) { + if ((size_t)w >= mh.msg_iov->iov_len) { + w -= mh.msg_iov->iov_len; + ++mh.msg_iov; + --mh.msg_iovlen; + ++wbuf->iov_written; + --wbuf->iov_nr; + } else { + uintptr_t x = (uintptr_t)mh.msg_iov->iov_base; + mh.msg_iov->iov_base = (void *)(x + w); + mh.msg_iov->iov_len -= w; + w = 0; + } + } + } while (mh.msg_iovlen); + free(wbuf); + h1->wbuf = NULL; + return h1->persist ? MW_QEV_RD : h1_close(h1); +} + +static FILE *wbuf_new(struct mw_membuf *mb) +{ + static const struct mw_wbuf pad; + FILE *fp = open_memstream(&mb->ptr, &mb->len); + if (!fp) + fprintf(stderr, "open_memstream: %m\n"); + fwrite(&pad, 1, sizeof(pad), fp); /* populated before h1_send_flush */ + return fp; +} + +static enum mw_qev h1_res_oneshot(struct mw_h1 *h1, const char *buf, size_t len) +{ + struct mw_membuf mb; + FILE *fp = wbuf_new(&mb); + + if (!fp) + return h1_close(h1); + fwrite(buf, 1, len, fp); + if (ferror(fp) | fclose(fp)) { + fprintf(stderr, "ferror|fclose: %m\n"); + return h1_close(h1); + } + + /* fill in the zero padding we added at wbuf_new */ + mwrap_assert(!h1->wbuf); + struct mw_wbuf *wbuf = h1->wbuf = (struct mw_wbuf *)mb.ptr; + wbuf->iov_nr = 1; + wbuf->iov[0].iov_len = mb.len - sizeof(*wbuf); + wbuf->iov[0].iov_base = wbuf->bytes; + return h1_send_flush(h1); +} + +#define FPUTS(STR, fp) fwrite(STR, sizeof(STR) - 1, 1, fp) +static enum mw_qev h1_200(struct mw_h1 *h1, FILE *fp, struct mw_membuf *mb) +{ + /* + * the HTTP header goes at the END of the body buffer, + * we'll rely on iovecs via sendmsg(2) to reorder and clamp it + */ + off_t clen = ftello(fp); + if (clen < 0) { + fprintf(stderr, "ftello: %m\n"); + fclose(fp); + return h1_close(h1); + } + clen -= sizeof(struct mw_wbuf); + mwrap_assert(clen >= 0); + FPUTS("HTTP/1.1 200 OK\r\n" + "Expires: Fri, 01 Jan 1980 00:00:00 GMT\r\n" + "Pragma: no-cache\r\n" + "Cache-Control: no-cache, max-age=0, must-revalidate\r\n" + "Content-Type: text/html; charset=UTF-8\r\n" + "Content-Length: ", fp); + fprintf(fp, "%zu", (size_t)clen); + FPUTS("\r\n\r\n", fp); + + if (ferror(fp) | fclose(fp)) { + fprintf(stderr, "ferror|fclose: %m\n"); + return h1_close(h1); + } + + /* fill in the zero-padding we added at wbuf_new */ + mwrap_assert(!h1->wbuf); + struct mw_wbuf *wbuf = h1->wbuf = (struct mw_wbuf *)mb->ptr; + wbuf->iov_nr = 2; + wbuf->iov[0].iov_len = mb->len - ((size_t)clen + sizeof(*wbuf)); + wbuf->iov[0].iov_base = wbuf->bytes + (size_t)clen; + wbuf->iov[1].iov_len = clen; + wbuf->iov[1].iov_base = wbuf->bytes; + return h1_send_flush(h1); +} + +static enum mw_qev h1_404(struct mw_h1 *h1) +{ + static const char r404[] = "HTTP/1.1 404 Not Found\r\n" + "Content-Type: text/html\r\n" + "Content-Length: 10\r\n\r\n" "Not Found\n"; + return h1_res_oneshot(h1, r404, sizeof(r404) - 1); +} + +#define NAME_EQ(h, NAME) name_eq(h, NAME, sizeof(NAME)-1) +static int name_eq(const struct phr_header *h, const char *name, size_t len) +{ + return h->name_len == len && !strncasecmp(name, h->name, len); +} +#define VAL_EQ(h, VAL) val_eq(h, VAL, sizeof(VAL)-1) +static int val_eq(const struct phr_header *h, const char *val, size_t len) +{ + return h->value_len == len && !strncasecmp(val, h->value, len); +} + +static enum mw_qev h1_do_reset(struct mw_h1 *h1) +{ + static const char r200[] = "HTTP/1.1 200 OK\r\n" + "Content-Type: text/plain\r\n" + "Content-Length: 6\r\n\r\n" "reset\n"; + mwrap_reset(); + return h1_res_oneshot(h1, r200, sizeof(r200) - 1); +} + +#define PATH_SKIP(h1r, pfx) path_skip(h1r, pfx, sizeof(pfx) - 1) +static const char *path_skip(struct mw_h1req *h1r, const char *pfx, size_t len) +{ + if (h1r->path_len > len && !memcmp(pfx, h1r->path, len)) + return h1r->path + len; + return NULL; +} + +static void write_html(FILE *fp, const char *s, size_t len) +{ + for (; len--; ++s) { + switch (*s) { + case '&': FPUTS("&", fp); break; + case '<': FPUTS("<", fp); break; + case '>': FPUTS(">", fp); break; + case '"': FPUTS(""", fp); break; + case '\'': FPUTS("'", fp); break; + case '\n': FPUTS("
", fp); break; + default: fputc(*s, fp); + } + } +} + +static void write_uri(FILE *fp, const char *s, size_t len) +{ +#define CGI_URI_OK(x) \ + ((x >= 'a' && x <= 'z') || \ + (x >= 'A' && x <= 'Z') || \ + (x >= '0' && x <= '9') || \ + (x == '.' || x == '-' || x == '_' || x == '~')) + for (; len--; ++s) { + if (caa_likely(CGI_URI_OK(*s))) { + fputc(*s, fp); + } else { + static const char cgi_digitmap[] = "0123456789ABCDEF"; + unsigned char c = *s; + char x[3]; + + x[2] = cgi_digitmap[(c % 16)]; + x[1] = cgi_digitmap[((c / 16) % 16)]; + x[0] = '%'; + fwrite(x, sizeof(x), 1, fp); + } + } +#undef CGI_URI_OK +} + +static bool is_hex(int x) +{ + return (((x) >= '0' && (x) <= '9') || + ((x) >= 'a' && (x) <= 'f') || + ((x) >= 'A' && (x) <= 'F')); +} + +static int xtoupper(int x) +{ + return (x >= 'a' && x <= 'f') ? (x & ~0x20) : x; +} + +static int hexchar_to_int(int x) +{ + return (x < 'A') ? (x - '0') : (xtoupper(x) - 'A' + 10); +} + +static int hexpair_to_int(int x1, int x2) +{ + return ((hexchar_to_int(x1) << 4) | hexchar_to_int(x2)); +} + +/* unescapes @s in-place and adjusts @len */ +static const char *uri_unescape(const char *s, size_t *len) +{ + union { const char *in; char *out; } deconst; + size_t orig = *len; + char *out; + + deconst.in = s; + out = deconst.out; + for (; orig--; s++) { + if (caa_unlikely(*s == '%') && orig > 1 && + caa_likely(is_hex(s[1])) && + caa_likely(is_hex(s[2]))) { + orig -= 2; + *len -= 2; + *out++ = hexpair_to_int(s[1], s[2]); + s += 2; + } else { + *out++ = *s; + } + } + return deconst.in; +} + +/* result must be freed */ +static char *loc2name(const struct src_loc *l, size_t *len) +{ + char *ptr; + FILE *fp = open_memstream(&ptr, len); + if (!fp) { + fprintf(stderr, "open_memstream: %m\n"); + return NULL; + } + if (l->f) { + fputs(l->f->fn, fp); + if (l->lineno == UINT_MAX) + FPUTS(":-", fp); + else + fprintf(fp, ":%zu", l->lineno); + } else { + size_t i; + char **s = backtrace_symbols((void *)l->bt, (int)l->bt_len); + + if (!s) { + fprintf(stderr, "backtrace_symbols: %m\n"); + fclose(fp); + return NULL; + } + fputs(s[0], fp); + for (i = 1; i < l->bt_len; i++) { + fputc('\n', fp); + fputs(s[i], fp); + } + } + if (ferror(fp) | fclose(fp)) { + fprintf(stderr, "ferror|fclose: %m\n"); + return NULL; + } + return ptr; +} + +static struct h1_src_loc *accumulate(unsigned long min, size_t *hslc) +{ + struct mw_membuf mb; + FILE *fp = open_memstream(&mb.ptr, &mb.len); + if (!fp) { + fprintf(stderr, "open_memstream: %m\n"); + return NULL; + } + rcu_read_lock(); + struct cds_lfht *t = CMM_LOAD_SHARED(totals); + struct cds_lfht_iter iter; + struct src_loc *l; + ++locating; + if (t) cds_lfht_for_each_entry(t, &iter, l, hnode) { + size_t freed = uatomic_read(&l->freed_bytes); + size_t total = uatomic_read(&l->total); + struct h1_src_loc hsl; + + if (total < min) continue; + hsl.as.src_loc = l; + hsl.bytes = total - freed; + hsl.allocations = uatomic_read(&l->allocations); + hsl.frees = uatomic_read(&l->frees); + hsl.live = hsl.allocations - hsl.frees; + hsl.mean_life = hsl.frees ? + ((long double)uatomic_read(&l->age_total) / + (long double)hsl.frees) : + HUGE_VAL; + hsl.max_life = uatomic_read(&l->max_lifespan); + fwrite(&hsl, sizeof(hsl), 1, fp); + } + --locating; + mwrap_assert(!locating); + rcu_read_unlock(); + + struct h1_src_loc *hslv; + if (ferror(fp) | fclose(fp)) { + fprintf(stderr, "ferror|fclose: %m\n"); + hslv = NULL; + } else { + *hslc = mb.len / sizeof(*hslv); + mwrap_assert((mb.len % sizeof(*hslv)) == 0); + hslv = (struct h1_src_loc *)mb.ptr; + for (size_t i = 0; i++ < *hslc; ++hslv) + hslv->as.loc_name = loc2name(hslv->as.src_loc, + &hslv->lname_len); + hslv = (struct h1_src_loc *)mb.ptr; + } + return hslv; +} + +/* /$PID/at/$LOCATION endpoint */ +static enum mw_qev each_at(struct mw_h1 *h1, struct mw_h1req *h1r) +{ + const char *loc = h1r->path + sizeof("/at/") - 1; + size_t len = h1r->path_len - (sizeof("/at/") - 1); + size_t min = 0; + + loc = uri_unescape(loc, &len); + if (len >= PATH_MAX) return h1_400(h1); + struct src_loc *l = mwrap_get(loc, len); + if (!l) return h1_404(h1); + size_t lname_len; + char *name = loc2name(l, &lname_len); + struct mw_membuf mb; + FILE *fp = wbuf_new(&mb); + FPUTS("", fp); + write_html(fp, name, lname_len); + FPUTS("

live allocations at ", fp); + write_html(fp, name, lname_len); + free(name); + size_t age = uatomic_read(&total_bytes_inc); + fprintf(fp, "

Current age: %zu (live: %zu)", + age, age - uatomic_read(&total_bytes_dec)); + FPUTS("" + "", fp); + + rcu_read_lock(); + ++locating; + struct alloc_hdr *h; + cds_list_for_each_entry_rcu(h, &l->allocs, anode) { + size_t size = uatomic_read(&h->size); + if (size > min) + fprintf(fp, "\n", + size, h->as.live.gen, h->real); + } + --locating; + mwrap_assert(!locating); + rcu_read_unlock(); + FPUTS("
sizegenerationaddress
%zu%zu%p
", fp); + return h1_200(h1, fp, &mb); +} + +/* /$PID/each/$MIN endpoint */ +static enum mw_qev each_gt(struct mw_h1 *h1, struct mw_h1req *h1r, + unsigned long min) +{ + static const char default_sort[] = "bytes"; + const char *sort = default_sort; + size_t sort_len = sizeof(default_sort) - 1; + + if (h1r->qstr && h1r->qlen > 5 && !memcmp(h1r->qstr, "sort=", 5)) { + sort = h1r->qstr + 5; + sort_len = h1r->qlen - 5; + } + + size_t hslc; + struct h1_src_loc *hslv = accumulate(min, &hslc); + if (!hslv) + return h1_close(h1); + + struct mw_membuf mb; + FILE *fp = wbuf_new(&mb); + fprintf(fp, "mwrap each >%lu" + "

mwrap each >%lu " + "(change `%lu' in URL to adjust filtering)", min, min, min); + size_t age = uatomic_read(&total_bytes_inc); + fprintf(fp, "

Current age: %zu (live: %zu)", + age, age - uatomic_read(&total_bytes_dec)); + FPUTS("", fp); + + int (*cmp)(const void *, const void *) = NULL; + for (size_t i = 0; i < CAA_ARRAY_SIZE(fields); i++) { + FPUTS("", fp); + } + FPUTS("", fp); + if (cmp) + qsort(hslv, hslc, sizeof(*hslv), cmp); + else + FPUTS("", fp); + for (size_t i = 0; i < hslc; i++) { + struct h1_src_loc *hsl = &hslv[i]; + + fprintf(fp, "" + "", + hsl->bytes, hsl->allocations, hsl->frees, + hsl->live, hsl->mean_life, hsl->max_life); + FPUTS("", fp); + } + free(hslv); + FPUTS("
", fp); + if (fields[i].flen == sort_len && + !memcmp(fields[i].fname, sort, sort_len)) { + cmp = fields[i].cmp; + fprintf(fp, "%s", fields[i].fname); + } else { + fprintf(fp, + "%s", + min, fields[i].fname, fields[i].fname); + } + FPUTS("
sort= not understood
%zu%zu%zu%zu%0.3Lf%zuas.loc_name, hsl->lname_len); + FPUTS("\">", fp); + write_html(fp, hsl->as.loc_name, hsl->lname_len); + free(hsl->as.loc_name); + FPUTS("
", fp); + return h1_200(h1, fp, &mb); +} + +static enum mw_qev h1_dispatch(struct mw_h1 *h1, struct mw_h1req *h1r) +{ + if (h1r->method_len == 3 && !memcmp(h1r->method, "GET", 3)) { + const char *c; + + if ((c = PATH_SKIP(h1r, "/each/"))) { + errno = 0; + char *end; + unsigned long min = strtoul(c, &end, 10); + if ((*end == ' ' || *end == '?') && !errno) + return each_gt(h1, h1r, min); + } else if ((PATH_SKIP(h1r, "/at/"))) { + return each_at(h1, h1r); + } + } else if (h1r->method_len == 4 && !memcmp(h1r->method, "POST", 4)) { + if (h1r->path_len == 6 && !memcmp(h1r->path, "/reset", 6)) + return h1_do_reset(h1); + } + return h1_404(h1); +} + +/* + * nothing in the PSGI app actually reads input, but clients tend + * to send something in the body of POST requests anyways, so we + * just drain it + */ +static enum mw_qev h1_drain_input(struct mw_h1 *h1, struct mw_h1req *h1r) +{ + if (h1r) { /* initial */ + ssize_t overread = h1r->rbuf_len - h1r->pret; + mwrap_assert(overread >= 0); + if ((size_t)overread <= h1->in_len) { + h1->in_len -= overread; + } else { /* TODO: deal with pipelined requests */ + return h1_400(h1); + } + } else { /* continue dealing with a trickle */ + h1r = h1->h1r; + mwrap_assert(h1r); + } + while (h1->in_len > 0) { + char ibuf[BUFSIZ]; + size_t len = h1->in_len; + ssize_t r; + + mwrap_assert(h1->has_input); + if (len > sizeof(ibuf)) + len = sizeof(ibuf); + + r = read(h1->fd, ibuf, len); + if (r > 0) { /* just discard the input */ + h1->in_len -= r; + } else if (r == 0) { + return h1_close(h1); + } else { + switch (errno) { + case EAGAIN: + if (!h1->h1r) { + h1->h1r = h1r; + mwrap_assert(tsd_h1r == h1r); + tsd_h1r = NULL; + } + return MW_QEV_RD; + case ECONNRESET: /* common */ + case ENOTCONN: + return h1_close(h1); + default: /* ENOMEM, ENOBUFS, ... */ + assert(errno != EBADF); + fprintf(stderr, "read: %m\n"); + return h1_close(h1); + } + } + } + h1->has_input = 0; /* all done with input */ + return h1_dispatch(h1, h1r); +} + +static enum mw_qev h1_parse_harder(struct mw_h1 *h1, struct mw_h1req *h1r) +{ + enum { HDR_IGN, HDR_CONN, HDR_XENC, HDR_CLEN } cur = HDR_IGN; + bool conn_set = false; + char *end; + struct phr_header *hdr = h1r->hdr; + + h1->has_input = 0; + h1->persist = h1r->minor_ver >= 1 ? 1 : 0; + h1->in_len = 0; + + for (hdr = h1r->hdr; h1r->nr_hdr--; hdr++) { + if (NAME_EQ(hdr, "Transfer-Encoding")) + cur = HDR_XENC; + else if (NAME_EQ(hdr, "Content-Length")) + cur = HDR_CLEN; + else if (NAME_EQ(hdr, "Connection")) + cur = HDR_CONN; + else if (NAME_EQ(hdr, "Trailer")) + return h1_400(h1); + else if (hdr->name) + cur = HDR_IGN; + /* else: continuation line */ + if (!hdr->value_len) + continue; + switch (cur) { + case HDR_CONN: + if (conn_set) return h1_400(h1); + conn_set = true; + if (VAL_EQ(hdr, "close")) + h1->persist = 0; + else if (VAL_EQ(hdr, "keep-alive")) + h1->persist = 1; + else + return h1_400(h1); + break; + case HDR_XENC: + return h1_400(h1); + case HDR_CLEN: + if (h1->has_input) return h1_400(h1); + h1->has_input = 1; + errno = 0; + h1->in_len = strtoul(hdr->value, &end, 10); + if (errno) + return h1_400(h1); + switch (*end) { + case '\r': case ' ': case '\t': case '\n': break; + default: return h1_400(h1); + } + break; + case HDR_IGN: + break; + } + } + if (h1r->path_len < (g_h1d.pid_len + 2)) + return h1_404(h1); + + /* skip "/$PID" prefix */ + if (*h1r->path == '/' && + !memcmp(h1r->path+1, g_h1d.pid_str, g_h1d.pid_len) && + h1r->path[1 + g_h1d.pid_len] == '/') { + h1r->path += 1 + g_h1d.pid_len; + h1r->path_len -= 1 + g_h1d.pid_len; + } else { + return h1_404(h1); + } + h1r->qstr = memchr(h1r->path, '?', h1r->path_len); + if (h1r->qstr) { + ++h1r->qstr; /* ignore '?' */ + h1r->qlen = h1r->path + h1r->path_len - h1r->qstr; + h1r->path_len -= (h1r->qlen + 1); + } + return h1_drain_input(h1, h1r); +} + +static enum mw_qev h1_event_step(struct mw_h1 *h1) +{ + struct mw_h1req *h1r; + + /* + * simple rule to avoid trivial DoS in HTTP/1.x: never process a + * new request until you've written out your previous response + * (and this is why I'm too stupid to do HTTP/2) + */ + if (h1->wbuf) + return h1_send_flush(h1); + + if (h1->has_input) + return h1_drain_input(h1, NULL); + /* + * The majority of requests can be served using TSD rbuf, + * no need for per-client allocations unless a client trickles + */ + h1r = h1->h1r ? h1->h1r : tsd_h1r; + if (!h1r) { + h1r = tsd_h1r = malloc(sizeof(*h1r)); + if (!h1r) { + fprintf(stderr, "h1r malloc: %m\n"); + return h1_close(h1); + } + } + for (;;) { + size_t n = MW_RBUF_SIZE - h1->prev_len; + ssize_t r = read(h1->fd, &h1r->rbuf[h1->prev_len], n); + + if (r > 0) { + h1r->rbuf_len = h1->prev_len + r; + h1r->nr_hdr = MW_NR_NAME; + h1r->pret = phr_parse_request(h1r->rbuf, h1r->rbuf_len, + &h1r->method, &h1r->method_len, + &h1r->path, &h1r->path_len, + &h1r->minor_ver, h1r->hdr, + &h1r->nr_hdr, h1->prev_len); + if (h1r->pret > 0) + return h1_parse_harder(h1, h1r); + if (h1r->pret == -1) + return h1_400(h1); /* parser error */ + + mwrap_assert(h1r->pret == -2); /* incomplete */ + mwrap_assert(h1r->rbuf_len <= MW_RBUF_SIZE && + "bad math"); + + /* this should be 413 or 414, don't need the bloat */ + if (h1r->rbuf_len == MW_RBUF_SIZE) + return h1_400(h1); + mwrap_assert(h1r->rbuf_len < MW_RBUF_SIZE); + h1->prev_len = h1r->rbuf_len; + /* loop again */ + } else if (r == 0) { + return h1_close(h1); + } else { /* r < 0 */ + switch (errno) { + case EAGAIN: /* likely, detach to per-client buffer */ + if (h1->prev_len && !h1->h1r) { + h1->h1r = h1r; + mwrap_assert(tsd_h1r == h1r); + tsd_h1r = NULL; + } + return MW_QEV_RD; + case ECONNRESET: /* common */ + case ENOTCONN: + return h1_close(h1); + default: /* ENOMEM, ENOBUFS, ... */ + assert(errno != EBADF); + fprintf(stderr, "read: %m\n"); + return h1_close(h1); + } + } + } + + return MW_QEV_RD; +} + +static int poll_add(struct mw_h1d *h1d, int fd, short events) +{ + struct pollfd pfd; + + if (!h1d->pfp) { + h1d->pfp = open_memstream(&h1d->pbuf.ptr, &h1d->pbuf.len); + if (!h1d->pfp) { + fprintf(stderr, "open_memstream: %m\n"); + return -1; + } + } + pfd.fd = fd; + pfd.events = events; + fwrite(&pfd, 1, sizeof(pfd), h1d->pfp); + return 0; /* success */ +} + +static struct pollfd *poll_detach(struct mw_h1d *h1d, nfds_t *nfds) +{ + struct pollfd *pfd = NULL; /* our return value */ + + /* not sure how to best recover from ENOMEM errors in stdio */ + if (h1d->pfp) { + if (ferror(h1d->pfp) | fclose(h1d->pfp)) { + fprintf(stderr, "ferror|fclose: %m\n"); + exit(EXIT_FAILURE); + } else { + mwrap_assert(h1d->pbuf.len % sizeof(*pfd) == 0); + pfd = (struct pollfd *)h1d->pbuf.ptr; + *nfds = h1d->pbuf.len / sizeof(*pfd); + } + } + + /* prepare a new poll buffer the next loop */ + h1d->pbuf.len = 0; + h1d->pbuf.ptr = NULL; + h1d->pfp = NULL; + + return pfd; +} + +static void non_fatal_pause(const char *fail_fn) +{ + fprintf(stderr, +"%s: %m (non-fatal, pausing mwrap-httpd thread)\n", fail_fn); + poll(NULL, 0, 1000); +} + +static void h1d_event_step(struct mw_h1d *h1d) +{ + union mw_sockaddr sa; + const char *fail_fn = NULL; + + while (!fail_fn) { + socklen_t len = (socklen_t)sizeof(sa); + int fd = accept4(h1d->lfd, &sa.any, &len, + SOCK_NONBLOCK|SOCK_CLOEXEC); + + if (fd >= 0) { + struct mw_h1 *h1 = calloc(1, sizeof(*h1)); + + if (h1) { + h1->fd = fd; + h1->events = POLLIN; + cds_list_add_tail(&h1->nd, &h1d->conn); + } else { + int err = errno; + fail_fn = "malloc"; + close(fd); + errno = err; + } + } else { + switch (errno) { + case EAGAIN: /* likely */ + return; + case ECONNABORTED: /* common w/ TCP */ + continue; + case EMFILE: + case ENFILE: + case ENOBUFS: + case ENOMEM: + case EPERM: + fail_fn = "accept4"; + break; + /* hope other cleanup work gets done: */ + default: + fprintf(stderr, "accept4: %m (fatal)\n"); + abort(); + } + } + } + non_fatal_pause(fail_fn); +} + +/* @env is getenv("MWRAP") */ +static int h1d_init(struct mw_h1d *h1d, const char *menv) +{ + union mw_sockaddr sa = { .un = { .sun_family = AF_UNIX } }; + + const char *env = strstr(menv, "socket_dir:"); + if (!env) return 1; + if (env != menv && env[-1] != ',') + return 1; + env += sizeof("socket_dir"); + if (!*env) return 1; + const char *end = strchr(env, ','); + size_t len = end ? (size_t)(end - env) : strlen(env); + if (len == 0) + return fprintf(stderr, "socket_dir: cannot be empty\n"); + if (len >= sizeof(sa.un.sun_path)) + return fprintf(stderr, "socket_dir:%s too long(%zu)\n", + env, len); + + char *p = mempcpy(sa.un.sun_path, env, len); + if (p[-1] != '/') + *p++ = '/'; + struct stat sb; + if (stat(sa.un.sun_path, &sb) < 0) { + if (errno != ENOENT) + return fprintf(stderr, "stat(%s): %m\n", + sa.un.sun_path); + if (mkdir(sa.un.sun_path, 0700) < 0) + return fprintf(stderr, "mkdir(%s): %m\n", + sa.un.sun_path); + } else if (!S_ISDIR(sb.st_mode)) { + return fprintf(stderr, "socket_dir:%s is not a directory\n", + sa.un.sun_path); + } + len = sizeof(sa.un.sun_path) - (p - sa.un.sun_path); + int rc = snprintf(p, len, "%d.sock", (int)getpid()); + if (rc >= (int)len) + return fprintf(stderr, + "socket_dir too long rc=%d > len=%zu\n", rc, len); + if (rc < 0) + return fprintf(stderr, "we suck at snprintf: %m\n"); + h1d->pid_len = rc - sizeof(".sock") + 1; + memcpy(h1d->pid_str, p, h1d->pid_len); + if (unlink(sa.un.sun_path) < 0 && errno != ENOENT) + return fprintf(stderr, "unlink(%s): %m\n", sa.un.sun_path); + h1d->lfd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0); + if (h1d->lfd < 0) + return fprintf(stderr, "socket: %m\n"); + if (bind(h1d->lfd, &sa.any, (socklen_t)sizeof(sa)) < 0) { + fprintf(stderr, "bind: %m\n"); + goto close_fail; + } + if (listen(h1d->lfd, 1024) < 0) { + fprintf(stderr, "listen: %m\n"); + goto close_fail; + } + h1d->alive = 1; /* runs in parent, before pthread_create */ + CDS_INIT_LIST_HEAD(&h1d->conn); + return 0; +close_fail: + close(h1d->lfd); + h1d->lfd = -1; + return 1; +} + +/* + * epoll|kqueue would make this O(n) function unnecessary, but our (n) is + * expected to be tiny (<10): no need to waste kernel memory on epoll|kqueue + */ +static struct mw_h1 *h1_lookup(const struct mw_h1d *h1d, int fd) +{ + struct mw_h1 *h1 = NULL; + + cds_list_for_each_entry(h1, &h1d->conn, nd) + if (h1->fd == fd) + break; + mwrap_assert(h1 && h1->fd == fd && "bad FD"); + return h1; +} + +static void *h1d_run(void *x) /* pthread_create cb */ +{ + struct mw_h1d *h1d = x; + nfds_t i, nfds; + struct pollfd *pfd; + int rc; + struct mw_h1 *h1, *nxt; + enum mw_qev ev; + + for (; uatomic_read(&h1d->alive); ) { + if (poll_add(h1d, h1d->lfd, POLLIN)) + exit(EXIT_FAILURE); + cds_list_for_each_entry_safe(h1, nxt, &h1d->conn, nd) + if (poll_add(h1d, h1->fd, h1->events)) + h1_close(h1); + pfd = poll_detach(h1d, &nfds); + rc = pfd ? poll(pfd, nfds, -1) : -1; + + if (rc < 0) { + switch (errno) { + case EINTR: break; + case ENOMEM: /* may be common */ + case EINVAL: /* RLIMIT_NOFILE hit */ + non_fatal_pause("poll"); + break; /* to forloop where rc<0 */ + default: + fprintf(stderr, "poll: %m (fatal)\n"); + abort(); + } + } else { + for (i = 0; i < nfds && + uatomic_read(&h1d->alive); i++) { + if (!pfd[i].revents) + continue; + if (pfd[i].fd == h1d->lfd) { + h1d_event_step(h1d); + } else { + h1 = h1_lookup(h1d, pfd[i].fd); + ev = h1_event_step(h1); + if (ev == MW_QEV_IGNORE) + continue; + h1->events = ev; + } + } + } + free(pfd); + } + pfd = poll_detach(h1d, &nfds); + free(pfd); + cds_list_for_each_entry_safe(h1, nxt, &h1d->conn, nd) + h1_close(h1); + return NULL; +} + +static void h1d_atexit(void) +{ + union mw_sockaddr sa; + socklen_t len = (socklen_t)sizeof(sa); + + if (g_h1d.lfd < 0 || !g_h1d.pid_len) + return; + if (getsockname(g_h1d.lfd, &sa.any, &len) < 0) + return; + + char p[sizeof(g_h1d.pid_str)]; + int rc = snprintf(p, sizeof(p), "%d", (int)getpid()); + + if (rc == (int)g_h1d.pid_len && !memcmp(p, g_h1d.pid_str, rc)) + unlink(sa.un.sun_path); +} + +static void h1d_stop_join(struct mw_h1d *h1d) +{ + union mw_sockaddr sa; + socklen_t len = (socklen_t)sizeof(sa); + int e, sfd; + void *ret; + + mwrap_assert(uatomic_read(&h1d->alive) == 0); + if (getsockname(h1d->lfd, &sa.any, &len) < 0) { + fprintf(stderr, "getsockname: %m\n"); + abort(); /* TODO: graceful fallback (ENOBUFS) */ + } + sfd = socket(AF_UNIX, SOCK_STREAM|SOCK_CLOEXEC, 0); + if (sfd < 0) { + fprintf(stderr, "socket: %m\n"); + abort(); /* TODO: graceful fallback (ENOMEM, EMFILE, ...) */ + } + if (connect(sfd, &sa.any, len) < 0) { + fprintf(stderr, "connect: %m\n"); + /* TODO: graceful fallback (EAGAIN, ...) */ + } + (void)close(sfd); + e = pthread_join(h1d->tid, &ret); + if (e) { + fprintf(stderr, "pthread_join: %s\n", strerror(e)); + abort(); + } + (void)close(h1d->lfd); + h1d->lfd = -1; +} + +static void h1d_atfork_prepare(void) +{ + if (uatomic_cmpxchg(&g_h1d.alive, 1, 0)) + h1d_stop_join(&g_h1d); +} + +static void h1d_start(void) /* may be called as pthread_atfork child cb */ +{ + if (mwrap_env && !h1d_init(&g_h1d, mwrap_env) && g_h1d.alive) { + int rc = pthread_create(&g_h1d.tid, NULL, h1d_run, &g_h1d); + if (rc) /* non-fatal */ + fprintf(stderr, "pthread_create: %s\n", strerror(rc)); + } +} + +/* must be called with global_mtx held */ +static void h1d_atfork_parent(void) +{ + if (g_h1d.lfd < 0) + h1d_start(); +} diff --git a/mymalloc.h b/mymalloc.h index 4f504b7..7d94246 100644 --- a/mymalloc.h +++ b/mymalloc.h @@ -113,8 +113,14 @@ ATTR_COLD static void mstate_tsd_dtor(void *p) CHECK(int, 0, pthread_mutex_unlock(&global_mtx)); } +/* see mwrap_httpd.h */ +static void h1d_atfork_prepare(void); +static void h1d_atfork_parent(void); +static void h1d_start(void); + ATTR_COLD static void atfork_prepare(void) { + h1d_atfork_prepare(); call_rcu_before_fork(); CHECK(int, 0, pthread_mutex_lock(&global_mtx)); } @@ -123,6 +129,9 @@ ATTR_COLD static void atfork_parent(void) { CHECK(int, 0, pthread_mutex_unlock(&global_mtx)); call_rcu_after_fork_parent(); + CHECK(int, 0, pthread_mutex_lock(&global_mtx)); + h1d_atfork_parent(); + CHECK(int, 0, pthread_mutex_unlock(&global_mtx)); } ATTR_COLD static void reset_mutexes(void); /* mwrap_core.h */ @@ -145,6 +154,7 @@ ATTR_COLD static void atfork_child(void) } reset_mutexes(); call_rcu_after_fork_child(); + h1d_start(); } #if defined(__GLIBC__) diff --git a/picohttpparser.h b/picohttpparser.h new file mode 100644 index 0000000..0927985 --- /dev/null +++ b/picohttpparser.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase, + * Shigeo Mitsunari + * + * The software is licensed under either the MIT License (below) or the Perl + * license. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef picohttpparser_h +#define picohttpparser_h + +#include + +#ifdef _MSC_VER +#define ssize_t intptr_t +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* contains name and value of a header (name == NULL if is a continuing line + * of a multiline header */ +struct phr_header { + const char *name; + size_t name_len; + const char *value; + size_t value_len; +}; + +/* returns number of bytes consumed if successful, -2 if request is partial, + * -1 if failed */ +static +int phr_parse_request(const char *buf, size_t len, const char **method, size_t *method_len, const char **path, size_t *path_len, + int *minor_version, struct phr_header *headers, size_t *num_headers, size_t last_len); + +/* ditto */ +static inline +int phr_parse_response(const char *_buf, size_t len, int *minor_version, int *status, const char **msg, size_t *msg_len, + struct phr_header *headers, size_t *num_headers, size_t last_len); + +/* ditto */ +static inline +int phr_parse_headers(const char *buf, size_t len, struct phr_header *headers, size_t *num_headers, size_t last_len); + +/* should be zero-filled before start */ +struct phr_chunked_decoder { + size_t bytes_left_in_chunk; /* number of bytes left in current chunk */ + char consume_trailer; /* if trailing headers should be consumed */ + char _hex_count; + char _state; +}; + +/* the function rewrites the buffer given as (buf, bufsz) removing the chunked- + * encoding headers. When the function returns without an error, bufsz is + * updated to the length of the decoded data available. Applications should + * repeatedly call the function while it returns -2 (incomplete) every time + * supplying newly arrived data. If the end of the chunked-encoded data is + * found, the function returns a non-negative number indicating the number of + * octets left undecoded, that starts from the offset returned by `*bufsz`. + * Returns -1 on error. + */ +static inline +ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *bufsz); + +/* returns if the chunked decoder is in middle of chunked data */ +static inline +int phr_decode_chunked_is_in_data(struct phr_chunked_decoder *decoder); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/picohttpparser_c.h b/picohttpparser_c.h new file mode 100644 index 0000000..0db7dcb --- /dev/null +++ b/picohttpparser_c.h @@ -0,0 +1,670 @@ +/* + * Copyright (c) 2009-2014 Kazuho Oku, Tokuhiro Matsuno, Daisuke Murase, + * Shigeo Mitsunari + * + * The software is licensed under either the MIT License (below) or the Perl + * license. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#ifdef __SSE4_2__ +#ifdef _MSC_VER +#include +#else +#include +#endif +#endif +#include "picohttpparser.h" + +#if __GNUC__ >= 3 +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) +#else +#define likely(x) (x) +#define unlikely(x) (x) +#endif + +#ifdef _MSC_VER +#define ALIGNED(n) _declspec(align(n)) +#else +#define ALIGNED(n) __attribute__((aligned(n))) +#endif + +#define IS_PRINTABLE_ASCII(c) ((unsigned char)(c)-040u < 0137u) + +#define CHECK_EOF() \ + if (buf == buf_end) { \ + *ret = -2; \ + return NULL; \ + } + +#define EXPECT_CHAR_NO_CHECK(ch) \ + if (*buf++ != ch) { \ + *ret = -1; \ + return NULL; \ + } + +#define EXPECT_CHAR(ch) \ + CHECK_EOF(); \ + EXPECT_CHAR_NO_CHECK(ch); + +#define ADVANCE_TOKEN(tok, toklen) \ + do { \ + const char *tok_start = buf; \ + static const char ALIGNED(16) ranges2[16] = "\000\040\177\177"; \ + int found2; \ + buf = findchar_fast(buf, buf_end, ranges2, 4, &found2); \ + if (!found2) { \ + CHECK_EOF(); \ + } \ + while (1) { \ + if (*buf == ' ') { \ + break; \ + } else if (unlikely(!IS_PRINTABLE_ASCII(*buf))) { \ + if ((unsigned char)*buf < '\040' || *buf == '\177') { \ + *ret = -1; \ + return NULL; \ + } \ + } \ + ++buf; \ + CHECK_EOF(); \ + } \ + tok = tok_start; \ + toklen = buf - tok_start; \ + } while (0) + +static const char *token_char_map = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" + "\0\1\0\1\1\1\1\1\0\0\1\1\0\1\1\0\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0" + "\0\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\1\1" + "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\1\0\1\0" + "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" + "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" + "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" + "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; + +static const char *findchar_fast(const char *buf, const char *buf_end, const char *ranges, size_t ranges_size, int *found) +{ + *found = 0; +#if __SSE4_2__ + if (likely(buf_end - buf >= 16)) { + __m128i ranges16 = _mm_loadu_si128((const __m128i *)ranges); + + size_t left = (buf_end - buf) & ~15; + do { + __m128i b16 = _mm_loadu_si128((const __m128i *)buf); + int r = _mm_cmpestri(ranges16, ranges_size, b16, 16, _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS); + if (unlikely(r != 16)) { + buf += r; + *found = 1; + break; + } + buf += 16; + left -= 16; + } while (likely(left != 0)); + } +#else + /* suppress unused parameter warning */ + (void)buf_end; + (void)ranges; + (void)ranges_size; +#endif + return buf; +} + +static const char *get_token_to_eol(const char *buf, const char *buf_end, const char **token, size_t *token_len, int *ret) +{ + const char *token_start = buf; + +#ifdef __SSE4_2__ + static const char ALIGNED(16) ranges1[16] = "\0\010" /* allow HT */ + "\012\037" /* allow SP and up to but not including DEL */ + "\177\177"; /* allow chars w. MSB set */ + int found; + buf = findchar_fast(buf, buf_end, ranges1, 6, &found); + if (found) + goto FOUND_CTL; +#else + /* find non-printable char within the next 8 bytes, this is the hottest code; manually inlined */ + while (likely(buf_end - buf >= 8)) { +#define DOIT() \ + do { \ + if (unlikely(!IS_PRINTABLE_ASCII(*buf))) \ + goto NonPrintable; \ + ++buf; \ + } while (0) + DOIT(); + DOIT(); + DOIT(); + DOIT(); + DOIT(); + DOIT(); + DOIT(); + DOIT(); +#undef DOIT + continue; + NonPrintable: + if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) { + goto FOUND_CTL; + } + ++buf; + } +#endif + for (;; ++buf) { + CHECK_EOF(); + if (unlikely(!IS_PRINTABLE_ASCII(*buf))) { + if ((likely((unsigned char)*buf < '\040') && likely(*buf != '\011')) || unlikely(*buf == '\177')) { + goto FOUND_CTL; + } + } + } +FOUND_CTL: + if (likely(*buf == '\015')) { + ++buf; + EXPECT_CHAR('\012'); + *token_len = buf - 2 - token_start; + } else if (*buf == '\012') { + *token_len = buf - token_start; + ++buf; + } else { + *ret = -1; + return NULL; + } + *token = token_start; + + return buf; +} + +static const char *is_complete(const char *buf, const char *buf_end, size_t last_len, int *ret) +{ + int ret_cnt = 0; + buf = last_len < 3 ? buf : buf + last_len - 3; + + while (1) { + CHECK_EOF(); + if (*buf == '\015') { + ++buf; + CHECK_EOF(); + EXPECT_CHAR('\012'); + ++ret_cnt; + } else if (*buf == '\012') { + ++buf; + ++ret_cnt; + } else { + ++buf; + ret_cnt = 0; + } + if (ret_cnt == 2) { + return buf; + } + } + + *ret = -2; + return NULL; +} + +#define PARSE_INT(valp_, mul_) \ + if (*buf < '0' || '9' < *buf) { \ + buf++; \ + *ret = -1; \ + return NULL; \ + } \ + *(valp_) = (mul_) * (*buf++ - '0'); + +#define PARSE_INT_3(valp_) \ + do { \ + int res_ = 0; \ + PARSE_INT(&res_, 100) \ + *valp_ = res_; \ + PARSE_INT(&res_, 10) \ + *valp_ += res_; \ + PARSE_INT(&res_, 1) \ + *valp_ += res_; \ + } while (0) + +/* returned pointer is always within [buf, buf_end), or null */ +static const char *parse_token(const char *buf, const char *buf_end, const char **token, size_t *token_len, char next_char, + int *ret) +{ + /* We use pcmpestri to detect non-token characters. This instruction can take no more than eight character ranges (8*2*8=128 + * bits that is the size of a SSE register). Due to this restriction, characters `|` and `~` are handled in the slow loop. */ + static const char ALIGNED(16) ranges[] = "\x00 " /* control chars and up to SP */ + "\"\"" /* 0x22 */ + "()" /* 0x28,0x29 */ + ",," /* 0x2c */ + "//" /* 0x2f */ + ":@" /* 0x3a-0x40 */ + "[]" /* 0x5b-0x5d */ + "{\xff"; /* 0x7b-0xff */ + const char *buf_start = buf; + int found; + buf = findchar_fast(buf, buf_end, ranges, sizeof(ranges) - 1, &found); + if (!found) { + CHECK_EOF(); + } + while (1) { + if (*buf == next_char) { + break; + } else if (!token_char_map[(unsigned char)*buf]) { + *ret = -1; + return NULL; + } + ++buf; + CHECK_EOF(); + } + *token = buf_start; + *token_len = buf - buf_start; + return buf; +} + +/* returned pointer is always within [buf, buf_end), or null */ +static const char *parse_http_version(const char *buf, const char *buf_end, int *minor_version, int *ret) +{ + /* we want at least [HTTP/1.] to try to parse */ + if (buf_end - buf < 9) { + *ret = -2; + return NULL; + } + EXPECT_CHAR_NO_CHECK('H'); + EXPECT_CHAR_NO_CHECK('T'); + EXPECT_CHAR_NO_CHECK('T'); + EXPECT_CHAR_NO_CHECK('P'); + EXPECT_CHAR_NO_CHECK('/'); + EXPECT_CHAR_NO_CHECK('1'); + EXPECT_CHAR_NO_CHECK('.'); + PARSE_INT(minor_version, 1); + return buf; +} + +static const char *parse_headers(const char *buf, const char *buf_end, struct phr_header *headers, size_t *num_headers, + size_t max_headers, int *ret) +{ + for (;; ++*num_headers) { + CHECK_EOF(); + if (*buf == '\015') { + ++buf; + EXPECT_CHAR('\012'); + break; + } else if (*buf == '\012') { + ++buf; + break; + } + if (*num_headers == max_headers) { + *ret = -1; + return NULL; + } + if (!(*num_headers != 0 && (*buf == ' ' || *buf == '\t'))) { + /* parsing name, but do not discard SP before colon, see + * http://www.mozilla.org/security/announce/2006/mfsa2006-33.html */ + if ((buf = parse_token(buf, buf_end, &headers[*num_headers].name, &headers[*num_headers].name_len, ':', ret)) == NULL) { + return NULL; + } + if (headers[*num_headers].name_len == 0) { + *ret = -1; + return NULL; + } + ++buf; + for (;; ++buf) { + CHECK_EOF(); + if (!(*buf == ' ' || *buf == '\t')) { + break; + } + } + } else { + headers[*num_headers].name = NULL; + headers[*num_headers].name_len = 0; + } + const char *value; + size_t value_len; + if ((buf = get_token_to_eol(buf, buf_end, &value, &value_len, ret)) == NULL) { + return NULL; + } + /* remove trailing SPs and HTABs */ + const char *value_end = value + value_len; + for (; value_end != value; --value_end) { + const char c = *(value_end - 1); + if (!(c == ' ' || c == '\t')) { + break; + } + } + headers[*num_headers].value = value; + headers[*num_headers].value_len = value_end - value; + } + return buf; +} + +static const char *parse_request(const char *buf, const char *buf_end, const char **method, size_t *method_len, const char **path, + size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers, + size_t max_headers, int *ret) +{ + /* skip first empty line (some clients add CRLF after POST content) */ + CHECK_EOF(); + if (*buf == '\015') { + ++buf; + EXPECT_CHAR('\012'); + } else if (*buf == '\012') { + ++buf; + } + + /* parse request line */ + if ((buf = parse_token(buf, buf_end, method, method_len, ' ', ret)) == NULL) { + return NULL; + } + do { + ++buf; + CHECK_EOF(); + } while (*buf == ' '); + ADVANCE_TOKEN(*path, *path_len); + do { + ++buf; + CHECK_EOF(); + } while (*buf == ' '); + if (*method_len == 0 || *path_len == 0) { + *ret = -1; + return NULL; + } + if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) { + return NULL; + } + if (*buf == '\015') { + ++buf; + EXPECT_CHAR('\012'); + } else if (*buf == '\012') { + ++buf; + } else { + *ret = -1; + return NULL; + } + + return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret); +} + +static +int phr_parse_request(const char *buf_start, size_t len, const char **method, size_t *method_len, const char **path, + size_t *path_len, int *minor_version, struct phr_header *headers, size_t *num_headers, size_t last_len) +{ + const char *buf = buf_start, *buf_end = buf_start + len; + size_t max_headers = *num_headers; + int r; + + *method = NULL; + *method_len = 0; + *path = NULL; + *path_len = 0; + *minor_version = -1; + *num_headers = 0; + + /* if last_len != 0, check if the request is complete (a fast countermeasure + againt slowloris */ + if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) { + return r; + } + + if ((buf = parse_request(buf, buf_end, method, method_len, path, path_len, minor_version, headers, num_headers, max_headers, + &r)) == NULL) { + return r; + } + + return (int)(buf - buf_start); +} + +static const char *parse_response(const char *buf, const char *buf_end, int *minor_version, int *status, const char **msg, + size_t *msg_len, struct phr_header *headers, size_t *num_headers, size_t max_headers, int *ret) +{ + /* parse "HTTP/1.x" */ + if ((buf = parse_http_version(buf, buf_end, minor_version, ret)) == NULL) { + return NULL; + } + /* skip space */ + if (*buf != ' ') { + *ret = -1; + return NULL; + } + do { + ++buf; + CHECK_EOF(); + } while (*buf == ' '); + /* parse status code, we want at least [:digit:][:digit:][:digit:] to try to parse */ + if (buf_end - buf < 4) { + *ret = -2; + return NULL; + } + PARSE_INT_3(status); + + /* get message including preceding space */ + if ((buf = get_token_to_eol(buf, buf_end, msg, msg_len, ret)) == NULL) { + return NULL; + } + if (*msg_len == 0) { + /* ok */ + } else if (**msg == ' ') { + /* Remove preceding space. Successful return from `get_token_to_eol` guarantees that we would hit something other than SP + * before running past the end of the given buffer. */ + do { + ++*msg; + --*msg_len; + } while (**msg == ' '); + } else { + /* garbage found after status code */ + *ret = -1; + return NULL; + } + + return parse_headers(buf, buf_end, headers, num_headers, max_headers, ret); +} + +static inline +int phr_parse_response(const char *buf_start, size_t len, int *minor_version, int *status, const char **msg, size_t *msg_len, + struct phr_header *headers, size_t *num_headers, size_t last_len) +{ + const char *buf = buf_start, *buf_end = buf + len; + size_t max_headers = *num_headers; + int r; + + *minor_version = -1; + *status = 0; + *msg = NULL; + *msg_len = 0; + *num_headers = 0; + + /* if last_len != 0, check if the response is complete (a fast countermeasure + against slowloris */ + if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) { + return r; + } + + if ((buf = parse_response(buf, buf_end, minor_version, status, msg, msg_len, headers, num_headers, max_headers, &r)) == NULL) { + return r; + } + + return (int)(buf - buf_start); +} + +static inline +int phr_parse_headers(const char *buf_start, size_t len, struct phr_header *headers, size_t *num_headers, size_t last_len) +{ + const char *buf = buf_start, *buf_end = buf + len; + size_t max_headers = *num_headers; + int r; + + *num_headers = 0; + + /* if last_len != 0, check if the response is complete (a fast countermeasure + against slowloris */ + if (last_len != 0 && is_complete(buf, buf_end, last_len, &r) == NULL) { + return r; + } + + if ((buf = parse_headers(buf, buf_end, headers, num_headers, max_headers, &r)) == NULL) { + return r; + } + + return (int)(buf - buf_start); +} + +enum { + CHUNKED_IN_CHUNK_SIZE, + CHUNKED_IN_CHUNK_EXT, + CHUNKED_IN_CHUNK_DATA, + CHUNKED_IN_CHUNK_CRLF, + CHUNKED_IN_TRAILERS_LINE_HEAD, + CHUNKED_IN_TRAILERS_LINE_MIDDLE +}; + +static int decode_hex(int ch) +{ + if ('0' <= ch && ch <= '9') { + return ch - '0'; + } else if ('A' <= ch && ch <= 'F') { + return ch - 'A' + 0xa; + } else if ('a' <= ch && ch <= 'f') { + return ch - 'a' + 0xa; + } else { + return -1; + } +} + +static inline +ssize_t phr_decode_chunked(struct phr_chunked_decoder *decoder, char *buf, size_t *_bufsz) +{ + size_t dst = 0, src = 0, bufsz = *_bufsz; + ssize_t ret = -2; /* incomplete */ + + while (1) { + switch (decoder->_state) { + case CHUNKED_IN_CHUNK_SIZE: + for (;; ++src) { + int v; + if (src == bufsz) + goto Exit; + if ((v = decode_hex(buf[src])) == -1) { + if (decoder->_hex_count == 0) { + ret = -1; + goto Exit; + } + break; + } + if (decoder->_hex_count == sizeof(size_t) * 2) { + ret = -1; + goto Exit; + } + decoder->bytes_left_in_chunk = decoder->bytes_left_in_chunk * 16 + v; + ++decoder->_hex_count; + } + decoder->_hex_count = 0; + decoder->_state = CHUNKED_IN_CHUNK_EXT; + /* fallthru */ + case CHUNKED_IN_CHUNK_EXT: + /* RFC 7230 A.2 "Line folding in chunk extensions is disallowed" */ + for (;; ++src) { + if (src == bufsz) + goto Exit; + if (buf[src] == '\012') + break; + } + ++src; + if (decoder->bytes_left_in_chunk == 0) { + if (decoder->consume_trailer) { + decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD; + break; + } else { + goto Complete; + } + } + decoder->_state = CHUNKED_IN_CHUNK_DATA; + /* fallthru */ + case CHUNKED_IN_CHUNK_DATA: { + size_t avail = bufsz - src; + if (avail < decoder->bytes_left_in_chunk) { + if (dst != src) + memmove(buf + dst, buf + src, avail); + src += avail; + dst += avail; + decoder->bytes_left_in_chunk -= avail; + goto Exit; + } + if (dst != src) + memmove(buf + dst, buf + src, decoder->bytes_left_in_chunk); + src += decoder->bytes_left_in_chunk; + dst += decoder->bytes_left_in_chunk; + decoder->bytes_left_in_chunk = 0; + decoder->_state = CHUNKED_IN_CHUNK_CRLF; + } + /* fallthru */ + case CHUNKED_IN_CHUNK_CRLF: + for (;; ++src) { + if (src == bufsz) + goto Exit; + if (buf[src] != '\015') + break; + } + if (buf[src] != '\012') { + ret = -1; + goto Exit; + } + ++src; + decoder->_state = CHUNKED_IN_CHUNK_SIZE; + break; + case CHUNKED_IN_TRAILERS_LINE_HEAD: + for (;; ++src) { + if (src == bufsz) + goto Exit; + if (buf[src] != '\015') + break; + } + if (buf[src++] == '\012') + goto Complete; + decoder->_state = CHUNKED_IN_TRAILERS_LINE_MIDDLE; + /* fallthru */ + case CHUNKED_IN_TRAILERS_LINE_MIDDLE: + for (;; ++src) { + if (src == bufsz) + goto Exit; + if (buf[src] == '\012') + break; + } + ++src; + decoder->_state = CHUNKED_IN_TRAILERS_LINE_HEAD; + break; + default: + assert(!"decoder is corrupt"); + } + } + +Complete: + ret = bufsz - src; +Exit: + if (dst != src) + memmove(buf + dst, buf + src, bufsz - src); + *_bufsz = dst; + return ret; +} + +static inline +int phr_decode_chunked_is_in_data(struct phr_chunked_decoder *decoder) +{ + return decoder->_state == CHUNKED_IN_CHUNK_DATA; +} + +#undef CHECK_EOF +#undef EXPECT_CHAR +#undef ADVANCE_TOKEN diff --git a/script/mwrap-rproxy b/script/mwrap-rproxy new file mode 100644 index 0000000..be6dcbe --- /dev/null +++ b/script/mwrap-rproxy @@ -0,0 +1,29 @@ +#!perl -w +# Copyright (C) mwrap hackers +# License: GPL-2.0+ +# thin wrapper for Devel::Mwrap::Rproxy +use v5.12; # strict +use Devel::Mwrap::Rproxy; +use Plack::Runner; +use Getopt::Long qw(:config no_ignore_case no_auto_abbrev pass_through); +my $usage = "$0 --socket-dir=/path/to/socket-dir [PLACKUP_OPTIONS]\n"; +my $socket_dir; +GetOptions('socket-dir=s' => \$socket_dir) or die $usage; +$socket_dir //= ($ENV{MWRAP} // '') =~ m!\bsocket_dir:([^,]+)! ? $1 : undef; +$socket_dir // die $usage; +my $app = Devel::Mwrap::Rproxy->new($socket_dir); +my $runner = Plack::Runner->new; +$runner->parse_options(@ARGV); +if (($ENV{LISTEN_PID} // 0) == $$) { + my $fds = $ENV{LISTEN_FDS} // ''; + die "only one LISTEN_FDS=1 supported (got `$fds')\n" if $fds ne '1'; + if (open(my $s, '<&=', 3)) { + my $prev_was_blocking = $s->blocking(1); + warn <<"" unless $prev_was_blocking; +Inherited socket (fd=3) is non-blocking, making it blocking. + + bless $s, 'IO::Socket::INET'; + $runner->set_options(listen_sock => $s); + } +} +$runner->run(sub { $app->call(@_) }); diff --git a/t/httpd-unit.t b/t/httpd-unit.t new file mode 100644 index 0000000..049d5fc --- /dev/null +++ b/t/httpd-unit.t @@ -0,0 +1,98 @@ +#!perl -w +# Copyright (C) mwrap hackers +# License: GPL-2.0+ +use v5.12; +use autodie; +use Test::More; +use ExtUtils::CBuilder; +use File::Spec; +use File::Temp; +use File::Path; +my ($n) = (__FILE__ =~ m!/([^/]+)\.t\z!); +open my $fh, '<', 'build.env'; +my %build_env = map { chomp; ( split(/=/, $_, 2) ) } (<$fh>); +my $tmp = File::Temp->newdir("$n-XXXX"); +my $err = "$tmp/err.log"; +open my $olderr, '+>&', *STDERR{IO}; +my $end_err = sub { + STDERR->autoflush(1); + open STDERR, '+>&', $olderr; + open my $eh, '+<', $err; + local $/; + my $buf = <$eh> // BAIL_OUT "$!"; + truncate($eh, 0); + diag "err=$buf" if $ENV{V}; + $buf; +}; + +my @vg = split(/ /, $ENV{VALGRIND} // ''); + +# using predictable pathnames but outside of working directory. +# This gives ccache-friendliness while staying clear of MakeMaker +# aggressively trying to include every *.c file +my $d = File::Spec->tmpdir . "/$>.mwrap-test"; +if (!-d $d) { + diag "# mkdir $d"; + mkdir($d, 0700); +} +my $f = "$d/$n.c"; +open $fh, '>', $f; +print $fh < +#include +#define getpid() my_getpid() +static pid_t my_getpid(void) +{ + return TEST_PID; +} +#include "mwrap_core.h" + +int main(int argc, const char *argv[]) +{ + struct mw_h1d h1d; + return h1d_init(&h1d, argv[1]); +} +C +close $fh; +my $TEST_PID = 10; +my $cb = ExtUtils::CBuilder->new(quiet => $ENV{V} ? 0 : 1); +my ($obj, $exe); +{ + my %be = %build_env; + $be{extra_compiler_flags} .= " -DTEST_PID=$TEST_PID -Wall "; + $obj = $cb->compile(source => $f, %be); + $exe = $cb->link_executable(exe_file => "$d/$n", objects => $obj, %be); +} +open STDERR, '>', $err; +is(system(@vg, $exe, "socket_dir:$d"), 0, "$exe"); +is($end_err->(), '', 'silence is golden'); + +my $s = "$d/$TEST_PID.sock"; +ok(-S $s, 'sock created'); +unlink($s); + +mkdir($s); +open STDERR, '>', $err; +isnt(system(@vg, $exe, "socket_dir:$d"), 0, "won't clobber dir"); +like($end_err->(), qr/unlink/, 'unlink fails for dir'); +rmdir($s); + +open STDERR, '>', $err; +is(system(@vg, $exe, "socket_dir:$d/"), 0, "listen again"); +is($end_err->(), '', 'silence is golden'); + +{ + my $t_mkdir = "$d/mkdir"; + File::Path::rmtree($t_mkdir) if -d $t_mkdir; + open STDERR, '>', $err; + is(system(@vg, $exe, "socket_dir:$t_mkdir"), 0, "listen in new dir"); + is($end_err->(), '', 'listened quietly on extra dir'); + File::Path::rmtree($t_mkdir); +} + +ok(-S $s, 'socket untouched'); +open STDERR, '>', $err; +isnt(system(@vg, $exe, "socket_dir:$s"), 0, "listen dir on socket fails"); +like($end_err->(), qr/stat.*directory/, 'stat failure shown'); + +done_testing; diff --git a/t/mwrap-httpd.t b/t/mwrap-httpd.t new file mode 100644 index 0000000..a1bf333 --- /dev/null +++ b/t/mwrap-httpd.t @@ -0,0 +1,129 @@ +#!perl -w +# Copyright (C) mwrap hackers +# License: GPL-2.0+ +use v5.12; +use IO::Socket::UNIX; +use Fcntl qw(F_GETFD F_SETFD FD_CLOEXEC); +use POSIX qw(dup2 _exit mkfifo); +BEGIN { require './t/test_common.perl' }; +my $env = { MWRAP => "socket_dir:$mwrap_tmp" }; +my $fifo = "$mwrap_tmp/fifo"; +mkfifo($fifo, 0600) // plan(skip_all => "mkfifo: $!"); +my $pid = mwrap_run('httpd test', $env, '-e', "open my \$fh, '<', '$fifo'"); +my $spid; +my $mw_exit; +my $cleanup = sub { + if (defined $spid) { + if (kill('TERM', $spid)) { + waitpid($spid, 0); + $? == 15 or warn "rproxy died with \$?=$?"; + } else { + warn "kill $spid: $!"; + } + undef $spid; + } + use autodie; + if (defined $pid) { + my $exit = $?; + open my $fh, '>', $fifo; + close $fh; + waitpid($pid, 0); + $mw_exit = $?; + undef $pid; + diag "err: ".slurp($mwrap_err); + $? = $exit; + } +}; +END { $cleanup->() } + +my $sock = "$mwrap_tmp/$pid.sock"; +my $n = 10000; +my $c; +my %o = (Peer => $sock , Type => SOCK_STREAM); +while (!$c && --$n > 0) { + $c = IO::Socket::UNIX->new(%o) or + select undef, undef, undef, 0.011; +} +ok(-S $sock, 'socket created'); +ok($c, 'socket connected'); +is(syswrite($c, 'GET'), 3, 'trickled 3 bytes'); + +my $cout = "$mwrap_tmp/cout"; +my $rc = system(qw(curl -vsSf --unix-socket), $sock, '-o', $cout, + "http://0/$pid/each/2000"); +SKIP: { + skip 'curl lacks --unix-socket support', 1 if $rc == 512; + is($rc, 0, 'curl /each'); + unlink($cout); + + $rc = system(qw(curl -vsSf --unix-socket), $sock, '-o', $cout, + "http://0/$pid/each/2000"); + is($rc, 0, 'curl /each'); + unlink($cout); + + skip 'no reset on if interactive', 1 if $ENV{INTERACTIVE}; + $rc = system(qw(curl -vsSf --unix-socket), $sock, '-o', $cout, + '-d', 'x=y', "http://0/$pid/reset"); + is($rc, 0, 'curl /reset'); +}; + +local $SIG{PIPE} = 'IGNORE'; +{ + my $req = " /$pid/each/20000 HTTP/1.0\r\n\r\n"; + is(syswrite($c, $req), length($req), 'wrote rest of response') or + diag "syswrite: $!"; + my $x = do { local $/; <$c> } or diag "read: $!"; + like($x, qr!\n?\z!s, 'got complete HTML response'); +} + +SKIP: { + eval { require HTTP::Tiny } or skip 'HTTP::Tiny missing', 1; + my $srv = IO::Socket::INET->new(LocalAddr => '127.0.0.1', + ReuseAddr => 1, Proto => 'tcp', + Type => SOCK_STREAM, + Listen => 1024); + $spid = fork; + if ($spid == 0) { + local $ENV{LISTEN_PID} = $$; + local $ENV{LISTEN_FDS} = 1; + my $fl = fcntl($srv, F_GETFD, 0); + fcntl($srv, F_SETFD, $fl &= ~FD_CLOEXEC); + if (fileno($srv) != 3) { + dup2(fileno($srv), 3) or die "dup2: $!"; + } + no warnings 'exec'; + exec $^X, '-w', './blib/script/mwrap-rproxy', + "--socket-dir=$mwrap_tmp"; + _exit(1); + } + my $http = HTTP::Tiny->new; + my ($h, $p) = ($srv->sockhost, $srv->sockport); + undef $srv; + my $res = $http->get("http://$h:$p/"); + ok($res->{success}, 'listing success'); + like($res->{content}, qr!/$pid/each/\d+!, 'got listing for each'); + $res = $http->get("http://$h:$p/$pid/each/1"); + ok($res->{success}, 'each/1 success'); + my $t = '/at/$LOCATION link in /each/$NUM'; + if ($res->{content} =~ m!href="\.\./at/([^"]+)"!) { + my $loc = $1; + ok($t); + $res = $http->get("http://$h:$p/$pid/at/$1"); + ok($res->{success}, '/at/$LOCATION endpoint'); + like($res->{content}, qr!\blive allocations at !, + 'live allocations shown'); + } else { + fail($t); + } + if ($ENV{INTERACTIVE}) { + diag "http://$h:$p/$pid/each/1 up for interactive testing"; + diag "- press Enter when done -"; + my $ok = ; + } +} + +diag slurp($cout) if $ENV{V}; +$cleanup->(); +ok(!-e $sock, 'socket unlinked after cleanup'); +is($mw_exit, 0, 'perl exited with $?==0'); +done_testing;