dumping ground for random patches and texts
 help / color / mirror / Atom feed
* [PATCH 1/4] support malloc tracing
@ 2024-04-05 21:05 Eric Wong
  2024-04-05 21:05 ` [PATCH 2/4] realloc Eric Wong
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Eric Wong @ 2024-04-05 21:05 UTC (permalink / raw)
  To: spew

This doesn't work reliably in multi-threaded code, but I have
fragmentation problems in single-threaded code.
---
 MANIFEST                                     |   8 +-
 Makefile.PL                                  |   2 +-
 httpd.h                                      |  75 ++-
 lib/Devel/Mwrap/TraceReplay.pm               |  80 ++++
 dlmalloc_c.h => lib/Devel/Mwrap/dlmalloc_c.h |   2 +
 lib/Devel/Mwrap/khashl.h                     | 454 +++++++++++++++++++
 lib/Devel/Mwrap/trace-replay.h               | 232 ++++++++++
 lib/Devel/Mwrap/trace_struct.h               |  34 ++
 mwrap_core.h                                 | 118 +++--
 mymalloc.h                                   |  10 +-
 script/mwrap-trace-replay                    |   7 +
 t/httpd.t                                    |  32 +-
 trace.h                                      |  66 +++
 13 files changed, 1085 insertions(+), 35 deletions(-)
 create mode 100644 lib/Devel/Mwrap/TraceReplay.pm
 rename dlmalloc_c.h => lib/Devel/Mwrap/dlmalloc_c.h (99%)
 create mode 100644 lib/Devel/Mwrap/khashl.h
 create mode 100644 lib/Devel/Mwrap/trace-replay.h
 create mode 100644 lib/Devel/Mwrap/trace_struct.h
 create mode 100644 script/mwrap-trace-replay
 create mode 100644 trace.h

diff --git a/MANIFEST b/MANIFEST
index cf42979..5af61f4 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -6,7 +6,6 @@ Makefile.PL
 Mwrap.xs
 README
 check.h
-dlmalloc_c.h
 examples/mwrap.psgi
 exe.sh
 gcc.h
@@ -15,6 +14,11 @@ jhash.h
 lib/Devel/Mwrap.pm
 lib/Devel/Mwrap/PSGI.pm
 lib/Devel/Mwrap/Rproxy.pm
+lib/Devel/Mwrap/TraceReplay.pm
+lib/Devel/Mwrap/dlmalloc_c.h
+lib/Devel/Mwrap/khashl.h
+lib/Devel/Mwrap/trace-replay.h
+lib/Devel/Mwrap/trace_struct.h
 mwrap_core.h
 mymalloc.h
 picohttpparser.h
@@ -23,9 +27,11 @@ ppport.h
 script/mwrap-decode-csv
 script/mwrap-perl
 script/mwrap-rproxy
+script/mwrap-trace-replay
 t/httpd-unit.t
 t/httpd.t
 t/mwrap.t
 t/source_location.perl
 t/test_common.perl
+trace.h
 typemap
diff --git a/Makefile.PL b/Makefile.PL
index 41e8f03..c1c21de 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -84,7 +84,7 @@ push @writemakefile_args, (
 	PREREQ_PM => {},
 	ABSTRACT_FROM => 'lib/Devel/Mwrap.pm',
 	EXE_FILES => [qw(script/mwrap-perl script/mwrap-rproxy
-		script/mwrap-decode-csv)],
+		script/mwrap-decode-csv script/mwrap-trace-replay)],
 	AUTHOR => 'mwrap hackers <mwrap-perl@80x24.org>',
 	LIBS => $LIBS, # e.g. -lurcu-cds
 	LICENSE => 'gpl_2', # GPL-3.0+, CPAN::Meta::Spec limitation
diff --git a/httpd.h b/httpd.h
index 8a105aa..3594eb4 100644
--- a/httpd.h
+++ b/httpd.h
@@ -332,6 +332,72 @@ static enum mw_qev h1_do_reset(struct mw_h1 *h1)
 	return h1_res_oneshot(h1, r200, sizeof(r200) - 1);
 }
 
+static enum mw_qev h1_trace_too_long(struct mw_h1 *h1)
+{
+	static const char r500[] = "HTTP/1.1 500 Error\r\n"
+		"Content-Type: text/plain\r\n"
+		"Connection: close\r\n"
+		"Content-Length: 9\r\n\r\n" "too long\n";
+	return h1_res_oneshot(h1, r500, sizeof(r500) - 1);
+}
+
+static enum mw_qev h1_trace_on(struct mw_h1 *h1)
+{
+	int e = trace_on(getenv("MWRAP"));
+	if (e == ENAMETOOLONG) {
+		return h1_trace_too_long(h1);
+	} else if (e == EBUSY) {
+		static const char r500[] = "HTTP/1.1 500 Error\r\n"
+			"Content-Type: text/plain\r\n"
+			"Connection: close\r\n"
+			"Content-Length: 12\r\n\r\n" "double open\n";
+		return h1_res_oneshot(h1, r500, sizeof(r500) - 1);
+	} else if (e) {
+		static const char r500[] = "HTTP/1.1 500 Error\r\n"
+			"Content-Type: text/plain\r\n"
+			"Connection: close\r\n"
+			"Content-Length: 11\r\n\r\n" "open error\n";
+		return h1_res_oneshot(h1, r500, sizeof(r500) - 1);
+	}
+	static const char r200[] = "HTTP/1.1 200 OK\r\n"
+		"Content-Type: text/plain\r\n"
+		"Connection: close\r\n"
+		"Content-Length: 8\r\n\r\n" "tracing\n";
+	return h1_res_oneshot(h1, r200, sizeof(r200) - 1);
+}
+
+static enum mw_qev h1_trace_off(struct mw_h1 *h1)
+{
+	int fd = uatomic_xchg(&mwrap_trace_fd, -1);
+	if (fd >= 0) {
+		synchronize_rcu();
+		CHECK(int, 0, pthread_mutex_lock(&global_mtx));
+		mstate ms;
+		cds_list_for_each_entry(ms, &arenas_active, arena_node)
+			trace_flush_fd(fd, ms);
+		cds_list_for_each_entry(ms, &arenas_unused, arena_node)
+			trace_flush_fd(fd, ms);
+		CHECK(int, 0, pthread_mutex_unlock(&global_mtx));
+		close(fd);
+
+		static const char r200[] = "HTTP/1.1 200 OK\r\n"
+			"Content-Type: text/plain\r\n"
+			"Connection: close\r\n"
+			"Content-Length: 10\r\n\r\n" "trace off\n";
+		return h1_res_oneshot(h1, r200, sizeof(r200) - 1);
+	}
+	static const char r500[] = "HTTP/1.1 500 Error\r\n"
+		"Content-Type: text/plain\r\n"
+		"Connection: close\r\n"
+		"Content-Length: 9\r\n\r\n" "not open\n";
+	return h1_res_oneshot(h1, r500, sizeof(r500) - 1);
+}
+
+static enum mw_qev h1_toggle_trace(struct mw_h1 *h1)
+{
+	return mwrap_trace_fd < 0 ? h1_trace_on(h1) : h1_trace_off(h1);
+}
+
 static enum mw_qev h1_do_trim(struct mw_h1 *h1)
 {
 	static const char r200[] = "HTTP/1.1 200 OK\r\n"
@@ -806,8 +872,12 @@ static enum mw_qev h1_dispatch(struct mw_h1 *h1, struct mw_h1req *h1r)
 			return pid_root(h1, h1r);
 		}
 	} else if (h1r->method_len == 4 && !memcmp(h1r->method, "POST", 4)) {
-		if (h1r->path_len == 6 && !memcmp(h1r->path, "/reset", 6))
-			return h1_do_reset(h1);
+		if (h1r->path_len == 6) {
+			if (!memcmp(h1r->path, "/reset", 6))
+				return h1_do_reset(h1);
+			if (!memcmp(h1r->path, "/trace", 6))
+				return h1_toggle_trace(h1);
+		}
 		if (h1r->path_len == 5 && !memcmp(h1r->path, "/trim", 5))
 			return h1_do_trim(h1);
 		if (h1r->path_len == 4 && !memcmp(h1r->path, "/ctl", 4))
@@ -1376,3 +1446,4 @@ static void h1d_atfork_parent(void)
 	if (g_h1d.lfd < 0)
 		h1d_start();
 }
+
diff --git a/lib/Devel/Mwrap/TraceReplay.pm b/lib/Devel/Mwrap/TraceReplay.pm
new file mode 100644
index 0000000..bb2551b
--- /dev/null
+++ b/lib/Devel/Mwrap/TraceReplay.pm
@@ -0,0 +1,80 @@
+# Copyright (C) mwrap hackers <mwrap-perl@80x24.org>
+# License: GPL-3.0+ <https://www.gnu.org/licenses/gpl-3.0.txt>
+#
+# Just-ahead-of-time builder for lib/Devel/Mwrap/trace-replay.h
+# I never want users to be without source code for repairs, so this
+# aims to replicate the feel of a scripting language using C.
+# The resulting executable is not linked to Perl in any way.
+package Devel::Mwrap::TraceReplay;
+use v5.12;
+use autodie;
+use Config;
+use Fcntl qw(LOCK_EX);
+my $dir = ($ENV{XDG_CACHE_HOME} //
+  (($ENV{HOME} // die('HOME unset')).'/.cache')).'/mwrap/trace-replay';
+my $bin = "$dir/trace-replay-$Config{archname}";
+my ($srcpfx) = (__FILE__ =~ m!\A(.+/)[^/]+\z!);
+my @srcs = map { $srcpfx.$_ } qw(trace-replay.h
+		dlmalloc_c.h khashl.h trace_struct.h);
+my $ldflags = '-Wl,-O1';
+$ldflags .= ' -Wl,--compress-debug-sections=zlib' if $^O ne 'openbsd';
+
+my $xflags = ($ENV{CFLAGS} // '-Wall -ggdb3 -pipe') . ' ' .
+	($ENV{LDFLAGS} // $ldflags);
+substr($xflags, 0, 0, '-O2 ') if !defined($ENV{CFLAGS}) && !-w __FILE__;
+my $cc = $ENV{CC} // $Config{cc} // 'c99';
+
+sub build () {
+	if (!-d $dir) {
+		require File::Path;
+		File::Path::make_path($dir);
+	}
+	my ($prog) = ($bin =~ m!/([^/]+)\z!);
+	my $fn = "$dir/$prog.c";
+	open my $fh, '>', $fn;
+	until (flock($fh, LOCK_EX)) { die "LOCK_EX: $fn: $!" if !$!{EINTR} }
+	say $fh qq{#include "trace-replay.h"};
+	$fh->flush or die "flush: $!";
+	my $pkg_config = $ENV{PKG_CONFIG} // 'pkg-config';
+	chomp(my $fl = `$pkg_config  liburcu-cds --libs --cflags`);
+	$^O eq 'netbsd' and $fl =~ s/(\A|[ \t])\-L([^ \t]+)([ \t]|\z)/
+				"$1-L$2 -Wl,-rpath=$2$3"/egsx;
+	my @xflags = split(' ', "$fl $xflags"); # ' ' awk-mode eats leading WS
+	my @cflags = ('-I', $srcpfx, grep(!/\A-(?:Wl|l|L)/, @xflags));
+	my @cmd = ($cc, '-o', "$dir/$prog.o", '-c', $fn, @cflags);
+	system(@cmd) and die "E: @cmd: \$?=$?";
+	@cmd = ($cc, '-o', "$dir/$prog.tmp", "$dir/$prog.o", @xflags);
+	system(@cmd) and die "E: @cmd: \$?=$?";
+	unlink $fn, "$dir/$prog.o";
+	open my $xfh, '>', "$dir/XFLAGS.tmp";
+	say $xfh $xflags;
+	close $xfh;
+	rename("$dir/$_.tmp", "$dir/$_") for ($prog, qw(XFLAGS));
+}
+
+sub needs_rebuild () {
+	open my $fh, '<', "$dir/XFLAGS" or return 1;
+	chomp(my $prev = <$fh>);
+	$prev ne $xflags;
+}
+
+sub check_build () {
+	use Time::HiRes qw(stat);
+	my $ctime = 0;
+	my @bin = stat($bin) or return build();
+	for (@srcs) {
+		my @st = stat($_) or die "stat $_: $!";
+		if ($st[10] > $ctime) {
+			$ctime = $st[10];
+			return build() if $ctime > $bin[10];
+		}
+	}
+	needs_rebuild() ? build() : 0;
+}
+
+sub run (@) {
+	check_build();
+	exec $bin, @_;
+}
+
+1;
diff --git a/dlmalloc_c.h b/lib/Devel/Mwrap/dlmalloc_c.h
similarity index 99%
rename from dlmalloc_c.h
rename to lib/Devel/Mwrap/dlmalloc_c.h
index 5aa9e94..cd2f7f5 100644
--- a/dlmalloc_c.h
+++ b/lib/Devel/Mwrap/dlmalloc_c.h
@@ -2607,6 +2607,8 @@ struct malloc_state {
   MLOCK_T    mutex;     /* locate lock among fields that rarely change */
 #endif /* USE_LOCKS */
   msegment   seg;
+  size_t trace_wfill;
+  char trace_wbuf[BUFSIZ];
   struct cds_list_head arena_node;	/* cold */
   struct cds_wfcq_tail remote_free_tail;
 };
diff --git a/lib/Devel/Mwrap/khashl.h b/lib/Devel/Mwrap/khashl.h
new file mode 100644
index 0000000..474f675
--- /dev/null
+++ b/lib/Devel/Mwrap/khashl.h
@@ -0,0 +1,454 @@
+/* The MIT License
+
+   Copyright (c) 2019-2023 by Attractive Chaos <attractor@live.co.uk>
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+#ifndef __AC_KHASHL_H
+#define __AC_KHASHL_H
+
+#define AC_VERSION_KHASHL_H "0.2"
+
+typedef uint32_t khint32_t;
+typedef uint64_t khint64_t;
+
+typedef khint32_t khint_t;
+typedef khint_t khiter_t;
+
+#define kh_inline inline /* portably handled elsewhere */
+#define KH_LOCAL static kh_inline
+
+/****************************
+ * Simple private functions *
+ ****************************/
+
+#define __kh_used(flag, i)       (flag[i>>5] >> (i&0x1fU) & 1U)
+#define __kh_set_used(flag, i)   (flag[i>>5] |= 1U<<(i&0x1fU))
+#define __kh_set_unused(flag, i) (flag[i>>5] &= ~(1U<<(i&0x1fU)))
+
+#define __kh_fsize(m) ((m) < 32? 1 : (m)>>5)
+
+static kh_inline khint_t __kh_h2b(khint_t hash, khint_t bits) { return hash * 2654435769U >> (32 - bits); }
+
+/*******************
+ * Hash table base *
+ *******************/
+
+#define __KHASHL_TYPE(HType, khkey_t) \
+	typedef struct HType { \
+		khint_t bits, count; \
+		khint32_t *used; \
+		khkey_t *keys; \
+	} HType;
+
+#define __KHASHL_PROTOTYPES(HType, prefix, khkey_t) \
+	extern HType *prefix##_init(void); \
+	extern void prefix##_destroy(HType *h); \
+	extern void prefix##_clear(HType *h); \
+	extern khint_t prefix##_getp(const HType *h, const khkey_t *key); \
+	extern void prefix##_resize(HType *h, khint_t new_n_buckets); \
+	extern khint_t prefix##_putp(HType *h, const khkey_t *key, int *absent); \
+	extern void prefix##_del(HType *h, khint_t k);
+
+#define __KHASHL_IMPL_BASIC(SCOPE, HType, prefix) \
+	SCOPE HType *prefix##_init(void) { \
+		return (HType*)kcalloc(1, sizeof(HType)); \
+	} \
+	SCOPE void prefix##_release(HType *h) { \
+		kfree((void *)h->keys); kfree(h->used); \
+	} \
+	SCOPE void prefix##_destroy(HType *h) { \
+		if (!h) return; \
+		prefix##_release(h); \
+		kfree(h); \
+	} \
+	SCOPE void prefix##_clear(HType *h) { \
+		if (h && h->used) { \
+			khint_t n_buckets = (khint_t)1U << h->bits; \
+			memset(h->used, 0, __kh_fsize(n_buckets) * sizeof(khint32_t)); \
+			h->count = 0; \
+		} \
+	}
+
+#define __KHASHL_IMPL_GET(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
+	SCOPE khint_t prefix##_getp_core(const HType *h, const khkey_t *key, khint_t hash) { \
+		khint_t i, last, n_buckets, mask; \
+		if (h->keys == 0) return 0; \
+		n_buckets = (khint_t)1U << h->bits; \
+		mask = n_buckets - 1U; \
+		i = last = __kh_h2b(hash, h->bits); \
+		while (__kh_used(h->used, i) && !__hash_eq(h->keys[i], *key)) { \
+			i = (i + 1U) & mask; \
+			if (i == last) return n_buckets; \
+		} \
+		return !__kh_used(h->used, i)? n_buckets : i; \
+	} \
+	SCOPE khint_t prefix##_getp(const HType *h, const khkey_t *key) { return prefix##_getp_core(h, key, __hash_fn(*key)); } \
+	SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { return prefix##_getp_core(h, &key, __hash_fn(key)); }
+
+#define __KHASHL_IMPL_RESIZE(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
+	SCOPE void prefix##_resize(HType *h, khint_t new_n_buckets) { \
+		khint32_t *new_used = 0; \
+		khint_t j = 0, x = new_n_buckets, n_buckets, new_bits, new_mask; \
+		while ((x >>= 1) != 0) ++j; \
+		if (new_n_buckets & (new_n_buckets - 1)) ++j; \
+		new_bits = j > 2? j : 2; \
+		new_n_buckets = (khint_t)1U << new_bits; \
+		if (h->count > (new_n_buckets>>1) + (new_n_buckets>>2)) return; /* noop, requested size is too small */ \
+		new_used = (khint32_t*)kcalloc(__kh_fsize(new_n_buckets), sizeof(khint32_t)); \
+		n_buckets = h->keys? (khint_t)1U<<h->bits : 0U; \
+		if (n_buckets < new_n_buckets) { /* expand */ \
+			REALLOC_ARRAY(h->keys, new_n_buckets); \
+		} /* otherwise shrink */ \
+		new_mask = new_n_buckets - 1; \
+		for (j = 0; j != n_buckets; ++j) { \
+			khkey_t key; \
+			if (!__kh_used(h->used, j)) continue; \
+			key = h->keys[j]; \
+			__kh_set_unused(h->used, j); \
+			while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \
+				khint_t i; \
+				i = __kh_h2b(__hash_fn(key), new_bits); \
+				while (__kh_used(new_used, i)) i = (i + 1) & new_mask; \
+				__kh_set_used(new_used, i); \
+				if (i < n_buckets && __kh_used(h->used, i)) { /* kick out the existing element */ \
+					{ khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
+					__kh_set_unused(h->used, i); /* mark it as deleted in the old hash table */ \
+				} else { /* write the element and jump out of the loop */ \
+					h->keys[i] = key; \
+					break; \
+				} \
+			} \
+		} \
+		if (n_buckets > new_n_buckets) /* shrink the hash table */ \
+			REALLOC_ARRAY(h->keys, new_n_buckets); \
+		kfree(h->used); /* free the working space */ \
+		h->used = new_used, h->bits = new_bits; \
+	}
+
+#define __KHASHL_IMPL_PUT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
+	SCOPE khint_t prefix##_putp_core(HType *h, const khkey_t *key, khint_t hash, int *absent) { \
+		khint_t n_buckets, i, last, mask; \
+		n_buckets = h->keys? (khint_t)1U<<h->bits : 0U; \
+		*absent = -1; \
+		if (h->count >= (n_buckets>>1) + (n_buckets>>2)) { /* rehashing */ \
+			prefix##_resize(h, n_buckets + 1U); \
+			n_buckets = (khint_t)1U<<h->bits; \
+		} /* TODO: to implement automatically shrinking; resize() already support shrinking */ \
+		mask = n_buckets - 1; \
+		i = last = __kh_h2b(hash, h->bits); \
+		while (__kh_used(h->used, i) && !__hash_eq(h->keys[i], *key)) { \
+			i = (i + 1U) & mask; \
+			if (i == last) break; \
+		} \
+		if (!__kh_used(h->used, i)) { /* not present at all */ \
+			h->keys[i] = *key; \
+			__kh_set_used(h->used, i); \
+			++h->count; \
+			*absent = 1; \
+		} else *absent = 0; /* Don't touch h->keys[i] if present */ \
+		return i; \
+	} \
+	SCOPE khint_t prefix##_putp(HType *h, const khkey_t *key, int *absent) { return prefix##_putp_core(h, key, __hash_fn(*key), absent); } \
+	SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { return prefix##_putp_core(h, &key, __hash_fn(key), absent); }
+
+#define __KHASHL_IMPL_DEL(SCOPE, HType, prefix, khkey_t, __hash_fn) \
+	SCOPE int prefix##_del(HType *h, khint_t i) { \
+		khint_t j = i, k, mask, n_buckets; \
+		if (h->keys == 0) return 0; \
+		n_buckets = (khint_t)1U<<h->bits; \
+		mask = n_buckets - 1U; \
+		while (1) { \
+			j = (j + 1U) & mask; \
+			if (j == i || !__kh_used(h->used, j)) break; /* j==i only when the table is completely full */ \
+			k = __kh_h2b(__hash_fn(h->keys[j]), h->bits); \
+			if ((j > i && (k <= i || k > j)) || (j < i && (k <= i && k > j))) \
+				h->keys[i] = h->keys[j], i = j; \
+		} \
+		__kh_set_unused(h->used, i); \
+		--h->count; \
+		return 1; \
+	}
+
+#define KHASHL_DECLARE(HType, prefix, khkey_t) \
+	__KHASHL_TYPE(HType, khkey_t) \
+	__KHASHL_PROTOTYPES(HType, prefix, khkey_t)
+
+/* compatibility wrappers to make khash -> khashl migration easier */
+#define __KHASH_COMPAT(SCOPE, HType, prefix, khkey_t) \
+	typedef HType HType##_t; \
+	SCOPE HType *kh_init_##prefix(void) { return prefix##_init(); } \
+	SCOPE void kh_release_##prefix(HType *h) { prefix##_release(h); } \
+	SCOPE void kh_destroy_##prefix(HType *h) { prefix##_destroy(h); } \
+	SCOPE void kh_clear_##prefix(HType *h) { prefix##_clear(h); } \
+	SCOPE khint_t kh_get_##prefix(const HType *h, khkey_t key) { \
+		return prefix##_get(h, key); \
+	} \
+	SCOPE void kh_resize_##prefix(HType *h, khint_t new_n_buckets) { \
+		prefix##_resize(h, new_n_buckets); \
+	} \
+	SCOPE khint_t kh_put_##prefix(HType *h, khkey_t key, int *absent) { \
+		return prefix##_put(h, key, absent); \
+	} \
+	SCOPE int kh_del_##prefix(HType *h, khint_t i) { \
+		return prefix##_del(h, i); \
+	}
+
+#define KHASHL_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
+	__KHASHL_TYPE(HType, khkey_t) \
+	__KHASHL_IMPL_BASIC(SCOPE, HType, prefix) \
+	__KHASHL_IMPL_GET(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
+	__KHASHL_IMPL_RESIZE(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
+	__KHASHL_IMPL_PUT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
+	__KHASHL_IMPL_DEL(SCOPE, HType, prefix, khkey_t, __hash_fn)
+
+/***************************
+ * Ensemble of hash tables *
+ ***************************/
+
+typedef struct {
+	khint_t sub, pos;
+} kh_ensitr_t;
+
+#define KHASHE_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
+	KHASHL_INIT(KH_LOCAL, HType##_sub, prefix##_sub, khkey_t, __hash_fn, __hash_eq) \
+	typedef struct HType { \
+		khint64_t count:54, bits:8; \
+		HType##_sub *sub; \
+	} HType; \
+	SCOPE HType *prefix##_init(int bits) { \
+		HType *g; \
+		g = (HType*)kcalloc(1, sizeof(*g)); \
+		g->bits = bits; \
+		g->sub = (HType##_sub*)kcalloc(1U<<bits, sizeof(*g->sub)); \
+		return g; \
+	} \
+	SCOPE void prefix##_destroy(HType *g) { \
+		int t; \
+		if (!g) return; \
+		for (t = 0; t < 1<<g->bits; ++t) { kfree((void*)g->sub[t].keys); kfree(g->sub[t].used); } \
+		kfree(g->sub); kfree(g); \
+	} \
+	SCOPE kh_ensitr_t prefix##_getp(const HType *g, const khkey_t *key) { \
+		khint_t hash, low, ret; \
+		kh_ensitr_t r; \
+		HType##_sub *h; \
+		hash = __hash_fn(*key); \
+		low = hash & ((1U<<g->bits) - 1); \
+		h = &g->sub[low]; \
+		ret = prefix##_sub_getp_core(h, key, hash); \
+		if (ret == 1U<<h->bits) r.sub = low, r.pos = (khint_t)-1; \
+		else r.sub = low, r.pos = ret; \
+		return r; \
+	} \
+	SCOPE kh_ensitr_t prefix##_get(const HType *g, const khkey_t key) { return prefix##_getp(g, &key); } \
+	SCOPE kh_ensitr_t prefix##_putp(HType *g, const khkey_t *key, int *absent) { \
+		khint_t hash, low, ret; \
+		kh_ensitr_t r; \
+		HType##_sub *h; \
+		hash = __hash_fn(*key); \
+		low = hash & ((1U<<g->bits) - 1); \
+		h = &g->sub[low]; \
+		ret = prefix##_sub_putp_core(h, key, hash, absent); \
+		if (*absent) ++g->count; \
+		if (ret == 1U<<h->bits) r.sub = low, r.pos = (khint_t)-1; \
+		else r.sub = low, r.pos = ret; \
+		return r; \
+	} \
+	SCOPE kh_ensitr_t prefix##_put(HType *g, const khkey_t key, int *absent) { return prefix##_putp(g, &key, absent); } \
+	SCOPE int prefix##_del(HType *g, kh_ensitr_t itr) { \
+		HType##_sub *h = &g->sub[itr.sub]; \
+		int ret; \
+		ret = prefix##_sub_del(h, itr.pos); \
+		if (ret) --g->count; \
+		return ret; \
+	}
+
+/*****************************
+ * More convenient interface *
+ *****************************/
+
+#define __kh_packed /* noop, we use -Werror=address-of-packed-member */
+#define __kh_cached_hash(x) ((x).hash)
+
+#define KHASHL_SET_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
+	typedef struct { khkey_t key; } __kh_packed HType##_s_bucket_t; \
+	static kh_inline khint_t prefix##_s_hash(HType##_s_bucket_t x) { return __hash_fn(x.key); } \
+	static kh_inline int prefix##_s_eq(HType##_s_bucket_t x, HType##_s_bucket_t y) { return __hash_eq(x.key, y.key); } \
+	KHASHL_INIT(KH_LOCAL, HType, prefix##_s, HType##_s_bucket_t, prefix##_s_hash, prefix##_s_eq) \
+	SCOPE HType *prefix##_init(void) { return prefix##_s_init(); } \
+	SCOPE void prefix##_release(HType *h) { prefix##_s_release(h); } \
+	SCOPE void prefix##_destroy(HType *h) { prefix##_s_destroy(h); } \
+	SCOPE void prefix##_clear(HType *h) { prefix##_s_clear(h); } \
+	SCOPE void prefix##_resize(HType *h, khint_t new_n_buckets) { prefix##_s_resize(h, new_n_buckets); } \
+	SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_s_bucket_t t; t.key = key; return prefix##_s_getp(h, &t); } \
+	SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_s_del(h, k); } \
+	SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_s_bucket_t t; t.key = key; return prefix##_s_putp(h, &t, absent); } \
+	__KHASH_COMPAT(SCOPE, HType, prefix, khkey_t)
+
+#define KHASHL_MAP_INIT(SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \
+	typedef struct { khkey_t key; kh_val_t val; } __kh_packed HType##_m_bucket_t; \
+	static kh_inline khint_t prefix##_m_hash(HType##_m_bucket_t x) { return __hash_fn(x.key); } \
+	static kh_inline int prefix##_m_eq(HType##_m_bucket_t x, HType##_m_bucket_t y) { return __hash_eq(x.key, y.key); } \
+	KHASHL_INIT(KH_LOCAL, HType, prefix##_m, HType##_m_bucket_t, prefix##_m_hash, prefix##_m_eq) \
+	SCOPE HType *prefix##_init(void) { return prefix##_m_init(); } \
+	SCOPE void prefix##_release(HType *h) { prefix##_m_release(h); } \
+	SCOPE void prefix##_destroy(HType *h) { prefix##_m_destroy(h); } \
+	SCOPE void prefix##_clear(HType *h) { prefix##_m_clear(h); } \
+	SCOPE void prefix##_resize(HType *h, khint_t new_n_buckets) { prefix##_m_resize(h, new_n_buckets); } \
+	SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_m_bucket_t t; t.key = key; return prefix##_m_getp(h, &t); } \
+	SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_m_del(h, k); } \
+	SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_m_bucket_t t; t.key = key; return prefix##_m_putp(h, &t, absent); } \
+	__KHASH_COMPAT(SCOPE, HType, prefix, khkey_t)
+
+#define KHASHL_CSET_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \
+	typedef struct { khkey_t key; khint_t hash; } __kh_packed HType##_cs_bucket_t; \
+	static kh_inline int prefix##_cs_eq(HType##_cs_bucket_t x, HType##_cs_bucket_t y) { return x.hash == y.hash && __hash_eq(x.key, y.key); } \
+	KHASHL_INIT(KH_LOCAL, HType, prefix##_cs, HType##_cs_bucket_t, __kh_cached_hash, prefix##_cs_eq) \
+	SCOPE HType *prefix##_init(void) { return prefix##_cs_init(); } \
+	SCOPE void prefix##_destroy(HType *h) { prefix##_cs_destroy(h); } \
+	SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_cs_bucket_t t; t.key = key; t.hash = __hash_fn(key); return prefix##_cs_getp(h, &t); } \
+	SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_cs_del(h, k); } \
+	SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_cs_bucket_t t; t.key = key, t.hash = __hash_fn(key); return prefix##_cs_putp(h, &t, absent); }
+
+#define KHASHL_CMAP_INIT(SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \
+	typedef struct { khkey_t key; kh_val_t val; khint_t hash; } __kh_packed HType##_cm_bucket_t; \
+	static kh_inline int prefix##_cm_eq(HType##_cm_bucket_t x, HType##_cm_bucket_t y) { return x.hash == y.hash && __hash_eq(x.key, y.key); } \
+	KHASHL_INIT(KH_LOCAL, HType, prefix##_cm, HType##_cm_bucket_t, __kh_cached_hash, prefix##_cm_eq) \
+	SCOPE HType *prefix##_init(void) { return prefix##_cm_init(); } \
+	SCOPE void prefix##_destroy(HType *h) { prefix##_cm_destroy(h); } \
+	SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_cm_bucket_t t; t.key = key; t.hash = __hash_fn(key); return prefix##_cm_getp(h, &t); } \
+	SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_cm_del(h, k); } \
+	SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_cm_bucket_t t; t.key = key, t.hash = __hash_fn(key); return prefix##_cm_putp(h, &t, absent); }
+
+#define KHASHE_MAP_INIT(SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \
+	typedef struct { khkey_t key; kh_val_t val; } __kh_packed HType##_m_bucket_t; \
+	static kh_inline khint_t prefix##_m_hash(HType##_m_bucket_t x) { return __hash_fn(x.key); } \
+	static kh_inline int prefix##_m_eq(HType##_m_bucket_t x, HType##_m_bucket_t y) { return __hash_eq(x.key, y.key); } \
+	KHASHE_INIT(KH_LOCAL, HType, prefix##_m, HType##_m_bucket_t, prefix##_m_hash, prefix##_m_eq) \
+	SCOPE HType *prefix##_init(int bits) { return prefix##_m_init(bits); } \
+	SCOPE void prefix##_destroy(HType *h) { prefix##_m_destroy(h); } \
+	SCOPE kh_ensitr_t prefix##_get(const HType *h, khkey_t key) { HType##_m_bucket_t t; t.key = key; return prefix##_m_getp(h, &t); } \
+	SCOPE int prefix##_del(HType *h, kh_ensitr_t k) { return prefix##_m_del(h, k); } \
+	SCOPE kh_ensitr_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_m_bucket_t t; t.key = key; return prefix##_m_putp(h, &t, absent); }
+
+/**************************
+ * Public macro functions *
+ **************************/
+
+#define kh_bucket(h, x) ((h)->keys[x])
+
+/*! @function
+  @abstract     Get the number of elements in the hash table
+  @param  h     Pointer to the hash table
+  @return       Number of elements in the hash table [khint_t]
+ */
+#define kh_size(h) ((h)->count)
+
+#define kh_capacity(h) ((h)->keys? 1U<<(h)->bits : 0U)
+
+/*! @function
+  @abstract     Get the end iterator
+  @param  h     Pointer to the hash table
+  @return       The end iterator [khint_t]
+ */
+#define kh_end(h) kh_capacity(h)
+
+/*! @function
+  @abstract     Get key given an iterator
+  @param  h     Pointer to the hash table
+  @param  x     Iterator to the bucket [khint_t]
+  @return       Key [type of keys]
+ */
+#define kh_key(h, x) ((h)->keys[x].key)
+
+/*! @function
+  @abstract     Get value given an iterator
+  @param  h     Pointer to the hash table
+  @param  x     Iterator to the bucket [khint_t]
+  @return       Value [type of values]
+  @discussion   For hash sets, calling this results in segfault.
+ */
+#define kh_val(h, x) ((h)->keys[x].val)
+
+/*! @function
+  @abstract     Test whether a bucket contains data.
+  @param  h     Pointer to the hash table
+  @param  x     Iterator to the bucket [khint_t]
+  @return       1 if containing data; 0 otherwise [int]
+ */
+#define kh_exist(h, x) __kh_used((h)->used, (x))
+
+#define kh_ens_key(g, x) kh_key(&(g)->sub[(x).sub], (x).pos)
+#define kh_ens_val(g, x) kh_val(&(g)->sub[(x).sub], (x).pos)
+#define kh_ens_exist(g, x) kh_exist(&(g)->sub[(x).sub], (x).pos)
+#define kh_ens_is_end(x) ((x).pos == (khint_t)-1)
+#define kh_ens_size(g) ((g)->count)
+
+/**************************************
+ * Common hash and equality functions *
+ **************************************/
+
+#define kh_eq_generic(a, b) ((a) == (b))
+#define kh_eq_str(a, b) (strcmp((a), (b)) == 0)
+#define kh_hash_dummy(x) ((khint_t)(x))
+
+static kh_inline khint_t kh_hash_uint32(khint_t key) {
+	key += ~(key << 15);
+	key ^=  (key >> 10);
+	key +=  (key << 3);
+	key ^=  (key >> 6);
+	key += ~(key << 11);
+	key ^=  (key >> 16);
+	return key;
+}
+
+static kh_inline khint_t kh_hash_uint64(khint64_t key) {
+	key = ~key + (key << 21);
+	key = key ^ key >> 24;
+	key = (key + (key << 3)) + (key << 8);
+	key = key ^ key >> 14;
+	key = (key + (key << 2)) + (key << 4);
+	key = key ^ key >> 28;
+	key = key + (key << 31);
+	return (khint_t)key;
+}
+
+#define KH_FNV_SEED 11
+
+static kh_inline khint_t kh_hash_str(const char *s) { /* FNV1a */
+	khint_t h = KH_FNV_SEED ^ 2166136261U;
+	const unsigned char *t = (const unsigned char*)s;
+	for (; *t; ++t)
+		h ^= *t, h *= 16777619;
+	return h;
+}
+
+static kh_inline khint_t kh_hash_bytes(int len, const unsigned char *s) {
+	khint_t h = KH_FNV_SEED ^ 2166136261U;
+	int i;
+	for (i = 0; i < len; ++i)
+		h ^= s[i], h *= 16777619;
+	return h;
+}
+
+#endif /* __AC_KHASHL_H */
diff --git a/lib/Devel/Mwrap/trace-replay.h b/lib/Devel/Mwrap/trace-replay.h
new file mode 100644
index 0000000..dfd443e
--- /dev/null
+++ b/lib/Devel/Mwrap/trace-replay.h
@@ -0,0 +1,232 @@
+/*
+ * Copyright (C) mwrap hackers <mwrap-perl@80x24.org>
+ * License: GPL-3.0+ <https://www.gnu.org/licenses/gpl-3.0.txt>
+ * single-threaded trace replayer, no runtime dependency on Perl
+ * nor the rest of mwrap (aside from the hacked up dlmalloc).
+ */
+#define _LGPL_SOURCE /* allows URCU to inline some stuff */
+#define _GNU_SOURCE
+/* knobs for dlmalloc */
+#define HAVE_MORECORE 0
+#define DEFAULT_GRANULARITY (8U * 1024U * 1024U)
+#define FOOTERS 1 /* required for remote_free_* stuff */
+#define USE_DL_PREFIX
+#define ONLY_MSPACES 1 /* aka per-thread "arenas" */
+#define DLMALLOC_EXPORT static inline
+/* #define NO_MALLOC_STATS 1 */
+#define USE_LOCKS 0 /* we do our own global_mtx + ms_tsd */
+#include <errno.h>
+#include <err.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <limits.h>
+#include <sys/types.h>
+#ifdef __GLIBC__
+extern void __attribute__((weak)) malloc_stats(void);
+extern void __attribute__((weak)) malloc_info(int, FILE *);
+#	define MALLOC_STATS() do { \
+		if (malloc_stats) malloc_stats(); \
+		if (malloc_info) malloc_info(0, stderr); \
+	} while (0)
+#else // TODO: jemalloc
+#	define MALLOC_STATS() do {} while (0)
+#endif
+#include <urcu/rculist.h>
+#include <urcu/wfcqueue.h>
+#include "dlmalloc_c.h"
+static mstate tr_ms;
+
+static void *my_calloc(size_t nmemb, size_t size)
+{
+	void *p = mspace_calloc(tr_ms, nmemb, size);
+	if (!p) err(1, "calloc");
+	return p;
+}
+
+#define kcalloc(N,Z) my_calloc(N, Z)
+#define kfree(P) mspace_free(tr_ms, P)
+#define REALLOC_ARRAY(x, nmemb) do { \
+	size_t asize; \
+	if (__builtin_mul_overflow(sizeof(*(x)), nmemb, &asize)) \
+		errx(1, "mul_overflow"); \
+	(x) = mspace_realloc(tr_ms, (x), asize); \
+	if (!x) err(1, "realloc"); \
+} while (0)
+#include "khashl.h"
+#include "trace_struct.h"
+
+static inline khint_t hash_uptr(uintptr_t p)
+{
+	return sizeof(uintptr_t) == 4 ? kh_hash_uint32(p) : kh_hash_uint64(p);
+}
+
+KHASHL_MAP_INIT(KH_LOCAL, kh_ptrmap, ptrmap, uintptr_t, uintptr_t,
+		hash_uptr, kh_eq_generic)
+
+static kh_ptrmap *old2cur;
+
+static void store_ptr(uintptr_t old, void *cur)
+{
+	int absent;
+	khint_t k = ptrmap_put(old2cur, old, &absent);
+	if (absent)
+		kh_val(old2cur, k) = (uintptr_t)cur;
+}
+
+int main(int argc, char *argv[])
+{
+	tr_ms = create_mspace(0, 0);
+	tr_ms->seg.sflags = EXTERN_BIT | USE_MMAP_BIT;
+	disable_contiguous(tr_ms);
+	size_t realloc_miss = 0, free_miss = 0, bad_entry = 0;
+	size_t realloc_hit = 0;
+	union {
+		struct tr_memalign do_memalign;
+		struct tr_free do_free;
+		struct tr_malloc do_malloc;
+		struct tr_calloc do_calloc;
+		struct tr_realloc do_realloc;
+	} as;
+	int truncated = 0;
+
+	old2cur = ptrmap_init();
+
+	// don't fill buf all the way so we can do small reads in ENSURE_FILL:
+	while (!feof(stdin) && !truncated) {
+
+#define CONSUME(dst, required) do { \
+	size_t need = sizeof(dst); \
+	char *buf = (char *)&dst; \
+	int done = 0; \
+	while (need) { \
+		size_t n = fread(buf, 1, need, stdin); \
+		if (n > 0) { \
+			need -= n; \
+		} else if (n == 0 && !required) { \
+			done = 1; \
+			break; \
+		} else { \
+			warnx("TRUNCATED: %zu != %zu", n, need); \
+			done = truncated = 1; \
+			break; \
+		} \
+	} \
+	if (done) break; \
+} while (0)
+		CONSUME(as.do_free.ptr, false);
+		enum tr_fn fn = as.do_free.ptr & TR_MASK;
+		as.do_free.ptr &= ~TR_MASK;
+		khint_t k;
+		void *cur;
+
+		switch (fn) {
+		case TR_FREE:
+			k = ptrmap_get(old2cur, as.do_free.ptr);
+			if (k >= kh_end(old2cur)) {
+				++free_miss;
+			} else {
+				free((void *)kh_val(old2cur, k));
+				ptrmap_del(old2cur, k);
+			}
+			break;
+		case TR_MALLOC:
+			CONSUME(as.do_malloc.size, true);
+			cur = malloc(as.do_malloc.size);
+			if (!cur)
+				err(1, "malloc(%zu) => %p",
+					as.do_malloc.size,
+					(void *)as.do_malloc.ret);
+			store_ptr(as.do_malloc.ret, cur);
+
+			break;
+		case TR_CALLOC:
+			CONSUME(as.do_calloc.size, true);
+			cur = calloc(as.do_calloc.size, 1);
+			if (!cur)
+				err(1, "calloc(%zu) => %p",
+					as.do_calloc.size,
+					(void *)as.do_calloc.ret);
+			store_ptr(as.do_calloc.ret, cur);
+
+			break;
+		case TR_REALLOC:
+			cur = NULL;
+			CONSUME(as.do_realloc.ptr, true);
+			CONSUME(as.do_realloc.size, true);
+			if (as.do_realloc.ptr) {
+				k = ptrmap_get(old2cur,
+						as.do_realloc.ptr);
+				if (k >= kh_end(old2cur)) {
+					realloc_miss++;
+				} else {
+					realloc_hit++;
+					cur = (void *)
+						kh_val(old2cur, k);
+					ptrmap_del(old2cur, k);
+				}
+			}
+			void *rp = realloc(cur, as.do_realloc.size);
+			if (!rp)
+				err(1, "realloc(%p => %p, %zu) => %p",
+					(void *)as.do_realloc.ptr,
+					cur,
+					as.do_realloc.size,
+					(void *)as.do_realloc.ret);
+			store_ptr(as.do_realloc.ret, rp);
+			break;
+		case TR_MEMALIGN:
+			cur = NULL;
+			CONSUME(as.do_memalign.alignment, true);
+			CONSUME(as.do_memalign.size, true);
+			int rc = posix_memalign(&cur,
+					as.do_memalign.alignment,
+					as.do_memalign.size);
+			if (rc) {
+				errno = rc;
+				err(1, "posix_memalign(%zu, %zu) => %p",
+					as.do_memalign.alignment,
+					as.do_memalign.size,
+					(void *)as.do_memalign.ret);
+			}
+			store_ptr(as.do_memalign.ret, cur);
+			break;
+		default:
+			bad_entry++;
+		}
+	}
+
+	if (free_miss || realloc_miss || bad_entry)
+		fprintf(stderr, "miss free=%zu realloc=%zu bad=%zu\n",
+			free_miss, realloc_miss, bad_entry);
+	fprintf(stderr, "realloc_hit=%zu\n", realloc_hit);
+	MALLOC_STATS();
+
+	int c;
+	char *end;
+	long sec = 0;
+	while ((c = getopt(argc, argv, "s:")) != -1) {
+		switch (c) {
+		case 's':
+			sec = strtol(optarg, &end, 10);
+			if (*end != 0)
+				errx(1, "`-s %s' invalid seconds", optarg);
+			break;
+		default: warnx("bad switch `-%c'", c);
+		}
+	}
+	if (sec < 0) {
+		fprintf(stderr, "# PID:%d sleeping indefinitely\n",
+			(int)getpid());
+		pause();
+	}
+	if (sec > 0) {
+		unsigned s = sec > UINT_MAX ? UINT_MAX : sec;
+		fprintf(stderr, "# PID:%d sleeping %u seconds\n",
+			(int)getpid(), s);
+		sleep(s);
+	}
+	fprintf(stderr, "truncated=%d\n", truncated);
+
+	return truncated;
+}
diff --git a/lib/Devel/Mwrap/trace_struct.h b/lib/Devel/Mwrap/trace_struct.h
new file mode 100644
index 0000000..e5fe622
--- /dev/null
+++ b/lib/Devel/Mwrap/trace_struct.h
@@ -0,0 +1,34 @@
+enum tr_fn {
+	TR_FREE = 0,
+	TR_MEMALIGN = 1,
+	TR_MALLOC = 2,
+	TR_REALLOC = 3,
+	TR_CALLOC = 4,
+};
+static const uintptr_t TR_MASK = 7;
+
+struct tr_memalign {
+	uintptr_t ret;
+	size_t alignment;
+	size_t size;
+};
+
+struct tr_free {
+	uintptr_t ptr;
+};
+
+struct tr_malloc {
+	uintptr_t ret;
+	size_t size;
+};
+
+struct tr_realloc {
+	uintptr_t ret;
+	uintptr_t ptr;
+	size_t size;
+};
+
+struct tr_calloc {
+	uintptr_t ret;
+	size_t size;
+};
diff --git a/mwrap_core.h b/mwrap_core.h
index 78c14e3..af73057 100644
--- a/mwrap_core.h
+++ b/mwrap_core.h
@@ -64,6 +64,8 @@
 
 #define U24_MAX (1U << 24)
 
+#include "trace.h"
+
 /*
  * Perl doesn't have a GC the same way (C) Ruby does, so no GC count.
  * Instead, the relative age of an object is the number of total bytes
@@ -498,31 +500,37 @@ static pthread_mutex_t *src_loc_mutex_lock(const struct src_loc *l)
 	return mtx;
 }
 
+static void free_notrace(void *p)
+{
+	struct alloc_hdr *h = ptr2hdr(p);
+	struct src_loc *l = h->as.live.loc;
+
+	if (l) {
+		size_t current_bytes = uatomic_read(&total_bytes_inc);
+		size_t age = current_bytes - h->as.live.gen;
+		uatomic_add(&total_bytes_dec, h->size);
+		uatomic_add(&l->freed_bytes, h->size);
+		uatomic_set(&h->size, 0);
+		uatomic_inc(&l->frees);
+		uatomic_add(&l->age_total, age);
+
+		pthread_mutex_t *mtx = src_loc_mutex_lock(l);
+		cds_list_del_rcu(&h->anode);
+		if (age > l->max_lifespan)
+			l->max_lifespan = age;
+		CHECK(int, 0, pthread_mutex_unlock(mtx));
+
+		call_rcu(&h->as.dead, free_hdr_rcu);
+	} else {
+		real_free(h->real);
+	}
+}
+
 void free(void *p)
 {
 	if (p) {
-		struct alloc_hdr *h = ptr2hdr(p);
-		struct src_loc *l = h->as.live.loc;
-
-		if (l) {
-			size_t current_bytes = uatomic_read(&total_bytes_inc);
-			size_t age = current_bytes - h->as.live.gen;
-			uatomic_add(&total_bytes_dec, h->size);
-			uatomic_add(&l->freed_bytes, h->size);
-			uatomic_set(&h->size, 0);
-			uatomic_inc(&l->frees);
-			uatomic_add(&l->age_total, age);
-
-			pthread_mutex_t *mtx = src_loc_mutex_lock(l);
-			cds_list_del_rcu(&h->anode);
-			if (age > l->max_lifespan)
-				l->max_lifespan = age;
-			CHECK(int, 0, pthread_mutex_unlock(mtx));
-
-			call_rcu(&h->as.dead, free_hdr_rcu);
-		} else {
-			real_free(h->real);
-		}
+		trace_free(p);
+		free_notrace(p);
 	}
 }
 
@@ -589,6 +597,7 @@ mwrap_memalign(void **pp, size_t alignment, size_t size, struct src_loc *sl)
 			p = ptr_align(p, alignment);
 		struct alloc_hdr *h = ptr2hdr(p);
 		alloc_insert_rcu(sl, h, size, real);
+		trace_memalign(p, alignment, size);
 		*pp = p;
 	}
 
@@ -701,7 +710,9 @@ void *malloc(size_t size)
 		SRC_LOC_BT(bt);
 		struct alloc_hdr *h = p;
 		alloc_insert_rcu(&bt.sl, h, size, h);
-		return hdr2ptr(h);
+		p = hdr2ptr(h);
+		trace_malloc(p, size);
+		return p;
 	}
 enomem:
 	errno = ENOMEM;
@@ -723,7 +734,9 @@ void *calloc(size_t nmemb, size_t size)
 		struct alloc_hdr *h = p;
 		SRC_LOC_BT(bt);
 		alloc_insert_rcu(&bt.sl, h, size, h);
-		return memset(hdr2ptr(h), 0, size);
+		p = hdr2ptr(h);
+		trace_calloc(p, size);
+		return memset(p, 0, size);
 	}
 enomem:
 	errno = ENOMEM;
@@ -747,10 +760,11 @@ void *realloc(void *ptr, size_t size)
 		SRC_LOC_BT(bt);
 		alloc_insert_rcu(&bt.sl, h, size, h);
 		p = hdr2ptr(h);
+		trace_realloc(p, ptr, size);
 		if (ptr) {
 			struct alloc_hdr *old = ptr2hdr(ptr);
 			memcpy(p, ptr, old->size < size ? old->size : size);
-			free(ptr);
+			free_notrace(ptr);
 		}
 		return p;
 	}
@@ -782,7 +796,8 @@ char **bt_syms(void * const *addrlist, uint32_t size)
 static void cleanup_free(void *any)
 {
 	void **p = any;
-	free(*p);
+	if (*p)
+		free_notrace(*p);
 }
 
 static void *write_csv(FILE *, size_t min, const char *sort, size_t sort_len);
@@ -1060,6 +1075,51 @@ static struct src_loc *mwrap_get_bin(const char *buf, size_t len)
 }
 
 static const char *mwrap_env;
+
+static int trace_on(const char *env)
+{
+	char trace_path[PATH_MAX];
+	size_t len = 0;
+
+	if (env) {
+		const char *td = strstr(env, "trace_dir:");
+		if (td) {
+			td += sizeof("trace_dir");
+			const char *end = strchrnul(td, ',');
+
+			len = end - td;
+			if ((len + 32) >= sizeof(trace_path))
+				return ENAMETOOLONG;
+			memcpy(trace_path, td, len);
+		}
+	}
+	if (!len) {
+		env = getenv("TMPDIR");
+		if (!env) {
+			memcpy(trace_path, "/tmp", len = 4);
+		} else {
+			len = strlen(env);
+			if ((len + 32) >= sizeof(trace_path))
+				return ENAMETOOLONG;
+			memcpy(trace_path, env, len);
+		}
+	}
+	if (trace_path[len - 1] != '/')
+		trace_path[len++] = '/';
+	int rc = snprintf(trace_path + len, 32,
+			"mwrap.%d.trace", (int)getpid());
+	if (rc < 0 || rc >= 32)
+		return ENAMETOOLONG;
+	int fd = open(trace_path, O_CLOEXEC|O_CREAT|O_APPEND|O_WRONLY, 0666);
+	if (fd < 0)
+		return errno;
+	if (uatomic_cmpxchg(&mwrap_trace_fd, -1, fd) != -1) {
+		close(fd);
+		return EBUSY;
+	}
+	return 0;
+}
+
 #include "httpd.h"
 
 __attribute__((constructor)) static void mwrap_ctor(void)
@@ -1089,7 +1149,11 @@ __attribute__((constructor)) static void mwrap_ctor(void)
 		call_rcu(&h->as.dead, free_hdr_rcu);
 	} else
 		perror("malloc");
-
+	if (mwrap_env && strstr(mwrap_env, "trace:1")) {
+		int e = trace_on(mwrap_env);
+		if (e)
+			fprintf(stderr, "trace failed: %s\n", strerror(e));
+	}
 	h1d_start();
 	CHECK(int, 0, pthread_sigmask(SIG_SETMASK, &old, NULL));
 	CHECK(int, 0, pthread_atfork(atfork_prepare, atfork_parent,
diff --git a/mymalloc.h b/mymalloc.h
index 4dd2ee6..8acf573 100644
--- a/mymalloc.h
+++ b/mymalloc.h
@@ -50,7 +50,7 @@
 #define DLMALLOC_EXPORT static inline
 /* #define NO_MALLOC_STATS 1 */
 #define USE_LOCKS 0 /* we do our own global_mtx + ms_tsd */
-#include "dlmalloc_c.h"
+#include "lib/Devel/Mwrap/dlmalloc_c.h"
 #undef ABORT /* conflicts with Perl */
 #undef NOINLINE /* conflicts with Ruby, defined by dlmalloc_c.h */
 #undef HAVE_MREMAP /* conflicts with Ruby 3.2 */
@@ -64,9 +64,17 @@ static CDS_LIST_HEAD(arenas_active);
 static CDS_LIST_HEAD(arenas_unused);
 
 /* called on pthread exit */
+static void trace_flush_fd(int, mstate);
+static int mwrap_trace_fd = -1; // httpd.h sets this
+
 ATTR_COLD static void mstate_tsd_dtor(void *p)
 {
 	mstate ms = p;
+	if (ms) {
+		int fd = uatomic_read(&mwrap_trace_fd);
+		if (fd >= 0)
+			trace_flush_fd(fd, ms);
+	}
 
 	/*
 	 * In case another destructor calls free (or any allocation function,
diff --git a/script/mwrap-trace-replay b/script/mwrap-trace-replay
new file mode 100644
index 0000000..433480b
--- /dev/null
+++ b/script/mwrap-trace-replay
@@ -0,0 +1,7 @@
+#!perl -w
+# Copyright (C) mwrap hackers <mwrap-perl@80x24.org>
+# License: GPL-3.0+ <https://www.gnu.org/licenses/gpl-3.0.txt>
+use v5.12;
+use Devel::Mwrap::TraceReplay;
+(-f STDIN || -p STDIN) or die "Usage: $0 </path/to/mwrap.\$PID.trace\n";
+Devel::Mwrap::TraceReplay::run @ARGV;
diff --git a/t/httpd.t b/t/httpd.t
index 76fe7d1..244f8da 100644
--- a/t/httpd.t
+++ b/t/httpd.t
@@ -6,7 +6,9 @@ use IO::Socket::UNIX;
 use Fcntl qw(F_GETFD F_SETFD FD_CLOEXEC);
 use POSIX qw(dup2 _exit mkfifo);
 BEGIN { require './t/test_common.perl' };
-my $env = { MWRAP => "socket_dir:$mwrap_tmp" };
+use autodie qw(mkdir fork);
+mkdir "$mwrap_tmp/tr";
+my $env = { MWRAP => "socket_dir:$mwrap_tmp,trace_dir:$mwrap_tmp/tr" };
 my $f1 = "$mwrap_tmp/f1";
 my $f2 = "$mwrap_tmp/f2";
 mkfifo($f1, 0600) // plan(skip_all => "mkfifo: $!");
@@ -57,7 +59,7 @@ my $cout = "$mwrap_tmp/cout";
 my @curl = (qw(curl -sf --unix-socket), $sock, '-o', $cout);
 push @curl, '-vS' if $ENV{V};
 my $rc = system(@curl, "http://0/$pid/each/2000");
-my $curl_unix;
+my ($curl_unix, $trace_file);
 SKIP: {
 	skip 'curl lacks --unix-socket support', 1 if $rc == 512;
 	is($rc, 0, 'curl /each');
@@ -76,6 +78,12 @@ SKIP: {
 	is($rc, 0, 'curl / (PID root)');
 	like(slurp($cout), qr/trimming/, 'trim started');
 	unlink($cout);
+
+	$rc = system(@curl, '-v', '-XPOST', "http://0/$pid/trace");
+	is $rc, 0, 'trace ok';
+	like(slurp($cout), qr/tracing/, 'tracing enabled');
+	$trace_file = "$mwrap_tmp/tr/mwrap.$pid.trace";
+	ok -f $trace_file, 'trace enabled';
 };
 
 {
@@ -181,8 +189,26 @@ SKIP: {
 
 	$rc = system(@curl, qw(-HX-Mwrap-BT:10 -d blah http://0/ctl));
 	is($rc >> 8, 22, '404 w/o PID prefix');
-};
 
+	$rc = system(@curl, '-v', '-XPOST', "http://0/$pid/trace");
+	is $rc, 0, 'trace disabled';
+	like(slurp($cout), qr/trace off/, 'tracing disabled');
+	ok -s $trace_file, 'trace file data';
+
+	my @replay = ($^X, '-w', './blib/script/mwrap-trace-replay');
+	my $trace_out = "$mwrap_tmp/tr.out";
+	my $tr_pid = fork;
+	if ($tr_pid == 0) {
+		open STDIN, '<', $trace_file;
+		open STDOUT, '+>>', $trace_out;
+		open STDERR, '+>>', $trace_out;
+		exec @replay;
+		die "exec: $!";
+	}
+	waitpid($tr_pid, 0);
+	is $?, 0, 'trace replay';
+	diag slurp($trace_out);
+};
 
 diag slurp($cout) if $ENV{V};
 $cleanup->();
diff --git a/trace.h b/trace.h
new file mode 100644
index 0000000..7b6946d
--- /dev/null
+++ b/trace.h
@@ -0,0 +1,66 @@
+#include "lib/Devel/Mwrap/trace_struct.h"
+
+static void trace_flush_fd(int fd, mstate ms)
+{
+	size_t n = uatomic_xchg(&ms->trace_wfill, 0);
+	if (n) write(fd, &ms->trace_wbuf, n);
+}
+
+#define TRACE_WRITE(buf) do { \
+	rcu_read_lock(); \
+	int fd = uatomic_read(&mwrap_trace_fd); \
+	if (fd >= 0) { \
+		mstate ms = ms_tsd; \
+		if ((ms->trace_wfill + (sizeof(uintptr_t) * 4)) >= \
+						sizeof(ms->trace_wbuf)) \
+			trace_flush_fd(fd, ms); \
+		size_t n = ms->trace_wfill; \
+		memcpy(ms->trace_wbuf + n, &buf, sizeof(buf)); \
+		uatomic_add(&ms->trace_wfill, sizeof(buf)); \
+	} \
+	rcu_read_unlock(); \
+} while (0)
+
+static void trace_memalign(const void *ret, size_t alignment, size_t size)
+{
+	struct tr_memalign buf = {
+		.ret = (uintptr_t)ret | TR_MEMALIGN,
+		.alignment = alignment,
+		.size = size
+	};
+	TRACE_WRITE(buf);
+}
+
+static void trace_free(const void *ptr)
+{
+	struct tr_free buf = { .ptr = (uintptr_t)ptr | TR_FREE };
+	TRACE_WRITE(buf);
+}
+
+static void trace_malloc(const void *ret, size_t size)
+{
+	struct tr_malloc buf = {
+		.ret = (uintptr_t)ret | TR_MALLOC,
+		.size = size
+	};
+	TRACE_WRITE(buf);
+}
+
+static void trace_realloc(const void *ret, const void *ptr, size_t size)
+{
+	struct tr_realloc buf = {
+		.ret = (uintptr_t)ret | TR_REALLOC,
+		.ptr = (uintptr_t)ptr,
+		.size = size
+	};
+	TRACE_WRITE(buf);
+}
+
+static void trace_calloc(const void *ret, size_t size)
+{
+	struct tr_calloc buf = {
+		.ret = (uintptr_t)ret | TR_CALLOC,
+		.size = size
+	};
+	TRACE_WRITE(buf);
+}

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/4] realloc
  2024-04-05 21:05 [PATCH 1/4] support malloc tracing Eric Wong
@ 2024-04-05 21:05 ` Eric Wong
  2024-04-05 21:05 ` [PATCH 3/4] gzip Eric Wong
  2024-04-05 21:05 ` [PATCH 4/4] tracecompress Eric Wong
  2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2024-04-05 21:05 UTC (permalink / raw)
  To: spew

---
 lib/Devel/Mwrap/trace-replay.h | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/lib/Devel/Mwrap/trace-replay.h b/lib/Devel/Mwrap/trace-replay.h
index dfd443e..a2b0e50 100644
--- a/lib/Devel/Mwrap/trace-replay.h
+++ b/lib/Devel/Mwrap/trace-replay.h
@@ -25,13 +25,17 @@
 #ifdef __GLIBC__
 extern void __attribute__((weak)) malloc_stats(void);
 extern void __attribute__((weak)) malloc_info(int, FILE *);
-#	define MALLOC_STATS() do { \
-		if (malloc_stats) malloc_stats(); \
+#	define GLIBC_MALLOC_STATS() do { \
 		if (malloc_info) malloc_info(0, stderr); \
+		if (malloc_stats) malloc_stats(); \
 	} while (0)
-#else // TODO: jemalloc
-#	define MALLOC_STATS() do {} while (0)
+#else
+#	define GLIBC_MALLOC_STATS() do {} while (0)
 #endif
+
+extern void __attribute__((weak)) malloc_stats_print(
+	void (*wcb)(void *, const char *), void *, const char *opts);
+
 #include <urcu/rculist.h>
 #include <urcu/wfcqueue.h>
 #include "dlmalloc_c.h"
@@ -80,7 +84,6 @@ int main(int argc, char *argv[])
 	tr_ms->seg.sflags = EXTERN_BIT | USE_MMAP_BIT;
 	disable_contiguous(tr_ms);
 	size_t realloc_miss = 0, free_miss = 0, bad_entry = 0;
-	size_t realloc_hit = 0;
 	union {
 		struct tr_memalign do_memalign;
 		struct tr_free do_free;
@@ -160,9 +163,7 @@ int main(int argc, char *argv[])
 				if (k >= kh_end(old2cur)) {
 					realloc_miss++;
 				} else {
-					realloc_hit++;
-					cur = (void *)
-						kh_val(old2cur, k);
+					cur = (void *)kh_val(old2cur, k);
 					ptrmap_del(old2cur, k);
 				}
 			}
@@ -199,8 +200,11 @@ int main(int argc, char *argv[])
 	if (free_miss || realloc_miss || bad_entry)
 		fprintf(stderr, "miss free=%zu realloc=%zu bad=%zu\n",
 			free_miss, realloc_miss, bad_entry);
-	fprintf(stderr, "realloc_hit=%zu\n", realloc_hit);
-	MALLOC_STATS();
+
+	if (malloc_stats_print) // jemalloc loaded
+		malloc_stats_print(NULL, NULL, NULL);
+	else
+		GLIBC_MALLOC_STATS();
 
 	int c;
 	char *end;
@@ -226,7 +230,6 @@ int main(int argc, char *argv[])
 			(int)getpid(), s);
 		sleep(s);
 	}
-	fprintf(stderr, "truncated=%d\n", truncated);
 
 	return truncated;
 }

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 3/4] gzip
  2024-04-05 21:05 [PATCH 1/4] support malloc tracing Eric Wong
  2024-04-05 21:05 ` [PATCH 2/4] realloc Eric Wong
@ 2024-04-05 21:05 ` Eric Wong
  2024-04-05 21:05 ` [PATCH 4/4] tracecompress Eric Wong
  2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2024-04-05 21:05 UTC (permalink / raw)
  To: spew

---
 httpd.h                        |  8 ++++++-
 lib/Devel/Mwrap/TraceReplay.pm |  2 +-
 lib/Devel/Mwrap/dlmalloc_c.h   |  4 +++-
 lib/Devel/Mwrap/trace-replay.h |  7 ++++--
 mwrap_core.h                   | 40 +++++++++++++++++++++++++++++++---
 mymalloc.h                     |  4 ++--
 script/mwrap-trace-replay      | 15 ++++++++++++-
 t/httpd.t                      |  2 +-
 8 files changed, 70 insertions(+), 12 deletions(-)

diff --git a/httpd.h b/httpd.h
index 3594eb4..a097e0e 100644
--- a/httpd.h
+++ b/httpd.h
@@ -1423,6 +1423,8 @@ join_thread:
 
 static void h1d_atfork_prepare(void)
 {
+	if (pthread_equal(g_h1d.tid, pthread_self()))
+		return;
 	if (uatomic_cmpxchg(&g_h1d.alive, 1, 0))
 		h1d_stop_join(&g_h1d);
 }
@@ -1443,7 +1445,11 @@ static void h1d_start(void) /* may be called as pthread_atfork child cb */
 /* must be called with global_mtx held */
 static void h1d_atfork_parent(void)
 {
-	if (g_h1d.lfd < 0)
+	if (!pthread_equal(g_h1d.tid, pthread_self()) && g_h1d.lfd < 0)
 		h1d_start();
 }
 
+static void h1d_atfork_child(void)
+{
+	if (!pthread_equal(g_h1d.tid, pthread_self())) h1d_start();
+}
diff --git a/lib/Devel/Mwrap/TraceReplay.pm b/lib/Devel/Mwrap/TraceReplay.pm
index bb2551b..fa3af7c 100644
--- a/lib/Devel/Mwrap/TraceReplay.pm
+++ b/lib/Devel/Mwrap/TraceReplay.pm
@@ -74,7 +74,7 @@ sub check_build () {
 
 sub run (@) {
 	check_build();
-	exec $bin, @_;
+	system $bin, @_;
 }
 
 1;
diff --git a/lib/Devel/Mwrap/dlmalloc_c.h b/lib/Devel/Mwrap/dlmalloc_c.h
index cd2f7f5..398f376 100644
--- a/lib/Devel/Mwrap/dlmalloc_c.h
+++ b/lib/Devel/Mwrap/dlmalloc_c.h
@@ -590,6 +590,8 @@ MAX_RELEASE_CHECK_RATE   default: 4095 unless not HAVE_MMAP
 #include <sys/types.h>  /* For size_t */
 #endif  /* LACKS_SYS_TYPES_H */
 
+#include <limits.h>
+
 /* The maximum possible size_t value has all bits set */
 #define MAX_SIZE_T           (~(size_t)0)
 
@@ -2608,7 +2610,7 @@ struct malloc_state {
 #endif /* USE_LOCKS */
   msegment   seg;
   size_t trace_wfill;
-  char trace_wbuf[BUFSIZ];
+  char trace_wbuf[PIPE_BUF];
   struct cds_list_head arena_node;	/* cold */
   struct cds_wfcq_tail remote_free_tail;
 };
diff --git a/lib/Devel/Mwrap/trace-replay.h b/lib/Devel/Mwrap/trace-replay.h
index a2b0e50..c43cc0f 100644
--- a/lib/Devel/Mwrap/trace-replay.h
+++ b/lib/Devel/Mwrap/trace-replay.h
@@ -8,7 +8,7 @@
 #define _GNU_SOURCE
 /* knobs for dlmalloc */
 #define HAVE_MORECORE 0
-#define DEFAULT_GRANULARITY (8U * 1024U * 1024U)
+#define DEFAULT_GRANULARITY (2U * 1024U * 1024U)
 #define FOOTERS 1 /* required for remote_free_* stuff */
 #define USE_DL_PREFIX
 #define ONLY_MSPACES 1 /* aka per-thread "arenas" */
@@ -198,9 +198,12 @@ int main(int argc, char *argv[])
 	}
 
 	if (free_miss || realloc_miss || bad_entry)
-		fprintf(stderr, "miss free=%zu realloc=%zu bad=%zu\n",
+		fprintf(stderr, "W: miss free=%zu realloc=%zu bad=%zu\n",
 			free_miss, realloc_miss, bad_entry);
 
+	fprintf(stderr, "# ptrmap .size=%zu capa=%zu\n",
+		(size_t)kh_size(old2cur), (size_t)kh_capacity(old2cur));
+
 	if (malloc_stats_print) // jemalloc loaded
 		malloc_stats_print(NULL, NULL, NULL);
 	else
diff --git a/mwrap_core.h b/mwrap_core.h
index af73057..2910a5a 100644
--- a/mwrap_core.h
+++ b/mwrap_core.h
@@ -36,6 +36,7 @@
 #include <urcu/rculfhash.h>
 #include <urcu/rculist.h>
 #include <limits.h>
+#include <err.h>
 
 #if MWRAP_PERL
 #	include "EXTERN.h"
@@ -1076,6 +1077,7 @@ static struct src_loc *mwrap_get_bin(const char *buf, size_t len)
 
 static const char *mwrap_env;
 
+// n.b. signals are always blocked by the caller(s) when calling this
 static int trace_on(const char *env)
 {
 	char trace_path[PATH_MAX];
@@ -1107,14 +1109,46 @@ static int trace_on(const char *env)
 	if (trace_path[len - 1] != '/')
 		trace_path[len++] = '/';
 	int rc = snprintf(trace_path + len, 32,
-			"mwrap.%d.trace", (int)getpid());
+			"mwrap.%d.trace.gz", (int)getpid());
 	if (rc < 0 || rc >= 32)
 		return ENAMETOOLONG;
 	int fd = open(trace_path, O_CLOEXEC|O_CREAT|O_APPEND|O_WRONLY, 0666);
 	if (fd < 0)
 		return errno;
-	if (uatomic_cmpxchg(&mwrap_trace_fd, -1, fd) != -1) {
-		close(fd);
+	int pfds[2];
+	if (pipe2(pfds, O_CLOEXEC) < 0)
+		return errno;
+	pid_t pid_a = fork();
+	if (pid_a < 0) {
+		err(1, "fork");
+	} else if (pid_a == 0) { // child
+		if (setsid() < 0) err(1, "setsid");
+		pid_t pid_b = fork();
+		if (pid_b < 0) {
+			err(1, "fork");
+		} else if (pid_b == 0) { // grandchild
+			unsetenv("LD_PRELOAD");
+
+			close(pfds[1]);
+			if (dup2(pfds[0], 0) < 0) err(1, "dup2");
+			close(pfds[0]);
+			if (dup2(fd, 1) < 1) err(1, "dup2");
+			close(fd);
+
+			execlp("gzip", "gzip", "-c", NULL);
+			err(1, "execlp");
+		} else {
+			_exit(0);
+		}
+	}
+	close(pfds[0]);
+	close(fd);
+	int st;
+	pid_t wpid = waitpid(pid_a, &st, 0);
+	if (wpid != pid_a) err(1, "waitpid(a)");
+	if (st) errx(1, "gzip parent failed %d", st);
+	if (uatomic_cmpxchg(&mwrap_trace_fd, -1, pfds[1]) != -1) {
+		close(pfds[1]);
 		return EBUSY;
 	}
 	return 0;
diff --git a/mymalloc.h b/mymalloc.h
index 8acf573..37771d4 100644
--- a/mymalloc.h
+++ b/mymalloc.h
@@ -94,7 +94,7 @@ ATTR_COLD static void mstate_tsd_dtor(void *p)
 /* see httpd.h */
 static void h1d_atfork_prepare(void);
 static void h1d_atfork_parent(void);
-static void h1d_start(void);
+static void h1d_atfork_child(void);
 
 ATTR_COLD static void atfork_prepare(void)
 {
@@ -132,7 +132,7 @@ ATTR_COLD static void atfork_child(void)
 	}
 	reset_mutexes();
 	call_rcu_after_fork_child();
-	h1d_start();
+	h1d_atfork_child();
 }
 
 #if defined(__GLIBC__)
diff --git a/script/mwrap-trace-replay b/script/mwrap-trace-replay
index 433480b..22cbc82 100644
--- a/script/mwrap-trace-replay
+++ b/script/mwrap-trace-replay
@@ -2,6 +2,19 @@
 # Copyright (C) mwrap hackers <mwrap-perl@80x24.org>
 # License: GPL-3.0+ <https://www.gnu.org/licenses/gpl-3.0.txt>
 use v5.12;
+use autodie;
 use Devel::Mwrap::TraceReplay;
-(-f STDIN || -p STDIN) or die "Usage: $0 </path/to/mwrap.\$PID.trace\n";
+(-f STDIN || -p STDIN) or die "Usage: $0 </path/to/mwrap.\$PID.trace.gz\n";
+pipe(my $r, my $w);
+my $gzip = $ENV{GZIP} // 'gzip';
+my $pid = fork;
+if ($pid == 0) {
+	open STDOUT, '>&', $w;
+	close $_ for ($r, $w);
+	exec $gzip, '-dc';
+	die "exec: $!";
+}
+open STDIN, '<&', $r;
+close $_ for ($r, $w);
 Devel::Mwrap::TraceReplay::run @ARGV;
+waitpid $pid, 0;
diff --git a/t/httpd.t b/t/httpd.t
index 244f8da..d7006d5 100644
--- a/t/httpd.t
+++ b/t/httpd.t
@@ -82,7 +82,7 @@ SKIP: {
 	$rc = system(@curl, '-v', '-XPOST', "http://0/$pid/trace");
 	is $rc, 0, 'trace ok';
 	like(slurp($cout), qr/tracing/, 'tracing enabled');
-	$trace_file = "$mwrap_tmp/tr/mwrap.$pid.trace";
+	$trace_file = "$mwrap_tmp/tr/mwrap.$pid.trace.gz";
 	ok -f $trace_file, 'trace enabled';
 };
 

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 4/4] tracecompress
  2024-04-05 21:05 [PATCH 1/4] support malloc tracing Eric Wong
  2024-04-05 21:05 ` [PATCH 2/4] realloc Eric Wong
  2024-04-05 21:05 ` [PATCH 3/4] gzip Eric Wong
@ 2024-04-05 21:05 ` Eric Wong
  2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2024-04-05 21:05 UTC (permalink / raw)
  To: spew

---
 lib/Devel/Mwrap/TraceReplay.pm |  2 +-
 mwrap_core.h                   | 33 ++++++++++++++++++---
 script/mwrap-trace-replay      | 53 ++++++++++++++++++++++++++--------
 t/httpd.t                      |  4 +--
 4 files changed, 73 insertions(+), 19 deletions(-)

diff --git a/lib/Devel/Mwrap/TraceReplay.pm b/lib/Devel/Mwrap/TraceReplay.pm
index fa3af7c..bb2551b 100644
--- a/lib/Devel/Mwrap/TraceReplay.pm
+++ b/lib/Devel/Mwrap/TraceReplay.pm
@@ -74,7 +74,7 @@ sub check_build () {
 
 sub run (@) {
 	check_build();
-	system $bin, @_;
+	exec $bin, @_;
 }
 
 1;
diff --git a/mwrap_core.h b/mwrap_core.h
index 2910a5a..d236a3a 100644
--- a/mwrap_core.h
+++ b/mwrap_core.h
@@ -1082,6 +1082,9 @@ static int trace_on(const char *env)
 {
 	char trace_path[PATH_MAX];
 	size_t len = 0;
+	const char *cmpr = NULL;
+	const char *sfx = ".gz";
+	char cmpr_cmd[32];
 
 	if (env) {
 		const char *td = strstr(env, "trace_dir:");
@@ -1094,6 +1097,17 @@ static int trace_on(const char *env)
 				return ENAMETOOLONG;
 			memcpy(trace_path, td, len);
 		}
+		cmpr = strstr(env, "trace_compress:");
+		if (cmpr) {
+			cmpr += sizeof("trace_compress");
+			const char *end = strchrnul(cmpr, ',');
+
+			len = end - td;
+			if (len >= sizeof(cmpr_cmd))
+				return ENAMETOOLONG;
+			strcpy(cmpr_cmd, cmpr);
+			cmpr = cmpr_cmd;
+		}
 	}
 	if (!len) {
 		env = getenv("TMPDIR");
@@ -1108,8 +1122,17 @@ static int trace_on(const char *env)
 	}
 	if (trace_path[len - 1] != '/')
 		trace_path[len++] = '/';
+	if (cmpr) {
+		if (strstr(cmpr, "zstd")) {
+			sfx = ".zst";
+		} else if (strstr(cmpr, "bzip2")) {
+			sfx = ".bz2";
+		}
+	} else {
+		cmpr = "gzip";
+	}
 	int rc = snprintf(trace_path + len, 32,
-			"mwrap.%d.trace.gz", (int)getpid());
+			"mwrap.%d.trace%s", (int)getpid(), sfx);
 	if (rc < 0 || rc >= 32)
 		return ENAMETOOLONG;
 	int fd = open(trace_path, O_CLOEXEC|O_CREAT|O_APPEND|O_WRONLY, 0666);
@@ -1134,9 +1157,11 @@ static int trace_on(const char *env)
 			close(pfds[0]);
 			if (dup2(fd, 1) < 1) err(1, "dup2");
 			close(fd);
-
-			execlp("gzip", "gzip", "-c", NULL);
-			err(1, "execlp");
+			if (strchr(cmpr, ' ') || strchr(cmpr, '\t'))
+				execl("/bin/sh", "sh", "-c", cmpr, NULL);
+			else
+				execlp(cmpr, cmpr, "-c", NULL);
+			err(1, "execl(p) %s", cmpr);
 		} else {
 			_exit(0);
 		}
diff --git a/script/mwrap-trace-replay b/script/mwrap-trace-replay
index 22cbc82..e1feb23 100644
--- a/script/mwrap-trace-replay
+++ b/script/mwrap-trace-replay
@@ -4,17 +4,46 @@
 use v5.12;
 use autodie;
 use Devel::Mwrap::TraceReplay;
-(-f STDIN || -p STDIN) or die "Usage: $0 </path/to/mwrap.\$PID.trace.gz\n";
-pipe(my $r, my $w);
-my $gzip = $ENV{GZIP} // 'gzip';
-my $pid = fork;
-if ($pid == 0) {
-	open STDOUT, '>&', $w;
+my (@files, @opt);
+for (@ARGV) {
+	if (-f $_) {
+		push @files, $_;
+	} else {
+		push @opt, $_;
+	}
+}
+
+warn "opt=@opt f=@files";
+if (@files) {
+	pipe(my $r, my $w);
+	my $tpid = fork;
+	if ($tpid == 0) {
+		open STDIN, '<&', $r;
+		close $_ for ($r, $w);
+		Devel::Mwrap::TraceReplay::run @opt;
+		die "exec trace-replay: $!";
+	}
+	for my $f (@files) {
+		my $dc = 'gzip';
+		if ($f =~ /\.zst\z/i) {
+			$dc = 'zstd';
+		} elsif ($f =~ /\.bz2\z/i) {
+			$dc = 'bzip2';
+		}
+		my $pid = fork;
+		if ($pid == 0) {
+			open STDOUT, '>&', $w;
+			open STDIN, '<', $f;
+			close $_ for ($r, $w);
+			exec $dc, '-dc';
+			die "exec: $dc: $!";
+		}
+		waitpid($pid, 0);
+	}
 	close $_ for ($r, $w);
-	exec $gzip, '-dc';
-	die "exec: $!";
+	waitpid($tpid, 0);
+} else {
+	(-f STDIN || -p STDIN) or
+		die "Usage: $0 </path/to/mwrap.\$PID.trace\n";
+	Devel::Mwrap::TraceReplay::run @opt;
 }
-open STDIN, '<&', $r;
-close $_ for ($r, $w);
-Devel::Mwrap::TraceReplay::run @ARGV;
-waitpid $pid, 0;
diff --git a/t/httpd.t b/t/httpd.t
index d7006d5..4b8ed82 100644
--- a/t/httpd.t
+++ b/t/httpd.t
@@ -194,15 +194,15 @@ SKIP: {
 	is $rc, 0, 'trace disabled';
 	like(slurp($cout), qr/trace off/, 'tracing disabled');
 	ok -s $trace_file, 'trace file data';
+	ok -f $trace_file, 'trace file data';
 
 	my @replay = ($^X, '-w', './blib/script/mwrap-trace-replay');
 	my $trace_out = "$mwrap_tmp/tr.out";
 	my $tr_pid = fork;
 	if ($tr_pid == 0) {
-		open STDIN, '<', $trace_file;
 		open STDOUT, '+>>', $trace_out;
 		open STDERR, '+>>', $trace_out;
-		exec @replay;
+		exec @replay, $trace_file;
 		die "exec: $!";
 	}
 	waitpid($tr_pid, 0);

^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2024-04-05 21:05 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-05 21:05 [PATCH 1/4] support malloc tracing Eric Wong
2024-04-05 21:05 ` [PATCH 2/4] realloc Eric Wong
2024-04-05 21:05 ` [PATCH 3/4] gzip Eric Wong
2024-04-05 21:05 ` [PATCH 4/4] tracecompress Eric Wong

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).