From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-3.6 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,NORMAL_HTTP_TO_IP, NUMERIC_HTTP_ADDR shortcircuit=no autolearn=ham autolearn_force=no version=3.4.6 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 0720D1F44D for ; Fri, 5 Apr 2024 21:29:01 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=80x24.org; s=selector1; t=1712352541; bh=l4E/KcPb2miwUcEd1RXZXGNO98tB0T+QU2WqrmNM0uU=; h=From:To:Subject:Date:From; b=p6b7REaWq1buCr+S5wVt1swbWBwg0M5L52K98fY1uTbMYRXicp3Ltu14/5wU/VeKW Zc81FVH/dosvZWHEgUL82ojPnnOuP0oP/iTk57VkaSwYFmBPpD0VG4zZsZLpkSDz4C xcPG05Kq9fNXWorukJZ55c9T24xwz1Zdc9A5gmDU= From: Eric Wong To: spew@80x24.org Subject: [PATCH] support malloc tracing and replay Date: Fri, 5 Apr 2024 21:29:00 +0000 Message-ID: <20240405212900.3140401-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: To test and provide reproducable behavior of different mallocs, provide an architecture-specific tracing mechanism to write trace files. Since these traces, they're compressed by gzip(1) by default to avoid filling up hard drives of long-lived daemons. The compressor can be replaced with zstd or bzip2 via "trace_compress:zstd" in the comma-delimited MWRAP environment. The new mwrap-trace-replay command is designed to run with either jemalloc or glibc malloc to replay trace files. It can read uncompressed output via stdin or compressed files via gzip/zstd/bzip2. This doesn't work reliably in multi-threaded code, but I have fragmentation problems in single-threaded code. --- MANIFEST | 8 +- Makefile.PL | 2 +- httpd.h | 83 +++- lib/Devel/Mwrap/TraceReplay.pm | 80 ++++ dlmalloc_c.h => lib/Devel/Mwrap/dlmalloc_c.h | 4 + lib/Devel/Mwrap/khashl.h | 454 +++++++++++++++++++ lib/Devel/Mwrap/trace-replay.h | 238 ++++++++++ lib/Devel/Mwrap/trace_struct.h | 34 ++ mwrap_core.h | 179 ++++++-- mymalloc.h | 14 +- script/mwrap-trace-replay | 49 ++ t/httpd.t | 32 +- trace.h | 66 +++ 13 files changed, 1205 insertions(+), 38 deletions(-) create mode 100644 lib/Devel/Mwrap/TraceReplay.pm rename dlmalloc_c.h => lib/Devel/Mwrap/dlmalloc_c.h (99%) create mode 100644 lib/Devel/Mwrap/khashl.h create mode 100644 lib/Devel/Mwrap/trace-replay.h create mode 100644 lib/Devel/Mwrap/trace_struct.h create mode 100644 script/mwrap-trace-replay create mode 100644 trace.h diff --git a/MANIFEST b/MANIFEST index cf42979..5af61f4 100644 --- a/MANIFEST +++ b/MANIFEST @@ -6,7 +6,6 @@ Makefile.PL Mwrap.xs README check.h -dlmalloc_c.h examples/mwrap.psgi exe.sh gcc.h @@ -15,6 +14,11 @@ jhash.h lib/Devel/Mwrap.pm lib/Devel/Mwrap/PSGI.pm lib/Devel/Mwrap/Rproxy.pm +lib/Devel/Mwrap/TraceReplay.pm +lib/Devel/Mwrap/dlmalloc_c.h +lib/Devel/Mwrap/khashl.h +lib/Devel/Mwrap/trace-replay.h +lib/Devel/Mwrap/trace_struct.h mwrap_core.h mymalloc.h picohttpparser.h @@ -23,9 +27,11 @@ ppport.h script/mwrap-decode-csv script/mwrap-perl script/mwrap-rproxy +script/mwrap-trace-replay t/httpd-unit.t t/httpd.t t/mwrap.t t/source_location.perl t/test_common.perl +trace.h typemap diff --git a/Makefile.PL b/Makefile.PL index 41e8f03..c1c21de 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -84,7 +84,7 @@ push @writemakefile_args, ( PREREQ_PM => {}, ABSTRACT_FROM => 'lib/Devel/Mwrap.pm', EXE_FILES => [qw(script/mwrap-perl script/mwrap-rproxy - script/mwrap-decode-csv)], + script/mwrap-decode-csv script/mwrap-trace-replay)], AUTHOR => 'mwrap hackers ', LIBS => $LIBS, # e.g. -lurcu-cds LICENSE => 'gpl_2', # GPL-3.0+, CPAN::Meta::Spec limitation diff --git a/httpd.h b/httpd.h index 8a105aa..a097e0e 100644 --- a/httpd.h +++ b/httpd.h @@ -332,6 +332,72 @@ static enum mw_qev h1_do_reset(struct mw_h1 *h1) return h1_res_oneshot(h1, r200, sizeof(r200) - 1); } +static enum mw_qev h1_trace_too_long(struct mw_h1 *h1) +{ + static const char r500[] = "HTTP/1.1 500 Error\r\n" + "Content-Type: text/plain\r\n" + "Connection: close\r\n" + "Content-Length: 9\r\n\r\n" "too long\n"; + return h1_res_oneshot(h1, r500, sizeof(r500) - 1); +} + +static enum mw_qev h1_trace_on(struct mw_h1 *h1) +{ + int e = trace_on(getenv("MWRAP")); + if (e == ENAMETOOLONG) { + return h1_trace_too_long(h1); + } else if (e == EBUSY) { + static const char r500[] = "HTTP/1.1 500 Error\r\n" + "Content-Type: text/plain\r\n" + "Connection: close\r\n" + "Content-Length: 12\r\n\r\n" "double open\n"; + return h1_res_oneshot(h1, r500, sizeof(r500) - 1); + } else if (e) { + static const char r500[] = "HTTP/1.1 500 Error\r\n" + "Content-Type: text/plain\r\n" + "Connection: close\r\n" + "Content-Length: 11\r\n\r\n" "open error\n"; + return h1_res_oneshot(h1, r500, sizeof(r500) - 1); + } + static const char r200[] = "HTTP/1.1 200 OK\r\n" + "Content-Type: text/plain\r\n" + "Connection: close\r\n" + "Content-Length: 8\r\n\r\n" "tracing\n"; + return h1_res_oneshot(h1, r200, sizeof(r200) - 1); +} + +static enum mw_qev h1_trace_off(struct mw_h1 *h1) +{ + int fd = uatomic_xchg(&mwrap_trace_fd, -1); + if (fd >= 0) { + synchronize_rcu(); + CHECK(int, 0, pthread_mutex_lock(&global_mtx)); + mstate ms; + cds_list_for_each_entry(ms, &arenas_active, arena_node) + trace_flush_fd(fd, ms); + cds_list_for_each_entry(ms, &arenas_unused, arena_node) + trace_flush_fd(fd, ms); + CHECK(int, 0, pthread_mutex_unlock(&global_mtx)); + close(fd); + + static const char r200[] = "HTTP/1.1 200 OK\r\n" + "Content-Type: text/plain\r\n" + "Connection: close\r\n" + "Content-Length: 10\r\n\r\n" "trace off\n"; + return h1_res_oneshot(h1, r200, sizeof(r200) - 1); + } + static const char r500[] = "HTTP/1.1 500 Error\r\n" + "Content-Type: text/plain\r\n" + "Connection: close\r\n" + "Content-Length: 9\r\n\r\n" "not open\n"; + return h1_res_oneshot(h1, r500, sizeof(r500) - 1); +} + +static enum mw_qev h1_toggle_trace(struct mw_h1 *h1) +{ + return mwrap_trace_fd < 0 ? h1_trace_on(h1) : h1_trace_off(h1); +} + static enum mw_qev h1_do_trim(struct mw_h1 *h1) { static const char r200[] = "HTTP/1.1 200 OK\r\n" @@ -806,8 +872,12 @@ static enum mw_qev h1_dispatch(struct mw_h1 *h1, struct mw_h1req *h1r) return pid_root(h1, h1r); } } else if (h1r->method_len == 4 && !memcmp(h1r->method, "POST", 4)) { - if (h1r->path_len == 6 && !memcmp(h1r->path, "/reset", 6)) - return h1_do_reset(h1); + if (h1r->path_len == 6) { + if (!memcmp(h1r->path, "/reset", 6)) + return h1_do_reset(h1); + if (!memcmp(h1r->path, "/trace", 6)) + return h1_toggle_trace(h1); + } if (h1r->path_len == 5 && !memcmp(h1r->path, "/trim", 5)) return h1_do_trim(h1); if (h1r->path_len == 4 && !memcmp(h1r->path, "/ctl", 4)) @@ -1353,6 +1423,8 @@ join_thread: static void h1d_atfork_prepare(void) { + if (pthread_equal(g_h1d.tid, pthread_self())) + return; if (uatomic_cmpxchg(&g_h1d.alive, 1, 0)) h1d_stop_join(&g_h1d); } @@ -1373,6 +1445,11 @@ static void h1d_start(void) /* may be called as pthread_atfork child cb */ /* must be called with global_mtx held */ static void h1d_atfork_parent(void) { - if (g_h1d.lfd < 0) + if (!pthread_equal(g_h1d.tid, pthread_self()) && g_h1d.lfd < 0) h1d_start(); } + +static void h1d_atfork_child(void) +{ + if (!pthread_equal(g_h1d.tid, pthread_self())) h1d_start(); +} diff --git a/lib/Devel/Mwrap/TraceReplay.pm b/lib/Devel/Mwrap/TraceReplay.pm new file mode 100644 index 0000000..bb2551b --- /dev/null +++ b/lib/Devel/Mwrap/TraceReplay.pm @@ -0,0 +1,80 @@ +# Copyright (C) mwrap hackers +# License: GPL-3.0+ +# +# Just-ahead-of-time builder for lib/Devel/Mwrap/trace-replay.h +# I never want users to be without source code for repairs, so this +# aims to replicate the feel of a scripting language using C. +# The resulting executable is not linked to Perl in any way. +package Devel::Mwrap::TraceReplay; +use v5.12; +use autodie; +use Config; +use Fcntl qw(LOCK_EX); +my $dir = ($ENV{XDG_CACHE_HOME} // + (($ENV{HOME} // die('HOME unset')).'/.cache')).'/mwrap/trace-replay'; +my $bin = "$dir/trace-replay-$Config{archname}"; +my ($srcpfx) = (__FILE__ =~ m!\A(.+/)[^/]+\z!); +my @srcs = map { $srcpfx.$_ } qw(trace-replay.h + dlmalloc_c.h khashl.h trace_struct.h); +my $ldflags = '-Wl,-O1'; +$ldflags .= ' -Wl,--compress-debug-sections=zlib' if $^O ne 'openbsd'; + +my $xflags = ($ENV{CFLAGS} // '-Wall -ggdb3 -pipe') . ' ' . + ($ENV{LDFLAGS} // $ldflags); +substr($xflags, 0, 0, '-O2 ') if !defined($ENV{CFLAGS}) && !-w __FILE__; +my $cc = $ENV{CC} // $Config{cc} // 'c99'; + +sub build () { + if (!-d $dir) { + require File::Path; + File::Path::make_path($dir); + } + my ($prog) = ($bin =~ m!/([^/]+)\z!); + my $fn = "$dir/$prog.c"; + open my $fh, '>', $fn; + until (flock($fh, LOCK_EX)) { die "LOCK_EX: $fn: $!" if !$!{EINTR} } + say $fh qq{#include "trace-replay.h"}; + $fh->flush or die "flush: $!"; + my $pkg_config = $ENV{PKG_CONFIG} // 'pkg-config'; + chomp(my $fl = `$pkg_config liburcu-cds --libs --cflags`); + $^O eq 'netbsd' and $fl =~ s/(\A|[ \t])\-L([^ \t]+)([ \t]|\z)/ + "$1-L$2 -Wl,-rpath=$2$3"/egsx; + my @xflags = split(' ', "$fl $xflags"); # ' ' awk-mode eats leading WS + my @cflags = ('-I', $srcpfx, grep(!/\A-(?:Wl|l|L)/, @xflags)); + my @cmd = ($cc, '-o', "$dir/$prog.o", '-c', $fn, @cflags); + system(@cmd) and die "E: @cmd: \$?=$?"; + @cmd = ($cc, '-o', "$dir/$prog.tmp", "$dir/$prog.o", @xflags); + system(@cmd) and die "E: @cmd: \$?=$?"; + unlink $fn, "$dir/$prog.o"; + open my $xfh, '>', "$dir/XFLAGS.tmp"; + say $xfh $xflags; + close $xfh; + rename("$dir/$_.tmp", "$dir/$_") for ($prog, qw(XFLAGS)); +} + +sub needs_rebuild () { + open my $fh, '<', "$dir/XFLAGS" or return 1; + chomp(my $prev = <$fh>); + $prev ne $xflags; +} + +sub check_build () { + use Time::HiRes qw(stat); + my $ctime = 0; + my @bin = stat($bin) or return build(); + for (@srcs) { + my @st = stat($_) or die "stat $_: $!"; + if ($st[10] > $ctime) { + $ctime = $st[10]; + return build() if $ctime > $bin[10]; + } + } + needs_rebuild() ? build() : 0; +} + +sub run (@) { + check_build(); + exec $bin, @_; +} + +1; diff --git a/dlmalloc_c.h b/lib/Devel/Mwrap/dlmalloc_c.h similarity index 99% rename from dlmalloc_c.h rename to lib/Devel/Mwrap/dlmalloc_c.h index 5aa9e94..398f376 100644 --- a/dlmalloc_c.h +++ b/lib/Devel/Mwrap/dlmalloc_c.h @@ -590,6 +590,8 @@ MAX_RELEASE_CHECK_RATE default: 4095 unless not HAVE_MMAP #include /* For size_t */ #endif /* LACKS_SYS_TYPES_H */ +#include + /* The maximum possible size_t value has all bits set */ #define MAX_SIZE_T (~(size_t)0) @@ -2607,6 +2609,8 @@ struct malloc_state { MLOCK_T mutex; /* locate lock among fields that rarely change */ #endif /* USE_LOCKS */ msegment seg; + size_t trace_wfill; + char trace_wbuf[PIPE_BUF]; struct cds_list_head arena_node; /* cold */ struct cds_wfcq_tail remote_free_tail; }; diff --git a/lib/Devel/Mwrap/khashl.h b/lib/Devel/Mwrap/khashl.h new file mode 100644 index 0000000..474f675 --- /dev/null +++ b/lib/Devel/Mwrap/khashl.h @@ -0,0 +1,454 @@ +/* The MIT License + + Copyright (c) 2019-2023 by Attractive Chaos + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +#ifndef __AC_KHASHL_H +#define __AC_KHASHL_H + +#define AC_VERSION_KHASHL_H "0.2" + +typedef uint32_t khint32_t; +typedef uint64_t khint64_t; + +typedef khint32_t khint_t; +typedef khint_t khiter_t; + +#define kh_inline inline /* portably handled elsewhere */ +#define KH_LOCAL static kh_inline + +/**************************** + * Simple private functions * + ****************************/ + +#define __kh_used(flag, i) (flag[i>>5] >> (i&0x1fU) & 1U) +#define __kh_set_used(flag, i) (flag[i>>5] |= 1U<<(i&0x1fU)) +#define __kh_set_unused(flag, i) (flag[i>>5] &= ~(1U<<(i&0x1fU))) + +#define __kh_fsize(m) ((m) < 32? 1 : (m)>>5) + +static kh_inline khint_t __kh_h2b(khint_t hash, khint_t bits) { return hash * 2654435769U >> (32 - bits); } + +/******************* + * Hash table base * + *******************/ + +#define __KHASHL_TYPE(HType, khkey_t) \ + typedef struct HType { \ + khint_t bits, count; \ + khint32_t *used; \ + khkey_t *keys; \ + } HType; + +#define __KHASHL_PROTOTYPES(HType, prefix, khkey_t) \ + extern HType *prefix##_init(void); \ + extern void prefix##_destroy(HType *h); \ + extern void prefix##_clear(HType *h); \ + extern khint_t prefix##_getp(const HType *h, const khkey_t *key); \ + extern void prefix##_resize(HType *h, khint_t new_n_buckets); \ + extern khint_t prefix##_putp(HType *h, const khkey_t *key, int *absent); \ + extern void prefix##_del(HType *h, khint_t k); + +#define __KHASHL_IMPL_BASIC(SCOPE, HType, prefix) \ + SCOPE HType *prefix##_init(void) { \ + return (HType*)kcalloc(1, sizeof(HType)); \ + } \ + SCOPE void prefix##_release(HType *h) { \ + kfree((void *)h->keys); kfree(h->used); \ + } \ + SCOPE void prefix##_destroy(HType *h) { \ + if (!h) return; \ + prefix##_release(h); \ + kfree(h); \ + } \ + SCOPE void prefix##_clear(HType *h) { \ + if (h && h->used) { \ + khint_t n_buckets = (khint_t)1U << h->bits; \ + memset(h->used, 0, __kh_fsize(n_buckets) * sizeof(khint32_t)); \ + h->count = 0; \ + } \ + } + +#define __KHASHL_IMPL_GET(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + SCOPE khint_t prefix##_getp_core(const HType *h, const khkey_t *key, khint_t hash) { \ + khint_t i, last, n_buckets, mask; \ + if (h->keys == 0) return 0; \ + n_buckets = (khint_t)1U << h->bits; \ + mask = n_buckets - 1U; \ + i = last = __kh_h2b(hash, h->bits); \ + while (__kh_used(h->used, i) && !__hash_eq(h->keys[i], *key)) { \ + i = (i + 1U) & mask; \ + if (i == last) return n_buckets; \ + } \ + return !__kh_used(h->used, i)? n_buckets : i; \ + } \ + SCOPE khint_t prefix##_getp(const HType *h, const khkey_t *key) { return prefix##_getp_core(h, key, __hash_fn(*key)); } \ + SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { return prefix##_getp_core(h, &key, __hash_fn(key)); } + +#define __KHASHL_IMPL_RESIZE(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + SCOPE void prefix##_resize(HType *h, khint_t new_n_buckets) { \ + khint32_t *new_used = 0; \ + khint_t j = 0, x = new_n_buckets, n_buckets, new_bits, new_mask; \ + while ((x >>= 1) != 0) ++j; \ + if (new_n_buckets & (new_n_buckets - 1)) ++j; \ + new_bits = j > 2? j : 2; \ + new_n_buckets = (khint_t)1U << new_bits; \ + if (h->count > (new_n_buckets>>1) + (new_n_buckets>>2)) return; /* noop, requested size is too small */ \ + new_used = (khint32_t*)kcalloc(__kh_fsize(new_n_buckets), sizeof(khint32_t)); \ + n_buckets = h->keys? (khint_t)1U<bits : 0U; \ + if (n_buckets < new_n_buckets) { /* expand */ \ + REALLOC_ARRAY(h->keys, new_n_buckets); \ + } /* otherwise shrink */ \ + new_mask = new_n_buckets - 1; \ + for (j = 0; j != n_buckets; ++j) { \ + khkey_t key; \ + if (!__kh_used(h->used, j)) continue; \ + key = h->keys[j]; \ + __kh_set_unused(h->used, j); \ + while (1) { /* kick-out process; sort of like in Cuckoo hashing */ \ + khint_t i; \ + i = __kh_h2b(__hash_fn(key), new_bits); \ + while (__kh_used(new_used, i)) i = (i + 1) & new_mask; \ + __kh_set_used(new_used, i); \ + if (i < n_buckets && __kh_used(h->used, i)) { /* kick out the existing element */ \ + { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \ + __kh_set_unused(h->used, i); /* mark it as deleted in the old hash table */ \ + } else { /* write the element and jump out of the loop */ \ + h->keys[i] = key; \ + break; \ + } \ + } \ + } \ + if (n_buckets > new_n_buckets) /* shrink the hash table */ \ + REALLOC_ARRAY(h->keys, new_n_buckets); \ + kfree(h->used); /* free the working space */ \ + h->used = new_used, h->bits = new_bits; \ + } + +#define __KHASHL_IMPL_PUT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + SCOPE khint_t prefix##_putp_core(HType *h, const khkey_t *key, khint_t hash, int *absent) { \ + khint_t n_buckets, i, last, mask; \ + n_buckets = h->keys? (khint_t)1U<bits : 0U; \ + *absent = -1; \ + if (h->count >= (n_buckets>>1) + (n_buckets>>2)) { /* rehashing */ \ + prefix##_resize(h, n_buckets + 1U); \ + n_buckets = (khint_t)1U<bits; \ + } /* TODO: to implement automatically shrinking; resize() already support shrinking */ \ + mask = n_buckets - 1; \ + i = last = __kh_h2b(hash, h->bits); \ + while (__kh_used(h->used, i) && !__hash_eq(h->keys[i], *key)) { \ + i = (i + 1U) & mask; \ + if (i == last) break; \ + } \ + if (!__kh_used(h->used, i)) { /* not present at all */ \ + h->keys[i] = *key; \ + __kh_set_used(h->used, i); \ + ++h->count; \ + *absent = 1; \ + } else *absent = 0; /* Don't touch h->keys[i] if present */ \ + return i; \ + } \ + SCOPE khint_t prefix##_putp(HType *h, const khkey_t *key, int *absent) { return prefix##_putp_core(h, key, __hash_fn(*key), absent); } \ + SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { return prefix##_putp_core(h, &key, __hash_fn(key), absent); } + +#define __KHASHL_IMPL_DEL(SCOPE, HType, prefix, khkey_t, __hash_fn) \ + SCOPE int prefix##_del(HType *h, khint_t i) { \ + khint_t j = i, k, mask, n_buckets; \ + if (h->keys == 0) return 0; \ + n_buckets = (khint_t)1U<bits; \ + mask = n_buckets - 1U; \ + while (1) { \ + j = (j + 1U) & mask; \ + if (j == i || !__kh_used(h->used, j)) break; /* j==i only when the table is completely full */ \ + k = __kh_h2b(__hash_fn(h->keys[j]), h->bits); \ + if ((j > i && (k <= i || k > j)) || (j < i && (k <= i && k > j))) \ + h->keys[i] = h->keys[j], i = j; \ + } \ + __kh_set_unused(h->used, i); \ + --h->count; \ + return 1; \ + } + +#define KHASHL_DECLARE(HType, prefix, khkey_t) \ + __KHASHL_TYPE(HType, khkey_t) \ + __KHASHL_PROTOTYPES(HType, prefix, khkey_t) + +/* compatibility wrappers to make khash -> khashl migration easier */ +#define __KHASH_COMPAT(SCOPE, HType, prefix, khkey_t) \ + typedef HType HType##_t; \ + SCOPE HType *kh_init_##prefix(void) { return prefix##_init(); } \ + SCOPE void kh_release_##prefix(HType *h) { prefix##_release(h); } \ + SCOPE void kh_destroy_##prefix(HType *h) { prefix##_destroy(h); } \ + SCOPE void kh_clear_##prefix(HType *h) { prefix##_clear(h); } \ + SCOPE khint_t kh_get_##prefix(const HType *h, khkey_t key) { \ + return prefix##_get(h, key); \ + } \ + SCOPE void kh_resize_##prefix(HType *h, khint_t new_n_buckets) { \ + prefix##_resize(h, new_n_buckets); \ + } \ + SCOPE khint_t kh_put_##prefix(HType *h, khkey_t key, int *absent) { \ + return prefix##_put(h, key, absent); \ + } \ + SCOPE int kh_del_##prefix(HType *h, khint_t i) { \ + return prefix##_del(h, i); \ + } + +#define KHASHL_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + __KHASHL_TYPE(HType, khkey_t) \ + __KHASHL_IMPL_BASIC(SCOPE, HType, prefix) \ + __KHASHL_IMPL_GET(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + __KHASHL_IMPL_RESIZE(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + __KHASHL_IMPL_PUT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + __KHASHL_IMPL_DEL(SCOPE, HType, prefix, khkey_t, __hash_fn) + +/*************************** + * Ensemble of hash tables * + ***************************/ + +typedef struct { + khint_t sub, pos; +} kh_ensitr_t; + +#define KHASHE_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + KHASHL_INIT(KH_LOCAL, HType##_sub, prefix##_sub, khkey_t, __hash_fn, __hash_eq) \ + typedef struct HType { \ + khint64_t count:54, bits:8; \ + HType##_sub *sub; \ + } HType; \ + SCOPE HType *prefix##_init(int bits) { \ + HType *g; \ + g = (HType*)kcalloc(1, sizeof(*g)); \ + g->bits = bits; \ + g->sub = (HType##_sub*)kcalloc(1U<sub)); \ + return g; \ + } \ + SCOPE void prefix##_destroy(HType *g) { \ + int t; \ + if (!g) return; \ + for (t = 0; t < 1<bits; ++t) { kfree((void*)g->sub[t].keys); kfree(g->sub[t].used); } \ + kfree(g->sub); kfree(g); \ + } \ + SCOPE kh_ensitr_t prefix##_getp(const HType *g, const khkey_t *key) { \ + khint_t hash, low, ret; \ + kh_ensitr_t r; \ + HType##_sub *h; \ + hash = __hash_fn(*key); \ + low = hash & ((1U<bits) - 1); \ + h = &g->sub[low]; \ + ret = prefix##_sub_getp_core(h, key, hash); \ + if (ret == 1U<bits) r.sub = low, r.pos = (khint_t)-1; \ + else r.sub = low, r.pos = ret; \ + return r; \ + } \ + SCOPE kh_ensitr_t prefix##_get(const HType *g, const khkey_t key) { return prefix##_getp(g, &key); } \ + SCOPE kh_ensitr_t prefix##_putp(HType *g, const khkey_t *key, int *absent) { \ + khint_t hash, low, ret; \ + kh_ensitr_t r; \ + HType##_sub *h; \ + hash = __hash_fn(*key); \ + low = hash & ((1U<bits) - 1); \ + h = &g->sub[low]; \ + ret = prefix##_sub_putp_core(h, key, hash, absent); \ + if (*absent) ++g->count; \ + if (ret == 1U<bits) r.sub = low, r.pos = (khint_t)-1; \ + else r.sub = low, r.pos = ret; \ + return r; \ + } \ + SCOPE kh_ensitr_t prefix##_put(HType *g, const khkey_t key, int *absent) { return prefix##_putp(g, &key, absent); } \ + SCOPE int prefix##_del(HType *g, kh_ensitr_t itr) { \ + HType##_sub *h = &g->sub[itr.sub]; \ + int ret; \ + ret = prefix##_sub_del(h, itr.pos); \ + if (ret) --g->count; \ + return ret; \ + } + +/***************************** + * More convenient interface * + *****************************/ + +#define __kh_packed /* noop, we use -Werror=address-of-packed-member */ +#define __kh_cached_hash(x) ((x).hash) + +#define KHASHL_SET_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + typedef struct { khkey_t key; } __kh_packed HType##_s_bucket_t; \ + static kh_inline khint_t prefix##_s_hash(HType##_s_bucket_t x) { return __hash_fn(x.key); } \ + static kh_inline int prefix##_s_eq(HType##_s_bucket_t x, HType##_s_bucket_t y) { return __hash_eq(x.key, y.key); } \ + KHASHL_INIT(KH_LOCAL, HType, prefix##_s, HType##_s_bucket_t, prefix##_s_hash, prefix##_s_eq) \ + SCOPE HType *prefix##_init(void) { return prefix##_s_init(); } \ + SCOPE void prefix##_release(HType *h) { prefix##_s_release(h); } \ + SCOPE void prefix##_destroy(HType *h) { prefix##_s_destroy(h); } \ + SCOPE void prefix##_clear(HType *h) { prefix##_s_clear(h); } \ + SCOPE void prefix##_resize(HType *h, khint_t new_n_buckets) { prefix##_s_resize(h, new_n_buckets); } \ + SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_s_bucket_t t; t.key = key; return prefix##_s_getp(h, &t); } \ + SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_s_del(h, k); } \ + SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_s_bucket_t t; t.key = key; return prefix##_s_putp(h, &t, absent); } \ + __KHASH_COMPAT(SCOPE, HType, prefix, khkey_t) + +#define KHASHL_MAP_INIT(SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \ + typedef struct { khkey_t key; kh_val_t val; } __kh_packed HType##_m_bucket_t; \ + static kh_inline khint_t prefix##_m_hash(HType##_m_bucket_t x) { return __hash_fn(x.key); } \ + static kh_inline int prefix##_m_eq(HType##_m_bucket_t x, HType##_m_bucket_t y) { return __hash_eq(x.key, y.key); } \ + KHASHL_INIT(KH_LOCAL, HType, prefix##_m, HType##_m_bucket_t, prefix##_m_hash, prefix##_m_eq) \ + SCOPE HType *prefix##_init(void) { return prefix##_m_init(); } \ + SCOPE void prefix##_release(HType *h) { prefix##_m_release(h); } \ + SCOPE void prefix##_destroy(HType *h) { prefix##_m_destroy(h); } \ + SCOPE void prefix##_clear(HType *h) { prefix##_m_clear(h); } \ + SCOPE void prefix##_resize(HType *h, khint_t new_n_buckets) { prefix##_m_resize(h, new_n_buckets); } \ + SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_m_bucket_t t; t.key = key; return prefix##_m_getp(h, &t); } \ + SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_m_del(h, k); } \ + SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_m_bucket_t t; t.key = key; return prefix##_m_putp(h, &t, absent); } \ + __KHASH_COMPAT(SCOPE, HType, prefix, khkey_t) + +#define KHASHL_CSET_INIT(SCOPE, HType, prefix, khkey_t, __hash_fn, __hash_eq) \ + typedef struct { khkey_t key; khint_t hash; } __kh_packed HType##_cs_bucket_t; \ + static kh_inline int prefix##_cs_eq(HType##_cs_bucket_t x, HType##_cs_bucket_t y) { return x.hash == y.hash && __hash_eq(x.key, y.key); } \ + KHASHL_INIT(KH_LOCAL, HType, prefix##_cs, HType##_cs_bucket_t, __kh_cached_hash, prefix##_cs_eq) \ + SCOPE HType *prefix##_init(void) { return prefix##_cs_init(); } \ + SCOPE void prefix##_destroy(HType *h) { prefix##_cs_destroy(h); } \ + SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_cs_bucket_t t; t.key = key; t.hash = __hash_fn(key); return prefix##_cs_getp(h, &t); } \ + SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_cs_del(h, k); } \ + SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_cs_bucket_t t; t.key = key, t.hash = __hash_fn(key); return prefix##_cs_putp(h, &t, absent); } + +#define KHASHL_CMAP_INIT(SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \ + typedef struct { khkey_t key; kh_val_t val; khint_t hash; } __kh_packed HType##_cm_bucket_t; \ + static kh_inline int prefix##_cm_eq(HType##_cm_bucket_t x, HType##_cm_bucket_t y) { return x.hash == y.hash && __hash_eq(x.key, y.key); } \ + KHASHL_INIT(KH_LOCAL, HType, prefix##_cm, HType##_cm_bucket_t, __kh_cached_hash, prefix##_cm_eq) \ + SCOPE HType *prefix##_init(void) { return prefix##_cm_init(); } \ + SCOPE void prefix##_destroy(HType *h) { prefix##_cm_destroy(h); } \ + SCOPE khint_t prefix##_get(const HType *h, khkey_t key) { HType##_cm_bucket_t t; t.key = key; t.hash = __hash_fn(key); return prefix##_cm_getp(h, &t); } \ + SCOPE int prefix##_del(HType *h, khint_t k) { return prefix##_cm_del(h, k); } \ + SCOPE khint_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_cm_bucket_t t; t.key = key, t.hash = __hash_fn(key); return prefix##_cm_putp(h, &t, absent); } + +#define KHASHE_MAP_INIT(SCOPE, HType, prefix, khkey_t, kh_val_t, __hash_fn, __hash_eq) \ + typedef struct { khkey_t key; kh_val_t val; } __kh_packed HType##_m_bucket_t; \ + static kh_inline khint_t prefix##_m_hash(HType##_m_bucket_t x) { return __hash_fn(x.key); } \ + static kh_inline int prefix##_m_eq(HType##_m_bucket_t x, HType##_m_bucket_t y) { return __hash_eq(x.key, y.key); } \ + KHASHE_INIT(KH_LOCAL, HType, prefix##_m, HType##_m_bucket_t, prefix##_m_hash, prefix##_m_eq) \ + SCOPE HType *prefix##_init(int bits) { return prefix##_m_init(bits); } \ + SCOPE void prefix##_destroy(HType *h) { prefix##_m_destroy(h); } \ + SCOPE kh_ensitr_t prefix##_get(const HType *h, khkey_t key) { HType##_m_bucket_t t; t.key = key; return prefix##_m_getp(h, &t); } \ + SCOPE int prefix##_del(HType *h, kh_ensitr_t k) { return prefix##_m_del(h, k); } \ + SCOPE kh_ensitr_t prefix##_put(HType *h, khkey_t key, int *absent) { HType##_m_bucket_t t; t.key = key; return prefix##_m_putp(h, &t, absent); } + +/************************** + * Public macro functions * + **************************/ + +#define kh_bucket(h, x) ((h)->keys[x]) + +/*! @function + @abstract Get the number of elements in the hash table + @param h Pointer to the hash table + @return Number of elements in the hash table [khint_t] + */ +#define kh_size(h) ((h)->count) + +#define kh_capacity(h) ((h)->keys? 1U<<(h)->bits : 0U) + +/*! @function + @abstract Get the end iterator + @param h Pointer to the hash table + @return The end iterator [khint_t] + */ +#define kh_end(h) kh_capacity(h) + +/*! @function + @abstract Get key given an iterator + @param h Pointer to the hash table + @param x Iterator to the bucket [khint_t] + @return Key [type of keys] + */ +#define kh_key(h, x) ((h)->keys[x].key) + +/*! @function + @abstract Get value given an iterator + @param h Pointer to the hash table + @param x Iterator to the bucket [khint_t] + @return Value [type of values] + @discussion For hash sets, calling this results in segfault. + */ +#define kh_val(h, x) ((h)->keys[x].val) + +/*! @function + @abstract Test whether a bucket contains data. + @param h Pointer to the hash table + @param x Iterator to the bucket [khint_t] + @return 1 if containing data; 0 otherwise [int] + */ +#define kh_exist(h, x) __kh_used((h)->used, (x)) + +#define kh_ens_key(g, x) kh_key(&(g)->sub[(x).sub], (x).pos) +#define kh_ens_val(g, x) kh_val(&(g)->sub[(x).sub], (x).pos) +#define kh_ens_exist(g, x) kh_exist(&(g)->sub[(x).sub], (x).pos) +#define kh_ens_is_end(x) ((x).pos == (khint_t)-1) +#define kh_ens_size(g) ((g)->count) + +/************************************** + * Common hash and equality functions * + **************************************/ + +#define kh_eq_generic(a, b) ((a) == (b)) +#define kh_eq_str(a, b) (strcmp((a), (b)) == 0) +#define kh_hash_dummy(x) ((khint_t)(x)) + +static kh_inline khint_t kh_hash_uint32(khint_t key) { + key += ~(key << 15); + key ^= (key >> 10); + key += (key << 3); + key ^= (key >> 6); + key += ~(key << 11); + key ^= (key >> 16); + return key; +} + +static kh_inline khint_t kh_hash_uint64(khint64_t key) { + key = ~key + (key << 21); + key = key ^ key >> 24; + key = (key + (key << 3)) + (key << 8); + key = key ^ key >> 14; + key = (key + (key << 2)) + (key << 4); + key = key ^ key >> 28; + key = key + (key << 31); + return (khint_t)key; +} + +#define KH_FNV_SEED 11 + +static kh_inline khint_t kh_hash_str(const char *s) { /* FNV1a */ + khint_t h = KH_FNV_SEED ^ 2166136261U; + const unsigned char *t = (const unsigned char*)s; + for (; *t; ++t) + h ^= *t, h *= 16777619; + return h; +} + +static kh_inline khint_t kh_hash_bytes(int len, const unsigned char *s) { + khint_t h = KH_FNV_SEED ^ 2166136261U; + int i; + for (i = 0; i < len; ++i) + h ^= s[i], h *= 16777619; + return h; +} + +#endif /* __AC_KHASHL_H */ diff --git a/lib/Devel/Mwrap/trace-replay.h b/lib/Devel/Mwrap/trace-replay.h new file mode 100644 index 0000000..c43cc0f --- /dev/null +++ b/lib/Devel/Mwrap/trace-replay.h @@ -0,0 +1,238 @@ +/* + * Copyright (C) mwrap hackers + * License: GPL-3.0+ + * single-threaded trace replayer, no runtime dependency on Perl + * nor the rest of mwrap (aside from the hacked up dlmalloc). + */ +#define _LGPL_SOURCE /* allows URCU to inline some stuff */ +#define _GNU_SOURCE +/* knobs for dlmalloc */ +#define HAVE_MORECORE 0 +#define DEFAULT_GRANULARITY (2U * 1024U * 1024U) +#define FOOTERS 1 /* required for remote_free_* stuff */ +#define USE_DL_PREFIX +#define ONLY_MSPACES 1 /* aka per-thread "arenas" */ +#define DLMALLOC_EXPORT static inline +/* #define NO_MALLOC_STATS 1 */ +#define USE_LOCKS 0 /* we do our own global_mtx + ms_tsd */ +#include +#include +#include +#include +#include +#include +#include +#ifdef __GLIBC__ +extern void __attribute__((weak)) malloc_stats(void); +extern void __attribute__((weak)) malloc_info(int, FILE *); +# define GLIBC_MALLOC_STATS() do { \ + if (malloc_info) malloc_info(0, stderr); \ + if (malloc_stats) malloc_stats(); \ + } while (0) +#else +# define GLIBC_MALLOC_STATS() do {} while (0) +#endif + +extern void __attribute__((weak)) malloc_stats_print( + void (*wcb)(void *, const char *), void *, const char *opts); + +#include +#include +#include "dlmalloc_c.h" +static mstate tr_ms; + +static void *my_calloc(size_t nmemb, size_t size) +{ + void *p = mspace_calloc(tr_ms, nmemb, size); + if (!p) err(1, "calloc"); + return p; +} + +#define kcalloc(N,Z) my_calloc(N, Z) +#define kfree(P) mspace_free(tr_ms, P) +#define REALLOC_ARRAY(x, nmemb) do { \ + size_t asize; \ + if (__builtin_mul_overflow(sizeof(*(x)), nmemb, &asize)) \ + errx(1, "mul_overflow"); \ + (x) = mspace_realloc(tr_ms, (x), asize); \ + if (!x) err(1, "realloc"); \ +} while (0) +#include "khashl.h" +#include "trace_struct.h" + +static inline khint_t hash_uptr(uintptr_t p) +{ + return sizeof(uintptr_t) == 4 ? kh_hash_uint32(p) : kh_hash_uint64(p); +} + +KHASHL_MAP_INIT(KH_LOCAL, kh_ptrmap, ptrmap, uintptr_t, uintptr_t, + hash_uptr, kh_eq_generic) + +static kh_ptrmap *old2cur; + +static void store_ptr(uintptr_t old, void *cur) +{ + int absent; + khint_t k = ptrmap_put(old2cur, old, &absent); + if (absent) + kh_val(old2cur, k) = (uintptr_t)cur; +} + +int main(int argc, char *argv[]) +{ + tr_ms = create_mspace(0, 0); + tr_ms->seg.sflags = EXTERN_BIT | USE_MMAP_BIT; + disable_contiguous(tr_ms); + size_t realloc_miss = 0, free_miss = 0, bad_entry = 0; + union { + struct tr_memalign do_memalign; + struct tr_free do_free; + struct tr_malloc do_malloc; + struct tr_calloc do_calloc; + struct tr_realloc do_realloc; + } as; + int truncated = 0; + + old2cur = ptrmap_init(); + + // don't fill buf all the way so we can do small reads in ENSURE_FILL: + while (!feof(stdin) && !truncated) { + +#define CONSUME(dst, required) do { \ + size_t need = sizeof(dst); \ + char *buf = (char *)&dst; \ + int done = 0; \ + while (need) { \ + size_t n = fread(buf, 1, need, stdin); \ + if (n > 0) { \ + need -= n; \ + } else if (n == 0 && !required) { \ + done = 1; \ + break; \ + } else { \ + warnx("TRUNCATED: %zu != %zu", n, need); \ + done = truncated = 1; \ + break; \ + } \ + } \ + if (done) break; \ +} while (0) + CONSUME(as.do_free.ptr, false); + enum tr_fn fn = as.do_free.ptr & TR_MASK; + as.do_free.ptr &= ~TR_MASK; + khint_t k; + void *cur; + + switch (fn) { + case TR_FREE: + k = ptrmap_get(old2cur, as.do_free.ptr); + if (k >= kh_end(old2cur)) { + ++free_miss; + } else { + free((void *)kh_val(old2cur, k)); + ptrmap_del(old2cur, k); + } + break; + case TR_MALLOC: + CONSUME(as.do_malloc.size, true); + cur = malloc(as.do_malloc.size); + if (!cur) + err(1, "malloc(%zu) => %p", + as.do_malloc.size, + (void *)as.do_malloc.ret); + store_ptr(as.do_malloc.ret, cur); + + break; + case TR_CALLOC: + CONSUME(as.do_calloc.size, true); + cur = calloc(as.do_calloc.size, 1); + if (!cur) + err(1, "calloc(%zu) => %p", + as.do_calloc.size, + (void *)as.do_calloc.ret); + store_ptr(as.do_calloc.ret, cur); + + break; + case TR_REALLOC: + cur = NULL; + CONSUME(as.do_realloc.ptr, true); + CONSUME(as.do_realloc.size, true); + if (as.do_realloc.ptr) { + k = ptrmap_get(old2cur, + as.do_realloc.ptr); + if (k >= kh_end(old2cur)) { + realloc_miss++; + } else { + cur = (void *)kh_val(old2cur, k); + ptrmap_del(old2cur, k); + } + } + void *rp = realloc(cur, as.do_realloc.size); + if (!rp) + err(1, "realloc(%p => %p, %zu) => %p", + (void *)as.do_realloc.ptr, + cur, + as.do_realloc.size, + (void *)as.do_realloc.ret); + store_ptr(as.do_realloc.ret, rp); + break; + case TR_MEMALIGN: + cur = NULL; + CONSUME(as.do_memalign.alignment, true); + CONSUME(as.do_memalign.size, true); + int rc = posix_memalign(&cur, + as.do_memalign.alignment, + as.do_memalign.size); + if (rc) { + errno = rc; + err(1, "posix_memalign(%zu, %zu) => %p", + as.do_memalign.alignment, + as.do_memalign.size, + (void *)as.do_memalign.ret); + } + store_ptr(as.do_memalign.ret, cur); + break; + default: + bad_entry++; + } + } + + if (free_miss || realloc_miss || bad_entry) + fprintf(stderr, "W: miss free=%zu realloc=%zu bad=%zu\n", + free_miss, realloc_miss, bad_entry); + + fprintf(stderr, "# ptrmap .size=%zu capa=%zu\n", + (size_t)kh_size(old2cur), (size_t)kh_capacity(old2cur)); + + if (malloc_stats_print) // jemalloc loaded + malloc_stats_print(NULL, NULL, NULL); + else + GLIBC_MALLOC_STATS(); + + int c; + char *end; + long sec = 0; + while ((c = getopt(argc, argv, "s:")) != -1) { + switch (c) { + case 's': + sec = strtol(optarg, &end, 10); + if (*end != 0) + errx(1, "`-s %s' invalid seconds", optarg); + break; + default: warnx("bad switch `-%c'", c); + } + } + if (sec < 0) { + fprintf(stderr, "# PID:%d sleeping indefinitely\n", + (int)getpid()); + pause(); + } + if (sec > 0) { + unsigned s = sec > UINT_MAX ? UINT_MAX : sec; + fprintf(stderr, "# PID:%d sleeping %u seconds\n", + (int)getpid(), s); + sleep(s); + } + + return truncated; +} diff --git a/lib/Devel/Mwrap/trace_struct.h b/lib/Devel/Mwrap/trace_struct.h new file mode 100644 index 0000000..e5fe622 --- /dev/null +++ b/lib/Devel/Mwrap/trace_struct.h @@ -0,0 +1,34 @@ +enum tr_fn { + TR_FREE = 0, + TR_MEMALIGN = 1, + TR_MALLOC = 2, + TR_REALLOC = 3, + TR_CALLOC = 4, +}; +static const uintptr_t TR_MASK = 7; + +struct tr_memalign { + uintptr_t ret; + size_t alignment; + size_t size; +}; + +struct tr_free { + uintptr_t ptr; +}; + +struct tr_malloc { + uintptr_t ret; + size_t size; +}; + +struct tr_realloc { + uintptr_t ret; + uintptr_t ptr; + size_t size; +}; + +struct tr_calloc { + uintptr_t ret; + size_t size; +}; diff --git a/mwrap_core.h b/mwrap_core.h index 78c14e3..6467f1c 100644 --- a/mwrap_core.h +++ b/mwrap_core.h @@ -36,6 +36,7 @@ #include #include #include +#include #if MWRAP_PERL # include "EXTERN.h" @@ -64,6 +65,8 @@ #define U24_MAX (1U << 24) +#include "trace.h" + /* * Perl doesn't have a GC the same way (C) Ruby does, so no GC count. * Instead, the relative age of an object is the number of total bytes @@ -498,31 +501,37 @@ static pthread_mutex_t *src_loc_mutex_lock(const struct src_loc *l) return mtx; } +static void free_notrace(void *p) +{ + struct alloc_hdr *h = ptr2hdr(p); + struct src_loc *l = h->as.live.loc; + + if (l) { + size_t current_bytes = uatomic_read(&total_bytes_inc); + size_t age = current_bytes - h->as.live.gen; + uatomic_add(&total_bytes_dec, h->size); + uatomic_add(&l->freed_bytes, h->size); + uatomic_set(&h->size, 0); + uatomic_inc(&l->frees); + uatomic_add(&l->age_total, age); + + pthread_mutex_t *mtx = src_loc_mutex_lock(l); + cds_list_del_rcu(&h->anode); + if (age > l->max_lifespan) + l->max_lifespan = age; + CHECK(int, 0, pthread_mutex_unlock(mtx)); + + call_rcu(&h->as.dead, free_hdr_rcu); + } else { + real_free(h->real); + } +} + void free(void *p) { if (p) { - struct alloc_hdr *h = ptr2hdr(p); - struct src_loc *l = h->as.live.loc; - - if (l) { - size_t current_bytes = uatomic_read(&total_bytes_inc); - size_t age = current_bytes - h->as.live.gen; - uatomic_add(&total_bytes_dec, h->size); - uatomic_add(&l->freed_bytes, h->size); - uatomic_set(&h->size, 0); - uatomic_inc(&l->frees); - uatomic_add(&l->age_total, age); - - pthread_mutex_t *mtx = src_loc_mutex_lock(l); - cds_list_del_rcu(&h->anode); - if (age > l->max_lifespan) - l->max_lifespan = age; - CHECK(int, 0, pthread_mutex_unlock(mtx)); - - call_rcu(&h->as.dead, free_hdr_rcu); - } else { - real_free(h->real); - } + trace_free(p); + free_notrace(p); } } @@ -589,6 +598,7 @@ mwrap_memalign(void **pp, size_t alignment, size_t size, struct src_loc *sl) p = ptr_align(p, alignment); struct alloc_hdr *h = ptr2hdr(p); alloc_insert_rcu(sl, h, size, real); + trace_memalign(p, alignment, size); *pp = p; } @@ -701,7 +711,9 @@ void *malloc(size_t size) SRC_LOC_BT(bt); struct alloc_hdr *h = p; alloc_insert_rcu(&bt.sl, h, size, h); - return hdr2ptr(h); + p = hdr2ptr(h); + trace_malloc(p, size); + return p; } enomem: errno = ENOMEM; @@ -723,7 +735,9 @@ void *calloc(size_t nmemb, size_t size) struct alloc_hdr *h = p; SRC_LOC_BT(bt); alloc_insert_rcu(&bt.sl, h, size, h); - return memset(hdr2ptr(h), 0, size); + p = hdr2ptr(h); + trace_calloc(p, size); + return memset(p, 0, size); } enomem: errno = ENOMEM; @@ -747,10 +761,11 @@ void *realloc(void *ptr, size_t size) SRC_LOC_BT(bt); alloc_insert_rcu(&bt.sl, h, size, h); p = hdr2ptr(h); + trace_realloc(p, ptr, size); if (ptr) { struct alloc_hdr *old = ptr2hdr(ptr); memcpy(p, ptr, old->size < size ? old->size : size); - free(ptr); + free_notrace(ptr); } return p; } @@ -782,7 +797,8 @@ char **bt_syms(void * const *addrlist, uint32_t size) static void cleanup_free(void *any) { void **p = any; - free(*p); + if (*p) + free_notrace(*p); } static void *write_csv(FILE *, size_t min, const char *sort, size_t sort_len); @@ -1060,6 +1076,111 @@ static struct src_loc *mwrap_get_bin(const char *buf, size_t len) } static const char *mwrap_env; + +// n.b. signals are always blocked by the caller(s) when calling this +static int trace_on(const char *env) +{ + char trace_path[PATH_MAX]; + size_t len = 0; + const char *cmpr = NULL; + const char *sfx = ".gz"; + char cmpr_cmd[32]; + + if (env) { + const char *td = strstr(env, "trace_dir:"); + if (td) { + td += sizeof("trace_dir"); + const char *end = strchrnul(td, ','); + + len = end - td; + if ((len + 50) >= sizeof(trace_path)) + return ENAMETOOLONG; + if (len) memcpy(trace_path, td, len); + } + cmpr = strstr(env, "trace_compress:"); + if (cmpr) { + cmpr += sizeof("trace_compress"); + const char *end = strchrnul(cmpr, ','); + + size_t n = end - cmpr; + if (n) { + if (n >= sizeof(cmpr_cmd)) + return ENAMETOOLONG; + strcpy(cmpr_cmd, cmpr); + cmpr = cmpr_cmd; + } + } + } + if (!len) { + env = getenv("TMPDIR"); + if (!env) { + memcpy(trace_path, "/tmp", len = 4); + } else { + len = strlen(env); + if ((len + 50) >= sizeof(trace_path)) + return ENAMETOOLONG; + if (len) memcpy(trace_path, env, len); + } + } + if (trace_path[len - 1] != '/') + trace_path[len++] = '/'; + if (cmpr) { + if (strstr(cmpr, "zstd")) { + sfx = ".zst"; + } else if (strstr(cmpr, "bzip2")) { + sfx = ".bz2"; + } + } else { + cmpr = "gzip"; + } + int rc = snprintf(trace_path + len, 50, + "mwrap.%d.trace%s", (int)getpid(), sfx); + if (rc < 0 || rc >= 50) + return ENAMETOOLONG; + int fd = open(trace_path, O_CLOEXEC|O_CREAT|O_APPEND|O_WRONLY, 0666); + if (fd < 0) + return errno; + int pfds[2]; + if (pipe2(pfds, O_CLOEXEC) < 0) + return errno; + pid_t pid_a = fork(); + if (pid_a < 0) { + err(1, "fork"); + } else if (pid_a == 0) { // child + if (setsid() < 0) err(1, "setsid"); + pid_t pid_b = fork(); + if (pid_b < 0) { + err(1, "fork"); + } else if (pid_b == 0) { // grandchild + unsetenv("LD_PRELOAD"); + + close(pfds[1]); + if (dup2(pfds[0], 0) < 0) err(1, "dup2"); + close(pfds[0]); + if (dup2(fd, 1) < 1) err(1, "dup2"); + close(fd); + if (strchr(cmpr, ' ') || strchr(cmpr, '\t')) + execl("/bin/sh", "sh", "-c", cmpr, NULL); + else + execlp(cmpr, cmpr, "-c", NULL); + err(1, "execl(p) %s", cmpr); + } else { + _exit(0); + } + } + close(pfds[0]); + close(fd); + int st; + pid_t wpid = waitpid(pid_a, &st, 0); + if (wpid != pid_a) err(1, "waitpid(a)"); + if (st) errx(1, "gzip parent failed %d", st); + if (uatomic_cmpxchg(&mwrap_trace_fd, -1, pfds[1]) != -1) { + close(pfds[1]); + return EBUSY; + } + return 0; +} + #include "httpd.h" __attribute__((constructor)) static void mwrap_ctor(void) @@ -1089,7 +1210,11 @@ __attribute__((constructor)) static void mwrap_ctor(void) call_rcu(&h->as.dead, free_hdr_rcu); } else perror("malloc"); - + if (mwrap_env && strstr(mwrap_env, "trace:1")) { + int e = trace_on(mwrap_env); + if (e) + fprintf(stderr, "trace failed: %s\n", strerror(e)); + } h1d_start(); CHECK(int, 0, pthread_sigmask(SIG_SETMASK, &old, NULL)); CHECK(int, 0, pthread_atfork(atfork_prepare, atfork_parent, diff --git a/mymalloc.h b/mymalloc.h index 4dd2ee6..37771d4 100644 --- a/mymalloc.h +++ b/mymalloc.h @@ -50,7 +50,7 @@ #define DLMALLOC_EXPORT static inline /* #define NO_MALLOC_STATS 1 */ #define USE_LOCKS 0 /* we do our own global_mtx + ms_tsd */ -#include "dlmalloc_c.h" +#include "lib/Devel/Mwrap/dlmalloc_c.h" #undef ABORT /* conflicts with Perl */ #undef NOINLINE /* conflicts with Ruby, defined by dlmalloc_c.h */ #undef HAVE_MREMAP /* conflicts with Ruby 3.2 */ @@ -64,9 +64,17 @@ static CDS_LIST_HEAD(arenas_active); static CDS_LIST_HEAD(arenas_unused); /* called on pthread exit */ +static void trace_flush_fd(int, mstate); +static int mwrap_trace_fd = -1; // httpd.h sets this + ATTR_COLD static void mstate_tsd_dtor(void *p) { mstate ms = p; + if (ms) { + int fd = uatomic_read(&mwrap_trace_fd); + if (fd >= 0) + trace_flush_fd(fd, ms); + } /* * In case another destructor calls free (or any allocation function, @@ -86,7 +94,7 @@ ATTR_COLD static void mstate_tsd_dtor(void *p) /* see httpd.h */ static void h1d_atfork_prepare(void); static void h1d_atfork_parent(void); -static void h1d_start(void); +static void h1d_atfork_child(void); ATTR_COLD static void atfork_prepare(void) { @@ -124,7 +132,7 @@ ATTR_COLD static void atfork_child(void) } reset_mutexes(); call_rcu_after_fork_child(); - h1d_start(); + h1d_atfork_child(); } #if defined(__GLIBC__) diff --git a/script/mwrap-trace-replay b/script/mwrap-trace-replay new file mode 100644 index 0000000..e1feb23 --- /dev/null +++ b/script/mwrap-trace-replay @@ -0,0 +1,49 @@ +#!perl -w +# Copyright (C) mwrap hackers +# License: GPL-3.0+ +use v5.12; +use autodie; +use Devel::Mwrap::TraceReplay; +my (@files, @opt); +for (@ARGV) { + if (-f $_) { + push @files, $_; + } else { + push @opt, $_; + } +} + +warn "opt=@opt f=@files"; +if (@files) { + pipe(my $r, my $w); + my $tpid = fork; + if ($tpid == 0) { + open STDIN, '<&', $r; + close $_ for ($r, $w); + Devel::Mwrap::TraceReplay::run @opt; + die "exec trace-replay: $!"; + } + for my $f (@files) { + my $dc = 'gzip'; + if ($f =~ /\.zst\z/i) { + $dc = 'zstd'; + } elsif ($f =~ /\.bz2\z/i) { + $dc = 'bzip2'; + } + my $pid = fork; + if ($pid == 0) { + open STDOUT, '>&', $w; + open STDIN, '<', $f; + close $_ for ($r, $w); + exec $dc, '-dc'; + die "exec: $dc: $!"; + } + waitpid($pid, 0); + } + close $_ for ($r, $w); + waitpid($tpid, 0); +} else { + (-f STDIN || -p STDIN) or + die "Usage: $0 "socket_dir:$mwrap_tmp" }; +use autodie qw(mkdir fork); +mkdir "$mwrap_tmp/tr"; +my $env = { MWRAP => "socket_dir:$mwrap_tmp,trace_dir:$mwrap_tmp/tr" }; my $f1 = "$mwrap_tmp/f1"; my $f2 = "$mwrap_tmp/f2"; mkfifo($f1, 0600) // plan(skip_all => "mkfifo: $!"); @@ -57,7 +59,7 @@ my $cout = "$mwrap_tmp/cout"; my @curl = (qw(curl -sf --unix-socket), $sock, '-o', $cout); push @curl, '-vS' if $ENV{V}; my $rc = system(@curl, "http://0/$pid/each/2000"); -my $curl_unix; +my ($curl_unix, $trace_file); SKIP: { skip 'curl lacks --unix-socket support', 1 if $rc == 512; is($rc, 0, 'curl /each'); @@ -76,6 +78,12 @@ SKIP: { is($rc, 0, 'curl / (PID root)'); like(slurp($cout), qr/trimming/, 'trim started'); unlink($cout); + + $rc = system(@curl, '-v', '-XPOST', "http://0/$pid/trace"); + is $rc, 0, 'trace ok'; + like(slurp($cout), qr/tracing/, 'tracing enabled'); + $trace_file = "$mwrap_tmp/tr/mwrap.$pid.trace.gz"; + ok -f $trace_file, 'trace enabled'; }; { @@ -181,8 +189,26 @@ SKIP: { $rc = system(@curl, qw(-HX-Mwrap-BT:10 -d blah http://0/ctl)); is($rc >> 8, 22, '404 w/o PID prefix'); -}; + $rc = system(@curl, '-v', '-XPOST', "http://0/$pid/trace"); + is $rc, 0, 'trace disabled'; + like(slurp($cout), qr/trace off/, 'tracing disabled'); + ok -s $trace_file, 'trace file data'; + ok -f $trace_file, 'trace file data'; + + my @replay = ($^X, '-w', './blib/script/mwrap-trace-replay'); + my $trace_out = "$mwrap_tmp/tr.out"; + my $tr_pid = fork; + if ($tr_pid == 0) { + open STDOUT, '+>>', $trace_out; + open STDERR, '+>>', $trace_out; + exec @replay, $trace_file; + die "exec: $!"; + } + waitpid($tr_pid, 0); + is $?, 0, 'trace replay'; + diag slurp($trace_out); +}; diag slurp($cout) if $ENV{V}; $cleanup->(); diff --git a/trace.h b/trace.h new file mode 100644 index 0000000..7b6946d --- /dev/null +++ b/trace.h @@ -0,0 +1,66 @@ +#include "lib/Devel/Mwrap/trace_struct.h" + +static void trace_flush_fd(int fd, mstate ms) +{ + size_t n = uatomic_xchg(&ms->trace_wfill, 0); + if (n) write(fd, &ms->trace_wbuf, n); +} + +#define TRACE_WRITE(buf) do { \ + rcu_read_lock(); \ + int fd = uatomic_read(&mwrap_trace_fd); \ + if (fd >= 0) { \ + mstate ms = ms_tsd; \ + if ((ms->trace_wfill + (sizeof(uintptr_t) * 4)) >= \ + sizeof(ms->trace_wbuf)) \ + trace_flush_fd(fd, ms); \ + size_t n = ms->trace_wfill; \ + memcpy(ms->trace_wbuf + n, &buf, sizeof(buf)); \ + uatomic_add(&ms->trace_wfill, sizeof(buf)); \ + } \ + rcu_read_unlock(); \ +} while (0) + +static void trace_memalign(const void *ret, size_t alignment, size_t size) +{ + struct tr_memalign buf = { + .ret = (uintptr_t)ret | TR_MEMALIGN, + .alignment = alignment, + .size = size + }; + TRACE_WRITE(buf); +} + +static void trace_free(const void *ptr) +{ + struct tr_free buf = { .ptr = (uintptr_t)ptr | TR_FREE }; + TRACE_WRITE(buf); +} + +static void trace_malloc(const void *ret, size_t size) +{ + struct tr_malloc buf = { + .ret = (uintptr_t)ret | TR_MALLOC, + .size = size + }; + TRACE_WRITE(buf); +} + +static void trace_realloc(const void *ret, const void *ptr, size_t size) +{ + struct tr_realloc buf = { + .ret = (uintptr_t)ret | TR_REALLOC, + .ptr = (uintptr_t)ptr, + .size = size + }; + TRACE_WRITE(buf); +} + +static void trace_calloc(const void *ret, size_t size) +{ + struct tr_calloc buf = { + .ret = (uintptr_t)ret | TR_CALLOC, + .size = size + }; + TRACE_WRITE(buf); +}