* [PATCH] port to Perl5 and XS
@ 2019-10-31 20:03 Eric Wong
0 siblings, 0 replies; only message in thread
From: Eric Wong @ 2019-10-31 20:03 UTC (permalink / raw)
To: mwrap-perl
I mainly use Perl5 (again :P), and sometimes tracking down where
malloc calls happen is necessary. I don't know of any malloc
wrapper interface which is aware of Perl source locations.
Valgrind and similar tools can only figure out C source
locations, which isn't very useful when hacking in Perl.
---
.document | 2 -
.gitignore | 12 +-
.olddoc.yml | 8 -
MANIFEST | 18 +-
Makefile.PL | 72 ++
Mwrap.xs | 891 +++++++++++++++++++++
README | 86 +-
Rakefile | 16 -
bin/mwrap | 36 -
ext/mwrap/extconf.rb | 28 -
ext/mwrap/mwrap.c | 1464 ----------------------------------
ext/mwrap/jhash.h => jhash.h | 0
lib/Devel/Mwrap.pm | 15 +
lib/mwrap_rack.rb | 172 ----
mwrap.gemspec | 32 -
script/mwrap-perl | 34 +
t/mwrap.t | 85 ++
t/source_location.perl | 9 +
test/test_mwrap.rb | 322 --------
typemap | 4 +
20 files changed, 1163 insertions(+), 2143 deletions(-)
delete mode 100644 .document
delete mode 100644 .olddoc.yml
create mode 100644 Makefile.PL
create mode 100644 Mwrap.xs
delete mode 100644 Rakefile
delete mode 100755 bin/mwrap
delete mode 100644 ext/mwrap/extconf.rb
delete mode 100644 ext/mwrap/mwrap.c
rename ext/mwrap/jhash.h => jhash.h (100%)
create mode 100644 lib/Devel/Mwrap.pm
delete mode 100644 lib/mwrap_rack.rb
delete mode 100644 mwrap.gemspec
create mode 100644 script/mwrap-perl
create mode 100644 t/mwrap.t
create mode 100644 t/source_location.perl
delete mode 100644 test/test_mwrap.rb
create mode 100644 typemap
diff --git a/.document b/.document
deleted file mode 100644
index 4ca33e3..0000000
--- a/.document
+++ /dev/null
@@ -1,2 +0,0 @@
-ext/mwrap/mwrap.c
-lib/mwrap_rack.rb
diff --git a/.gitignore b/.gitignore
index aa3606c..81948b8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,10 @@
-/tmp
*.o
*.so
-/pkg
-/*.gem
-/doc
+/MYMETA.
+/MYMETA.*
+/MANIFEST.gen
+/Makefile
+/Mwrap.bs
+/Mwrap.c
+/blib
+/pm_to_blib
diff --git a/.olddoc.yml b/.olddoc.yml
deleted file mode 100644
index dac0353..0000000
--- a/.olddoc.yml
+++ /dev/null
@@ -1,8 +0,0 @@
----
-cgit_url: https://80x24.org/mwrap.git
-git_url: https://80x24.org/mwrap.git
-rdoc_url: https://80x24.org/mwrap/
-ml_url: https://80x24.org/mwrap-public/
-public_email: mwrap-public@80x24.org
-nntp_url:
- - nntp://news.public-inbox.org/inbox.comp.lang.ruby.mwrap
diff --git a/MANIFEST b/MANIFEST
index e6d8964..2fa42b1 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -1,14 +1,12 @@
-.document
.gitignore
-.olddoc.yml
COPYING
MANIFEST
+Makefile.PL
+Mwrap.xs
README
-Rakefile
-bin/mwrap
-ext/mwrap/extconf.rb
-ext/mwrap/jhash.h
-ext/mwrap/mwrap.c
-lib/mwrap_rack.rb
-mwrap.gemspec
-test/test_mwrap.rb
+jhash.h
+lib/Devel/Mwrap.pm
+script/mwrap-perl
+t/mwrap.t
+t/source_location.perl
+typemap
diff --git a/Makefile.PL b/Makefile.PL
new file mode 100644
index 0000000..1ae3080
--- /dev/null
+++ b/Makefile.PL
@@ -0,0 +1,72 @@
+use strict;
+use ExtUtils::MakeMaker;
+use Config;
+my $pkg_config = $ENV{PKG_CONFIG} // 'pkg-config';
+my $LIBS = `$pkg_config --libs liburcu-cds liburcu-bp`;
+if ($?) {
+ print STDERR <<END;
+`$pkg_config --libs liburcu-cds` failed (\$?=$?)
+
+You need to install pkg-config and liburcu <https://liburcu.org/>
+before you can build Devel::Mwrap.
+
+On Debian:
+
+ apt-get install pkg-config liburcu-dev
+END
+ # tell CPAN testing to indicate missing deps
+ exit 0;
+}
+
+if ($Config{usemymalloc} eq 'y') {
+ print STDERR <<END;
+Devel::Mwrap requires `usemymalloc=n'. malloc and related functions
+must be dynamically-linked.
+END
+ exit 0;
+}
+
+# may be empty
+chomp(my $INC = `$pkg_config --cflags liburcu-cds liburcu-bp`);
+my @writemakefile_args = ();
+# Filter out some gcc options which g++ doesn't support.
+my $CCFLAGS = $Config{ccflags};
+
+if (defined $ENV{CPPFLAGS}) {
+ $CCFLAGS .= ' ' . $ENV{CPPFLAGS};
+}
+
+# See lib/ExtUtils/MakeMaker.pm for details of how to influence
+# the contents of the Makefile that is written.
+push @writemakefile_args, (
+ NAME => 'Devel::Mwrap',
+ VERSION_FROM => 'lib/Devel/Mwrap.pm',
+ PREREQ_PM => {},
+ ABSTRACT_FROM => 'lib/Devel/Mwrap.pm',
+ EXE_FILES => [qw(script/mwrap-perl)],
+ AUTHOR => 'mwrap hackers <mwrap-perl@80x24.org>',
+ LIBS => $LIBS, # e.g. -lurcu-cds
+ LICENSE => 'gpl_2', # GPL-2.0+, CPAN::Meta::Spec limitation
+ MIN_PERL_VERSION => '5.14.0', # for caller_cx
+ BUILD_REQUIRES => {},
+ CCFLAGS => $CCFLAGS, # e.g -I/usr/include/$ARCH
+ INC => $INC,
+ depend => {
+ Makefile => 'lib/Devel/Mwrap.pm',
+ }
+);
+
+WriteMakefile(@writemakefile_args);
+
+sub MY::postamble {
+ <<EOF;
+N = \$\$(( \$\$(nproc 2>/dev/null || gnproc 2>/dev/null || echo 2) + 1 ))
+-include config.mak
+
+check-manifest :: MANIFEST
+ if git ls-files >\$?.gen 2>&1; then diff -u \$? \$?.gen; fi
+
+check:: all check-manifest
+ PERL5LIB=blib/lib:blib/arch prove -vw -j\$(N)
+EOF
+}
diff --git a/Mwrap.xs b/Mwrap.xs
new file mode 100644
index 0000000..f196b1a
--- /dev/null
+++ b/Mwrap.xs
@@ -0,0 +1,891 @@
+/*
+ * Copyright (C) 2018-2019 mwrap hackers <mwrap-perl@80x24.org>
+ * License: GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt>
+ * Disclaimer: I don't really know my way around XS or Perl internals well
+ */
+#define _LGPL_SOURCE /* allows URCU to inline some stuff */
+#include "EXTERN.h"
+#include "perl.h"
+#include "XSUB.h"
+#include "embed.h"
+
+#include <execinfo.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <dlfcn.h>
+#include <assert.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <urcu-bp.h>
+#include <urcu/rculfhash.h>
+#include <urcu/rculist.h>
+#include "jhash.h"
+
+static size_t total_bytes_inc, total_bytes_dec;
+
+extern pthread_key_t __attribute__((weak)) PL_thr_key;
+
+/* true for glibc/dlmalloc/ptmalloc, not sure about jemalloc */
+#define ASSUMED_MALLOC_ALIGNMENT (sizeof(void *) * 2)
+
+#ifdef __FreeBSD__
+void *__malloc(size_t);
+void __free(void *);
+# define real_malloc __malloc
+# define real_free __free
+#else
+static void *(*real_malloc)(size_t);
+static void (*real_free)(void *);
+static int resolving_malloc;
+#endif /* !FreeBSD */
+
+/*
+ * we need to fake an OOM condition while dlsym is running,
+ * as that calls calloc under glibc, but we don't have the
+ * symbol for the jemalloc calloc, yet
+ */
+# define RETURN_IF_NOT_READY() do { \
+ if (!real_malloc) { \
+ errno = ENOMEM; \
+ return NULL; \
+ } \
+} while (0)
+
+static __thread size_t locating;
+static size_t page_size;
+static struct cds_lfht *totals;
+union padded_mutex {
+ pthread_mutex_t mtx;
+ char pad[64];
+};
+
+/* a round-robin pool of mutexes */
+#define MUTEX_NR (1 << 6)
+#define MUTEX_MASK (MUTEX_NR - 1)
+static size_t mutex_i;
+static union padded_mutex mutexes[MUTEX_NR] = {
+ [0 ... (MUTEX_NR-1)].mtx = PTHREAD_MUTEX_INITIALIZER
+};
+
+static pthread_mutex_t *mutex_assign(void)
+{
+ return &mutexes[uatomic_add_return(&mutex_i, 1) & MUTEX_MASK].mtx;
+}
+
+static struct cds_lfht *
+lfht_new(void)
+{
+ return cds_lfht_new(16384, 1, 0, CDS_LFHT_AUTO_RESIZE, 0);
+}
+
+__attribute__((constructor)) static void resolve_malloc(void)
+{
+ int err;
+ ++locating;
+
+#ifdef __FreeBSD__
+ /*
+ * PTHREAD_MUTEX_INITIALIZER on FreeBSD means lazy initialization,
+ * which happens at pthread_mutex_lock, and that calls calloc
+ */
+ {
+ size_t i;
+
+ for (i = 0; i < MUTEX_NR; i++) {
+ err = pthread_mutex_init(&mutexes[i].mtx, 0);
+ if (err) {
+ fprintf(stderr, "error: %s\n", strerror(err));
+ _exit(1);
+ }
+ }
+ /* initialize mutexes used by urcu-bp */
+ rcu_read_lock();
+ rcu_read_unlock();
+ }
+#else /* !FreeBSD (tested on GNU/Linux) */
+ if (!real_malloc) {
+ resolving_malloc = 1;
+ real_malloc = dlsym(RTLD_NEXT, "malloc");
+ }
+ real_free = dlsym(RTLD_NEXT, "free");
+ if (!real_malloc || !real_free) {
+ fprintf(stderr, "missing malloc/aligned_alloc/free\n"
+ "\t%p %p\n", real_malloc, real_free);
+ _exit(1);
+ }
+#endif /* !FreeBSD */
+ err = pthread_atfork(call_rcu_before_fork,
+ call_rcu_after_fork_parent,
+ call_rcu_after_fork_child);
+ if (err)
+ fprintf(stderr, "pthread_atfork failed: %s\n", strerror(err));
+ page_size = sysconf(_SC_PAGESIZE);
+ --locating;
+}
+
+static void
+mutex_lock(pthread_mutex_t *m)
+{
+ int err = pthread_mutex_lock(m);
+ assert(err == 0);
+}
+
+static void
+mutex_unlock(pthread_mutex_t *m)
+{
+ int err = pthread_mutex_unlock(m);
+ assert(err == 0);
+}
+
+#ifndef HAVE_MEMPCPY
+static void *
+my_mempcpy(void *dest, const void *src, size_t n)
+{
+ return (char *)memcpy(dest, src, n) + n;
+}
+#define mempcpy(dst,src,n) my_mempcpy(dst,src,n)
+#endif
+
+/* stolen from glibc: */
+#define RETURN_ADDRESS(nr) \
+ (uintptr_t)(__builtin_extract_return_addr(__builtin_return_address(nr)))
+
+#define INT2STR_MAX (sizeof(unsigned) == 4 ? 10 : 19)
+static char *int2str(unsigned num, char *dst, size_t * size)
+{
+ if (num <= 9) {
+ *size -= 1;
+ *dst++ = (char)(num + '0');
+ return dst;
+ } else {
+ char buf[INT2STR_MAX];
+ char *end = buf + sizeof(buf);
+ char *p = end;
+ size_t adj;
+
+ do {
+ *size -= 1;
+ *--p = (char)((num % 10) + '0');
+ num /= 10;
+ } while (num && *size);
+
+ if (!num) {
+ adj = end - p;
+ return mempcpy(dst, p, adj);
+ }
+ }
+ return NULL;
+}
+
+/* allocated via real_malloc/real_free */
+struct src_loc {
+ pthread_mutex_t *mtx;
+ size_t total;
+ size_t allocations;
+ size_t frees;
+ struct cds_lfht_node hnode;
+ struct cds_list_head allocs; /* <=> alloc_hdr.node */
+ uint32_t hval;
+ uint32_t capa;
+ char k[];
+};
+
+/*
+ * I hate typedefs, especially when they're hiding the fact that there's
+ * a pointer, but XS needs this, apparently, and it does s/__/::/g
+ */
+typedef struct src_loc * Devel__Mwrap__SrcLoc;
+
+/* every allocation has this in the header, maintain alignment with malloc */
+struct alloc_hdr {
+ struct cds_list_head anode; /* <=> src_loc.allocs */
+ union {
+ struct {
+ struct src_loc *loc;
+ } live;
+ struct rcu_head dead;
+ } as;
+ void *real; /* what to call real_free on */
+ size_t size;
+};
+
+static __thread char kbuf[
+ PATH_MAX + INT2STR_MAX + sizeof(struct alloc_hdr) + 2
+];
+
+static struct alloc_hdr *ptr2hdr(void *p)
+{
+ return (struct alloc_hdr *)((uintptr_t)p - sizeof(struct alloc_hdr));
+}
+
+static void *hdr2ptr(struct alloc_hdr *h)
+{
+ return (void *)((uintptr_t)h + sizeof(struct alloc_hdr));
+}
+
+static int loc_is_addr(const struct src_loc *l)
+{
+ return l->capa == 0;
+}
+
+static size_t loc_size(const struct src_loc *l)
+{
+ return loc_is_addr(l) ? sizeof(uintptr_t) : l->capa;
+}
+
+static int loc_eq(struct cds_lfht_node *node, const void *key)
+{
+ const struct src_loc *existing;
+ const struct src_loc *k = key;
+
+ existing = caa_container_of(node, struct src_loc, hnode);
+
+ return (k->hval == existing->hval &&
+ k->capa == existing->capa &&
+ memcmp(k->k, existing->k, loc_size(k)) == 0);
+}
+
+static struct src_loc *totals_add_rcu(struct src_loc *k)
+{
+ struct cds_lfht_iter iter;
+ struct cds_lfht_node *cur;
+ struct src_loc *l = 0;
+ struct cds_lfht *t;
+
+again:
+ t = rcu_dereference(totals);
+ if (!t) goto out_unlock;
+ cds_lfht_lookup(t, k->hval, loc_eq, k, &iter);
+ cur = cds_lfht_iter_get_node(&iter);
+ if (cur) {
+ l = caa_container_of(cur, struct src_loc, hnode);
+ uatomic_add(&l->total, k->total);
+ uatomic_add(&l->allocations, 1);
+ } else {
+ size_t n = loc_size(k);
+ l = real_malloc(sizeof(*l) + n);
+ if (!l) goto out_unlock;
+ memcpy(l, k, sizeof(*l) + n);
+ l->mtx = mutex_assign();
+ l->frees = 0;
+ l->allocations = 1;
+ CDS_INIT_LIST_HEAD(&l->allocs);
+ cur = cds_lfht_add_unique(t, k->hval, loc_eq, l, &l->hnode);
+ if (cur != &l->hnode) { /* lost race */
+ rcu_read_unlock();
+ real_free(l);
+ rcu_read_lock();
+ goto again;
+ }
+ }
+out_unlock:
+ return l;
+}
+
+static void update_stats_rcu_unlock(const struct src_loc *l)
+{
+ if (caa_likely(l)) rcu_read_unlock();
+}
+
+static struct src_loc *update_stats_rcu_lock(size_t size, uintptr_t caller)
+{
+ const PERL_CONTEXT *cx = NULL;
+ static const size_t xlen = sizeof(caller);
+ struct src_loc *k, *ret = 0;
+ char *dst;
+
+ if (caa_unlikely(!totals)) return 0;
+ if (locating++) goto out; /* do not recurse into another *alloc */
+
+ uatomic_add(&total_bytes_inc, size);
+
+ rcu_read_lock();
+ cx = caller_cx(0, NULL);
+ if (cx) {
+ const char *ptr = OutCopFILE(cx->blk_oldcop);
+ const COP *lcop;
+ unsigned line;
+ size_t len;
+ size_t int_size = INT2STR_MAX;
+
+ if (!ptr) goto unknown;
+
+ lcop = Perl_closest_cop(aTHX_ cx->blk_oldcop,
+ OpSIBLING(cx->blk_oldcop),
+ cx->blk_sub.retop, TRUE);
+ if (!lcop)
+ lcop = cx->blk_oldcop;
+ line = CopLINE(lcop);
+
+ /* avoid vsnprintf or anything which could call malloc here: */
+ len = strlen(ptr);
+ if (len > PATH_MAX)
+ len = PATH_MAX;
+ k = (void *)kbuf;
+ k->total = size;
+ dst = mempcpy(k->k, ptr, len);
+ *dst++ = ':';
+
+ if (line == UINT_MAX) /* no line number */
+ *dst++ = '-';
+ else
+ dst = int2str(line, dst, &int_size);
+
+ assert(dst && "bad math");
+ *dst = 0; /* terminate string */
+ k->capa = (uint32_t)(dst - k->k + 1);
+ k->hval = jhash(k->k, k->capa, 0xdeadbeef);
+ ret = totals_add_rcu(k);
+ } else {
+unknown:
+ k = alloca(sizeof(*k) + xlen);
+ k->total = size;
+ memcpy(k->k, &caller, xlen);
+ k->capa = 0;
+ k->hval = jhash(k->k, xlen, 0xdeadbeef);
+ ret = totals_add_rcu(k);
+ }
+out:
+ --locating;
+ return ret;
+}
+
+size_t malloc_usable_size(void *p)
+{
+ return ptr2hdr(p)->size;
+}
+
+static void
+free_hdr_rcu(struct rcu_head *dead)
+{
+ struct alloc_hdr *h = caa_container_of(dead, struct alloc_hdr, as.dead);
+ real_free(h->real);
+}
+
+void free(void *p)
+{
+ if (p) {
+ struct alloc_hdr *h = ptr2hdr(p);
+ struct src_loc *l = h->as.live.loc;
+
+ if (!real_free) return; /* oh well, leak a little */
+ if (l) {
+ uatomic_add(&total_bytes_dec, h->size);
+ uatomic_set(&h->size, 0);
+ uatomic_add(&l->frees, 1);
+
+ mutex_lock(l->mtx);
+ cds_list_del_rcu(&h->anode);
+ mutex_unlock(l->mtx);
+
+ call_rcu(&h->as.dead, free_hdr_rcu);
+ } else {
+ real_free(h->real);
+ }
+ }
+}
+
+static void
+alloc_insert_rcu(struct src_loc *l, struct alloc_hdr *h, size_t size, void *real)
+{
+ /* we need src_loc to remain alive for the duration of this call */
+ if (!h) return;
+ h->size = size;
+ h->real = real;
+ h->as.live.loc = l;
+ if (l) {
+ mutex_lock(l->mtx);
+ cds_list_add_rcu(&h->anode, &l->allocs);
+ mutex_unlock(l->mtx);
+ }
+}
+
+static size_t size_align(size_t size, size_t alignment)
+{
+ return ((size + (alignment - 1)) & ~(alignment - 1));
+}
+
+static bool ptr_is_aligned(const void *ptr, size_t alignment)
+{
+ return ((uintptr_t)ptr & (alignment - 1)) == 0;
+}
+
+static void *ptr_align(void *ptr, size_t alignment)
+{
+ return (void *)(((uintptr_t)ptr + (alignment - 1)) & ~(alignment - 1));
+}
+
+static bool is_power_of_two(size_t n) { return (n & (n - 1)) == 0; }
+
+static int
+internal_memalign(void **pp, size_t alignment, size_t size, uintptr_t caller)
+{
+ struct src_loc *l;
+ struct alloc_hdr *h;
+ void *real;
+ size_t asize;
+ size_t d = alignment / sizeof(void*);
+ size_t r = alignment % sizeof(void*);
+
+ if (!real_malloc) return ENOMEM;
+
+ if (r != 0 || d == 0 || !is_power_of_two(d))
+ return EINVAL;
+
+ if (alignment <= ASSUMED_MALLOC_ALIGNMENT) {
+ void *p = malloc(size);
+ if (!p) return ENOMEM;
+ *pp = p;
+ return 0;
+ }
+ for (; alignment < sizeof(struct alloc_hdr); alignment *= 2)
+ ; /* double alignment until >= sizeof(struct alloc_hdr) */
+ if (__builtin_add_overflow(size, alignment, &asize) ||
+ __builtin_add_overflow(asize, sizeof(struct alloc_hdr), &asize))
+ return ENOMEM;
+
+ l = update_stats_rcu_lock(size, caller);
+
+ real = real_malloc(asize);
+ if (real) {
+ void *p = hdr2ptr(real);
+ if (!ptr_is_aligned(p, alignment))
+ p = ptr_align(p, alignment);
+ h = ptr2hdr(p);
+ alloc_insert_rcu(l, h, size, real);
+ update_stats_rcu_unlock(l);
+ *pp = p;
+ }
+
+ return real ? 0 : ENOMEM;
+}
+
+static void *
+memalign_result(int err, void *p)
+{
+ if (caa_unlikely(err)) {
+ errno = err;
+ return 0;
+ }
+ return p;
+}
+
+void *memalign(size_t alignment, size_t size)
+{
+ void *p;
+ int err = internal_memalign(&p, alignment, size, RETURN_ADDRESS(0));
+ return memalign_result(err, p);
+}
+
+int posix_memalign(void **p, size_t alignment, size_t size)
+{
+ return internal_memalign(p, alignment, size, RETURN_ADDRESS(0));
+}
+
+void *aligned_alloc(size_t, size_t) __attribute__((alias("memalign")));
+void cfree(void *) __attribute__((alias("free")));
+
+void *valloc(size_t size)
+{
+ void *p;
+ int err = internal_memalign(&p, page_size, size, RETURN_ADDRESS(0));
+ return memalign_result(err, p);
+}
+
+#if __GNUC__ < 7
+# define add_overflow_p(a,b) __extension__({ \
+ __typeof__(a) _c; \
+ __builtin_add_overflow(a,b,&_c); \
+ })
+#else
+# define add_overflow_p(a,b) \
+ __builtin_add_overflow_p((a),(b),(__typeof__(a+b))0)
+#endif
+
+void *pvalloc(size_t size)
+{
+ size_t alignment = page_size;
+ void *p;
+ int err;
+
+ if (add_overflow_p(size, alignment)) {
+ errno = ENOMEM;
+ return 0;
+ }
+ size = size_align(size, alignment);
+ err = internal_memalign(&p, alignment, size, RETURN_ADDRESS(0));
+ return memalign_result(err, p);
+}
+
+void *malloc(size_t size)
+{
+ struct src_loc *l;
+ struct alloc_hdr *h;
+ size_t asize;
+ void *p;
+
+ if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize))
+ goto enomem;
+
+ /*
+ * Needed for C++ global declarations using "new",
+ * which happens before our constructor
+ */
+#ifndef __FreeBSD__
+ if (!real_malloc) {
+ if (resolving_malloc) goto enomem;
+ resolving_malloc = 1;
+ real_malloc = dlsym(RTLD_NEXT, "malloc");
+ }
+#endif
+ l = update_stats_rcu_lock(size, RETURN_ADDRESS(0));
+ p = h = real_malloc(asize);
+ if (h) {
+ alloc_insert_rcu(l, h, size, h);
+ p = hdr2ptr(h);
+ }
+ update_stats_rcu_unlock(l);
+ if (caa_unlikely(!p)) errno = ENOMEM;
+ return p;
+enomem:
+ errno = ENOMEM;
+ return 0;
+}
+
+void *calloc(size_t nmemb, size_t size)
+{
+ void *p;
+ struct src_loc *l;
+ struct alloc_hdr *h;
+ size_t asize;
+
+ if (__builtin_mul_overflow(size, nmemb, &size)) {
+ errno = ENOMEM;
+ return 0;
+ }
+ if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize)) {
+ errno = ENOMEM;
+ return 0;
+ }
+ RETURN_IF_NOT_READY();
+ l = update_stats_rcu_lock(size, RETURN_ADDRESS(0));
+ p = h = real_malloc(asize);
+ if (p) {
+ alloc_insert_rcu(l, h, size, h);
+ p = hdr2ptr(h);
+ memset(p, 0, size);
+ }
+ update_stats_rcu_unlock(l);
+ if (caa_unlikely(!p)) errno = ENOMEM;
+ return p;
+}
+
+void *realloc(void *ptr, size_t size)
+{
+ void *p;
+ struct src_loc *l;
+ struct alloc_hdr *h;
+ size_t asize;
+
+ if (!size) {
+ free(ptr);
+ return 0;
+ }
+ if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize)) {
+ errno = ENOMEM;
+ return 0;
+ }
+ RETURN_IF_NOT_READY();
+
+ l = update_stats_rcu_lock(size, RETURN_ADDRESS(0));
+ p = h = real_malloc(asize);
+ if (p) {
+ alloc_insert_rcu(l, h, size, h);
+ p = hdr2ptr(h);
+ }
+ update_stats_rcu_unlock(l);
+
+ if (ptr && p) {
+ struct alloc_hdr *old = ptr2hdr(ptr);
+ memcpy(p, ptr, old->size < size ? old->size : size);
+ free(ptr);
+ }
+ if (caa_unlikely(!p)) errno = ENOMEM;
+ return p;
+}
+
+struct dump_arg {
+ FILE *fp;
+ size_t min;
+};
+
+static void *dump_to_file(struct dump_arg *a)
+{
+ struct cds_lfht_iter iter;
+ struct src_loc *l;
+ struct cds_lfht *t;
+
+ ++locating;
+ rcu_read_lock();
+ t = rcu_dereference(totals);
+ if (!t)
+ goto out_unlock;
+ cds_lfht_for_each_entry(t, &iter, l, hnode) {
+ const void *p = l->k;
+ char **s = 0;
+ if (l->total <= a->min) continue;
+
+ if (loc_is_addr(l)) {
+ s = backtrace_symbols(p, 1);
+ p = s[0];
+ }
+ fprintf(a->fp, "%16zu %12zu %s\n",
+ l->total, l->allocations, (const char *)p);
+ if (s) free(s);
+ }
+out_unlock:
+ rcu_read_unlock();
+ --locating;
+ return 0;
+}
+
+static SV *location_string(struct src_loc *l)
+{
+ SV *ret;
+
+ if (loc_is_addr(l)) {
+ char **s = backtrace_symbols((void *)l->k, 1);
+
+ ret = newSVpvn(s[0], strlen(s[0]));
+ }
+ else {
+ ret = newSVpvn(l->k, l->capa - 1);
+ }
+
+ return ret;
+}
+
+static int
+extract_addr(const char *str, size_t len, void **p)
+{
+ const char *c;
+#if defined(__GLIBC__)
+ return ((c = memrchr(str, '[', len)) && sscanf(c, "[%p]", p));
+#else /* TODO: test FreeBSD */
+ return ((c = strstr(str, "0x")) && sscanf(c, "%p", p));
+#endif
+}
+
+#ifndef O_CLOEXEC
+# define O_CLOEXEC 0
+#endif
+__attribute__ ((destructor))
+static void dump_destructor(void)
+{
+ const char *opt = getenv("MWRAP");
+ const char *modes[] = { "a", "a+", "w", "w+", "r+" };
+ struct dump_arg a = { .min = 0 };
+ size_t i;
+ int dump_fd;
+ char *dump_path;
+ char *s;
+
+ if (!opt)
+ return;
+
+ ++locating;
+ if ((dump_path = strstr(opt, "dump_path:")) &&
+ (dump_path += sizeof("dump_path")) &&
+ *dump_path) {
+ char *end = strchr(dump_path, ',');
+ if (end) {
+ char *tmp = alloca(end - dump_path + 1);
+ end = mempcpy(tmp, dump_path, end - dump_path);
+ *end = 0;
+ dump_path = tmp;
+ }
+ dump_fd = open(dump_path, O_CLOEXEC|O_WRONLY|O_APPEND|O_CREAT,
+ 0666);
+ if (dump_fd < 0) {
+ fprintf(stderr, "open %s failed: %s\n", dump_path,
+ strerror(errno));
+ goto out;
+ }
+ }
+ else if (!sscanf(opt, "dump_fd:%d", &dump_fd))
+ goto out;
+
+ if ((s = strstr(opt, "dump_min:")))
+ sscanf(s, "dump_min:%zu", &a.min);
+
+ switch (dump_fd) {
+ case 0: goto out;
+ case 1: a.fp = stdout; break;
+ case 2: a.fp = stderr; break;
+ default:
+ if (dump_fd < 0)
+ goto out;
+ a.fp = 0;
+
+ for (i = 0; !a.fp && i < 5; i++)
+ a.fp = fdopen(dump_fd, modes[i]);
+
+ if (!a.fp) {
+ fprintf(stderr, "failed to open fd=%d: %s\n",
+ dump_fd, strerror(errno));
+ goto out;
+ }
+ /* we'll leak some memory here, but this is a destructor */
+ }
+ dump_to_file(&a);
+out:
+ --locating;
+}
+
+MODULE = Devel::Mwrap PACKAGE = Devel::Mwrap PREFIX = mwrap_
+
+BOOT:
+ totals = lfht_new();
+ if (!totals)
+ fprintf(stderr, "failed to allocate totals table\n");
+
+PROTOTYPES: ENABLE
+
+size_t
+mwrap_total_bytes_allocated()
+CODE:
+ RETVAL = total_bytes_inc;
+OUTPUT:
+ RETVAL
+
+size_t
+mwrap_total_bytes_freed()
+CODE:
+ RETVAL = total_bytes_dec;
+OUTPUT:
+ RETVAL
+
+void
+mwrap_reset()
+PREINIT:
+ struct cds_lfht *t;
+ struct cds_lfht_iter iter;
+ struct src_loc *l;
+CODE:
+ uatomic_set(&total_bytes_inc, 0);
+ uatomic_set(&total_bytes_dec, 0);
+
+ rcu_read_lock();
+ t = rcu_dereference(totals);
+ cds_lfht_for_each_entry(t, &iter, l, hnode) {
+ uatomic_set(&l->total, 0);
+ uatomic_set(&l->allocations, 0);
+ uatomic_set(&l->frees, 0);
+ }
+ rcu_read_unlock();
+
+Devel::Mwrap::SrcLoc
+mwrap_get(loc)
+ SV *loc;
+PREINIT:
+ STRLEN len;
+ const char *str;
+ struct src_loc *k = 0;
+ uintptr_t p;
+ struct cds_lfht_iter iter;
+ struct cds_lfht_node *cur;
+ struct cds_lfht *t;
+ struct src_loc *l = NULL;
+ ++locating;
+CODE:
+ if (!SvPOK(loc))
+ XSRETURN_UNDEF;
+ str = SvPV(loc, len);
+ if (len > PATH_MAX)
+ XSRETURN_UNDEF;
+ if (extract_addr(str, len, (void **)&p)) {
+ k = (void *)kbuf;
+ memcpy(k->k, &p, sizeof(p));
+ k->capa = 0;
+ k->hval = jhash(k->k, sizeof(p), 0xdeadbeef);
+ } else {
+ k = (void *)kbuf;
+ memcpy(k->k, str, len + 1);
+ k->capa = len + 1;
+ k->hval = jhash(k->k, k->capa, 0xdeadbeef);
+ }
+
+ if (!k)
+ XSRETURN_UNDEF;
+
+ rcu_read_lock();
+ t = rcu_dereference(totals);
+ if (!t) goto out_unlock;
+
+ cds_lfht_lookup(t, k->hval, loc_eq, k, &iter);
+ cur = cds_lfht_iter_get_node(&iter);
+ if (cur)
+ l = caa_container_of(cur, struct src_loc, hnode);
+out_unlock:
+ rcu_read_unlock();
+ RETVAL = l;
+OUTPUT:
+ RETVAL
+CLEANUP:
+ --locating;
+
+MODULE = Devel::Mwrap PACKAGE = Devel::Mwrap::SrcLoc PREFIX = src_loc_
+
+PROTOTYPES: ENABLE
+
+size_t
+src_loc_frees(self)
+ Devel::Mwrap::SrcLoc self
+PREINIT:
+ ++locating;
+CODE:
+ RETVAL = uatomic_read(&self->frees);
+OUTPUT:
+ RETVAL
+CLEANUP:
+ --locating;
+
+size_t
+src_loc_allocations(self)
+ Devel::Mwrap::SrcLoc self
+PREINIT:
+ ++locating;
+CODE:
+ RETVAL = uatomic_read(&self->allocations);
+OUTPUT:
+ RETVAL
+CLEANUP:
+ --locating;
+
+size_t
+src_loc_total(self)
+ Devel::Mwrap::SrcLoc self
+PREINIT:
+ ++locating;
+CODE:
+ RETVAL = uatomic_read(&self->total);
+OUTPUT:
+ RETVAL
+CLEANUP:
+ --locating;
+
+SV *
+src_loc_name(self)
+ Devel::Mwrap::SrcLoc self
+PREINIT:
+ ++locating;
+CODE:
+ RETVAL = location_string(self);
+OUTPUT:
+ RETVAL
+CLEANUP:
+ --locating;
diff --git a/README b/README
index 3a20258..97ff4ea 100644
--- a/README
+++ b/README
@@ -1,95 +1,83 @@
-= mwrap - LD_PRELOAD malloc wrapper + line stats for Ruby
+Devel::Mwrap - LD_PRELOAD malloc wrapper + line stats for Perl
-mwrap is designed to answer the question:
+Devel::Mwrap is designed to answer the question:
- Which lines of Ruby are hitting malloc the most?
+ Which lines of Perl are hitting malloc the most?
-mwrap wraps all malloc-family calls to trace the Ruby source
-location of such calls and bytes allocated at each callsite.
-As of mwrap 2.0.0, it can also function as a leak detector
-and show live allocations at every call site. Depending on
-your application and workload, the overhead is roughly a 50%
-increase memory and runtime.
+Devel::Mwrap wraps all malloc-family calls to trace the Perl source
+location of such calls and bytes allocated at each callsite. It
+can also function as a leak detector and show live allocations
+at every call site. Depending on your application and workload,
+the overhead is roughly a 50%-100% increase memory and runtime.
-It works best for allocations under GVL, but tries to track
-numeric caller addresses for allocations made without GVL so you
-can get an idea of how much memory usage certain extensions and
-native libraries use.
+It is thread-safe and requires the concurrent lock-free hash table
+from the Userspace RCU project: https://liburcu.org/
-It requires the concurrent lock-free hash table from the
-Userspace RCU project: https://liburcu.org/
+It relies on dynamic linking to a malloc(3) implementation. If
+you got Perl from your OS distribution, this typically does not
+require rebuilding Perl.
-It does not require recompiling or rebuilding Ruby, but only
-supports Ruby trunk (2.6.0dev+) on a few platforms:
+Tested on the perl package distributed with:
-* GNU/Linux
-* FreeBSD (tested 11.1)
+* Debian GNU/Linux 9, 10
-It may work on NetBSD, OpenBSD and DragonFly BSD.
+It may work on FreeBSD, NetBSD, OpenBSD and DragonFly BSD.
== Install
- # FreeBSD: pkg install liburcu
+ # FreeBSD: pkg install pkg-config liburcu
- # Debian-based systems: apt-get liburcu-dev
-
- # Install mwrap via RubyGems.org
- gem install mwrap
+ # Debian-based systems: apt-get install pkg-config liburcu-dev
== Usage
-mwrap works as an LD_PRELOAD and supplies a mwrap RubyGem executable to
+Devel::Mwrap works as an LD_PRELOAD and supplies a mwrap-perl script to
improve ease-of-use. You can set dump_path: in the MWRAP environment
variable to append the results to a log file:
- MWRAP=dump_path:/path/to/log mwrap RUBY_COMMAND
+ MWRAP=dump_path:/path/to/log mwrap-perl PERL_COMMAND
# And to display the locations with the most allocations:
sort -k1,1rn </path/to/log | $PAGER
-You may also `require "mwrap"' in your Ruby code and use
-Mwrap.dump, Mwrap.reset, Mwrap.each, etc.
+You may also `use Devel::Mwrap' in your Perl code and use
+Devel::Mwrap->dump, Devel::Mwrap->reset, Devel::Mwrap->each, etc.
-However, mwrap MUST be loaded via LD_PRELOAD to have any
+However, Devel::Mwrap MUST be loaded via LD_PRELOAD to have any
effect in tracking malloc use. However, it is safe to keep
-"require 'mwrap'" in performance-critical deployments,
+"use Devel::Mwrap" in performance-critical deployments,
as overhead is only incurred when used as an LD_PRELOAD.
-The output of the mwrap dump is a text file with 3 columns:
+The output of the Devel::Mwrap->dump is a text file with 3 columns:
total_bytes call_count location
-Where location is a Ruby source location (if made under GVL)
-or an address retrieved by backtrace_symbols(3). It is
-recommended to use the sort(1) command on either of the
-first two columns to find the hottest malloc locations.
-
-mwrap 2.0.0+ also supports a Rack application endpoint,
-it is documented at:
-
- https://80x24.org/mwrap/MwrapRack.html
+Where location is a Perl source location or an address retrieved
+by backtrace_symbols(3). It is recommended to use the sort(1)
+command on either of the first two columns to find the hottest
+malloc locations.
== Known problems
* 32-bit machines are prone to overflow (WONTFIX)
-== Mail archives and list:
+== Mail archives and newsgroup:
- https://80x24.org/mwrap-public/
- nntp://80x24.org/inbox.comp.lang.ruby.mwrap
+ https://80x24.org/mwrap-perl/
+ nntp://80x24.org/inbox.comp.lang.perl.mwrap
No subscription will ever be required to post, but HTML mail
will be rejected:
- mwrap-public@80x24.org
+ mwrap-perl@80x24.org
== Hacking
- git clone https://80x24.org/mwrap.git
+ git clone https://80x24.org/mwrap-perl.git
-Send all patches and pull requests (use "git request-pull" to format) to
-the mailing list. We do not use centralized or proprietary messaging
-systems.
+Send all patches and pull requests (use "git request-pull" to format)
+via email to mwrap-perl@80x24.org. We do not and will not use
+proprietary messaging systems.
== License
diff --git a/Rakefile b/Rakefile
deleted file mode 100644
index 50bfa89..0000000
--- a/Rakefile
+++ /dev/null
@@ -1,16 +0,0 @@
-# Copyright (C) 2018 mwrap hackers <mwrap-public@80x24.org>
-# License: GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt>
-require 'rake/testtask'
-begin
- require 'rake/extensiontask'
- Rake::ExtensionTask.new('mwrap')
-rescue LoadError
- warn 'rake-compiler not available, cross compiling disabled'
-end
-
-Rake::TestTask.new(:test)
-task :test => :compile
-task :default => :compile
-
-c_files = File.readlines('MANIFEST').grep(%r{ext/.*\.[ch]$}).map!(&:chomp!)
-task 'compile:mwrap' => c_files
diff --git a/bin/mwrap b/bin/mwrap
deleted file mode 100755
index 9f67dab..0000000
--- a/bin/mwrap
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/usr/bin/ruby
-# frozen_string_literal: true
-# Copyright (C) 2018 mwrap hackers <mwrap-public@80x24.org>
-# License: GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt>
-require 'mwrap'
-mwrap_so = $".grep(%r{/mwrap\.so\z})[0] or abort "mwrap.so not loaded"
-cur = ENV['LD_PRELOAD']
-if cur
- cur = cur.split(/[:\s]+/)
- if !cur.include?(mwrap_so)
- # drop old versions
- cur.delete_if { |path| path.end_with?('/mwrap.so') }
- cur.unshift(mwrap_so)
- ENV['LD_PRELOAD'] = cur.join(':')
- end
-else
- ENV['LD_PRELOAD'] = mwrap_so
-end
-
-# work around close-on-exec by default behavior in Ruby:
-opts = {}
-if ENV['MWRAP'] =~ /dump_fd:(\d+)/
- dump_fd = $1.to_i
- if dump_fd > 2
- dump_io = IO.new(dump_fd)
- opts[dump_fd] = dump_io
- end
-end
-
-# allow inheriting FDs from systemd
-n = ENV['LISTEN_FDS']
-if n && ENV['LISTEN_PID'].to_i == $$
- n = 3 + n.to_i
- (3...n).each { |fd| opts[fd] = IO.new(fd) }
-end
-exec *ARGV, opts
diff --git a/ext/mwrap/extconf.rb b/ext/mwrap/extconf.rb
deleted file mode 100644
index e9dbb1e..0000000
--- a/ext/mwrap/extconf.rb
+++ /dev/null
@@ -1,28 +0,0 @@
-# frozen_string_literal: true
-# Copyright (C) 2018 mwrap hackers <mwrap-public@80x24.org>
-# License: GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt>
-require 'mkmf'
-
-have_func 'mempcpy'
-have_library 'urcu-cds' or abort 'userspace RCU not installed'
-have_header 'urcu/rculfhash.h' or abort 'rculfhash.h not found'
-have_library 'urcu-bp' or abort 'liburcu-bp not found'
-have_library 'dl'
-have_library 'c'
-have_library 'execinfo' # FreeBSD
-
-if try_link(<<'')
-int main(void) { return __builtin_add_overflow_p(0,0,(int)1); }
-
- $defs << '-DHAVE_BUILTIN_ADD_OVERFLOW_P'
-end
-
-if try_link(<<'')
-int main(int a) { return __builtin_add_overflow(0,0,&a); }
-
- $defs << '-DHAVE_BUILTIN_ADD_OVERFLOW_P'
-else
- abort 'missing __builtin_add_overflow'
-end
-
-create_makefile 'mwrap'
diff --git a/ext/mwrap/mwrap.c b/ext/mwrap/mwrap.c
deleted file mode 100644
index 5174127..0000000
--- a/ext/mwrap/mwrap.c
+++ /dev/null
@@ -1,1464 +0,0 @@
-/*
- * Copyright (C) 2018 mwrap hackers <mwrap-public@80x24.org>
- * License: GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt>
- */
-#define _LGPL_SOURCE /* allows URCU to inline some stuff */
-#include <ruby/ruby.h>
-#include <ruby/thread.h>
-#include <ruby/io.h>
-#include <execinfo.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <dlfcn.h>
-#include <assert.h>
-#include <errno.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <pthread.h>
-#include <urcu-bp.h>
-#include <urcu/rculfhash.h>
-#include <urcu/rculist.h>
-#include "jhash.h"
-
-static ID id_uminus;
-const char *rb_source_location_cstr(int *line); /* requires 2.6.0dev */
-extern int __attribute__((weak)) ruby_thread_has_gvl_p(void);
-extern void * __attribute__((weak)) ruby_current_execution_context_ptr;
-extern void * __attribute__((weak)) ruby_current_vm_ptr; /* for rb_gc_count */
-extern size_t __attribute__((weak)) rb_gc_count(void);
-extern VALUE __attribute__((weak)) rb_cObject;
-extern VALUE __attribute__((weak)) rb_eTypeError;
-extern VALUE __attribute__((weak)) rb_yield(VALUE);
-
-static size_t total_bytes_inc, total_bytes_dec;
-
-/* true for glibc/dlmalloc/ptmalloc, not sure about jemalloc */
-#define ASSUMED_MALLOC_ALIGNMENT (sizeof(void *) * 2)
-
-/* match values in Ruby gc.c */
-#define HEAP_PAGE_ALIGN_LOG 14
-enum {
- HEAP_PAGE_ALIGN = (1UL << HEAP_PAGE_ALIGN_LOG),
- REQUIRED_SIZE_BY_MALLOC = (sizeof(size_t) * 5),
- HEAP_PAGE_SIZE = (HEAP_PAGE_ALIGN - REQUIRED_SIZE_BY_MALLOC)
-};
-
-#define IS_HEAP_PAGE_BODY ((struct src_loc *)-1)
-
-int __attribute__((weak)) ruby_thread_has_gvl_p(void)
-{
- return 0;
-}
-
-#ifdef __FreeBSD__
-void *__malloc(size_t);
-void __free(void *);
-# define real_malloc __malloc
-# define real_free __free
-#else
-static void *(*real_malloc)(size_t);
-static void (*real_free)(void *);
-static int resolving_malloc;
-#endif /* !FreeBSD */
-
-/*
- * we need to fake an OOM condition while dlsym is running,
- * as that calls calloc under glibc, but we don't have the
- * symbol for the jemalloc calloc, yet
- */
-# define RETURN_IF_NOT_READY() do { \
- if (!real_malloc) { \
- errno = ENOMEM; \
- return NULL; \
- } \
-} while (0)
-
-static __thread size_t locating;
-static size_t generation;
-static size_t page_size;
-static struct cds_lfht *totals;
-union padded_mutex {
- pthread_mutex_t mtx;
- char pad[64];
-};
-
-/* a round-robin pool of mutexes */
-#define MUTEX_NR (1 << 6)
-#define MUTEX_MASK (MUTEX_NR - 1)
-static size_t mutex_i;
-static union padded_mutex mutexes[MUTEX_NR] = {
- [0 ... (MUTEX_NR-1)].mtx = PTHREAD_MUTEX_INITIALIZER
-};
-
-static pthread_mutex_t *mutex_assign(void)
-{
- return &mutexes[uatomic_add_return(&mutex_i, 1) & MUTEX_MASK].mtx;
-}
-
-static struct cds_lfht *
-lfht_new(void)
-{
- return cds_lfht_new(16384, 1, 0, CDS_LFHT_AUTO_RESIZE, 0);
-}
-
-__attribute__((constructor)) static void resolve_malloc(void)
-{
- int err;
- ++locating;
-
-#ifdef __FreeBSD__
- /*
- * PTHREAD_MUTEX_INITIALIZER on FreeBSD means lazy initialization,
- * which happens at pthread_mutex_lock, and that calls calloc
- */
- {
- size_t i;
-
- for (i = 0; i < MUTEX_NR; i++) {
- err = pthread_mutex_init(&mutexes[i].mtx, 0);
- if (err) {
- fprintf(stderr, "error: %s\n", strerror(err));
- _exit(1);
- }
- }
- /* initialize mutexes used by urcu-bp */
- rcu_read_lock();
- rcu_read_unlock();
- }
-#else /* !FreeBSD (tested on GNU/Linux) */
- if (!real_malloc) {
- resolving_malloc = 1;
- real_malloc = dlsym(RTLD_NEXT, "malloc");
- }
- real_free = dlsym(RTLD_NEXT, "free");
- if (!real_malloc || !real_free) {
- fprintf(stderr, "missing malloc/aligned_alloc/free\n"
- "\t%p %p\n", real_malloc, real_free);
- _exit(1);
- }
-#endif /* !FreeBSD */
- totals = lfht_new();
- if (!totals)
- fprintf(stderr, "failed to allocate totals table\n");
-
- err = pthread_atfork(call_rcu_before_fork,
- call_rcu_after_fork_parent,
- call_rcu_after_fork_child);
- if (err)
- fprintf(stderr, "pthread_atfork failed: %s\n", strerror(err));
- page_size = sysconf(_SC_PAGESIZE);
- --locating;
-}
-
-static void
-mutex_lock(pthread_mutex_t *m)
-{
- int err = pthread_mutex_lock(m);
- assert(err == 0);
-}
-
-static void
-mutex_unlock(pthread_mutex_t *m)
-{
- int err = pthread_mutex_unlock(m);
- assert(err == 0);
-}
-
-#ifndef HAVE_MEMPCPY
-static void *
-my_mempcpy(void *dest, const void *src, size_t n)
-{
- return (char *)memcpy(dest, src, n) + n;
-}
-#define mempcpy(dst,src,n) my_mempcpy(dst,src,n)
-#endif
-
-/* stolen from glibc: */
-#define RETURN_ADDRESS(nr) \
- (uintptr_t)(__builtin_extract_return_addr(__builtin_return_address(nr)))
-
-#define INT2STR_MAX (sizeof(int) == 4 ? 10 : 19)
-static char *int2str(int num, char *dst, size_t * size)
-{
- if (num <= 9) {
- *size -= 1;
- *dst++ = (char)(num + '0');
- return dst;
- } else {
- char buf[INT2STR_MAX];
- char *end = buf + sizeof(buf);
- char *p = end;
- size_t adj;
-
- do {
- *size -= 1;
- *--p = (char)((num % 10) + '0');
- num /= 10;
- } while (num && *size);
-
- if (!num) {
- adj = end - p;
- return mempcpy(dst, p, adj);
- }
- }
- return NULL;
-}
-
-/*
- * rb_source_location_cstr relies on GET_EC(), and it's possible
- * to have a native thread but no EC during the early and late
- * (teardown) phases of the Ruby process
- */
-static int has_ec_p(void)
-{
- return (ruby_thread_has_gvl_p() && ruby_current_vm_ptr &&
- ruby_current_execution_context_ptr);
-}
-
-struct acc {
- uint64_t nr;
- int64_t min;
- int64_t max;
- double m2;
- double mean;
-};
-
-#define ACC_INIT(name) { .nr=0, .min=INT64_MAX, .max=-1, .m2=0, .mean=0 }
-
-/* for tracking 16K-aligned heap page bodies (protected by GVL) */
-struct {
- pthread_mutex_t lock;
- struct cds_list_head bodies;
- struct cds_list_head freed;
-
- struct acc alive;
- struct acc reborn;
-} hpb_stats = {
- .lock = PTHREAD_MUTEX_INITIALIZER,
- .bodies = CDS_LIST_HEAD_INIT(hpb_stats.bodies),
- .freed = CDS_LIST_HEAD_INIT(hpb_stats.freed),
- .alive = ACC_INIT(hpb_stats.alive),
- .reborn = ACC_INIT(hpb_stats.reborn)
-};
-
-/* allocated via real_malloc/real_free */
-struct src_loc {
- pthread_mutex_t *mtx;
- size_t total;
- size_t allocations;
- size_t frees;
- size_t age_total; /* (age_total / frees) => mean age at free */
- size_t max_lifespan;
- struct cds_lfht_node hnode;
- struct cds_list_head allocs; /* <=> alloc_hdr.node */
- uint32_t hval;
- uint32_t capa;
- char k[];
-};
-
-/* every allocation has this in the header, maintain alignment with malloc */
-struct alloc_hdr {
- struct cds_list_head anode; /* <=> src_loc.allocs */
- union {
- struct {
- size_t gen; /* rb_gc_count() */
- struct src_loc *loc;
- } live;
- struct rcu_head dead;
- struct {
- size_t at; /* rb_gc_count() */
- } hpb_freed;
- } as;
- void *real; /* what to call real_free on */
- size_t size;
-};
-
-static char kbuf[PATH_MAX + INT2STR_MAX + sizeof(struct alloc_hdr) + 2];
-
-static struct alloc_hdr *ptr2hdr(void *p)
-{
- return (struct alloc_hdr *)((uintptr_t)p - sizeof(struct alloc_hdr));
-}
-
-static void *hdr2ptr(struct alloc_hdr *h)
-{
- return (void *)((uintptr_t)h + sizeof(struct alloc_hdr));
-}
-
-static int loc_is_addr(const struct src_loc *l)
-{
- return l->capa == 0;
-}
-
-static size_t loc_size(const struct src_loc *l)
-{
- return loc_is_addr(l) ? sizeof(uintptr_t) : l->capa;
-}
-
-static int loc_eq(struct cds_lfht_node *node, const void *key)
-{
- const struct src_loc *existing;
- const struct src_loc *k = key;
-
- existing = caa_container_of(node, struct src_loc, hnode);
-
- return (k->hval == existing->hval &&
- k->capa == existing->capa &&
- memcmp(k->k, existing->k, loc_size(k)) == 0);
-}
-
-/* note: not atomic */
-static void
-acc_add(struct acc *acc, size_t val)
-{
- double delta = val - acc->mean;
- uint64_t nr = ++acc->nr;
-
- /* just don't divide-by-zero if we ever hit this (unlikely :P) */
- if (nr)
- acc->mean += delta / nr;
-
- acc->m2 += delta * (val - acc->mean);
- if ((int64_t)val < acc->min)
- acc->min = (int64_t)val;
- if ((int64_t)val > acc->max)
- acc->max = (int64_t)val;
-}
-
-#if SIZEOF_LONG == 8
-# define INT64toNUM(x) LONG2NUM((long)x)
-#elif defined(HAVE_LONG_LONG) && SIZEOF_LONG_LONG == 8
-# define INT64toNUM(x) LL2NUM((LONG_LONG)x)
-#endif
-
-static VALUE
-acc_max(const struct acc *acc)
-{
- return INT64toNUM(acc->max);
-}
-
-static VALUE
-acc_min(const struct acc *acc)
-{
- return acc->min == INT64_MAX ? INT2FIX(-1) : INT64toNUM(acc->min);
-}
-
-static VALUE
-acc_mean(const struct acc *acc)
-{
- return DBL2NUM(acc->nr ? acc->mean : HUGE_VAL);
-}
-
-static double
-acc_stddev_dbl(const struct acc *acc)
-{
- if (acc->nr > 1) {
- double variance = acc->m2 / (acc->nr - 1);
- return sqrt(variance);
- }
- return 0.0;
-}
-
-static VALUE
-acc_stddev(const struct acc *acc)
-{
- return DBL2NUM(acc_stddev_dbl(acc));
-}
-
-static struct src_loc *totals_add_rcu(struct src_loc *k)
-{
- struct cds_lfht_iter iter;
- struct cds_lfht_node *cur;
- struct src_loc *l = 0;
- struct cds_lfht *t;
-
-again:
- t = rcu_dereference(totals);
- if (!t) goto out_unlock;
- cds_lfht_lookup(t, k->hval, loc_eq, k, &iter);
- cur = cds_lfht_iter_get_node(&iter);
- if (cur) {
- l = caa_container_of(cur, struct src_loc, hnode);
- uatomic_add(&l->total, k->total);
- uatomic_add(&l->allocations, 1);
- } else {
- size_t n = loc_size(k);
- l = real_malloc(sizeof(*l) + n);
- if (!l) goto out_unlock;
- memcpy(l, k, sizeof(*l) + n);
- l->mtx = mutex_assign();
- l->age_total = 0;
- l->max_lifespan = 0;
- l->frees = 0;
- l->allocations = 1;
- CDS_INIT_LIST_HEAD(&l->allocs);
- cur = cds_lfht_add_unique(t, k->hval, loc_eq, l, &l->hnode);
- if (cur != &l->hnode) { /* lost race */
- rcu_read_unlock();
- real_free(l);
- rcu_read_lock();
- goto again;
- }
- }
-out_unlock:
- return l;
-}
-
-static void update_stats_rcu_unlock(const struct src_loc *l)
-{
- if (caa_likely(l)) rcu_read_unlock();
-}
-
-static struct src_loc *update_stats_rcu_lock(size_t size, uintptr_t caller)
-{
- struct src_loc *k, *ret = 0;
- static const size_t xlen = sizeof(caller);
- char *dst;
-
- if (caa_unlikely(!totals)) return 0;
- if (locating++) goto out; /* do not recurse into another *alloc */
-
- uatomic_add(&total_bytes_inc, size);
-
- rcu_read_lock();
- if (has_ec_p()) {
- int line;
- const char *ptr = rb_source_location_cstr(&line);
- size_t len;
- size_t int_size = INT2STR_MAX;
-
- generation = rb_gc_count();
-
- if (!ptr) goto unknown;
-
- /* avoid vsnprintf or anything which could call malloc here: */
- len = strlen(ptr);
- k = (void *)kbuf;
- k->total = size;
- dst = mempcpy(k->k, ptr, len);
- *dst++ = ':';
- dst = int2str(line, dst, &int_size);
- if (dst) {
- *dst = 0; /* terminate string */
- k->capa = (uint32_t)(dst - k->k + 1);
- k->hval = jhash(k->k, k->capa, 0xdeadbeef);
- ret = totals_add_rcu(k);
- } else {
- rb_bug("bad math making key from location %s:%d\n",
- ptr, line);
- }
- } else {
-unknown:
- k = alloca(sizeof(*k) + xlen);
- k->total = size;
- memcpy(k->k, &caller, xlen);
- k->capa = 0;
- k->hval = jhash(k->k, xlen, 0xdeadbeef);
- ret = totals_add_rcu(k);
- }
-out:
- --locating;
- return ret;
-}
-
-size_t malloc_usable_size(void *p)
-{
- return ptr2hdr(p)->size;
-}
-
-static void
-free_hdr_rcu(struct rcu_head *dead)
-{
- struct alloc_hdr *h = caa_container_of(dead, struct alloc_hdr, as.dead);
- real_free(h->real);
-}
-
-void free(void *p)
-{
- if (p) {
- struct alloc_hdr *h = ptr2hdr(p);
- struct src_loc *l = h->as.live.loc;
-
- if (!real_free) return; /* oh well, leak a little */
- if (l && l != IS_HEAP_PAGE_BODY) {
- size_t age = generation - h->as.live.gen;
-
- uatomic_add(&total_bytes_dec, h->size);
- uatomic_set(&h->size, 0);
- uatomic_add(&l->frees, 1);
- uatomic_add(&l->age_total, age);
-
- mutex_lock(l->mtx);
- cds_list_del_rcu(&h->anode);
- if (age > l->max_lifespan)
- l->max_lifespan = age;
- mutex_unlock(l->mtx);
-
- call_rcu(&h->as.dead, free_hdr_rcu);
- } else if (l == IS_HEAP_PAGE_BODY) {
- size_t gen = generation;
- size_t age = gen - h->as.live.gen;
-
- h->as.hpb_freed.at = gen;
-
- mutex_lock(&hpb_stats.lock);
- acc_add(&hpb_stats.alive, age);
-
- /* hpb_stats.bodies => hpb_stats.freed */
- cds_list_move(&h->anode, &hpb_stats.freed);
-
- mutex_unlock(&hpb_stats.lock);
- } else {
- real_free(h->real);
- }
- }
-}
-
-static void
-alloc_insert_rcu(struct src_loc *l, struct alloc_hdr *h, size_t size, void *real)
-{
- /* we need src_loc to remain alive for the duration of this call */
- if (!h) return;
- h->size = size;
- h->real = real;
- h->as.live.loc = l;
- h->as.live.gen = generation;
- if (l) {
- mutex_lock(l->mtx);
- cds_list_add_rcu(&h->anode, &l->allocs);
- mutex_unlock(l->mtx);
- }
-}
-
-static size_t size_align(size_t size, size_t alignment)
-{
- return ((size + (alignment - 1)) & ~(alignment - 1));
-}
-
-static bool ptr_is_aligned(const void *ptr, size_t alignment)
-{
- return ((uintptr_t)ptr & (alignment - 1)) == 0;
-}
-
-static void *ptr_align(void *ptr, size_t alignment)
-{
- return (void *)(((uintptr_t)ptr + (alignment - 1)) & ~(alignment - 1));
-}
-
-static bool is_power_of_two(size_t n) { return (n & (n - 1)) == 0; }
-
-static int
-internal_memalign(void **pp, size_t alignment, size_t size, uintptr_t caller)
-{
- struct src_loc *l;
- struct alloc_hdr *h;
- void *real;
- size_t asize;
- size_t d = alignment / sizeof(void*);
- size_t r = alignment % sizeof(void*);
-
- if (!real_malloc) return ENOMEM;
-
- if (r != 0 || d == 0 || !is_power_of_two(d))
- return EINVAL;
-
- if (alignment <= ASSUMED_MALLOC_ALIGNMENT) {
- void *p = malloc(size);
- if (!p) return ENOMEM;
- *pp = p;
- return 0;
- }
- for (; alignment < sizeof(struct alloc_hdr); alignment *= 2)
- ; /* double alignment until >= sizeof(struct alloc_hdr) */
- if (__builtin_add_overflow(size, alignment, &asize) ||
- __builtin_add_overflow(asize, sizeof(struct alloc_hdr), &asize))
- return ENOMEM;
-
-
- if (alignment == HEAP_PAGE_ALIGN && size == HEAP_PAGE_SIZE) {
- if (has_ec_p()) generation = rb_gc_count();
- l = IS_HEAP_PAGE_BODY;
- } else {
- l = update_stats_rcu_lock(size, caller);
- }
-
- if (l == IS_HEAP_PAGE_BODY) {
- void *p;
- size_t gen = generation;
-
- mutex_lock(&hpb_stats.lock);
-
- /* reuse existing entry */
- if (!cds_list_empty(&hpb_stats.freed)) {
- size_t deathspan;
-
- h = cds_list_first_entry(&hpb_stats.freed,
- struct alloc_hdr, anode);
- /* hpb_stats.freed => hpb_stats.bodies */
- cds_list_move(&h->anode, &hpb_stats.bodies);
- assert(h->size == size);
- assert(h->real);
- real = h->real;
- p = hdr2ptr(h);
- assert(ptr_is_aligned(p, alignment));
-
- deathspan = gen - h->as.hpb_freed.at;
- acc_add(&hpb_stats.reborn, deathspan);
- }
- else {
- real = real_malloc(asize);
- if (!real) return ENOMEM;
-
- p = hdr2ptr(real);
- if (!ptr_is_aligned(p, alignment))
- p = ptr_align(p, alignment);
- h = ptr2hdr(p);
- h->size = size;
- h->real = real;
- cds_list_add(&h->anode, &hpb_stats.bodies);
- }
- mutex_unlock(&hpb_stats.lock);
- h->as.live.loc = l;
- h->as.live.gen = gen;
- *pp = p;
- }
- else {
- real = real_malloc(asize);
- if (real) {
- void *p = hdr2ptr(real);
- if (!ptr_is_aligned(p, alignment))
- p = ptr_align(p, alignment);
- h = ptr2hdr(p);
- alloc_insert_rcu(l, h, size, real);
- update_stats_rcu_unlock(l);
- *pp = p;
- }
- }
-
- return real ? 0 : ENOMEM;
-}
-
-static void *
-memalign_result(int err, void *p)
-{
- if (caa_unlikely(err)) {
- errno = err;
- return 0;
- }
- return p;
-}
-
-void *memalign(size_t alignment, size_t size)
-{
- void *p;
- int err = internal_memalign(&p, alignment, size, RETURN_ADDRESS(0));
- return memalign_result(err, p);
-}
-
-int posix_memalign(void **p, size_t alignment, size_t size)
-{
- return internal_memalign(p, alignment, size, RETURN_ADDRESS(0));
-}
-
-void *aligned_alloc(size_t, size_t) __attribute__((alias("memalign")));
-void cfree(void *) __attribute__((alias("free")));
-
-void *valloc(size_t size)
-{
- void *p;
- int err = internal_memalign(&p, page_size, size, RETURN_ADDRESS(0));
- return memalign_result(err, p);
-}
-
-#if __GNUC__ < 7
-# define add_overflow_p(a,b) __extension__({ \
- __typeof__(a) _c; \
- __builtin_add_overflow(a,b,&_c); \
- })
-#else
-# define add_overflow_p(a,b) \
- __builtin_add_overflow_p((a),(b),(__typeof__(a+b))0)
-#endif
-
-void *pvalloc(size_t size)
-{
- size_t alignment = page_size;
- void *p;
- int err;
-
- if (add_overflow_p(size, alignment)) {
- errno = ENOMEM;
- return 0;
- }
- size = size_align(size, alignment);
- err = internal_memalign(&p, alignment, size, RETURN_ADDRESS(0));
- return memalign_result(err, p);
-}
-
-void *malloc(size_t size)
-{
- struct src_loc *l;
- struct alloc_hdr *h;
- size_t asize;
- void *p;
-
- if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize))
- goto enomem;
-
- /*
- * Needed for C++ global declarations using "new",
- * which happens before our constructor
- */
-#ifndef __FreeBSD__
- if (!real_malloc) {
- if (resolving_malloc) goto enomem;
- resolving_malloc = 1;
- real_malloc = dlsym(RTLD_NEXT, "malloc");
- }
-#endif
- l = update_stats_rcu_lock(size, RETURN_ADDRESS(0));
- p = h = real_malloc(asize);
- if (h) {
- alloc_insert_rcu(l, h, size, h);
- p = hdr2ptr(h);
- }
- update_stats_rcu_unlock(l);
- if (caa_unlikely(!p)) errno = ENOMEM;
- return p;
-enomem:
- errno = ENOMEM;
- return 0;
-}
-
-void *calloc(size_t nmemb, size_t size)
-{
- void *p;
- struct src_loc *l;
- struct alloc_hdr *h;
- size_t asize;
-
- if (__builtin_mul_overflow(size, nmemb, &size)) {
- errno = ENOMEM;
- return 0;
- }
- if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize)) {
- errno = ENOMEM;
- return 0;
- }
- RETURN_IF_NOT_READY();
- l = update_stats_rcu_lock(size, RETURN_ADDRESS(0));
- p = h = real_malloc(asize);
- if (p) {
- alloc_insert_rcu(l, h, size, h);
- p = hdr2ptr(h);
- memset(p, 0, size);
- }
- update_stats_rcu_unlock(l);
- if (caa_unlikely(!p)) errno = ENOMEM;
- return p;
-}
-
-void *realloc(void *ptr, size_t size)
-{
- void *p;
- struct src_loc *l;
- struct alloc_hdr *h;
- size_t asize;
-
- if (!size) {
- free(ptr);
- return 0;
- }
- if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize)) {
- errno = ENOMEM;
- return 0;
- }
- RETURN_IF_NOT_READY();
-
- l = update_stats_rcu_lock(size, RETURN_ADDRESS(0));
- p = h = real_malloc(asize);
- if (p) {
- alloc_insert_rcu(l, h, size, h);
- p = hdr2ptr(h);
- }
- update_stats_rcu_unlock(l);
-
- if (ptr && p) {
- struct alloc_hdr *old = ptr2hdr(ptr);
- memcpy(p, ptr, old->size < size ? old->size : size);
- free(ptr);
- }
- if (caa_unlikely(!p)) errno = ENOMEM;
- return p;
-}
-
-struct dump_arg {
- FILE *fp;
- size_t min;
-};
-
-static void *dump_to_file(void *x)
-{
- struct dump_arg *a = x;
- struct cds_lfht_iter iter;
- struct src_loc *l;
- struct cds_lfht *t;
-
- ++locating;
- rcu_read_lock();
- t = rcu_dereference(totals);
- if (!t)
- goto out_unlock;
- cds_lfht_for_each_entry(t, &iter, l, hnode) {
- const void *p = l->k;
- char **s = 0;
- if (l->total <= a->min) continue;
-
- if (loc_is_addr(l)) {
- s = backtrace_symbols(p, 1);
- p = s[0];
- }
- fprintf(a->fp, "%16zu %12zu %s\n",
- l->total, l->allocations, (const char *)p);
- if (s) free(s);
- }
-out_unlock:
- rcu_read_unlock();
- --locating;
- return 0;
-}
-
-/*
- * call-seq:
- *
- * Mwrap.dump([[io] [, min]] -> nil
- *
- * Dumps the current totals to +io+ which must be an IO object
- * (StringIO and similar are not supported). Total sizes smaller
- * than or equal to +min+ are skipped.
- *
- * The output is space-delimited by 3 columns:
- *
- * total_size call_count location
- */
-static VALUE mwrap_dump(int argc, VALUE * argv, VALUE mod)
-{
- VALUE io, min;
- struct dump_arg a;
- rb_io_t *fptr;
-
- rb_scan_args(argc, argv, "02", &io, &min);
-
- if (NIL_P(io))
- /* library may be linked w/o Ruby */
- io = *((VALUE *)dlsym(RTLD_DEFAULT, "rb_stderr"));
-
- a.min = NIL_P(min) ? 0 : NUM2SIZET(min);
- io = rb_io_get_io(io);
- io = rb_io_get_write_io(io);
- GetOpenFile(io, fptr);
- a.fp = rb_io_stdio_file(fptr);
-
- rb_thread_call_without_gvl(dump_to_file, &a, 0, 0);
- RB_GC_GUARD(io);
- return Qnil;
-}
-
-/* The whole operation is not remotely atomic... */
-static void *totals_reset(void *ign)
-{
- struct cds_lfht *t;
- struct cds_lfht_iter iter;
- struct src_loc *l;
-
- uatomic_set(&total_bytes_inc, 0);
- uatomic_set(&total_bytes_dec, 0);
-
- rcu_read_lock();
- t = rcu_dereference(totals);
- cds_lfht_for_each_entry(t, &iter, l, hnode) {
- uatomic_set(&l->total, 0);
- uatomic_set(&l->allocations, 0);
- uatomic_set(&l->frees, 0);
- uatomic_set(&l->age_total, 0);
- uatomic_set(&l->max_lifespan, 0);
- }
- rcu_read_unlock();
- return 0;
-}
-
-/*
- * call-seq:
- *
- * Mwrap.reset -> nil
- *
- * Resets the the total tables by zero-ing all counters.
- * This resets all statistics. This is not an atomic operation
- * as other threads (outside of GVL) may increment counters.
- */
-static VALUE mwrap_reset(VALUE mod)
-{
- rb_thread_call_without_gvl(totals_reset, 0, 0, 0);
- return Qnil;
-}
-
-/* :nodoc: */
-static VALUE mwrap_clear(VALUE mod)
-{
- return mwrap_reset(mod);
-}
-
-static VALUE rcu_unlock_ensure(VALUE ignored)
-{
- rcu_read_unlock();
- --locating;
- return Qfalse;
-}
-
-static VALUE location_string(struct src_loc *l)
-{
- VALUE ret, tmp;
-
- if (loc_is_addr(l)) {
- char **s = backtrace_symbols((void *)l->k, 1);
- tmp = rb_str_new_cstr(s[0]);
- free(s);
- }
- else {
- tmp = rb_str_new(l->k, l->capa - 1);
- }
-
- /* deduplicate and try to free up some memory */
- ret = rb_funcall(tmp, id_uminus, 0);
- if (!OBJ_FROZEN_RAW(tmp))
- rb_str_resize(tmp, 0);
-
- return ret;
-}
-
-static VALUE dump_each_rcu(VALUE x)
-{
- struct dump_arg *a = (struct dump_arg *)x;
- struct cds_lfht *t;
- struct cds_lfht_iter iter;
- struct src_loc *l;
-
- t = rcu_dereference(totals);
- cds_lfht_for_each_entry(t, &iter, l, hnode) {
- VALUE v[6];
- if (l->total <= a->min) continue;
-
- v[0] = location_string(l);
- v[1] = SIZET2NUM(l->total);
- v[2] = SIZET2NUM(l->allocations);
- v[3] = SIZET2NUM(l->frees);
- v[4] = SIZET2NUM(l->age_total);
- v[5] = SIZET2NUM(l->max_lifespan);
-
- rb_yield_values2(6, v);
- assert(rcu_read_ongoing());
- }
- return Qnil;
-}
-
-/*
- * call-seq:
- *
- * Mwrap.each([min]) do |location,total,allocations,frees,age_total,max_lifespan|
- * ...
- * end
- *
- * Yields each entry of the of the table to a caller-supplied block.
- * +min+ may be specified to filter out lines with +total+ bytes
- * equal-to-or-smaller-than the supplied minimum.
- */
-static VALUE mwrap_each(int argc, VALUE * argv, VALUE mod)
-{
- VALUE min;
- struct dump_arg a;
-
- rb_scan_args(argc, argv, "01", &min);
- a.min = NIL_P(min) ? 0 : NUM2SIZET(min);
-
- ++locating;
- rcu_read_lock();
-
- return rb_ensure(dump_each_rcu, (VALUE)&a, rcu_unlock_ensure, 0);
-}
-
-static size_t
-src_loc_memsize(const void *p)
-{
- return sizeof(struct src_loc);
-}
-
-static const rb_data_type_t src_loc_type = {
- "source_location",
- /* no marking, no freeing */
- { 0, 0, src_loc_memsize, /* reserved */ },
- /* parent, data, [ flags ] */
-};
-
-static VALUE cSrcLoc;
-
-static int
-extract_addr(const char *str, size_t len, void **p)
-{
- const char *c;
-#if defined(__GLIBC__)
- return ((c = memrchr(str, '[', len)) && sscanf(c, "[%p]", p));
-#else /* tested FreeBSD */
- return ((c = strstr(str, "0x")) && sscanf(c, "%p", p));
-#endif
-}
-
-/*
- * call-seq:
- * Mwrap[location] -> Mwrap::SourceLocation
- *
- * Returns the associated Mwrap::SourceLocation given the +location+
- * String. +location+ is either a Ruby source location path:line
- * (e.g. "/path/to/foo.rb:5") or a hexadecimal memory address with
- * square-braces part yielded by Mwrap.dump (e.g. "[0xdeadbeef]")
- */
-static VALUE mwrap_aref(VALUE mod, VALUE loc)
-{
- const char *str = StringValueCStr(loc);
- int len = RSTRING_LENINT(loc);
- struct src_loc *k = 0;
- uintptr_t p;
- struct cds_lfht_iter iter;
- struct cds_lfht_node *cur;
- struct cds_lfht *t;
- struct src_loc *l;
- VALUE val = Qnil;
-
- if (extract_addr(str, len, (void **)&p)) {
- k = (void *)kbuf;
- memcpy(k->k, &p, sizeof(p));
- k->capa = 0;
- k->hval = jhash(k->k, sizeof(p), 0xdeadbeef);
- } else {
- k = (void *)kbuf;
- memcpy(k->k, str, len + 1);
- k->capa = len + 1;
- k->hval = jhash(k->k, k->capa, 0xdeadbeef);
- }
-
- if (!k) return val;
-
- rcu_read_lock();
- t = rcu_dereference(totals);
- if (!t) goto out_unlock;
-
- cds_lfht_lookup(t, k->hval, loc_eq, k, &iter);
- cur = cds_lfht_iter_get_node(&iter);
- if (cur) {
- l = caa_container_of(cur, struct src_loc, hnode);
- val = TypedData_Wrap_Struct(cSrcLoc, &src_loc_type, l);
- }
-out_unlock:
- rcu_read_unlock();
- return val;
-}
-
-static VALUE src_loc_each_i(VALUE p)
-{
- struct alloc_hdr *h;
- struct src_loc *l = (struct src_loc *)p;
-
- cds_list_for_each_entry_rcu(h, &l->allocs, anode) {
- size_t gen = uatomic_read(&h->as.live.gen);
- size_t size = uatomic_read(&h->size);
-
- if (size) {
- VALUE v[2];
- v[0] = SIZET2NUM(size);
- v[1] = SIZET2NUM(gen);
-
- rb_yield_values2(2, v);
- }
- }
-
- return Qfalse;
-}
-
-static struct src_loc *src_loc_get(VALUE self)
-{
- struct src_loc *l;
- TypedData_Get_Struct(self, struct src_loc, &src_loc_type, l);
- assert(l);
- return l;
-}
-
-/*
- * call-seq:
- * loc = Mwrap[location]
- * loc.each { |size,generation| ... }
- *
- * Iterates through live allocations for a given Mwrap::SourceLocation,
- * yielding the +size+ (in bytes) and +generation+ of each allocation.
- * The +generation+ is the value of the GC.count method at the time
- * the allocation was made.
- *
- * This functionality is only available in mwrap 2.0.0+
- */
-static VALUE src_loc_each(VALUE self)
-{
- struct src_loc *l = src_loc_get(self);
-
- assert(locating == 0 && "forgot to clear locating");
- ++locating;
- rcu_read_lock();
- rb_ensure(src_loc_each_i, (VALUE)l, rcu_unlock_ensure, 0);
- return self;
-}
-
-/*
- * The the mean lifespan (in GC generations) of allocations made from this
- * location. This does not account for live allocations.
- */
-static VALUE src_loc_mean_lifespan(VALUE self)
-{
- struct src_loc *l = src_loc_get(self);
- size_t tot, frees;
-
- frees = uatomic_read(&l->frees);
- tot = uatomic_read(&l->age_total);
- return DBL2NUM(frees ? ((double)tot/(double)frees) : HUGE_VAL);
-}
-
-/* The number of frees made from this location */
-static VALUE src_loc_frees(VALUE self)
-{
- return SIZET2NUM(uatomic_read(&src_loc_get(self)->frees));
-}
-
-/* The number of allocations made from this location */
-static VALUE src_loc_allocations(VALUE self)
-{
- return SIZET2NUM(uatomic_read(&src_loc_get(self)->allocations));
-}
-
-/* The total number of bytes allocated from this location */
-static VALUE src_loc_total(VALUE self)
-{
- return SIZET2NUM(uatomic_read(&src_loc_get(self)->total));
-}
-
-/*
- * The maximum age (in GC generations) of an allocation before it was freed.
- * This does not account for live allocations.
- */
-static VALUE src_loc_max_lifespan(VALUE self)
-{
- return SIZET2NUM(uatomic_read(&src_loc_get(self)->max_lifespan));
-}
-
-/*
- * Returns a frozen String location of the given SourceLocation object.
- */
-static VALUE src_loc_name(VALUE self)
-{
- struct src_loc *l = src_loc_get(self);
- VALUE ret;
-
- ++locating;
- ret = location_string(l);
- --locating;
- return ret;
-}
-
-static VALUE reset_locating(VALUE ign) { --locating; return Qfalse; }
-
-/*
- * call-seq:
- *
- * Mwrap.quiet do |depth|
- * # expensive sort/calculate/emitting results of Mwrap.each
- * # affecting statistics of the rest of the app
- * end
- *
- * Stops allocation tracking inside the block. This is useful for
- * monitoring code which calls other Mwrap (or ObjectSpace/GC)
- * functions which unavoidably allocate memory.
- *
- * This feature was added in mwrap 2.0.0+
- */
-static VALUE mwrap_quiet(VALUE mod)
-{
- size_t cur = ++locating;
- return rb_ensure(rb_yield, SIZET2NUM(cur), reset_locating, 0);
-}
-
-static VALUE total_inc(VALUE mod)
-{
- return SIZET2NUM(total_bytes_inc);
-}
-
-static VALUE total_dec(VALUE mod)
-{
- return SIZET2NUM(total_bytes_dec);
-}
-
-static VALUE hpb_each_yield(VALUE ignore)
-{
- struct alloc_hdr *h, *next;
-
- cds_list_for_each_entry_safe(h, next, &hpb_stats.bodies, anode) {
- VALUE v[2]; /* [ generation, address ] */
- void *addr = hdr2ptr(h);
- assert(ptr_is_aligned(addr, HEAP_PAGE_ALIGN));
- v[0] = LONG2NUM((long)addr);
- v[1] = SIZET2NUM(h->as.live.gen);
- rb_yield_values2(2, v);
- }
- return Qnil;
-}
-
-/*
- * call-seq:
- *
- * Mwrap::HeapPageBody.each { |gen, addr| } -> Integer
- *
- * Yields the generation (GC.count) the heap page body was created
- * and address of the heap page body as an Integer. Returns the
- * number of allocated pages as an Integer. This return value should
- * match the result of GC.stat(:heap_allocated_pages)
- */
-static VALUE hpb_each(VALUE mod)
-{
- ++locating;
- return rb_ensure(hpb_each_yield, Qfalse, reset_locating, 0);
-}
-
-/*
- * call-seq:
- *
- * Mwrap::HeapPageBody.stat -> Hash
- * Mwrap::HeapPageBody.stat(hash) -> hash
- *
- * The maximum lifespan of a heap page body in the Ruby VM.
- * This may be Infinity if no heap page bodies were ever freed.
- */
-static VALUE hpb_stat(int argc, VALUE *argv, VALUE hpb)
-{
- VALUE h;
-
- rb_scan_args(argc, argv, "01", &h);
- if (NIL_P(h))
- h = rb_hash_new();
- else if (!RB_TYPE_P(h, T_HASH))
- rb_raise(rb_eTypeError, "not a hash %+"PRIsVALUE, h);
-
- ++locating;
-#define S(x) ID2SYM(rb_intern(#x))
- rb_hash_aset(h, S(lifespan_max), acc_max(&hpb_stats.alive));
- rb_hash_aset(h, S(lifespan_min), acc_min(&hpb_stats.alive));
- rb_hash_aset(h, S(lifespan_mean), acc_mean(&hpb_stats.alive));
- rb_hash_aset(h, S(lifespan_stddev), acc_stddev(&hpb_stats.alive));
- rb_hash_aset(h, S(deathspan_max), acc_max(&hpb_stats.reborn));
- rb_hash_aset(h, S(deathspan_min), acc_min(&hpb_stats.reborn));
- rb_hash_aset(h, S(deathspan_mean), acc_mean(&hpb_stats.reborn));
- rb_hash_aset(h, S(deathspan_stddev), acc_stddev(&hpb_stats.reborn));
- rb_hash_aset(h, S(resurrects), SIZET2NUM(hpb_stats.reborn.nr));
-#undef S
- --locating;
-
- return h;
-}
-
-/*
- * Document-module: Mwrap
- *
- * require 'mwrap'
- *
- * Mwrap has a dual function as both a Ruby C extension and LD_PRELOAD
- * wrapper. As a Ruby C extension, it exposes a limited Ruby API.
- * To be effective at gathering status, mwrap must be loaded as a
- * LD_PRELOAD (using the mwrap(1) executable makes it easy)
- *
- * ENVIRONMENT
- *
- * The "MWRAP" environment variable contains a comma-delimited list
- * of key:value options for automatically dumping at program exit.
- *
- * * dump_fd: a writable FD to dump to
- * * dump_path: a path to dump to, the file is opened in O_APPEND mode
- * * dump_min: the minimum allocation size (total) to dump
- * * dump_heap: mask of heap_page_body statistics to dump
- *
- * If both `dump_fd' and `dump_path' are specified, dump_path takes
- * precedence.
- *
- * dump_heap bitmask
- * * 0x01 - summary stats (same info as HeapPageBody.stat)
- * * 0x02 - all live heaps (similar to HeapPageBody.each)
- * * 0x04 - skip non-heap_page_body-related output
- */
-void Init_mwrap(void)
-{
- VALUE mod, hpb;
-
- ++locating;
- mod = rb_define_module("Mwrap");
- id_uminus = rb_intern("-@");
-
- /*
- * Represents a location in source code or library
- * address which calls a memory allocation. It is
- * updated automatically as allocations are made, so
- * there is no need to reload or reread it from Mwrap#[].
- * This class is only available since mwrap 2.0.0+.
- */
- cSrcLoc = rb_define_class_under(mod, "SourceLocation", rb_cObject);
- rb_define_singleton_method(mod, "dump", mwrap_dump, -1);
- rb_define_singleton_method(mod, "reset", mwrap_reset, 0);
- rb_define_singleton_method(mod, "clear", mwrap_clear, 0);
- rb_define_singleton_method(mod, "each", mwrap_each, -1);
- rb_define_singleton_method(mod, "[]", mwrap_aref, 1);
- rb_define_singleton_method(mod, "quiet", mwrap_quiet, 0);
- rb_define_singleton_method(mod, "total_bytes_allocated", total_inc, 0);
- rb_define_singleton_method(mod, "total_bytes_freed", total_dec, 0);
-
-
- rb_define_method(cSrcLoc, "each", src_loc_each, 0);
- rb_define_method(cSrcLoc, "frees", src_loc_frees, 0);
- rb_define_method(cSrcLoc, "allocations", src_loc_allocations, 0);
- rb_define_method(cSrcLoc, "total", src_loc_total, 0);
- rb_define_method(cSrcLoc, "mean_lifespan", src_loc_mean_lifespan, 0);
- rb_define_method(cSrcLoc, "max_lifespan", src_loc_max_lifespan, 0);
- rb_define_method(cSrcLoc, "name", src_loc_name, 0);
-
- /*
- * Information about "struct heap_page_body" allocations from
- * Ruby gc.c. This can be useful for tracking fragmentation
- * from posix_memalign(3) use in mainline Ruby:
- *
- * https://sourceware.org/bugzilla/show_bug.cgi?id=14581
- */
- hpb = rb_define_class_under(mod, "HeapPageBody", rb_cObject);
- rb_define_singleton_method(hpb, "stat", hpb_stat, -1);
- rb_define_singleton_method(hpb, "each", hpb_each, 0);
-
- --locating;
-}
-
-enum {
- DUMP_HPB_STATS = 0x1,
- DUMP_HPB_EACH = 0x2,
- DUMP_HPB_EXCL = 0x4,
-};
-
-static void dump_hpb(FILE *fp, unsigned flags)
-{
- if (flags & DUMP_HPB_STATS) {
- fprintf(fp,
- "lifespan_max: %zu\n"
- "lifespan_min:%s%zu\n"
- "lifespan_mean: %0.3f\n"
- "lifespan_stddev: %0.3f\n"
- "deathspan_max: %zu\n"
- "deathspan_min:%s%zu\n"
- "deathspan_mean: %0.3f\n"
- "deathspan_stddev: %0.3f\n"
- "gc_count: %zu\n",
- hpb_stats.alive.max,
- hpb_stats.alive.min == INT64_MAX ? " -" : " ",
- hpb_stats.alive.min,
- hpb_stats.alive.mean,
- acc_stddev_dbl(&hpb_stats.alive),
- hpb_stats.reborn.max,
- hpb_stats.reborn.min == INT64_MAX ? " -" : " ",
- hpb_stats.reborn.min,
- hpb_stats.reborn.mean,
- acc_stddev_dbl(&hpb_stats.reborn),
- /* n.b.: unsafe to call rb_gc_count() in destructor */
- generation);
- }
- if (flags & DUMP_HPB_EACH) {
- struct alloc_hdr *h;
-
- cds_list_for_each_entry(h, &hpb_stats.bodies, anode) {
- void *addr = hdr2ptr(h);
-
- fprintf(fp, "%p\t%zu\n", addr, h->as.live.gen);
- }
- }
-}
-
-/* rb_cloexec_open isn't usable by non-Ruby processes */
-#ifndef O_CLOEXEC
-# define O_CLOEXEC 0
-#endif
-
-__attribute__ ((destructor))
-static void mwrap_dump_destructor(void)
-{
- const char *opt = getenv("MWRAP");
- const char *modes[] = { "a", "a+", "w", "w+", "r+" };
- struct dump_arg a = { .min = 0 };
- size_t i;
- int dump_fd;
- unsigned dump_heap = 0;
- char *dump_path;
- char *s;
-
- if (!opt)
- return;
-
- ++locating;
- if ((dump_path = strstr(opt, "dump_path:")) &&
- (dump_path += sizeof("dump_path")) &&
- *dump_path) {
- char *end = strchr(dump_path, ',');
- if (end) {
- char *tmp = alloca(end - dump_path + 1);
- end = mempcpy(tmp, dump_path, end - dump_path);
- *end = 0;
- dump_path = tmp;
- }
- dump_fd = open(dump_path, O_CLOEXEC|O_WRONLY|O_APPEND|O_CREAT,
- 0666);
- if (dump_fd < 0) {
- fprintf(stderr, "open %s failed: %s\n", dump_path,
- strerror(errno));
- goto out;
- }
- }
- else if (!sscanf(opt, "dump_fd:%d", &dump_fd))
- goto out;
-
- if ((s = strstr(opt, "dump_min:")))
- sscanf(s, "dump_min:%zu", &a.min);
-
- if ((s = strstr(opt, "dump_heap:")))
- sscanf(s, "dump_heap:%u", &dump_heap);
-
- switch (dump_fd) {
- case 0: goto out;
- case 1: a.fp = stdout; break;
- case 2: a.fp = stderr; break;
- default:
- if (dump_fd < 0)
- goto out;
- a.fp = 0;
-
- for (i = 0; !a.fp && i < 5; i++)
- a.fp = fdopen(dump_fd, modes[i]);
-
- if (!a.fp) {
- fprintf(stderr, "failed to open fd=%d: %s\n",
- dump_fd, strerror(errno));
- goto out;
- }
- /* we'll leak some memory here, but this is a destructor */
- }
- if ((dump_heap & DUMP_HPB_EXCL) == 0)
- dump_to_file(&a);
- dump_hpb(a.fp, dump_heap);
-out:
- --locating;
-}
diff --git a/ext/mwrap/jhash.h b/jhash.h
similarity index 100%
rename from ext/mwrap/jhash.h
rename to jhash.h
diff --git a/lib/Devel/Mwrap.pm b/lib/Devel/Mwrap.pm
new file mode 100644
index 0000000..f74f7d1
--- /dev/null
+++ b/lib/Devel/Mwrap.pm
@@ -0,0 +1,15 @@
+# Copyright (C) 2019 all contributors <mwrap-perl@80x24.org>
+# License: GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt>
+package Devel::Mwrap;
+use strict;
+our $VERSION = '0.0.0';
+use XSLoader;
+XSLoader::load(__PACKAGE__, $VERSION);
+
+1;
+__END__
+=pod
+
+=head1 NAME
+
+Devel::Mwrap - LD_PRELOAD malloc wrapper + line stats for Perl
diff --git a/lib/mwrap_rack.rb b/lib/mwrap_rack.rb
deleted file mode 100644
index e45b26d..0000000
--- a/lib/mwrap_rack.rb
+++ /dev/null
@@ -1,172 +0,0 @@
-# Copyright (C) 2018 all contributors <mwrap@80x24.org>
-# License: GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt>
-# frozen_string_literal: true
-require 'mwrap'
-require 'rack'
-require 'cgi'
-
-# MwrapRack is a standalone Rack application which can be
-# mounted to run within your application process.
-#
-# Using the Rack::Builder API in config.ru, you can map it to
-# the "/MWRAP/" endpoint. As with the rest of the Mwrap API,
-# your Rack server needs to be spawned with the mwrap(1)
-# wrapper to enable the LD_PRELOAD.
-#
-# require 'mwrap_rack'
-# map('/MWRAP') { run(MwrapRack.new) }
-# map('/') { run(your_normal_app) }
-#
-# A live demo is available at https://80x24.org/MWRAP/
-# (warning the demo machine is 32-bit, so counters will overflow)
-#
-# This module is only available in mwrap 2.0.0+
-class MwrapRack
- module HtmlResponse # :nodoc:
- def response
- [ 200, {
- 'Expires' => 'Fri, 01 Jan 1980 00:00:00 GMT',
- 'Pragma' => 'no-cache',
- 'Cache-Control' => 'no-cache, max-age=0, must-revalidate',
- 'Content-Type' => 'text/html; charset=UTF-8',
- }, self ]
- end
- end
-
- class Each < Struct.new(:script_name, :min, :sort) # :nodoc:
- include HtmlResponse
- HEADER = '<tr><th>' + %w(total allocations frees mean_life max_life
- location).join('</th><th>') + '</th></tr>'
- FIELDS = %w(total allocations frees mean_life max_life location)
- def each
- Mwrap.quiet do
- t = -"Mwrap.each(#{min})"
- sn = script_name
- all = []
- f = FIELDS.dup
- sc = FIELDS.index(sort || 'total') || 0
- f[sc] = -"<b>#{f[sc]}</b>"
- f.map! do |hdr|
- if hdr.start_with?('<b>')
- hdr
- else
- -%Q(<a\nhref="#{sn}/each/#{min}?sort=#{hdr}">#{hdr}</a>)
- end
- end
- Mwrap.each(min) do |loc, total, allocations, frees, age_sum, max_life|
- mean_life = frees == 0 ? Float::INFINITY : age_sum/frees.to_f
- all << [total,allocations,frees,mean_life,max_life,loc]
- end
- all.sort_by! { |cols| -cols[sc] }
-
- yield(-"<html><head><title>#{t}</title></head>" \
- "<body><h1>#{t}</h1>\n" \
- "<h2>Current generation: #{GC.count}</h2>\n<table>\n" \
- "<tr><th>#{f.join('</th><th>')}</th></tr>\n")
- all.each do |cols|
- loc = cols.pop
- cols[3] = sprintf('%0.3f', cols[3]) # mean_life
- href = -(+"#{sn}/at/#{CGI.escape(loc)}").encode!(xml: :attr)
- yield(%Q(<tr><td>#{cols.join('</td><td>')}<td><a\nhref=#{
- href}>#{-loc.encode(xml: :text)}</a></td></tr>\n))
- cols.clear
- end.clear
- yield "</table></body></html>\n"
- end
- end
- end
-
- class EachAt < Struct.new(:loc) # :nodoc:
- include HtmlResponse
- HEADER = '<tr><th>size</th><th>generation</th></tr>'
-
- def each
- t = loc.name.encode(xml: :text)
- yield(-"<html><head><title>#{t}</title></head>" \
- "<body><h1>live allocations at #{t}</h1>" \
- "<h2>Current generation: #{GC.count}</h2>\n<table>#{HEADER}")
- loc.each do |size, generation|
- yield("<tr><td>#{size}</td><td>#{generation}</td></tr>\n")
- end
- yield "</table></body></html>\n"
- end
- end
-
- class HeapPages # :nodoc:
- include HtmlResponse
- HEADER = '<tr><th>address</th><th>generation</th></tr>'
-
- def hpb_rows
- Mwrap::HeapPageBody.stat(stat = Thread.current[:mwrap_hpb_stat] ||= {})
- %i(lifespan_max lifespan_min lifespan_mean lifespan_stddev
- deathspan_max deathspan_min deathspan_mean deathspan_stddev
- resurrects
- ).map! do |k|
- "<tr><td>#{k}</td><td>#{stat[k]}</td></tr>\n"
- end.join
- end
-
- def gc_stat_rows
- GC.stat(stat = Thread.current[:mwrap_gc_stat] ||= {})
- %i(count heap_allocated_pages heap_eden_pages heap_tomb_pages
- total_allocated_pages total_freed_pages).map do |k|
- "<tr><td>GC.stat(:#{k})</td><td>#{stat[k]}</td></tr>\n"
- end.join
- end
-
- GC_STAT_URL = 'https://docs.ruby-lang.org/en/trunk/GC.html#method-c-stat'
- GC_STAT_HELP = <<~""
- <p>Non-Infinity lifespans can indicate fragmentation.
- <p>See <a
- href="#{GC_STAT_URL}">#{GC_STAT_URL}</a> for info on GC.stat values.
-
- def each
- Mwrap.quiet do
- yield("<html><head><title>heap pages</title></head>" \
- "<body><h1>heap pages</h1>" \
- "<table><tr><th>stat</th><th>value</th></tr>\n" \
- "#{hpb_rows}" \
- "#{gc_stat_rows}" \
- "</table>\n" \
- "#{GC_STAT_HELP}" \
- "<table>#{HEADER}")
- Mwrap::HeapPageBody.each do |addr, generation|
- addr = -sprintf('0x%x', addr)
- yield(-"<tr><td>#{addr}</td><td>#{generation}</td></tr>\n")
- end
- yield "</table></body></html>\n"
- end
- end
- end
-
- def r404 # :nodoc:
- [404,{'Content-Type'=>'text/plain'},["Not found\n"]]
- end
-
- # The standard Rack application endpoint for MwrapRack
- def call(env)
- case env['PATH_INFO']
- when %r{\A/each/(\d+)\z}
- min = $1.to_i
- m = env['QUERY_STRING'].match(/\bsort=(\w+)/)
- Each.new(env['SCRIPT_NAME'], min, m ? m[1] : nil).response
- when %r{\A/at/(.*)\z}
- loc = -CGI.unescape($1)
- loc = Mwrap[loc] or return r404
- EachAt.new(loc).response
- when '/heap_pages'
- HeapPages.new.response
- when '/'
- n = 2000
- u = 'https://80x24.org/mwrap/README.html'
- b = -('<html><head><title>Mwrap demo</title></head>' \
- "<body><p><a href=\"each/#{n}\">allocations >#{n} bytes</a>" \
- "<p><a href=\"#{u}\">#{u}</a>" \
- "<p><a href=\"heap_pages\">heap pages</a>" \
- "</body></html>\n")
- [ 200, {'Content-Type'=>'text/html','Content-Length'=>-b.size.to_s},[b]]
- else
- r404
- end
- end
-end
diff --git a/mwrap.gemspec b/mwrap.gemspec
deleted file mode 100644
index 2c01a68..0000000
--- a/mwrap.gemspec
+++ /dev/null
@@ -1,32 +0,0 @@
-git_manifest = `git ls-files 2>/dev/null`.split("\n")
-manifest = File.exist?('MANIFEST') ?
- File.readlines('MANIFEST').map!(&:chomp).delete_if(&:empty?) : git_manifest
-if git_manifest[0] && manifest != git_manifest
- tmp = "MANIFEST.#$$.tmp"
- File.open(tmp, 'w') { |fp| fp.puts(git_manifest.join("\n")) }
- File.rename(tmp, 'MANIFEST')
- system('git add MANIFEST')
-end
-
-desc = `git describe --abbrev=4 HEAD`.strip.tr('-', '.').delete_prefix('v')
-
-Gem::Specification.new do |s|
- s.name = 'mwrap'
- s.version = desc.empty? ? '2.0.0' : desc
- s.homepage = 'https://80x24.org/mwrap/'
- s.authors = ["Ruby hackers"]
- s.summary = 'LD_PRELOAD malloc wrapper for Ruby'
- s.executables = %w(mwrap)
- s.files = manifest
- s.description = <<~EOF
-mwrap wraps all malloc, calloc, and realloc calls to trace the Ruby
-source location of such calls and bytes allocated at each callsite.
- EOF
- s.email = %q{e@80x24.org}
- s.test_files = Dir['test/test_*.rb']
- s.extensions = %w(ext/mwrap/extconf.rb)
-
- s.add_development_dependency('test-unit', '~> 3.0')
- s.add_development_dependency('rake-compiler', '~> 1.0')
- s.licenses = %w(GPL-2.0+)
-end
diff --git a/script/mwrap-perl b/script/mwrap-perl
new file mode 100644
index 0000000..5e5eec4
--- /dev/null
+++ b/script/mwrap-perl
@@ -0,0 +1,34 @@
+#!/usr/bin/perl -w
+# Copyright (C) 2019 mwrap hackers <mwrap-perl@80x24.org>
+# License: GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt>
+use strict;
+use Devel::Mwrap;
+my $so;
+if ($^O eq 'linux') {
+ my $maps = do {
+ open my $fh, '<', "/proc/$$/maps" or
+ die "/proc/$$/maps not accessible: $!\n";
+ local $/;
+ <$fh>;
+ };
+ if ($maps =~ m![ \t](/[^\n]+?/Mwrap\.so)$!sm) {
+ $so = $1;
+ } else {
+ die "Mwrap.so not found in: $so\n";
+ }
+} else {
+ die "unsupported OS ($^O ne 'linux')";
+}
+my $cur = $ENV{LD_PRELOAD};
+if (defined $cur) {
+ my @cur = split(/[: \t]+/, $cur);
+ my %cur = map { $_ => 1 } @cur;
+ if (!$cur{$so}) {
+ # drop old redundant versions
+ my @keep = grep(!m!/Mwrap\.so\$!, @cur);
+ $ENV{LD_PRELOAD} = join(':', $so, @keep);
+ }
+} else {
+ $ENV{LD_PRELOAD} = $so;
+}
+exec @ARGV;
diff --git a/t/mwrap.t b/t/mwrap.t
new file mode 100644
index 0000000..5bcc285
--- /dev/null
+++ b/t/mwrap.t
@@ -0,0 +1,85 @@
+#!perl -w
+# Copyright (C) 2019 mwrap hackers <mwrap-perl@80x24.org>
+# License: GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt>
+use strict;
+use Test::More;
+use File::Temp qw(tempdir);
+use_ok 'Devel::Mwrap';
+
+my $tmpdir = tempdir('mwrap-perl-XXXXXX', TMPDIR => 1, CLEANUP => 1);
+my $dump = "$tmpdir/dump";
+my $out = "$tmpdir/out";
+my $err = "$tmpdir/err";
+my $src = slurp('blib/script/mwrap-perl');
+
+{
+ my $env = { MWRAP => "dump_path:$dump,dump_min:10000" };
+ my $nr = 1000;
+ mwrap_run('dump test', $env, '-e', '$x = "hello world" x '.$nr);
+ ok(-s $dump, "dump file written to");
+ my $s = slurp($dump);
+ my $re = qr/([0-9]+)[ \t]+([0-9]+)[ \t]+-e:1[ \t]*\n/sm;
+ my ($bytes, $n);
+ if ($s =~ $re) {
+ ($bytes, $n) = ($1, $2);
+ ok($bytes >= (length('hello world') * $nr),
+ "counted 'hello world' x $nr");
+ ok($n >= 1, 'allocation counted');
+ } else {
+ fail("$s failed to match $re");
+ }
+}
+
+SKIP: { # C++ program which uses malloc via "new"
+ my $exp = `cmake -h`;
+ skip 'cmake missing', 2 if $?;
+ skip "`cmake -h' gave no output", 2 unless $exp =~ /\S/s;
+ open my $truncate, '>', $dump or die;
+ close $truncate or die;
+ my $env = { MWRAP => "dump_path:$dump" };
+ mwrap_run('cmake (C++ new)', $env, '-e',
+ 'system(qw(cmake -h)); exit $?');
+ my $res = slurp($out);
+ is($res, $exp, "`cmake -h' works");
+};
+
+{
+ my $env = { MWRAP => "dump_path:$dump" };
+ mwrap_run('total_bytes*', $env, '-e', <<'E1');
+my $A = Devel::Mwrap::total_bytes_allocated();
+my $f = Devel::Mwrap::total_bytes_freed();
+print("$A - $f\n");
+E1
+ my $o = slurp($out);
+ like($o, qr/^([0-9]+) - ([0-9]+)\n/s, 'got allocated & freed bytes');
+}
+
+{
+ my $env = { MWRAP => "dump_path:$dump" };
+ mwrap_run('source location', $env, 't/source_location.perl');
+}
+
+done_testing();
+
+sub slurp {
+ open my $fh, '<', $_[0] or die "open($_[0]): $!";
+ local $/;
+ <$fh>;
+}
+
+sub mwrap_run {
+ my ($msg, $env, @args) = @_;
+ my $pid = fork;
+ if ($pid == 0) {
+ while (my ($k, $v) = each %$env) {
+ $ENV{$k} = $v;
+ }
+ open STDERR, '>', $err or die "open: $!";
+ open STDOUT, '>', $out or die "open: $!";
+ @ARGV = ($^X, '-MDevel::Mwrap', @args);
+ eval $src;
+ die "fail: $! ($@)";
+ }
+ waitpid($pid, 0);
+ is($?, 0, $msg);
+}
diff --git a/t/source_location.perl b/t/source_location.perl
new file mode 100644
index 0000000..ed81ed8
--- /dev/null
+++ b/t/source_location.perl
@@ -0,0 +1,9 @@
+use Devel::Mwrap;
+my $foo = ('hello world' x 10000);
+my $k = __FILE__ . ":2";
+my $loc = Devel::Mwrap::get($k) or die;
+$loc->name eq $k or die;
+$loc->total >= 10000 or die;
+$loc->allocations >= 1 or die;
+$loc->frees >= 0 or die;
+exit 0;
diff --git a/test/test_mwrap.rb b/test/test_mwrap.rb
deleted file mode 100644
index 48fba23..0000000
--- a/test/test_mwrap.rb
+++ /dev/null
@@ -1,322 +0,0 @@
-# frozen_string_literal: true
-# Copyright (C) 2018 mwrap hackers <mwrap-public@80x24.org>
-# License: GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt>
-require 'test/unit'
-require 'mwrap'
-require 'rbconfig'
-require 'tempfile'
-
-class TestMwrap < Test::Unit::TestCase
- RB = "#{RbConfig::CONFIG['bindir']}/#{RbConfig::CONFIG['RUBY_INSTALL_NAME']}"
-
- mwrap_so = $".grep(%r{/mwrap\.so\z})[0]
- env = ENV.to_hash
- cur = env['LD_PRELOAD']
- env['LD_PRELOAD'] = cur ? "#{mwrap_so}:#{cur}".freeze : mwrap_so
- @@env = env.freeze
- inc = File.dirname(mwrap_so)
- @@cmd = %W(#{RB} -w --disable=gems -I#{inc} -rmwrap).freeze
-
- def test_mwrap_preload
- cmd = @@cmd + %w(
- -e ("helloworld"*1000).clear
- -e Mwrap.dump
- )
- Tempfile.create('junk') do |tmp|
- tmp.sync = true
- res = system(@@env, *cmd, err: tmp)
- assert res, $?.inspect
- tmp.rewind
- lines = tmp.readlines
- line_1 = lines.grep(/\s-e:1\b/)[0].strip
- assert_equal '10001', line_1.split(/\s+/)[0]
- end
- end
-
- def test_dump_via_destructor
- env = @@env.dup
- env['MWRAP'] = 'dump_fd:5'
- cmd = @@cmd + %w(-e ("0"*10000).clear)
- Tempfile.create('junk') do |tmp|
- tmp.sync = true
- res = system(env, *cmd, { 5 => tmp })
- assert res, $?.inspect
- tmp.rewind
- assert_match(/\b10001\s+1\s+-e:1$/, tmp.read)
-
- env['MWRAP'] = 'dump_fd:1,dump_min:10000'
- tmp.rewind
- tmp.truncate(0)
- res = system(env, *cmd, { 1 => tmp })
- assert res, $?.inspect
- tmp.rewind
- assert_match(/\b10001\s+1\s+-e:1$/, tmp.read)
-
- tmp.rewind
- tmp.truncate(0)
- env['MWRAP'] = "dump_path:#{tmp.path},dump_min:10000"
- res = system(env, *cmd)
- assert res, $?.inspect
- assert_match(/\b10001\s+1\s+-e:1$/, tmp.read)
-
- tmp.rewind
- tmp.truncate(0)
- env['MWRAP'] = "dump_path:#{tmp.path},dump_heap:5"
- res = system(env, *cmd)
- assert res, $?.inspect
- assert_match %r{lifespan_stddev}, tmp.read
- end
- end
-
- def test_cmake
- begin
- exp = `cmake -h`
- rescue Errno::ENOENT
- warn 'cmake missing'
- return
- end
- assert_not_predicate exp.strip, :empty?
- env = @@env.merge('MWRAP' => 'dump_fd:1')
- out = IO.popen(env, %w(cmake -h), &:read)
- assert out.start_with?(exp), 'original help exists'
- assert_not_equal exp, out, 'includes dump output'
- dump = out.delete_prefix(exp)
- assert_match(/\b0x[a-f0-9]+\b/s, dump, 'dump output has addresses')
- end
-
- def test_clear
- cmd = @@cmd + %w(
- -e ("0"*10000).clear
- -e Mwrap.clear
- -e ("0"*20000).clear
- -e Mwrap.dump($stdout,9999)
- )
- Tempfile.create('junk') do |tmp|
- tmp.sync = true
- res = system(@@env, *cmd, { 1 => tmp })
- assert res, $?.inspect
- tmp.rewind
- buf = tmp.read
- assert_not_match(/\s+-e:1$/, buf)
- assert_match(/\b20001\s+1\s+-e:3$/, buf)
- end
- end
-
- # make sure we don't break commands spawned by an mwrap-ed Ruby process:
- def test_non_ruby_exec
- IO.pipe do |r, w|
- th = Thread.new { r.read }
- Tempfile.create('junk') do |tmp|
- tmp.sync = true
- env = @@env.merge('MWRAP' => "dump_path:#{tmp.path}")
- cmd = %w(perl -e print("HELLO_WORLD"))
- res = system(env, *cmd, out: w)
- w.close
- assert res, $?.inspect
- assert_match(/0x[a-f0-9]+\b/, tmp.read)
- end
- assert_equal "HELLO_WORLD", th.value
- end
- end
-
- # some URCU flavors use USR1, ensure the one we choose does not
- def test_sigusr1_works
- cmd = @@cmd + %w(
- -e STDOUT.sync=true
- -e trap(:USR1){p("HELLO_WORLD")}
- -e END{Mwrap.dump}
- -e puts -e STDIN.read)
- IO.pipe do |r, w|
- IO.pipe do |r2, w2|
- pid = spawn(@@env, *cmd, in: r2, out: w, err: '/dev/null')
- r2.close
- w.close
- assert_equal "\n", r.gets
- buf = +''
- 10.times { Process.kill(:USR1, pid) }
- while IO.select([r], nil, nil, 0.1)
- case tmp = r.read_nonblock(1000, exception: false)
- when String
- buf << tmp
- end
- end
- w2.close
- Process.wait(pid)
- assert_predicate $?, :success?, $?.inspect
- assert_equal(["\"HELLO_WORLD\"\n"], buf.split(/^/).uniq)
- end
- end
- end
-
- def test_reset
- assert_nil Mwrap.reset
- end
-
- def test_each
- cmd = @@cmd + %w(
- -e ("0"*10000).clear
- -e h={}
- -e Mwrap.each(1000){|a,b,c|h[a]=[b,c]}
- -e puts(Marshal.dump(h))
- )
- r = IO.popen(@@env, cmd, 'r')
- h = Marshal.load(r.read)
- assert_not_predicate h, :empty?
- h.each_key { |k| assert_kind_of String, k }
- h.each_value do |total,calls|
- assert_operator total, :>, 0
- assert_operator calls, :>, 0
- assert_operator total, :>=, calls
- end
- end
-
- def test_aref_each
- cmd = @@cmd + %w(
- -e count=GC.count
- -e GC.disable
- -e keep=("0"*10000)
- -e loc=Mwrap["-e:3"]
- -e loc.each{|size,gen|p([size,gen,count])}
- )
- buf = IO.popen(@@env, cmd, &:read)
- assert_predicate $?, :success?
- assert_match(/\A\[\s*\d+,\s*\d+,\s*\d+\]\s*\z/s, buf)
- size, gen, count = eval(buf)
- assert_operator size, :>=, 10000
- assert_operator gen, :>=, count
-
- cmd = @@cmd + %w(
- -e count=GC.count
- -e locs=""
- -e Mwrap.each(1){|loc,tot,calls|locs<<loc}
- -e m=locs.match(/(\[0x[a-f0-9]+\])/i)
- -e m||=locs.match(/\b(0x[a-f0-9]+)\b/i)
- -e p(loc=Mwrap["bobloblaw\t#{m[1]}"])
- -e loc.each{|size,gen|p([size,gen,count])}
- )
- buf = IO.popen(@@env, cmd, &:read)
- assert_predicate $?, :success?
- assert_match(/\bMwrap::SourceLocation\b/, buf)
- end
-
- def test_benchmark
- cmd = @@cmd + %w(-rbenchmark
- -e puts(Benchmark.measure{1000000.times{Time.now}}))
- r = IO.popen(@@env, cmd, 'r')
- require 'benchmark'
- warn Benchmark::Tms::CAPTION
- warn r.read
- end if ENV['BENCHMARK']
-
- def test_mwrap_dump_check
- assert_raise(TypeError) { Mwrap.dump(:bogus) }
- end
-
- def assert_separately(src, *opts)
- Tempfile.create(%w(mwrap .rb)) do |tmp|
- tmp.write(src.lstrip!)
- tmp.flush
- assert(system(@@env, *@@cmd, tmp.path, *opts))
- end
- end
-
- def test_source_location
- assert_separately(+"#{<<~"begin;"}\n#{<<~'end;'}")
- begin;
- require 'mwrap'
- foo = '0' * 10000
- k = -"#{__FILE__}:2"
- loc = Mwrap[k]
- loc.name == k or abort 'SourceLocation#name broken'
- loc.total >= 10000 or abort 'SourceLocation#total broken'
- loc.frees == 0 or abort 'SourceLocation#frees broken'
- loc.allocations == 1 or abort 'SourceLocation#allocations broken'
- seen = false
- loc.each do |*x| seen = x end
- seen[1] == loc.total or 'SourceLocation#each broken'
- foo.clear
-
- # wait for call_rcu to perform real_free
- freed = false
- until freed
- freed = true
- loc.each do freed = false end
- end
- loc.frees == 1 or abort 'SourceLocation#frees broken (after free)'
- Float === loc.mean_lifespan or abort 'mean_lifespan broken'
- Integer === loc.max_lifespan or abort 'max_lifespan broken'
-
- addr = false
- Mwrap.each do |a,|
- if a =~ /0x[a-f0-9]+/
- addr = a
- break
- end
- end
- addr && addr.frozen? or abort 'Mwrap.each returned unfrozen address'
- loc = Mwrap[addr] or abort "Mwrap[#{addr}] broken"
- addr == loc.name or abort 'SourceLocation#name works on address'
- loc.name.frozen? or abort 'SourceLocation#name not frozen'
- end;
- end
-
- def test_quiet
- assert_separately(+"#{<<~"begin;"}\n#{<<~'end;'}")
- begin;
- require 'mwrap'
- before = __LINE__
- res = Mwrap.quiet do |depth|
- depth == 1 or abort 'depth is not 1'
- ('a' * 10000).clear
- Mwrap.quiet { |d| d == 2 or abort 'depth is not 2' }
- :foo
- end
- after = __LINE__ - 1
- (before..after).each do |lineno|
- Mwrap["#{__FILE__}:#{lineno}"] and
- abort "unexpectedly tracked allocation at line #{lineno}"
- end
- res == :foo or abort 'Mwrap.quiet did not return block result'
- end;
- end
-
- def test_total_bytes
- assert_separately(+"#{<<~"begin;"}\n#{<<~'end;'}")
- begin;
- require 'mwrap'
- Mwrap.total_bytes_allocated > 0 or abort 'nothing allocated'
- Mwrap.total_bytes_freed > 0 or abort 'nothing freed'
- Mwrap.total_bytes_allocated > Mwrap.total_bytes_freed or
- abort 'freed more than allocated'
- end;
- end
-
- def test_heap_page_body
- assert_separately(+"#{<<~"begin;"}\n#{<<~'end;'}")
- begin;
- require 'mwrap'
- require 'rubygems' # use up some memory
- ap = GC.stat(:heap_allocated_pages)
- h = {}
- nr = 0
- Mwrap::HeapPageBody.each do |addr, gen|
- nr += 1
- gen <= GC.count && gen >= 0 or abort "bad generation: #{gen}"
- (0 == (addr & 16383)) or abort "addr not aligned: #{'%x' % addr}"
- end
- nr == ap or abort 'HeapPageBody.each missed page'
- 10.times { (1..20000).to_a.map(&:to_s) }
- 3.times { GC.start }
- Mwrap::HeapPageBody.stat(h)
- Integer === h[:lifespan_max] or abort 'lifespan_max not recorded'
- Integer === h[:lifespan_min] or abort 'lifespan_min not recorded'
- Float === h[:lifespan_mean] or abort 'lifespan_mean not recorded'
- 3.times { GC.start }
- 10.times { (1..20000).to_a.map(&:to_s) }
- Mwrap::HeapPageBody.stat(h)
- h[:deathspan_min] <= h[:deathspan_max] or
- abort 'wrong min/max deathtime'
- Float === h[:deathspan_mean] or abort 'deathspan_mean not recorded'
- end;
- end
-end
diff --git a/typemap b/typemap
new file mode 100644
index 0000000..9531289
--- /dev/null
+++ b/typemap
@@ -0,0 +1,4 @@
+TYPEMAP
+size_t T_UV
+const char * T_PV
+Devel::Mwrap::SrcLoc T_PTROBJ
^ permalink raw reply related [flat|nested] only message in thread
only message in thread, other threads:[~2019-10-31 20:03 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-10-31 20:03 [PATCH] port to Perl5 and XS Eric Wong
Code repositories for project(s) associated with this public inbox
https://80x24.org/mwrap-perl.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).