about summary refs log tree commit homepage
path: root/ext/mwrap/mwrap.c
diff options
context:
space:
mode:
Diffstat (limited to 'ext/mwrap/mwrap.c')
-rw-r--r--ext/mwrap/mwrap.c890
1 files changed, 40 insertions, 850 deletions
diff --git a/ext/mwrap/mwrap.c b/ext/mwrap/mwrap.c
index 160007f..d88fee6 100644
--- a/ext/mwrap/mwrap.c
+++ b/ext/mwrap/mwrap.c
@@ -2,702 +2,13 @@
  * Copyright (C) mwrap hackers <mwrap-public@80x24.org>
  * License: GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt>
  */
-#define _LGPL_SOURCE /* allows URCU to inline some stuff */
-#include <ruby.h> /* defines HAVE_RUBY_RACTOR_H on 3.0+ */
-#include <ruby/thread.h>
-#include <ruby/io.h>
-#include <execinfo.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <dlfcn.h>
-#include <assert.h>
-#include <errno.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <pthread.h>
-#include <urcu-bp.h>
-#include <urcu/rculfhash.h>
-#include <urcu/rculist.h>
-#include "jhash.h"
-
-#if __STDC_VERSION__ >= 201112
-#        define MWRAP_TSD _Thread_local
-#elif defined(__GNUC__)
-#        define MWRAP_TSD __thread
-#else
-#        error _Thread_local nor __thread supported
-#endif
+#define MWRAP_RUBY 1
+#include "mwrap_core.h"
 
 static ID id_uminus;
-const char *rb_source_location_cstr(int *line); /* requires 2.6.0dev */
-
-#ifdef HAVE_RUBY_RACTOR_H /* Ruby 3.0+ */
-extern MWRAP_TSD void * __attribute__((weak)) ruby_current_ec;
-#else /* Ruby 2.6-2.7 */
-extern void * __attribute__((weak)) ruby_current_execution_context_ptr;
-#        define ruby_current_ec ruby_current_execution_context_ptr
-#endif
-extern void * __attribute__((weak)) ruby_current_vm_ptr; /* for rb_gc_count */
-extern size_t __attribute__((weak)) rb_gc_count(void);
 extern VALUE __attribute__((weak)) rb_cObject;
 extern VALUE __attribute__((weak)) rb_eTypeError;
 extern VALUE __attribute__((weak)) rb_yield(VALUE);
-int __attribute__((weak)) ruby_thread_has_gvl_p(void);
-
-static size_t total_bytes_inc, total_bytes_dec;
-
-/* true for glibc/dlmalloc/ptmalloc, not sure about jemalloc */
-#define ASSUMED_MALLOC_ALIGNMENT (sizeof(void *) * 2)
-
-#ifdef __FreeBSD__
-void *__malloc(size_t);
-void __free(void *);
-#  define real_malloc __malloc
-#  define real_free __free
-#else
-static void *(*real_malloc)(size_t);
-static void (*real_free)(void *);
-static int resolving_malloc;
-#endif /* !FreeBSD */
-
-/*
- * we need to fake an OOM condition while dlsym is running,
- * as that calls calloc under glibc, but we don't have the
- * symbol for the jemalloc calloc, yet
- */
-#  define RETURN_IF_NOT_READY() do { \
-        if (!real_malloc) { \
-                errno = ENOMEM; \
-                return NULL; \
-        } \
-} while (0)
-
-static MWRAP_TSD size_t locating;
-static size_t generation;
-static size_t page_size;
-static struct cds_lfht *totals;
-union padded_mutex {
-        pthread_mutex_t mtx;
-        char pad[64];
-};
-
-/* a round-robin pool of mutexes */
-#define MUTEX_NR   (1 << 6)
-#define MUTEX_MASK (MUTEX_NR - 1)
-#ifdef __FreeBSD__
-#  define STATIC_MTX_INIT_OK (0)
-#else /* only tested on Linux + glibc */
-#  define STATIC_MTX_INIT_OK (1)
-#endif
-static size_t mutex_i;
-static union padded_mutex mutexes[MUTEX_NR] = {
-#if STATIC_MTX_INIT_OK
-        [0 ... (MUTEX_NR-1)].mtx = PTHREAD_MUTEX_INITIALIZER
-#endif
-};
-
-static pthread_mutex_t *mutex_assign(void)
-{
-        return &mutexes[uatomic_add_return(&mutex_i, 1) & MUTEX_MASK].mtx;
-}
-
-static struct cds_lfht *
-lfht_new(void)
-{
-        return cds_lfht_new(16384, 1, 0, CDS_LFHT_AUTO_RESIZE, 0);
-}
-
-__attribute__((constructor)) static void resolve_malloc(void)
-{
-        int err;
-        ++locating;
-
-        /*
-         * PTHREAD_MUTEX_INITIALIZER on FreeBSD means lazy initialization,
-         * which happens at pthread_mutex_lock, and that calls calloc
-         */
-        if (!STATIC_MTX_INIT_OK) {
-                size_t i;
-
-                for (i = 0; i < MUTEX_NR; i++) {
-                        err = pthread_mutex_init(&mutexes[i].mtx, 0);
-                        if (err) {
-                                fprintf(stderr, "error: %s\n", strerror(err));
-                                _exit(1);
-                        }
-                }
-                /* initialize mutexes used by urcu-bp */
-                rcu_read_lock();
-                rcu_read_unlock();
-#ifndef __FreeBSD__
-        } else {
-                if (!real_malloc) {
-                        resolving_malloc = 1;
-                        real_malloc = dlsym(RTLD_NEXT, "malloc");
-                }
-                real_free = dlsym(RTLD_NEXT, "free");
-                if (!real_malloc || !real_free) {
-                        fprintf(stderr, "missing malloc/aligned_alloc/free\n"
-                                "\t%p %p\n", real_malloc, real_free);
-                        _exit(1);
-                }
-#endif /* !__FreeBSD__ */
-        }
-        CMM_STORE_SHARED(totals, lfht_new());
-        if (!CMM_LOAD_SHARED(totals))
-                fprintf(stderr, "failed to allocate totals table\n");
-
-        err = pthread_atfork(call_rcu_before_fork,
-                                call_rcu_after_fork_parent,
-                                call_rcu_after_fork_child);
-        if (err)
-                fprintf(stderr, "pthread_atfork failed: %s\n", strerror(err));
-        page_size = sysconf(_SC_PAGESIZE);
-        --locating;
-}
-
-#ifdef NDEBUG
-#define QUIET_CC_WARNING(var) (void)var;
-#else
-#define QUIET_CC_WARNING(var)
-#endif
-
-static void
-mutex_lock(pthread_mutex_t *m)
-{
-        int err = pthread_mutex_lock(m);
-        assert(err == 0);
-        QUIET_CC_WARNING(err)
-}
-
-static void
-mutex_unlock(pthread_mutex_t *m)
-{
-        int err = pthread_mutex_unlock(m);
-        assert(err == 0);
-        QUIET_CC_WARNING(err)
-}
-
-#ifndef HAVE_MEMPCPY
-static void *
-my_mempcpy(void *dest, const void *src, size_t n)
-{
-        return (char *)memcpy(dest, src, n) + n;
-}
-#define mempcpy(dst,src,n) my_mempcpy(dst,src,n)
-#endif
-
-/* stolen from glibc: */
-#define RETURN_ADDRESS(nr) \
-  (uintptr_t)(__builtin_extract_return_addr(__builtin_return_address(nr)))
-
-#define INT2STR_MAX (sizeof(int) == 4 ? 10 : 19)
-static char *int2str(int num, char *dst, size_t * size)
-{
-        if (num <= 9) {
-                *size -= 1;
-                *dst++ = (char)(num + '0');
-                return dst;
-        } else {
-                char buf[INT2STR_MAX];
-                char *end = buf + sizeof(buf);
-                char *p = end;
-                size_t adj;
-
-                do {
-                        *size -= 1;
-                        *--p = (char)((num % 10) + '0');
-                        num /= 10;
-                } while (num && *size);
-
-                if (!num) {
-                        adj = end - p;
-                        return mempcpy(dst, p, adj);
-                }
-        }
-        return NULL;
-}
-
-/*
- * rb_source_location_cstr relies on GET_EC(), and it's possible
- * to have a native thread but no EC during the early and late
- * (teardown) phases of the Ruby process
- */
-static int has_ec_p(void)
-{
-        return ruby_thread_has_gvl_p && ruby_thread_has_gvl_p() &&
-                ruby_current_vm_ptr && ruby_current_ec;
-}
-
-/* allocated via real_malloc/real_free */
-struct src_loc {
-        pthread_mutex_t *mtx;
-        size_t total;
-        size_t allocations;
-        size_t frees;
-        size_t age_total; /* (age_total / frees) => mean age at free */
-        size_t max_lifespan;
-        struct cds_lfht_node hnode;
-        struct cds_list_head allocs; /* <=> alloc_hdr.node */
-        uint32_t hval;
-        uint32_t capa;
-        char k[];
-};
-
-/* every allocation has this in the header, maintain alignment with malloc  */
-struct alloc_hdr {
-        struct cds_list_head anode; /* <=> src_loc.allocs */
-        union {
-                struct {
-                        size_t gen; /* rb_gc_count() */
-                        struct src_loc *loc;
-                } live;
-                struct rcu_head dead;
-        } as;
-        void *real; /* what to call real_free on */
-        size_t size;
-};
-
-static MWRAP_TSD char kbuf[
-        PATH_MAX + INT2STR_MAX + sizeof(struct alloc_hdr) + 2
-];
-
-static struct alloc_hdr *ptr2hdr(void *p)
-{
-        return (struct alloc_hdr *)((uintptr_t)p - sizeof(struct alloc_hdr));
-}
-
-static void *hdr2ptr(struct alloc_hdr *h)
-{
-        return (void *)((uintptr_t)h + sizeof(struct alloc_hdr));
-}
-
-static int loc_is_addr(const struct src_loc *l)
-{
-        return l->capa == 0;
-}
-
-static size_t loc_size(const struct src_loc *l)
-{
-        return loc_is_addr(l) ? sizeof(uintptr_t) : l->capa;
-}
-
-static int loc_eq(struct cds_lfht_node *node, const void *key)
-{
-        const struct src_loc *existing;
-        const struct src_loc *k = key;
-
-        existing = caa_container_of(node, struct src_loc, hnode);
-
-        return (k->hval == existing->hval &&
-                k->capa == existing->capa &&
-                memcmp(k->k, existing->k, loc_size(k)) == 0);
-}
-
-static struct src_loc *totals_add_rcu(const struct src_loc *k)
-{
-        struct cds_lfht_iter iter;
-        struct cds_lfht_node *cur;
-        struct src_loc *l = 0;
-        struct cds_lfht *t;
-
-again:
-        t = CMM_LOAD_SHARED(totals);
-        if (!t) goto out_unlock;
-        cds_lfht_lookup(t, k->hval, loc_eq, k, &iter);
-        cur = cds_lfht_iter_get_node(&iter);
-        if (cur) {
-                l = caa_container_of(cur, struct src_loc, hnode);
-                uatomic_add(&l->total, k->total);
-                uatomic_add(&l->allocations, 1);
-        } else {
-                size_t n = loc_size(k);
-                l = real_malloc(sizeof(*l) + n);
-                if (!l) goto out_unlock;
-                memcpy(l, k, sizeof(*l) + n);
-                l->mtx = mutex_assign();
-                l->age_total = 0;
-                l->max_lifespan = 0;
-                l->frees = 0;
-                l->allocations = 1;
-                CDS_INIT_LIST_HEAD(&l->allocs);
-                cur = cds_lfht_add_unique(t, k->hval, loc_eq, l, &l->hnode);
-                if (cur != &l->hnode) { /* lost race */
-                        rcu_read_unlock();
-                        real_free(l);
-                        rcu_read_lock();
-                        goto again;
-                }
-        }
-out_unlock:
-        return l;
-}
-
-static void update_stats_rcu_unlock(const struct src_loc *l)
-{
-        if (caa_likely(l)) rcu_read_unlock();
-}
-
-static struct src_loc *update_stats_rcu_lock(size_t size, uintptr_t caller)
-{
-        struct src_loc *k, *ret = 0;
-        static const size_t xlen = sizeof(caller);
-        char *dst;
-
-        if (caa_unlikely(!CMM_LOAD_SHARED(totals))) return 0;
-        if (locating++) goto out; /* do not recurse into another *alloc */
-
-        uatomic_add(&total_bytes_inc, size);
-
-        rcu_read_lock();
-        if (has_ec_p()) {
-                int line;
-                const char *ptr = rb_source_location_cstr(&line);
-                size_t len;
-                size_t int_size = INT2STR_MAX;
-
-                generation = rb_gc_count();
-
-                if (!ptr) goto unknown;
-
-                /* avoid vsnprintf or anything which could call malloc here: */
-                len = strlen(ptr);
-                if (len > PATH_MAX)
-                        len = PATH_MAX;
-                k = (void *)kbuf;
-                k->total = size;
-                dst = mempcpy(k->k, ptr, len);
-                *dst++ = ':';
-                dst = int2str(line, dst, &int_size);
-                if (dst) {
-                        *dst = 0;        /* terminate string */
-                        k->capa = (uint32_t)(dst - k->k + 1);
-                        k->hval = jhash(k->k, k->capa, 0xdeadbeef);
-                        ret = totals_add_rcu(k);
-                } else {
-                        rb_bug("bad math making key from location %s:%d\n",
-                                ptr, line);
-                }
-        } else {
-unknown:
-                k = alloca(sizeof(*k) + xlen);
-                k->total = size;
-                memcpy(k->k, &caller, xlen);
-                k->capa = 0;
-                k->hval = jhash(k->k, xlen, 0xdeadbeef);
-                ret = totals_add_rcu(k);
-        }
-out:
-        --locating;
-        return ret;
-}
-
-size_t malloc_usable_size(void *p)
-{
-        return ptr2hdr(p)->size;
-}
-
-static void
-free_hdr_rcu(struct rcu_head *dead)
-{
-        struct alloc_hdr *h = caa_container_of(dead, struct alloc_hdr, as.dead);
-        real_free(h->real);
-}
-
-void free(void *p)
-{
-        if (p) {
-                struct alloc_hdr *h = ptr2hdr(p);
-                struct src_loc *l = h->as.live.loc;
-
-                if (!real_free) return; /* oh well, leak a little */
-                if (l) {
-                        size_t age = generation - h->as.live.gen;
-
-                        uatomic_add(&total_bytes_dec, h->size);
-                        uatomic_set(&h->size, 0);
-                        uatomic_add(&l->frees, 1);
-                        uatomic_add(&l->age_total, age);
-
-                        mutex_lock(l->mtx);
-                        cds_list_del_rcu(&h->anode);
-                        if (age > l->max_lifespan)
-                                l->max_lifespan = age;
-                        mutex_unlock(l->mtx);
-
-                        call_rcu(&h->as.dead, free_hdr_rcu);
-                } else {
-                        real_free(h->real);
-                }
-        }
-}
-
-static void
-alloc_insert_rcu(struct src_loc *l, struct alloc_hdr *h, size_t size, void *real)
-{
-        /* we need src_loc to remain alive for the duration of this call */
-        if (!h) return;
-        h->size = size;
-        h->real = real;
-        h->as.live.loc = l;
-        h->as.live.gen = generation;
-        if (l) {
-                mutex_lock(l->mtx);
-                cds_list_add_rcu(&h->anode, &l->allocs);
-                mutex_unlock(l->mtx);
-        }
-}
-
-static size_t size_align(size_t size, size_t alignment)
-{
-        return ((size + (alignment - 1)) & ~(alignment - 1));
-}
-
-static bool ptr_is_aligned(const void *ptr, size_t alignment)
-{
-        return ((uintptr_t)ptr & (alignment - 1)) == 0;
-}
-
-static void *ptr_align(void *ptr, size_t alignment)
-{
-        return (void *)(((uintptr_t)ptr + (alignment - 1)) & ~(alignment - 1));
-}
-
-static bool is_power_of_two(size_t n) { return (n & (n - 1)) == 0; }
-
-static int
-internal_memalign(void **pp, size_t alignment, size_t size, uintptr_t caller)
-{
-        struct src_loc *l;
-        struct alloc_hdr *h;
-        void *real;
-        size_t asize;
-        size_t d = alignment / sizeof(void*);
-        size_t r = alignment % sizeof(void*);
-
-        if (!real_malloc) return ENOMEM;
-
-        if (r != 0 || d == 0 || !is_power_of_two(d))
-                return EINVAL;
-
-        if (alignment <= ASSUMED_MALLOC_ALIGNMENT) {
-                void *p = malloc(size);
-                if (!p) return ENOMEM;
-                *pp = p;
-                return 0;
-        }
-        for (; alignment < sizeof(struct alloc_hdr); alignment *= 2)
-                ; /* double alignment until >= sizeof(struct alloc_hdr) */
-        if (__builtin_add_overflow(size, alignment, &asize) ||
-            __builtin_add_overflow(asize, sizeof(struct alloc_hdr), &asize))
-                return ENOMEM;
-
-
-        l = update_stats_rcu_lock(size, caller);
-
-        real = real_malloc(asize);
-        if (real) {
-                void *p = hdr2ptr(real);
-                if (!ptr_is_aligned(p, alignment))
-                        p = ptr_align(p, alignment);
-                h = ptr2hdr(p);
-                alloc_insert_rcu(l, h, size, real);
-                *pp = p;
-        }
-        update_stats_rcu_unlock(l);
-
-        return real ? 0 : ENOMEM;
-}
-
-static void *
-memalign_result(int err, void *p)
-{
-        if (caa_unlikely(err))
-                errno = err;
-        return p;
-}
-
-void *memalign(size_t alignment, size_t size)
-{
-        void *p = NULL;
-        int err = internal_memalign(&p, alignment, size, RETURN_ADDRESS(0));
-        return memalign_result(err, p);
-}
-
-int posix_memalign(void **p, size_t alignment, size_t size)
-{
-        return internal_memalign(p, alignment, size, RETURN_ADDRESS(0));
-}
-
-void *aligned_alloc(size_t, size_t) __attribute__((alias("memalign")));
-void cfree(void *) __attribute__((alias("free")));
-
-void *valloc(size_t size)
-{
-        void *p = NULL;
-        int err = internal_memalign(&p, page_size, size, RETURN_ADDRESS(0));
-        return memalign_result(err, p);
-}
-
-#if __GNUC__ < 7
-#  define add_overflow_p(a,b) __extension__({ \
-                __typeof__(a) _c; \
-                __builtin_add_overflow(a,b,&_c); \
-        })
-#else
-#  define add_overflow_p(a,b) \
-                __builtin_add_overflow_p((a),(b),(__typeof__(a+b))0)
-#endif
-
-void *pvalloc(size_t size)
-{
-        size_t alignment = page_size;
-        void *p = NULL;
-        int err;
-
-        if (add_overflow_p(size, alignment)) {
-                errno = ENOMEM;
-                return 0;
-        }
-        size = size_align(size, alignment);
-        err = internal_memalign(&p, alignment, size, RETURN_ADDRESS(0));
-        return memalign_result(err, p);
-}
-
-void *malloc(size_t size)
-{
-        struct src_loc *l;
-        struct alloc_hdr *h;
-        size_t asize;
-        void *p;
-
-        if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize))
-                goto enomem;
-
-        /*
-         * Needed for C++ global declarations using "new",
-         * which happens before our constructor
-         */
-#ifndef __FreeBSD__
-        if (!real_malloc) {
-                if (resolving_malloc) goto enomem;
-                resolving_malloc = 1;
-                real_malloc = dlsym(RTLD_NEXT, "malloc");
-        }
-#endif
-        l = update_stats_rcu_lock(size, RETURN_ADDRESS(0));
-        p = h = real_malloc(asize);
-        if (h) {
-                alloc_insert_rcu(l, h, size, h);
-                p = hdr2ptr(h);
-        }
-        update_stats_rcu_unlock(l);
-        if (caa_unlikely(!p)) errno = ENOMEM;
-        return p;
-enomem:
-        errno = ENOMEM;
-        return 0;
-}
-
-void *calloc(size_t nmemb, size_t size)
-{
-        void *p;
-        struct src_loc *l;
-        struct alloc_hdr *h;
-        size_t asize;
-
-        if (__builtin_mul_overflow(size, nmemb, &size)) {
-                errno = ENOMEM;
-                return 0;
-        }
-        if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize)) {
-                errno = ENOMEM;
-                return 0;
-        }
-        RETURN_IF_NOT_READY();
-        l = update_stats_rcu_lock(size, RETURN_ADDRESS(0));
-        p = h = real_malloc(asize);
-        if (p) {
-                alloc_insert_rcu(l, h, size, h);
-                p = hdr2ptr(h);
-                memset(p, 0, size);
-        }
-        update_stats_rcu_unlock(l);
-        if (caa_unlikely(!p)) errno = ENOMEM;
-        return p;
-}
-
-void *realloc(void *ptr, size_t size)
-{
-        void *p;
-        struct src_loc *l;
-        struct alloc_hdr *h;
-        size_t asize;
-
-        if (!size) {
-                free(ptr);
-                return 0;
-        }
-        if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize)) {
-                errno = ENOMEM;
-                return 0;
-        }
-        RETURN_IF_NOT_READY();
-
-        l = update_stats_rcu_lock(size, RETURN_ADDRESS(0));
-        p = h = real_malloc(asize);
-        if (p) {
-                alloc_insert_rcu(l, h, size, h);
-                p = hdr2ptr(h);
-        }
-        update_stats_rcu_unlock(l);
-
-        if (ptr && p) {
-                struct alloc_hdr *old = ptr2hdr(ptr);
-                memcpy(p, ptr, old->size < size ? old->size : size);
-                free(ptr);
-        }
-        if (caa_unlikely(!p)) errno = ENOMEM;
-        return p;
-}
-
-struct dump_arg {
-        FILE *fp;
-        size_t min;
-};
-
-static void *dump_to_file(void *x)
-{
-        struct dump_arg *a = x;
-        struct cds_lfht_iter iter;
-        struct src_loc *l;
-        struct cds_lfht *t;
-
-        ++locating;
-        rcu_read_lock();
-        t = CMM_LOAD_SHARED(totals);
-        if (!t)
-                goto out_unlock;
-        cds_lfht_for_each_entry(t, &iter, l, hnode) {
-                const void *p = l->k;
-                char **s = 0;
-                if (l->total <= a->min) continue;
-
-                if (loc_is_addr(l)) {
-                        s = backtrace_symbols(p, 1);
-                        p = s[0];
-                }
-                fprintf(a->fp, "%16zu %12zu %s\n",
-                        l->total, l->allocations, (const char *)p);
-                if (s) free(s);
-        }
-out_unlock:
-        rcu_read_unlock();
-        --locating;
-        return 0;
-}
 
 /*
  * call-seq:
@@ -712,7 +23,7 @@ out_unlock:
  *
  * total_size      call_count      location
  */
-static VALUE mwrap_dump(int argc, VALUE * argv, VALUE mod)
+static VALUE mwrap_dump(int argc, VALUE *argv, VALUE mod)
 {
         VALUE io, min;
         struct dump_arg a;
@@ -730,7 +41,7 @@ static VALUE mwrap_dump(int argc, VALUE * argv, VALUE mod)
         GetOpenFile(io, fptr);
         a.fp = rb_io_stdio_file(fptr);
 
-        rb_thread_call_without_gvl(dump_to_file, &a, 0, 0);
+        rb_thread_call_without_gvl((void *(*)(void *))dump_to_file, &a, 0, 0);
         RB_GC_GUARD(io);
         return Qnil;
 }
@@ -738,24 +49,8 @@ static VALUE mwrap_dump(int argc, VALUE * argv, VALUE mod)
 /* The whole operation is not remotely atomic... */
 static void *totals_reset(void *ign)
 {
-        struct cds_lfht *t;
-        struct cds_lfht_iter iter;
-        struct src_loc *l;
-
-        uatomic_set(&total_bytes_inc, 0);
-        uatomic_set(&total_bytes_dec, 0);
-
-        rcu_read_lock();
-        t = CMM_LOAD_SHARED(totals);
-        cds_lfht_for_each_entry(t, &iter, l, hnode) {
-                uatomic_set(&l->total, 0);
-                uatomic_set(&l->allocations, 0);
-                uatomic_set(&l->frees, 0);
-                uatomic_set(&l->age_total, 0);
-                uatomic_set(&l->max_lifespan, 0);
-        }
-        rcu_read_unlock();
-        return 0;
+        mwrap_reset();
+        return NULL;
 }
 
 /*
@@ -767,18 +62,12 @@ static void *totals_reset(void *ign)
  * This resets all statistics.  This is not an atomic operation
  * as other threads (outside of GVL) may increment counters.
  */
-static VALUE mwrap_reset(VALUE mod)
+static VALUE reset_m(VALUE mod)
 {
         rb_thread_call_without_gvl(totals_reset, 0, 0, 0);
         return Qnil;
 }
 
-/* :nodoc: */
-static VALUE mwrap_clear(VALUE mod)
-{
-        return mwrap_reset(mod);
-}
-
 static VALUE rcu_unlock_ensure(VALUE ignored)
 {
         rcu_read_unlock();
@@ -786,21 +75,31 @@ static VALUE rcu_unlock_ensure(VALUE ignored)
         return Qfalse;
 }
 
-static VALUE location_string(struct src_loc *l)
+static VALUE location_string(const struct src_loc *l)
 {
-        VALUE ret, tmp;
+        VALUE tmp = rb_str_new(NULL, 0);
 
-        if (loc_is_addr(l)) {
-                char **s = backtrace_symbols((void *)l->k, 1);
-                tmp = rb_str_new_cstr(s[0]);
-                free(s);
+        if (l->f) {
+                rb_str_cat(tmp, l->f->fn, l->f->fn_len);
+                if (l->lineno == U24_MAX)
+                        rb_str_cat_cstr(tmp, ":-");
+                else
+                        rb_str_catf(tmp, ":%u", l->lineno);
         }
-        else {
-                tmp = rb_str_new(l->k, l->capa - 1);
+        if (l->bt_len) {
+                AUTO_FREE char **s = bt_syms(l->bt, l->bt_len);
+
+                if (s) {
+                        if (l->f)
+                                rb_str_cat_cstr(tmp, "\n");
+                        rb_str_cat_cstr(tmp, s[0]);
+                        for (uint32_t i = 1; i < l->bt_len; ++i)
+                                rb_str_catf(tmp, "\n%s", s[i]);
+                }
         }
 
         /* deduplicate and try to free up some memory */
-        ret = rb_funcall(tmp, id_uminus, 0);
+        VALUE ret = rb_funcall(tmp, id_uminus, 0);
         if (!OBJ_FROZEN_RAW(tmp))
                 rb_str_resize(tmp, 0);
 
@@ -872,17 +171,6 @@ static const rb_data_type_t src_loc_type = {
 
 static VALUE cSrcLoc;
 
-static int
-extract_addr(const char *str, size_t len, void **p)
-{
-        const char *c;
-#if defined(__GLIBC__)
-        return ((c = memrchr(str, '[', len)) && sscanf(c, "[%p]", p));
-#else /* tested FreeBSD */
-        return ((c = strstr(str, "0x")) && sscanf(c, "%p", p));
-#endif
-}
-
 /*
  * call-seq:
  *        Mwrap[location] -> Mwrap::SourceLocation
@@ -895,41 +183,11 @@ extract_addr(const char *str, size_t len, void **p)
 static VALUE mwrap_aref(VALUE mod, VALUE loc)
 {
         const char *str = StringValueCStr(loc);
-        int len = RSTRING_LENINT(loc);
-        struct src_loc *k = 0;
-        uintptr_t p;
-        struct cds_lfht_iter iter;
-        struct cds_lfht_node *cur;
-        struct cds_lfht *t;
-        struct src_loc *l;
-        VALUE val = Qnil;
+        long len = RSTRING_LEN(loc);
+        assert(len >= 0);
+        struct src_loc *l = mwrap_get(str, (size_t)len);
 
-        if (extract_addr(str, len, (void **)&p)) {
-                k = (void *)kbuf;
-                memcpy(k->k, &p, sizeof(p));
-                k->capa = 0;
-                k->hval = jhash(k->k, sizeof(p), 0xdeadbeef);
-        } else {
-                k = (void *)kbuf;
-                memcpy(k->k, str, len + 1);
-                k->capa = len + 1;
-                k->hval = jhash(k->k, k->capa, 0xdeadbeef);
-        }
-
-        if (!k) return val;
-
-        t = CMM_LOAD_SHARED(totals);
-        if (!t) return val;
-        rcu_read_lock();
-
-        cds_lfht_lookup(t, k->hval, loc_eq, k, &iter);
-        cur = cds_lfht_iter_get_node(&iter);
-        if (cur) {
-                l = caa_container_of(cur, struct src_loc, hnode);
-                val = TypedData_Wrap_Struct(cSrcLoc, &src_loc_type, l);
-        }
-        rcu_read_unlock();
-        return val;
+        return l ? TypedData_Wrap_Struct(cSrcLoc, &src_loc_type, l) : Qnil;
 }
 
 static VALUE src_loc_each_i(VALUE p)
@@ -953,7 +211,7 @@ static VALUE src_loc_each_i(VALUE p)
         return Qfalse;
 }
 
-static struct src_loc *src_loc_get(VALUE self)
+static struct src_loc *src_loc_of(VALUE self)
 {
         struct src_loc *l;
         TypedData_Get_Struct(self, struct src_loc, &src_loc_type, l);
@@ -975,7 +233,7 @@ static struct src_loc *src_loc_get(VALUE self)
  */
 static VALUE src_loc_each(VALUE self)
 {
-        struct src_loc *l = src_loc_get(self);
+        struct src_loc *l = src_loc_of(self);
 
         assert(locating == 0 && "forgot to clear locating");
         ++locating;
@@ -990,7 +248,7 @@ static VALUE src_loc_each(VALUE self)
  */
 static VALUE src_loc_mean_lifespan(VALUE self)
 {
-        struct src_loc *l = src_loc_get(self);
+        struct src_loc *l = src_loc_of(self);
         size_t tot, frees;
 
         frees = uatomic_read(&l->frees);
@@ -1001,19 +259,19 @@ static VALUE src_loc_mean_lifespan(VALUE self)
 /* The number of frees made from this location */
 static VALUE src_loc_frees(VALUE self)
 {
-        return SIZET2NUM(uatomic_read(&src_loc_get(self)->frees));
+        return SIZET2NUM(uatomic_read(&src_loc_of(self)->frees));
 }
 
 /* The number of allocations made from this location */
 static VALUE src_loc_allocations(VALUE self)
 {
-        return SIZET2NUM(uatomic_read(&src_loc_get(self)->allocations));
+        return SIZET2NUM(uatomic_read(&src_loc_of(self)->allocations));
 }
 
 /* The total number of bytes allocated from this location */
 static VALUE src_loc_total(VALUE self)
 {
-        return SIZET2NUM(uatomic_read(&src_loc_get(self)->total));
+        return SIZET2NUM(uatomic_read(&src_loc_of(self)->total));
 }
 
 /*
@@ -1022,7 +280,7 @@ static VALUE src_loc_total(VALUE self)
  */
 static VALUE src_loc_max_lifespan(VALUE self)
 {
-        return SIZET2NUM(uatomic_read(&src_loc_get(self)->max_lifespan));
+        return SIZET2NUM(uatomic_read(&src_loc_of(self)->max_lifespan));
 }
 
 /*
@@ -1030,7 +288,7 @@ static VALUE src_loc_max_lifespan(VALUE self)
  */
 static VALUE src_loc_name(VALUE self)
 {
-        struct src_loc *l = src_loc_get(self);
+        struct src_loc *l = src_loc_of(self);
         VALUE ret;
 
         ++locating;
@@ -1117,8 +375,8 @@ void Init_mwrap(void)
         cSrcLoc = rb_define_class_under(mod, "SourceLocation", rb_cObject);
         rb_undef_alloc_func(cSrcLoc);
         rb_define_singleton_method(mod, "dump", mwrap_dump, -1);
-        rb_define_singleton_method(mod, "reset", mwrap_reset, 0);
-        rb_define_singleton_method(mod, "clear", mwrap_clear, 0);
+        rb_define_singleton_method(mod, "reset", reset_m, 0);
+        rb_define_singleton_method(mod, "clear", reset_m, 0);
         rb_define_singleton_method(mod, "each", mwrap_each, -1);
         rb_define_singleton_method(mod, "[]", mwrap_aref, 1);
         rb_define_singleton_method(mod, "quiet", mwrap_quiet, 0);
@@ -1136,71 +394,3 @@ void Init_mwrap(void)
 
         --locating;
 }
-
-/* rb_cloexec_open isn't usable by non-Ruby processes */
-#ifndef O_CLOEXEC
-#  define O_CLOEXEC 0
-#endif
-
-__attribute__ ((destructor))
-static void mwrap_dump_destructor(void)
-{
-        const char *opt = getenv("MWRAP");
-        const char *modes[] = { "a", "a+", "w", "w+", "r+" };
-        struct dump_arg a = { .min = 0 };
-        size_t i;
-        int dump_fd;
-        char *dump_path;
-        char *s;
-
-        if (!opt)
-                return;
-
-        ++locating;
-        if ((dump_path = strstr(opt, "dump_path:")) &&
-                        (dump_path += sizeof("dump_path")) &&
-                        *dump_path) {
-                char *end = strchr(dump_path, ',');
-                if (end) {
-                        char *tmp = alloca(end - dump_path + 1);
-                        end = mempcpy(tmp, dump_path, end - dump_path);
-                        *end = 0;
-                        dump_path = tmp;
-                }
-                dump_fd = open(dump_path, O_CLOEXEC|O_WRONLY|O_APPEND|O_CREAT,
-                                0666);
-                if (dump_fd < 0) {
-                        fprintf(stderr, "open %s failed: %s\n", dump_path,
-                                strerror(errno));
-                        goto out;
-                }
-        }
-        else if (!sscanf(opt, "dump_fd:%d", &dump_fd))
-                goto out;
-
-        if ((s = strstr(opt, "dump_min:")))
-                sscanf(s, "dump_min:%zu", &a.min);
-
-        switch (dump_fd) {
-        case 0: goto out;
-        case 1: a.fp = stdout; break;
-        case 2: a.fp = stderr; break;
-        default:
-                if (dump_fd < 0)
-                        goto out;
-                a.fp = 0;
-
-                for (i = 0; !a.fp && i < 5; i++)
-                        a.fp = fdopen(dump_fd, modes[i]);
-
-                if (!a.fp) {
-                        fprintf(stderr, "failed to open fd=%d: %s\n",
-                                dump_fd, strerror(errno));
-                        goto out;
-                }
-                /* we'll leak some memory here, but this is a destructor */
-        }
-        dump_to_file(&a);
-out:
-        --locating;
-}