about summary refs log tree commit homepage
path: root/mwrap_core.h
diff options
context:
space:
mode:
Diffstat (limited to 'mwrap_core.h')
-rw-r--r--mwrap_core.h751
1 files changed, 751 insertions, 0 deletions
diff --git a/mwrap_core.h b/mwrap_core.h
new file mode 100644
index 0000000..09b579d
--- /dev/null
+++ b/mwrap_core.h
@@ -0,0 +1,751 @@
+/*
+ * Copyright (C) mwrap hackers <mwrap-perl@80x24.org>
+ * License: GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt>
+ * Disclaimer: I don't really know my way around XS or Perl internals well
+ */
+#define _LGPL_SOURCE /* allows URCU to inline some stuff */
+#include "mymalloc.h" /* includes dlmalloc_c.h */
+#ifndef MWRAP_PERL
+#        define MWRAP_PERL 0
+#endif
+
+#if MWRAP_PERL
+#        include "EXTERN.h"
+#        include "perl.h"
+#        include "XSUB.h"
+#        include "embed.h"
+#        include "ppport.h"
+#        ifndef MWRAP_EARLY_THREADS
+#                define MWRAP_EARLY_THREADS 1
+#        endif
+#endif
+
+/*
+ * Start URCU threads early for runtimes (e.g. Perl) which leave
+ * signals unblocked.  This isn't needed for (C)Ruby since it
+ * currently runs with all signals blocked.
+ * Needed for URCU prior to commit ea3a28a3f71dd02fb34ed4e3108f93275dbef89a
+ * ("Disable signals in URCU background threads" 2022-09-23)
+ */
+#ifndef MWRAP_EARLY_THREADS
+#        define MWRAP_EARLY_THREADS 0
+#endif
+
+#include <execinfo.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <dlfcn.h>
+#include <assert.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <signal.h>
+#include <urcu-bp.h>
+#include <urcu/rculfhash.h>
+#include <urcu/rculist.h>
+#include "jhash.h"
+
+/*
+ * Perl doesn't have a GC the same way (C) Ruby does, so no GC count.
+ * Instead, the relative age of an object is the number of total bytes
+ * allocated (and we don't care about overflow on 32-bit since
+ * hardly anybody still uses it).
+ */
+static size_t total_bytes_inc, total_bytes_dec;
+
+#if MWRAP_PERL
+extern pthread_key_t __attribute__((weak)) PL_thr_key;
+extern const char __attribute__((weak)) PL_memory_wrap[]; /* needed for -O0 */
+#endif
+
+static MWRAP_TSD size_t locating;
+#ifndef PERL_IMPLICIT_CONTEXT
+static size_t *root_locating; /* determines if PL_curcop is our thread */
+#endif
+static struct cds_lfht *totals;
+union padded_mutex {
+        pthread_mutex_t mtx;
+        char pad[64];
+};
+
+/* a round-robin pool of mutexes */
+#define MUTEX_NR   (1 << 6)
+#define MUTEX_MASK (MUTEX_NR - 1)
+#ifdef __FreeBSD__
+#  define STATIC_MTX_INIT_OK (0)
+#else /* only tested on Linux + glibc */
+#  define STATIC_MTX_INIT_OK (1)
+#endif
+static size_t mutex_i;
+static union padded_mutex mutexes[MUTEX_NR] = {
+#if STATIC_MTX_INIT_OK
+        [0 ... (MUTEX_NR-1)].mtx = PTHREAD_MUTEX_INITIALIZER
+#endif
+};
+
+static pthread_mutex_t *mutex_assign(void)
+{
+        return &mutexes[uatomic_add_return(&mutex_i, 1) & MUTEX_MASK].mtx;
+}
+
+static struct cds_lfht *lfht_new(void)
+{
+        unsigned long size = MWRAP_EARLY_THREADS ? 8192 : 16384;
+        return cds_lfht_new(size, 1, 0, CDS_LFHT_AUTO_RESIZE, 0);
+}
+
+static void reset_mutexes(void)
+{
+        size_t i;
+
+        for (i = 0; i < MUTEX_NR; i++)
+                CHECK(int, 0, pthread_mutex_init(&mutexes[i].mtx, 0));
+}
+
+#ifndef HAVE_MEMPCPY
+static void *my_mempcpy(void *dest, const void *src, size_t n)
+{
+        return (char *)memcpy(dest, src, n) + n;
+}
+#define mempcpy(dst,src,n) my_mempcpy(dst,src,n)
+#endif
+
+/* stolen from glibc: */
+#define RETURN_ADDRESS(nr) \
+  (uintptr_t)(__builtin_extract_return_addr(__builtin_return_address(nr)))
+
+#define UINT2STR_MAX (sizeof(unsigned) == 4 ? 10 : 19)
+static char *uint2str(unsigned num, char *dst, size_t *size)
+{
+        if (num <= 9) {
+                *size -= 1;
+                *dst++ = (char)(num + '0');
+                return dst;
+        } else {
+                char buf[UINT2STR_MAX];
+                char *end = buf + sizeof(buf);
+                char *p = end;
+                size_t adj;
+
+                do {
+                        *size -= 1;
+                        *--p = (char)((num % 10) + '0');
+                        num /= 10;
+                } while (num && *size);
+
+                if (!num) {
+                        adj = end - p;
+                        return mempcpy(dst, p, adj);
+                }
+        }
+        return NULL;
+}
+
+/* allocated via real_malloc, immortal for safety reasons */
+struct src_loc {
+        pthread_mutex_t *mtx;
+        size_t total;
+        size_t freed_bytes;
+        size_t allocations;
+        size_t frees;
+        size_t age_total; /* (age_total / frees) => mean age at free */
+        size_t max_lifespan;
+        struct cds_lfht_node hnode;
+        struct cds_list_head allocs; /* <=> alloc_hdr.node */
+        uint32_t hval;
+        uint32_t capa;
+        char k[];
+};
+
+/*
+ * Every allocation has this in the header, maintain alignment with malloc
+ * Do not expose this to Perl code because of use-after-free concerns.
+ */
+struct alloc_hdr {
+        struct cds_list_head anode; /* <=> src_loc.allocs */
+        union {
+                struct {
+                        size_t gen; /* global age */
+                        struct src_loc *loc;
+                } live;
+                struct rcu_head dead;
+        } as;
+        void *real; /* what to call real_free on (exists for *memalign) */
+        size_t size;
+};
+
+/* $PATHNAME:$LINENO */
+static MWRAP_TSD char kbuf[
+        PATH_MAX + sizeof(":") + UINT2STR_MAX + sizeof(struct alloc_hdr)
+];
+
+static struct alloc_hdr *ptr2hdr(void *p)
+{
+        return (struct alloc_hdr *)((uintptr_t)p - sizeof(struct alloc_hdr));
+}
+
+static void *hdr2ptr(struct alloc_hdr *h)
+{
+        return (void *)((uintptr_t)h + sizeof(struct alloc_hdr));
+}
+
+static int loc_is_addr(const struct src_loc *l)
+{
+        return l->capa == 0;
+}
+
+static size_t loc_size(const struct src_loc *l)
+{
+        return loc_is_addr(l) ? sizeof(uintptr_t) : l->capa;
+}
+
+static int loc_eq(struct cds_lfht_node *node, const void *key)
+{
+        const struct src_loc *existing;
+        const struct src_loc *k = key;
+
+        existing = caa_container_of(node, struct src_loc, hnode);
+
+        return (k->hval == existing->hval &&
+                k->capa == existing->capa &&
+                memcmp(k->k, existing->k, loc_size(k)) == 0);
+}
+
+static struct src_loc *totals_add_rcu(struct src_loc *k)
+{
+        struct cds_lfht_iter iter;
+        struct cds_lfht_node *cur;
+        struct src_loc *l = 0;
+        struct cds_lfht *t;
+
+again:
+        t = CMM_LOAD_SHARED(totals);
+        if (!t) goto out_unlock;
+        cds_lfht_lookup(t, k->hval, loc_eq, k, &iter);
+        cur = cds_lfht_iter_get_node(&iter);
+        if (cur) {
+                l = caa_container_of(cur, struct src_loc, hnode);
+                uatomic_add(&l->total, k->total);
+                uatomic_add(&l->allocations, 1);
+        } else {
+                size_t n = loc_size(k);
+                l = real_malloc(sizeof(*l) + n);
+                if (!l) goto out_unlock;
+                memcpy(l, k, sizeof(*l) + n);
+                l->mtx = mutex_assign();
+                l->age_total = 0;
+                l->max_lifespan = 0;
+                l->frees = 0;
+                l->allocations = 1;
+                CDS_INIT_LIST_HEAD(&l->allocs);
+                cur = cds_lfht_add_unique(t, k->hval, loc_eq, l, &l->hnode);
+                if (cur != &l->hnode) { /* lost race */
+                        rcu_read_unlock();
+                        real_free(l);
+                        rcu_read_lock();
+                        goto again;
+                }
+        }
+out_unlock:
+        return l;
+}
+
+static void update_stats_rcu_unlock(const struct src_loc *l)
+{
+        if (caa_likely(l)) rcu_read_unlock();
+}
+
+static const COP *mwp_curcop(void)
+{
+#if MWRAP_PERL
+        if (&PL_thr_key) { /* are we even in a Perl process? */
+#        ifdef PERL_IMPLICIT_CONTEXT
+                if (aTHX) return PL_curcop;
+#        else /* !PERL_IMPLICIT_CONTEXT */
+                if (&locating == root_locating) return PL_curcop;
+#        endif /* PERL_IMPLICIT_CONTEXT */
+        }
+#endif /* MWRAP_PERL */
+        return NULL;
+}
+
+static struct src_loc *assign_line(size_t size, const char *file, unsigned line)
+{
+        /* avoid vsnprintf or anything which could call malloc here: */
+        size_t len;
+        struct src_loc *k;
+        char *dst;
+        size_t uint_size = UINT2STR_MAX;
+
+        if (!file)
+                return NULL;
+        len = strlen(file);
+        if (len > PATH_MAX)
+                len = PATH_MAX;
+        k = (void *)kbuf;
+        k->total = size;
+        dst = mempcpy(k->k, file, len);
+        *dst++ = ':';
+
+        if (line == UINT_MAX) /* no line number */
+                *dst++ = '-';
+        else
+                dst = uint2str(line, dst, &uint_size);
+
+        assert(dst && "bad math");
+        *dst = 0;        /* terminate string */
+        k->capa = (uint32_t)(dst - k->k + 1);
+        k->hval = jhash(k->k, k->capa, 0xdeadbeef);
+        return totals_add_rcu(k);
+}
+
+static struct src_loc *
+update_stats_rcu_lock(size_t *generation, size_t size, uintptr_t caller)
+{
+        struct src_loc *k, *ret = 0;
+        static const size_t xlen = sizeof(caller);
+        struct cds_lfht *t = CMM_LOAD_SHARED(totals);
+        const COP *cop = NULL;
+
+        if (caa_unlikely(!t)) return 0; /* not initialized */
+        if (locating++) goto out; /* do not recurse into another *alloc */
+
+        *generation = uatomic_add_return(&total_bytes_inc, size);
+        cop = mwp_curcop();
+        rcu_read_lock();
+#if MWRAP_PERL
+        if (cop)
+                ret = assign_line(size, OutCopFILE(cop), CopLINE(cop));
+#endif /* MWRAP_PERL */
+        if (!ret) {
+                k = alloca(sizeof(*k) + xlen);
+                k->total = size;
+                memcpy(k->k, &caller, xlen);
+                k->capa = 0;
+                k->hval = jhash(k->k, xlen, 0xdeadbeef);
+                ret = totals_add_rcu(k);
+        }
+out:
+        --locating;
+        return ret;
+}
+
+size_t malloc_usable_size(void *p)
+{
+        return ptr2hdr(p)->size;
+}
+
+static void free_hdr_rcu(struct rcu_head *dead)
+{
+        struct alloc_hdr *h = caa_container_of(dead, struct alloc_hdr, as.dead);
+        real_free(h->real);
+}
+
+void free(void *p)
+{
+        if (p) {
+                struct alloc_hdr *h = ptr2hdr(p);
+                struct src_loc *l = h->as.live.loc;
+
+                if (l) {
+                        size_t current_bytes = uatomic_read(&total_bytes_inc);
+                        size_t age = current_bytes - h->as.live.gen;
+                        uatomic_add(&total_bytes_dec, h->size);
+                        uatomic_add(&l->freed_bytes, h->size);
+                        uatomic_set(&h->size, 0);
+                        uatomic_add(&l->frees, 1);
+                        uatomic_add(&l->age_total, age);
+
+                        CHECK(int, 0, pthread_mutex_lock(l->mtx));
+                        cds_list_del_rcu(&h->anode);
+                        if (age > l->max_lifespan)
+                                l->max_lifespan = age;
+                        CHECK(int, 0, pthread_mutex_unlock(l->mtx));
+
+                        call_rcu(&h->as.dead, free_hdr_rcu);
+                } else {
+                        real_free(h->real);
+                }
+        }
+}
+
+static void
+alloc_insert_rcu(struct src_loc *l, struct alloc_hdr *h, size_t size,
+                void *real, size_t generation)
+{
+        /* we need src_loc to remain alive for the duration of this call */
+        if (!h) return;
+        h->size = size;
+        h->real = real;
+        h->as.live.loc = l;
+        h->as.live.gen = generation;
+        if (l) {
+                CHECK(int, 0, pthread_mutex_lock(l->mtx));
+                cds_list_add_rcu(&h->anode, &l->allocs);
+                CHECK(int, 0, pthread_mutex_unlock(l->mtx));
+        }
+}
+
+static bool ptr_is_aligned(void *ptr, size_t alignment)
+{
+        return ((uintptr_t) ptr & (alignment - 1)) == 0;
+}
+
+static void *ptr_align(void *ptr, size_t alignment)
+{
+        return (void *)(((uintptr_t) ptr + (alignment - 1)) & ~(alignment - 1));
+}
+
+static bool is_power_of_two(size_t n)
+{
+        return (n & (n - 1)) == 0;
+}
+
+static int
+mwrap_memalign(void **pp, size_t alignment, size_t size, uintptr_t caller)
+{
+        struct src_loc *l;
+        struct alloc_hdr *h;
+        void *real;
+        size_t asize;
+        size_t generation = 0;
+        size_t d = alignment / sizeof(void*);
+        size_t r = alignment % sizeof(void*);
+
+        if (r != 0 || d == 0 || !is_power_of_two(d))
+                return EINVAL;
+
+        if (alignment <= MALLOC_ALIGNMENT) {
+                void *p = malloc(size);
+                if (!p) return ENOMEM;
+                *pp = p;
+                return 0;
+        }
+        for (; alignment < sizeof(struct alloc_hdr); alignment *= 2)
+                ; /* double alignment until >= sizeof(struct alloc_hdr) */
+        if (__builtin_add_overflow(size, alignment, &asize) ||
+            __builtin_add_overflow(asize, sizeof(struct alloc_hdr), &asize))
+                return ENOMEM;
+
+        l = update_stats_rcu_lock(&generation, size, caller);
+
+        real = real_malloc(asize);
+        if (real) {
+                void *p = hdr2ptr(real);
+                if (!ptr_is_aligned(p, alignment))
+                        p = ptr_align(p, alignment);
+                h = ptr2hdr(p);
+                alloc_insert_rcu(l, h, size, real, generation);
+                *pp = p;
+        }
+        update_stats_rcu_unlock(l);
+
+        return real ? 0 : ENOMEM;
+}
+
+static void *memalign_result(int err, void *p)
+{
+        if (caa_unlikely(err))
+                errno = err;
+        return p;
+}
+
+void *memalign(size_t alignment, size_t size)
+{
+        void *p = NULL;
+        int err = mwrap_memalign(&p, alignment, size, RETURN_ADDRESS(0));
+        return memalign_result(err, p);
+}
+
+int posix_memalign(void **p, size_t alignment, size_t size)
+{
+        return mwrap_memalign(p, alignment, size, RETURN_ADDRESS(0));
+}
+
+/* these aliases aren't needed for glibc, not sure about other libcs... */
+void *aligned_alloc(size_t, size_t) __attribute__((alias("memalign")));
+void cfree(void *) __attribute__((__nothrow__))
+                __attribute__((__leaf__)) __attribute__((alias("free")));
+
+void *valloc(size_t size)
+{
+        void *p = NULL;
+        int err;
+
+        ensure_initialization();
+        err = mwrap_memalign(&p, mparams.page_size,
+                                size, RETURN_ADDRESS(0));
+        return memalign_result(err, p);
+}
+
+#if __GNUC__ < 7
+#  define add_overflow_p(a,b) __extension__({ \
+                __typeof__(a) _c; \
+                __builtin_add_overflow(a,b,&_c); \
+        })
+#else
+#  define add_overflow_p(a,b) \
+                __builtin_add_overflow_p((a),(b),(__typeof__(a+b))0)
+#endif
+
+static size_t size_align(size_t size, size_t alignment)
+{
+        return ((size + (alignment - 1)) & ~(alignment - 1));
+}
+
+void *pvalloc(size_t size)
+{
+        void *p = NULL;
+        int err;
+
+        ensure_initialization();
+
+        if (add_overflow_p(size, mparams.page_size)) {
+                errno = ENOMEM;
+                return 0;
+        }
+        size = size_align(size, mparams.page_size);
+        err = mwrap_memalign(&p, mparams.page_size,
+                                size, RETURN_ADDRESS(0));
+        return memalign_result(err, p);
+}
+
+void *malloc(size_t size)
+{
+        struct src_loc *l;
+        struct alloc_hdr *h;
+        size_t asize;
+        void *p;
+        size_t generation = 0;
+
+        if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize))
+                goto enomem;
+
+        l = update_stats_rcu_lock(&generation, size, RETURN_ADDRESS(0));
+        p = h = real_malloc(asize);
+        if (h) {
+                alloc_insert_rcu(l, h, size, h, generation);
+                p = hdr2ptr(h);
+        }
+        update_stats_rcu_unlock(l);
+        if (caa_unlikely(!p)) errno = ENOMEM;
+        return p;
+enomem:
+        errno = ENOMEM;
+        return 0;
+}
+
+void *calloc(size_t nmemb, size_t size)
+{
+        void *p;
+        struct src_loc *l;
+        struct alloc_hdr *h;
+        size_t asize;
+        size_t generation = 0;
+
+        if (__builtin_mul_overflow(size, nmemb, &size)) {
+                errno = ENOMEM;
+                return 0;
+        }
+        if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize)) {
+                errno = ENOMEM;
+                return 0;
+        }
+        l = update_stats_rcu_lock(&generation, size, RETURN_ADDRESS(0));
+        p = h = real_malloc(asize);
+        if (p) {
+                alloc_insert_rcu(l, h, size, h, generation);
+                p = hdr2ptr(h);
+                memset(p, 0, size);
+        }
+        update_stats_rcu_unlock(l);
+        if (caa_unlikely(!p)) errno = ENOMEM;
+        return p;
+}
+
+void *realloc(void *ptr, size_t size)
+{
+        void *p;
+        struct src_loc *l;
+        struct alloc_hdr *h;
+        size_t asize;
+        size_t generation = 0;
+
+        if (!size) {
+                free(ptr);
+                return 0;
+        }
+        if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize)) {
+                errno = ENOMEM;
+                return 0;
+        }
+        l = update_stats_rcu_lock(&generation, size, RETURN_ADDRESS(0));
+        p = h = real_malloc(asize);
+        if (p) {
+                alloc_insert_rcu(l, h, size, h, generation);
+                p = hdr2ptr(h);
+        }
+        update_stats_rcu_unlock(l);
+
+        if (ptr && p) {
+                struct alloc_hdr *old = ptr2hdr(ptr);
+                memcpy(p, ptr, old->size < size ? old->size : size);
+                free(ptr);
+        }
+        if (caa_unlikely(!p)) errno = ENOMEM;
+        return p;
+}
+
+struct dump_arg {
+        FILE *fp;
+        size_t min;
+};
+
+static void *dump_to_file(struct dump_arg *a)
+{
+        struct cds_lfht_iter iter;
+        struct src_loc *l;
+        struct cds_lfht *t;
+
+        ++locating;
+        rcu_read_lock();
+        t = CMM_LOAD_SHARED(totals);
+        if (!t)
+                goto out_unlock;
+        cds_lfht_for_each_entry(t, &iter, l, hnode) {
+                const void *p = l->k;
+                char **s = 0;
+                if (l->total <= a->min) continue;
+
+                if (loc_is_addr(l)) {
+                        s = backtrace_symbols(p, 1);
+                        p = s[0];
+                }
+                fprintf(a->fp, "%16zu %12zu %s\n",
+                        l->total, l->allocations, (const char *)p);
+                if (s) free(s);
+        }
+out_unlock:
+        rcu_read_unlock();
+        --locating;
+        return 0;
+}
+
+static int extract_addr(const char *str, size_t len, void **p)
+{
+        const char *c;
+#if defined(__GLIBC__)
+        return ((c = memrchr(str, '[', len)) && sscanf(c, "[%p]", p));
+#else /* TODO: test FreeBSD */
+        return ((c = strstr(str, "0x")) && sscanf(c, "%p", p));
+#endif
+}
+
+#ifndef O_CLOEXEC
+#  define O_CLOEXEC 0
+#endif
+__attribute__ ((destructor))
+static void dump_destructor(void)
+{
+        const char *opt = getenv("MWRAP");
+        const char *modes[] = { "a", "a+", "w", "w+", "r+" };
+        struct dump_arg a = { .min = 0 };
+        size_t i;
+        int dump_fd;
+        char *dump_path;
+        char *s;
+
+        if (!opt)
+                return;
+
+        ++locating;
+        if ((dump_path = strstr(opt, "dump_path:")) &&
+                        (dump_path += sizeof("dump_path")) &&
+                        *dump_path) {
+                char *end = strchr(dump_path, ',');
+                if (end) {
+                        char *tmp = alloca(end - dump_path + 1);
+                        end = mempcpy(tmp, dump_path, end - dump_path);
+                        *end = 0;
+                        dump_path = tmp;
+                }
+                dump_fd = open(dump_path, O_CLOEXEC|O_WRONLY|O_APPEND|O_CREAT,
+                                0666);
+                if (dump_fd < 0) {
+                        fprintf(stderr, "open %s failed: %s\n", dump_path,
+                                strerror(errno));
+                        goto out;
+                }
+        }
+        else if (!sscanf(opt, "dump_fd:%d", &dump_fd))
+                goto out;
+
+        if ((s = strstr(opt, "dump_min:")))
+                sscanf(s, "dump_min:%zu", &a.min);
+
+        switch (dump_fd) {
+        case 0: goto out;
+        case 1: a.fp = stdout; break;
+        case 2: a.fp = stderr; break;
+        default:
+                if (dump_fd < 0)
+                        goto out;
+                a.fp = 0;
+
+                for (i = 0; !a.fp && i < 5; i++)
+                        a.fp = fdopen(dump_fd, modes[i]);
+
+                if (!a.fp) {
+                        fprintf(stderr, "failed to open fd=%d: %s\n",
+                                dump_fd, strerror(errno));
+                        goto out;
+                }
+                /* we'll leak some memory here, but this is a destructor */
+        }
+        dump_to_file(&a);
+out:
+        --locating;
+}
+
+__attribute__((constructor)) static void mwrap_ctor(void)
+{
+        sigset_t set, old;
+        struct alloc_hdr *h;
+
+        ++locating;
+
+        /* block signals */
+        CHECK(int, 0, sigfillset(&set));
+        CHECK(int, 0, pthread_sigmask(SIG_SETMASK, &set, &old));
+        ensure_initialization();
+        CHECK(int, 0, pthread_key_create(&tlskey, mstate_tsd_dtor));
+
+        /*
+         * PTHREAD_MUTEX_INITIALIZER on FreeBSD means lazy initialization,
+         * which happens at pthread_mutex_lock, and that calls calloc
+         */
+        if (!STATIC_MTX_INIT_OK)
+                reset_mutexes();
+        /* initialize mutexes used by urcu-bp */
+        CMM_STORE_SHARED(totals, lfht_new());
+        if (!CMM_LOAD_SHARED(totals))
+                fprintf(stderr, "failed to allocate totals table\n");
+        h = real_malloc(sizeof(struct alloc_hdr));
+        if (h) { /* force call_rcu to start background thread */
+                h->real = h;
+                call_rcu(&h->as.dead, free_hdr_rcu);
+        } else
+                fprintf(stderr, "malloc failed: %s\n", strerror(errno));
+
+        /* start background threads before unblocking signals */
+        if (MWRAP_EARLY_THREADS)
+                cds_lfht_resize(CMM_LOAD_SHARED(totals), 16384);
+
+        CHECK(int, 0, pthread_sigmask(SIG_SETMASK, &old, NULL));
+        CHECK(int, 0, pthread_atfork(atfork_prepare, atfork_parent,
+                                     atfork_child));
+        --locating;
+}