diff options
Diffstat (limited to 'ext/mwrap/mwrap_core.h')
-rw-r--r-- | ext/mwrap/mwrap_core.h | 1091 |
1 files changed, 1091 insertions, 0 deletions
diff --git a/ext/mwrap/mwrap_core.h b/ext/mwrap/mwrap_core.h new file mode 100644 index 0000000..c0eea2f --- /dev/null +++ b/ext/mwrap/mwrap_core.h @@ -0,0 +1,1091 @@ +/* + * Copyright (C) mwrap hackers <mwrap-perl@80x24.org> + * License: GPL-3.0+ <https://www.gnu.org/licenses/gpl-3.0.txt> + * Disclaimer: I don't really know my way around XS or Perl internals well + */ +#define _LGPL_SOURCE /* allows URCU to inline some stuff */ +#define _GNU_SOURCE +#include "mymalloc.h" /* includes dlmalloc_c.h */ +#ifndef MWRAP_PERL +# define MWRAP_PERL 0 +#endif + +#ifndef MWRAP_RUBY +# define MWRAP_RUBY 0 +#endif + +/* set a sensible max to avoid stack overflows */ +#ifndef MWRAP_BT_MAX +# define MWRAP_BT_MAX 32 +#endif + +#ifndef _GNU_SOURCE +# define _GNU_SOURCE +#endif +#include <execinfo.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <dlfcn.h> +#include <assert.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <pthread.h> +#include <signal.h> +#include <urcu-bp.h> +#include <urcu/rculfhash.h> +#include <urcu/rculist.h> +#include <limits.h> + +#if MWRAP_PERL +# include "EXTERN.h" +# include "perl.h" +# include "XSUB.h" +# include "embed.h" +# include "ppport.h" +#endif + +#if MWRAP_RUBY +# undef _GNU_SOURCE /* ruby.h redefines it */ +# include <ruby.h> /* defines HAVE_RUBY_RACTOR_H on 3.0+ */ +# include <ruby/thread.h> +# include <ruby/io.h> +#endif + +/* + * XXH3 (truncated to 32-bits) seems to provide a ~2% speedup. + * XXH32 doesn't show improvements over jhash despite rculfhash + * only supporting 32-bit hash values. + */ +#if defined(HAVE_XXHASH) +# define XXH_INLINE_ALL +# include <xxhash.h> +# if !defined(XXH3_64bits) +# warning XXH3_64bits not defined +# endif +#endif + +#if !defined(XXH3_64bits) +# include "jhash.h" +#endif + +#define U24_MAX (1U << 24) + +/* + * Perl doesn't have a GC the same way (C) Ruby does, so no GC count. + * Instead, the relative age of an object is the number of total bytes + * allocated (and we don't care about overflow on 32-bit since + * hardly anybody still uses it). + */ +static size_t total_bytes_inc, total_bytes_dec, nr_file, nr_src_loc; +static uint32_t bt_req_depth; + +#if MWRAP_PERL +extern pthread_key_t __attribute__((weak)) PL_thr_key; +extern const char __attribute__((weak)) PL_memory_wrap[]; /* needed for -O0 */ +# if !defined(PERL_IMPLICIT_CONTEXT) +static size_t *root_locating; /* determines if PL_curcop is our thread */ +# endif +#endif /* MWRAP_PERL */ + +#if MWRAP_RUBY +const char *rb_source_location_cstr(int *line); /* requires 2.6.0dev or later */ + +# ifdef HAVE_RUBY_RACTOR_H /* Ruby 3.0+ */ +extern MWRAP_TSD void * __attribute__((weak)) ruby_current_ec; +# else /* Ruby 2.6-2.7 */ +extern void * __attribute__((weak)) ruby_current_execution_context_ptr; +# define ruby_current_ec ruby_current_execution_context_ptr +# endif /* HAVE_RUBY_RACTOR_H */ + +extern void * __attribute__((weak)) ruby_current_vm_ptr; /* for rb_gc_count */ +extern size_t __attribute__((weak)) rb_gc_count(void); +int __attribute__((weak)) ruby_thread_has_gvl_p(void); + +/* + * rb_source_location_cstr relies on GET_EC(), and it's possible + * to have a native thread but no EC during the early and late + * (teardown) phases of the Ruby process + */ +static int has_ec_p(void) +{ + return ruby_thread_has_gvl_p && ruby_thread_has_gvl_p() && + ruby_current_vm_ptr && ruby_current_ec; +} + +static void set_generation(size_t *gen, size_t size) +{ + if (rb_gc_count) { + uatomic_add_return(&total_bytes_inc, size); + if (has_ec_p()) + *gen = rb_gc_count(); + } else { + *gen = uatomic_add_return(&total_bytes_inc, size); + } +} +# define SET_GENERATION(gen, size) set_generation(gen, size) +#endif /* MWRAP_RUBY */ + +#ifndef SET_GENERATION +# define SET_GENERATION(gen, size) \ + *gen = uatomic_add_return(&total_bytes_inc, size) +#endif /* !SET_GENERATION */ + +/* generic stuff: */ +static MWRAP_TSD size_t locating; +static struct cds_lfht *files, *totals; +union padded_mutex { + pthread_mutex_t mtx; + char pad[64]; /* cache alignment for common CPUs */ +}; + +/* a pool of mutexes for all "struct src_loc" */ +#define MUTEX_NR (1 << 6) +#define MUTEX_MASK (MUTEX_NR - 1) +static union padded_mutex mutexes[MUTEX_NR] = { + [0 ... (MUTEX_NR-1)].mtx = PTHREAD_MUTEX_INITIALIZER +}; + +#ifdef static_assert +/* we only use uint32_t for pathname storage for struct alignment */ +static_assert(UINT32_MAX > PATH_MAX, "UINT32_MAX > PATH_MAX"); +#endif + +static struct cds_lfht *lfht_new(size_t size) +{ + return cds_lfht_new(size, 1, 0, CDS_LFHT_AUTO_RESIZE, 0); +} + +static void reset_mutexes(void) +{ + size_t i; + + for (i = 0; i < MUTEX_NR; i++) + CHECK(int, 0, pthread_mutex_init(&mutexes[i].mtx, 0)); +} + +#ifndef HAVE_MEMPCPY +static void *my_mempcpy(void *dest, const void *src, size_t n) +{ + return (char *)memcpy(dest, src, n) + n; +} +#define mempcpy(dst,src,n) my_mempcpy(dst,src,n) +#endif + +/* stolen from glibc: */ +#define RETURN_ADDRESS(nr) \ + __builtin_extract_return_addr(__builtin_return_address(nr)) + + +#define SRC_LOC_BT(bt) union stk_bt bt; do { \ + uint32_t depth = locating ? 1 : CMM_LOAD_SHARED(bt_req_depth); \ + switch (depth) { \ + case 0: \ + case 1: bt.sl.bt_len = 1; bt.sl.bt[0] = RETURN_ADDRESS(0); break; \ + default: /* skip 1st level of BT since thats our function */ \ + mwrap_assert(depth <= MWRAP_BT_MAX); \ + ++locating; \ + long n = (long)backtrace(bt_dst(&bt), depth); \ + --locating; \ + bt.sl.bt_len = n <= 1 ? 0 : (uint32_t)n - 1; \ + if (n > 1) mwrap_assert(bt.sl.bt[0] == RETURN_ADDRESS(0)); \ + } \ +} while (0) + +/* + * only for interpreted sources (Perl/Ruby/etc), not backtrace_symbols* files + * Allocated via real_malloc / real_free + */ +struct src_file { + struct cds_lfht_node nd; /* <=> files table */ + uint32_t fn_hash; + uint32_t fn_len; /* < PATH_MAX */ + char fn[]; /* NUL-terminated */ +}; + +/* allocated via real_malloc, immortal for safety reasons */ +struct src_loc { + size_t total; + size_t freed_bytes; + size_t allocations; + size_t frees; + size_t age_total; /* (age_total / frees) => mean age at free */ + size_t max_lifespan; + struct cds_lfht_node hnode; /* <=> totals table */ + struct cds_list_head allocs; /* <=> alloc_hdr.node */ + uint32_t loc_hash; + uint8_t bt_len; + /* next 3 fields contiguous for hash_src_loc(): */ + unsigned lineno:24; /* nobody should have >=16.7 LoC in one file */ + struct src_file *f; + void *bt[]; +} __attribute__((packed,aligned(8))); + +/* sizeof() doesn't work on bitfields */ +#define SIZEOF_LINENO (size_t)(24 / 8) + +/* + * Every allocation has this in the header, maintain alignment with malloc + * Do not expose this to Perl code because of use-after-free concerns. + */ +struct alloc_hdr { + struct cds_list_head anode; /* <=> src_loc.allocs */ + union { + struct { + size_t gen; /* global age || rb_gc_count() */ + struct src_loc *loc; + } live; + struct rcu_head dead; + } as; + void *real; /* what to call real_free on (exists for *memalign) */ + size_t size; +}; + +/* on-stack structures */ +union stk_sf { + struct src_file sf; + char buf_[sizeof(struct src_file) + PATH_MAX]; +}; + +union stk_bt { + struct src_loc sl; + /* we subtract one level from MWRAP_BT_MAX since we discard one + * level of backtrace(3) (see below for why) */ + char buf_[sizeof(struct src_loc) + sizeof(void *) * (MWRAP_BT_MAX-1)]; +}; + +/* + * we discard the 1st-level of the backtrace(3) since it's our *alloc + * function (and therefore uninteresting), so we want backtrace(3) to + * write to bt->sl.bt[-1] so that bt->sl.bt[0] is the first interesting + * thing. + */ +#ifdef static_assert +static_assert(offsetof(struct src_loc, f) + sizeof(void *) == + offsetof(struct src_loc, bt), + "bt lineno is is bt[-1]"); +#endif +static void **bt_dst(union stk_bt *bt) +{ + return (void **)&bt->sl.f; +} + +static struct alloc_hdr *ptr2hdr(void *p) +{ + return (struct alloc_hdr *)((uintptr_t)p - sizeof(struct alloc_hdr)); +} + +static void *hdr2ptr(struct alloc_hdr *h) +{ + return (void *)((uintptr_t)h + sizeof(struct alloc_hdr)); +} + +static int loc_is_addr(const struct src_loc *l) +{ + return l->f == NULL; +} + +static size_t bt_bytelen(const struct src_loc *l) +{ + return sizeof(l->bt[0]) * l->bt_len; +} + +static size_t src_loc_hash_len(const struct src_loc *l) +{ + return sizeof(l->f) + SIZEOF_LINENO + bt_bytelen(l); +} + +static void *src_loc_hash_tip(const struct src_loc *l) +{ + return (void *)((uintptr_t)&l->bt_len + sizeof(l->bt_len)); +} + +static int loc_eq(struct cds_lfht_node *node, const void *key) +{ + const struct src_loc *existing; + const struct src_loc *k = key; + + existing = caa_container_of(node, struct src_loc, hnode); + + return (k->bt_len == existing->bt_len && + !memcmp(src_loc_hash_tip(k), src_loc_hash_tip(existing), + src_loc_hash_len(k))); +} + +static int fn_eq(struct cds_lfht_node *node, const void *key) +{ + const struct src_file *existing; + const struct src_file *k = key; + + existing = caa_container_of(node, struct src_file, nd); + + return (k->fn_len == existing->fn_len && + !memcmp(k->fn, existing->fn, k->fn_len)); +} + +static struct src_loc *src_loc_get(struct cds_lfht *t, const struct src_loc *k) +{ + struct cds_lfht_iter iter; + struct cds_lfht_node *cur; + + mwrap_assert(rcu_read_ongoing()); + cds_lfht_lookup(t, k->loc_hash, loc_eq, k, &iter); + cur = cds_lfht_iter_get_node(&iter); + return cur ? caa_container_of(cur, struct src_loc, hnode) : NULL; +} + +static struct src_loc *totals_add_rcu(const struct src_loc *k) +{ + struct src_loc *l; + struct cds_lfht *t = CMM_LOAD_SHARED(totals); + if (!t) return NULL; + +again: + l = src_loc_get(t, k); + if (l) { + uatomic_add(&l->total, k->total); + uatomic_inc(&l->allocations); + } else { + size_t n = bt_bytelen(k) + sizeof(*k); + struct cds_lfht_node *cur; + + l = real_malloc(n); + if (!l) return l; + memcpy(l, k, n); + l->freed_bytes = 0; + l->age_total = 0; + l->max_lifespan = 0; + l->freed_bytes = 0; + l->frees = 0; + l->allocations = 1; + CDS_INIT_LIST_HEAD(&l->allocs); + cur = cds_lfht_add_unique(t, l->loc_hash, loc_eq, l, &l->hnode); + if (cur == &l->hnode) { + uatomic_inc(&nr_src_loc); + } else { /* lost race */ + rcu_read_unlock(); + real_free(l); + rcu_read_lock(); + goto again; + } + } + return l; +} + +static uint32_t do_hash(const void *p, size_t len) +{ +#if defined(XXH3_64bits) + union { + XXH64_hash_t u64; + uint32_t u32[2]; + } u; + u.u64 = XXH3_64bits(p, len); + return u.u32[1]; +#else + return jhash(p, len, 0xdeadbeef); +#endif +} + +static void hash_src_loc(struct src_loc *l) +{ + l->loc_hash = do_hash(src_loc_hash_tip(l), src_loc_hash_len(l)); +} + +static struct src_file *src_file_get(struct cds_lfht *t, struct src_file *k, + const char *fn, size_t fn_len) +{ + struct cds_lfht_iter iter; + struct cds_lfht_node *cur; + + mwrap_assert(t); /* caller should've bailed if missing */ + if (fn_len >= PATH_MAX) + return NULL; + k->fn_len = (uint32_t)fn_len; + memcpy(k->fn, fn, fn_len); + k->fn[fn_len] = 0; + k->fn_hash = do_hash(k->fn, fn_len); + mwrap_assert(rcu_read_ongoing()); + cds_lfht_lookup(t, k->fn_hash, fn_eq, k, &iter); + cur = cds_lfht_iter_get_node(&iter); + + return cur ? caa_container_of(cur, struct src_file, nd) : NULL; +} + +#if MWRAP_PERL +static const COP *mwp_curcop(void) +{ + if (&PL_thr_key) { /* are we even in a Perl process? */ +# ifdef PERL_IMPLICIT_CONTEXT + if (aTHX) return PL_curcop; +# else /* !PERL_IMPLICIT_CONTEXT */ + if (&locating == root_locating) return PL_curcop; +# endif /* PERL_IMPLICIT_CONTEXT */ + } + return NULL; +} + +static const char *mw_perl_src_file_cstr(unsigned *lineno) +{ + const COP *cop = mwp_curcop(); + if (!cop) return NULL; + const char *fn = CopFILE(cop); + if (!fn) return NULL; + *lineno = CopLINE(cop); + return fn; +} +# define SRC_FILE_CSTR(lineno) mw_perl_src_file_cstr(lineno) +#endif /* MWRAP_PERL */ + +#if MWRAP_RUBY +static const char *mw_ruby_src_file_cstr(unsigned *lineno) +{ + if (!has_ec_p()) return NULL; + int line; + const char *fn = rb_source_location_cstr(&line); + *lineno = line < 0 ? UINT_MAX : (unsigned)line; + return fn; +} +# define SRC_FILE_CSTR(lineno) mw_ruby_src_file_cstr(lineno) +#endif /* MWRAP_RUBY */ + +#ifndef SRC_FILE_CSTR /* for C-only compilation */ +# define SRC_FILE_CSTR(lineno) (NULL) +#endif /* !SRC_FILE_CSTR */ + +static struct src_loc *assign_line(size_t size, struct src_loc *sl, + const char *fn, unsigned lineno) +{ + struct src_file *f; + union stk_sf sf; + struct cds_lfht_node *cur; + struct cds_lfht *t = CMM_LOAD_SHARED(files); + + mwrap_assert(t); + + size_t len = strlen(fn); + if (len >= PATH_MAX) + len = PATH_MAX - 1; + + if (lineno == UINT_MAX) { /* NOLINE in Perl is UINT_MAX */ + lineno = U24_MAX; + } else if (lineno > U24_MAX) { + fprintf(stderr, + "%s:%u line number exceeds limit (%u), capped\n", + fn, lineno, U24_MAX); + lineno = U24_MAX; + } +again: + f = src_file_get(t, &sf.sf, fn, len); + if (!f) { /* doesn't exist, add a new one */ + f = real_malloc(sizeof(*f) + len + 1); + if (!f) return NULL; + memcpy(f, &sf.sf, sizeof(*f) + len + 1); + cur = cds_lfht_add_unique(t, f->fn_hash, fn_eq, f, &f->nd); + if (cur == &f->nd) { + uatomic_inc(&nr_file); + } else { /* lost race */ + rcu_read_unlock(); + real_free(f); + rcu_read_lock(); + goto again; + } + } + + sl->total = size; + sl->f = f; + sl->lineno = lineno; + if (f && !bt_req_depth) + sl->bt_len = 0; + hash_src_loc(sl); + return totals_add_rcu(sl); +} + +static struct src_loc * +update_stats_rcu_lock(size_t *gen, size_t size, struct src_loc *sl) +{ + struct cds_lfht *t = CMM_LOAD_SHARED(totals); + struct src_loc *ret = NULL; + + if (caa_unlikely(!t)) return 0; /* not initialized */ + if (locating++) goto out; /* do not recurse into another *alloc */ + + SET_GENERATION(gen, size); + + unsigned lineno; + const char *fn = SRC_FILE_CSTR(&lineno); + + rcu_read_lock(); + if (fn) + ret = assign_line(size, sl, fn, lineno); + if (!ret) { /* no associated Perl|Ruby code, just C/C++ */ + sl->total = size; + sl->f = NULL; + sl->lineno = 0; + hash_src_loc(sl); + ret = totals_add_rcu(sl); + } +out: + --locating; + return ret; +} + +size_t malloc_usable_size(void *p) +{ + return ptr2hdr(p)->size; +} + +static void free_hdr_rcu(struct rcu_head *dead) +{ + struct alloc_hdr *h = caa_container_of(dead, struct alloc_hdr, as.dead); + real_free(h->real); +} + +static pthread_mutex_t *src_loc_mutex_lock(const struct src_loc *l) +{ + pthread_mutex_t *mtx = &mutexes[l->loc_hash & MUTEX_MASK].mtx; + CHECK(int, 0, pthread_mutex_lock(mtx)); + return mtx; +} + +void free(void *p) +{ + if (p) { + struct alloc_hdr *h = ptr2hdr(p); + struct src_loc *l = h->as.live.loc; + + if (l) { + size_t current_bytes = uatomic_read(&total_bytes_inc); + size_t age = current_bytes - h->as.live.gen; + uatomic_add(&total_bytes_dec, h->size); + uatomic_add(&l->freed_bytes, h->size); + uatomic_set(&h->size, 0); + uatomic_inc(&l->frees); + uatomic_add(&l->age_total, age); + + pthread_mutex_t *mtx = src_loc_mutex_lock(l); + cds_list_del_rcu(&h->anode); + if (age > l->max_lifespan) + l->max_lifespan = age; + CHECK(int, 0, pthread_mutex_unlock(mtx)); + + call_rcu(&h->as.dead, free_hdr_rcu); + } else { + real_free(h->real); + } + } +} + +static void +alloc_insert_rcu(struct src_loc *sl, struct alloc_hdr *h, size_t size, + void *real) +{ + h->size = size; + h->real = real; + size_t gen = 0; + struct src_loc *l = update_stats_rcu_lock(&gen, size, sl); + h->as.live.loc = l; + h->as.live.gen = gen; + if (l) { + pthread_mutex_t *mtx = src_loc_mutex_lock(l); + cds_list_add_rcu(&h->anode, &l->allocs); + CHECK(int, 0, pthread_mutex_unlock(mtx)); + rcu_read_unlock(); + } +} + +static bool ptr_is_aligned(void *ptr, size_t alignment) +{ + return ((uintptr_t) ptr & (alignment - 1)) == 0; +} + +static void *ptr_align(void *ptr, size_t alignment) +{ + return (void *)(((uintptr_t) ptr + (alignment - 1)) & ~(alignment - 1)); +} + +static bool is_power_of_two(size_t n) +{ + return (n & (n - 1)) == 0; +} + +static int +mwrap_memalign(void **pp, size_t alignment, size_t size, struct src_loc *sl) +{ + void *real; + size_t asize; + size_t d = alignment / sizeof(void*); + size_t r = alignment % sizeof(void*); + + if (r != 0 || d == 0 || !is_power_of_two(d)) + return EINVAL; + + if (alignment <= MALLOC_ALIGNMENT) { + void *p = malloc(size); + if (!p) return ENOMEM; + *pp = p; + return 0; + } + for (; alignment < sizeof(struct alloc_hdr); alignment *= 2) + ; /* double alignment until >= sizeof(struct alloc_hdr) */ + if (__builtin_add_overflow(size, alignment, &asize) || + __builtin_add_overflow(asize, sizeof(struct alloc_hdr), &asize)) + return ENOMEM; + + real = real_malloc(asize); + if (real) { + void *p = hdr2ptr(real); + if (!ptr_is_aligned(p, alignment)) + p = ptr_align(p, alignment); + struct alloc_hdr *h = ptr2hdr(p); + alloc_insert_rcu(sl, h, size, real); + *pp = p; + } + + return real ? 0 : ENOMEM; +} + +static void *memalign_result(int err, void *p) +{ + if (caa_unlikely(err)) + errno = err; + return p; +} + +void *memalign(size_t alignment, size_t size) +{ + void *p = NULL; + SRC_LOC_BT(bt); + int err = mwrap_memalign(&p, alignment, size, &bt.sl); + return memalign_result(err, p); +} + +int posix_memalign(void **p, size_t alignment, size_t size) +{ + SRC_LOC_BT(bt); + return mwrap_memalign(p, alignment, size, &bt.sl); +} + +/* these aliases aren't needed for glibc, not sure about other libcs... */ +void *aligned_alloc(size_t, size_t) __attribute__((alias("memalign"))); +void cfree(void *) __attribute__((__nothrow__)) + __attribute__((__leaf__)) __attribute__((alias("free"))); + +void *valloc(size_t size) +{ + ensure_initialization(); + SRC_LOC_BT(bt); + void *p = NULL; + int err = mwrap_memalign(&p, mparams.page_size, size, &bt.sl); + return memalign_result(err, p); +} + +#if __GNUC__ < 7 +# define add_overflow_p(a,b) __extension__({ \ + __typeof__(a) _c; \ + __builtin_add_overflow(a,b,&_c); \ + }) +#else +# define add_overflow_p(a,b) \ + __builtin_add_overflow_p((a),(b),(__typeof__(a+b))0) +#endif + +static size_t size_align(size_t size, size_t alignment) +{ + return ((size + (alignment - 1)) & ~(alignment - 1)); +} + +void *pvalloc(size_t size) +{ + void *p = NULL; + + ensure_initialization(); + + if (add_overflow_p(size, mparams.page_size)) { + errno = ENOMEM; + return 0; + } + size = size_align(size, mparams.page_size); + SRC_LOC_BT(bt); + int err = mwrap_memalign(&p, mparams.page_size, size, &bt.sl); + return memalign_result(err, p); +} + +void *malloc(size_t size) +{ + size_t asize; + + if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize)) + goto enomem; + + void *p = real_malloc(asize); + if (p) { + SRC_LOC_BT(bt); + struct alloc_hdr *h = p; + alloc_insert_rcu(&bt.sl, h, size, h); + return hdr2ptr(h); + } +enomem: + errno = ENOMEM; + return 0; +} + +void *calloc(size_t nmemb, size_t size) +{ + size_t asize; + + if (__builtin_mul_overflow(size, nmemb, &size)) + goto enomem; + if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize)) + goto enomem; + void *p = real_malloc(asize); + if (p) { + struct alloc_hdr *h = p; + SRC_LOC_BT(bt); + alloc_insert_rcu(&bt.sl, h, size, h); + return memset(hdr2ptr(h), 0, size); + } +enomem: + errno = ENOMEM; + return 0; +} + +void *realloc(void *ptr, size_t size) +{ + size_t asize; + + if (!size) { + free(ptr); + return 0; + } + if (__builtin_add_overflow(size, sizeof(struct alloc_hdr), &asize)) + goto enomem; + void *p = real_malloc(asize); + if (p) { + struct alloc_hdr *h = p; + SRC_LOC_BT(bt); + alloc_insert_rcu(&bt.sl, h, size, h); + p = hdr2ptr(h); + if (ptr) { + struct alloc_hdr *old = ptr2hdr(ptr); + memcpy(p, ptr, old->size < size ? old->size : size); + free(ptr); + } + return p; + } +enomem: + errno = ENOMEM; + return 0; +} + +struct dump_arg { + FILE *fp; + size_t min; +}; + +char **bt_syms(void * const *addrlist, uint32_t size) +{ + mwrap_assert(size < INT_MAX); +#if defined(__GLIBC__) + char **s = backtrace_symbols(addrlist, size); +#else /* make FreeBSD look like glibc output: */ + char **s = backtrace_symbols_fmt(addrlist, size, "%f(%n%D) [%a]"); +#endif + if (!s) fprintf(stderr, "backtrace_symbols: %m\n"); + return s; +} + +/* supported by modern gcc + clang */ +#define AUTO_FREE __attribute__((__cleanup__(cleanup_free))) +static void cleanup_free(void *any) +{ + void **p = any; + free(*p); +} + +static void *dump_to_file(struct dump_arg *a) +{ + struct cds_lfht_iter iter; + struct src_loc *l; + struct cds_lfht *t; + + ++locating; + rcu_read_lock(); + t = CMM_LOAD_SHARED(totals); + if (!t) + goto out_unlock; + + cds_lfht_for_each_entry(t, &iter, l, hnode) { + if (l->total <= a->min) continue; + + if (loc_is_addr(l)) { + AUTO_FREE char **s = bt_syms(l->bt, 1); + + if (s) + fprintf(a->fp, "%16zu %12zu %s\n", + l->total, l->allocations, s[0]); + } else { + fprintf(a->fp, "%16zu %12zu %s:%u\n", + l->total, l->allocations, l->f->fn, l->lineno); + } + } +out_unlock: + rcu_read_unlock(); + --locating; + return 0; +} + +/* str = "/path/to/foo.so(+0x123) [0xdeadbeefcafe]" (see bt_syms()) */ +static int extract_addr(const char *str, size_t len, void **p) +{ + unsigned long x; + char *e; + const char *end = str + len; + const char *c = memrchr(str, '[', len); + + if (c && (c + 2) < end && c[1] == '0' && c[2] == 'x') { + errno = 0; + x = strtoul(c + 3, &e, 16); + if (!errno && *e == ']') { + *p = (void *)x; + return 1; + } + } + return 0; +} + +/* str is $PATHNAME:$LINENO, len is strlen(str) */ +static struct src_loc *src_loc_lookup(const char *str, size_t len) +{ + char *c = memrchr(str, ':', len); + const char *end = str + len; + unsigned lineno; + struct src_loc *l = NULL; + struct cds_lfht *t = CMM_LOAD_SHARED(files); + union stk_sf sf; + + if (!c || c == end || !t) + return NULL; + + size_t fn_len = c - str; + c++; + if (*c == '-') { + lineno = U24_MAX; + } else { + lineno = 0; + for (; c < end; c++) { + if (*c < '0' || *c > '9') + return NULL; + lineno *= 10; + lineno += (*c - '0'); + } + if (lineno > U24_MAX) + return NULL; + } + rcu_read_lock(); + struct src_file *f = src_file_get(t, &sf.sf, str, fn_len); + t = CMM_LOAD_SHARED(totals); + if (f && t) { + struct src_loc k; + + k.f = f; + k.lineno = lineno; + k.bt_len = 0; + hash_src_loc(&k); + l = src_loc_get(t, &k); + } + rcu_read_unlock(); + return l; +} + +#ifndef O_CLOEXEC +# define O_CLOEXEC 0 +#endif +static void h1d_atexit(void); +__attribute__ ((destructor)) static void mwrap_dtor(void) +{ + const char *opt = getenv("MWRAP"); + const char *modes[] = { "a", "a+", "w", "w+", "r+" }; + struct dump_arg a = { .min = 0 }; + size_t i; + int dump_fd; + char *dump_path; + char *s; + + /* n.b. unsetenv("MWRAP") may be called, so run this unconditionally */ + h1d_atexit(); + + if (!opt) + return; + + ++locating; + if ((dump_path = strstr(opt, "dump_path:")) && + (dump_path += sizeof("dump_path")) && + *dump_path) { + char *end = strchr(dump_path, ','); + char buf[PATH_MAX]; + if (end) { + mwrap_assert((end - dump_path) < (intptr_t)sizeof(buf)); + end = mempcpy(buf, dump_path, end - dump_path); + *end = 0; + dump_path = buf; + } + dump_fd = open(dump_path, O_CLOEXEC|O_WRONLY|O_APPEND|O_CREAT, + 0666); + if (dump_fd < 0) { + fprintf(stderr, "open %s failed: %m\n", dump_path); + goto out; + } + } + else if (!sscanf(opt, "dump_fd:%d", &dump_fd)) + goto out; + + if ((s = strstr(opt, "dump_min:"))) + sscanf(s, "dump_min:%zu", &a.min); + + switch (dump_fd) { + case 0: goto out; + case 1: a.fp = stdout; break; + case 2: a.fp = stderr; break; + default: + if (dump_fd < 0) + goto out; + a.fp = 0; + + for (i = 0; !a.fp && i < 5; i++) + a.fp = fdopen(dump_fd, modes[i]); + + if (!a.fp) { + fprintf(stderr, "failed to open fd=%d: %m\n", dump_fd); + goto out; + } + /* we'll leak some memory here, but this is a destructor */ + } + dump_to_file(&a); +out: + --locating; +} + +static void mwrap_reset(void) +{ + struct cds_lfht *t; + struct cds_lfht_iter iter; + struct src_loc *l; + + uatomic_set(&total_bytes_inc, 0); + uatomic_set(&total_bytes_dec, 0); + + rcu_read_lock(); + t = CMM_LOAD_SHARED(totals); + if (t) + cds_lfht_for_each_entry(t, &iter, l, hnode) { + uatomic_set(&l->total, 0); + uatomic_set(&l->allocations, 0); + uatomic_set(&l->frees, 0); + uatomic_set(&l->freed_bytes, 0); + uatomic_set(&l->age_total, 0); + uatomic_set(&l->max_lifespan, 0); + } + rcu_read_unlock(); +} + +static inline struct src_loc *mwrap_get(const char *str, size_t len) +{ + void *p; + + if (!extract_addr(str, len, &p)) + return src_loc_lookup(str, len); + + union stk_bt k; + struct cds_lfht *t = CMM_LOAD_SHARED(totals); + + if (!t) return NULL; + k.sl.f = NULL; + k.sl.lineno = 0; + k.sl.bt[0] = p; + k.sl.bt_len = 1; + hash_src_loc(&k.sl); + rcu_read_lock(); + struct src_loc *l = src_loc_get(t, &k.sl); + rcu_read_unlock(); + return l; +} + +static struct src_loc *mwrap_get_bin(const char *buf, size_t len) +{ + static const size_t min_len = sizeof(struct src_file *) + SIZEOF_LINENO; + + if (len >= min_len && ((len - min_len) % sizeof(void *)) == 0) { + struct cds_lfht *t = CMM_LOAD_SHARED(totals); + if (!t) return NULL; + + union stk_bt k; + size_t bt_len = (len - min_len) / sizeof(void *); + + if (bt_len > MWRAP_BT_MAX) + return NULL; + k.sl.bt_len = bt_len; + + memcpy(src_loc_hash_tip(&k.sl), buf, len); + hash_src_loc(&k.sl); + rcu_read_lock(); + struct src_loc *l = src_loc_get(t, &k.sl); + rcu_read_unlock(); + return l; + } + return NULL; +} + +static const char *mwrap_env; +#include "httpd.h" + +__attribute__((constructor)) static void mwrap_ctor(void) +{ + sigset_t set, old; + struct alloc_hdr *h; + mwrap_env = getenv("MWRAP"); + + ++locating; + + /* block signals */ + CHECK(int, 0, sigfillset(&set)); + CHECK(int, 0, pthread_sigmask(SIG_SETMASK, &set, &old)); + ensure_initialization(); + CHECK(int, 0, pthread_key_create(&tlskey, mstate_tsd_dtor)); + + /* initialize mutexes used by urcu-bp */ + CMM_STORE_SHARED(files, lfht_new(256)); + if (!CMM_LOAD_SHARED(files)) + fprintf(stderr, "failed to allocate files table\n"); + CMM_STORE_SHARED(totals, lfht_new(16384)); + if (!CMM_LOAD_SHARED(totals)) + fprintf(stderr, "failed to allocate totals table\n"); + h = real_malloc(sizeof(struct alloc_hdr)); + if (h) { /* force call_rcu to start background thread */ + h->real = h; + call_rcu(&h->as.dead, free_hdr_rcu); + } else + fprintf(stderr, "malloc: %m\n"); + + h1d_start(); + CHECK(int, 0, pthread_sigmask(SIG_SETMASK, &old, NULL)); + CHECK(int, 0, pthread_atfork(atfork_prepare, atfork_parent, + atfork_child)); + + if (mwrap_env) { + const char *bt = strstr(mwrap_env, "bt:"); + if (bt) { + bt += sizeof("bt"); + errno = 0; + char *end; + unsigned long n = strtoul(bt, &end, 10); + if (n && !errno && (*end == ',' || *end == 0)) { + if (n > MWRAP_BT_MAX) + n = MWRAP_BT_MAX; + CMM_STORE_SHARED(bt_req_depth, (uint32_t)n); + } + } + } + --locating; +} |