diff options
Diffstat (limited to 'ext/mwrap/mymalloc.h')
-rw-r--r-- | ext/mwrap/mymalloc.h | 299 |
1 files changed, 299 insertions, 0 deletions
diff --git a/ext/mwrap/mymalloc.h b/ext/mwrap/mymalloc.h new file mode 100644 index 0000000..196ccc0 --- /dev/null +++ b/ext/mwrap/mymalloc.h @@ -0,0 +1,299 @@ +/* + * Copyright (C) mwrap hackers <mwrap-perl@80x24.org> + * License: LGPL-2.1+ <https://www.gnu.org/licenses/lgpl-2.1.txt> + * + * Unlike the rest of the project, I'm happy with this being LGPL-2.1+ + * since the remote_free_* stuff is meant for glibc, anyways. + * + * This is a small wrapper on top of dlmalloc (dlmalloc_c.h) which + * adds wait-free free(3) multi-threading support to avoid contention + * with call_rcu. + + * The wait-free free(3) implementation was proposed for inclusion into + glibc in 2018 and may eventually become part of glibc: + https://inbox.sourceware.org/libc-alpha/20180731084936.g4yw6wnvt677miti@dcvr/ + + * Arenas are thread-local and returned to a global pool upon thread + destruction. This works well for processes with stable thread counts, + but wastes memory in processes with unstable thread counts. + + * On Linux with O_TMPFILE support, all allocations are backed by + a temporary file (in TMPDIR). This avoids OOM errors on + memory-constrained systems due to the higher-than-normal memory + usage of mwrap itself. + + * memalign-family support is ignored (and reimplemented in mwrap_core.h). + dlmalloc's attempts to improve memory-efficiency is prone to fragmentation + if memaligned-allocations are repeatedly freed and relalocated while + normal mallocs are happening. The complexity and work needed to + avoid it does not seem worthwhile nowadays given: + 1) memalign usage isn't common + 2) 64-bit systems with virtually unlimited VA space are common + see https://sourceware.org/bugzilla/show_bug.cgi?id=14581 + + * realloc and calloc are also reimplemented naively in mwrap_core.h since + the optimizations in made it harder to deal with accounting needs + of mwrap. They may be reinstated in the future. + */ +#include "check.h" +#include "gcc.h" +#include <urcu/rculist.h> +#include <urcu/wfcqueue.h> +#include <urcu-bp.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> + +/* this is fine on most x86-64, especially with file-backed mmap(2) */ +#define DEFAULT_GRANULARITY (64U * 1024U * 1024U) + +#if !defined(MWRAP_FILE_BACKED) && defined(__linux__) && defined(O_TMPFILE) +# define MWRAP_FILE_BACKED 1 +#else +# define MWRAP_FILE_BACKED 0 +#endif +#if MWRAP_FILE_BACKED +# include <sys/mman.h> +static void *my_mmap(size_t size) +{ + int flags = MAP_PRIVATE; + const char *tmpdir = getenv("TMPDIR"); + int fd; + void *ret; + + if (!tmpdir) + tmpdir = "/tmp"; + + fd = open(tmpdir, O_TMPFILE|O_RDWR|S_IRUSR|S_IWUSR, 0600); + if (fd < 0) { + flags |= MAP_ANONYMOUS; + } else { + int t = ftruncate(fd, size); /* sparse file */ + + if (t < 0) { + flags |= MAP_ANONYMOUS; + close(fd); + fd = -1; + } + } + ret = mmap(NULL, size, PROT_READ|PROT_WRITE, flags, fd, 0); + if (fd >= 0) { + int err = errno; + close(fd); + errno = err; + } + return ret; +} +#endif /* MWRAP_FILE_BACKED */ + +/* knobs for dlmalloc */ +#define FOOTERS 1 /* required for remote_free_* stuff */ +#define USE_DL_PREFIX +#define ONLY_MSPACES 1 /* aka per-thread "arenas" */ +#define DLMALLOC_EXPORT static inline +/* #define NO_MALLOC_STATS 1 */ +#define USE_LOCKS 0 /* we do our own global_mtx + ms_tsd */ +#if MWRAP_FILE_BACKED +# define MMAP(size) my_mmap(size) +# define HAVE_MREMAP 0 +#endif +#include "dlmalloc_c.h" +#undef ABORT /* conflicts with Perl */ +#undef NOINLINE /* conflicts with Ruby, defined by dlmalloc_c.h */ +#undef HAVE_MREMAP /* conflicts with Ruby 3.2 */ + +static MWRAP_TSD mstate ms_tsd; + +/* global_mtx protects arenas_active, arenas_unused, and tlskey init */ +static pthread_mutex_t global_mtx = PTHREAD_MUTEX_INITIALIZER; +static pthread_key_t tlskey; +static CDS_LIST_HEAD(arenas_active); +static CDS_LIST_HEAD(arenas_unused); + +/* called on pthread exit */ +ATTR_COLD static void mstate_tsd_dtor(void *p) +{ + mstate ms = p; + + /* + * In case another destructor calls free (or any allocation function, + * in that case we leak the mstate forever) + */ + ms_tsd = 0; + + if (!ms) + return; + + CHECK(int, 0, pthread_mutex_lock(&global_mtx)); + cds_list_del(&ms->arena_node); /* remove from arenas_active */ + cds_list_add(&ms->arena_node, &arenas_unused); + CHECK(int, 0, pthread_mutex_unlock(&global_mtx)); +} + +/* see httpd.h */ +static void h1d_atfork_prepare(void); +static void h1d_atfork_parent(void); +static void h1d_start(void); + +ATTR_COLD static void atfork_prepare(void) +{ + h1d_atfork_prepare(); + call_rcu_before_fork(); + CHECK(int, 0, pthread_mutex_lock(&global_mtx)); +} + +ATTR_COLD static void atfork_parent(void) +{ + CHECK(int, 0, pthread_mutex_unlock(&global_mtx)); + call_rcu_after_fork_parent(); + CHECK(int, 0, pthread_mutex_lock(&global_mtx)); + h1d_atfork_parent(); + CHECK(int, 0, pthread_mutex_unlock(&global_mtx)); +} + +ATTR_COLD static void reset_mutexes(void); /* mwrap_core.h */ + +ATTR_COLD static void atfork_child(void) +{ + CHECK(int, 0, pthread_mutex_init(&global_mtx, 0)); + + /* + * We should be the only active thread at this point. + * Theoretically the application could register another atfork_child + * handler which runs before this handler AND spawns a thread which + * calls malloc, not much we can do about that, though. + */ + cds_list_splice(&arenas_active, &arenas_unused); + CDS_INIT_LIST_HEAD(&arenas_active); + if (ms_tsd) { + cds_list_del(&ms_tsd->arena_node); /* remove from unused */ + cds_list_add(&ms_tsd->arena_node, &arenas_active); + } + reset_mutexes(); + call_rcu_after_fork_child(); + h1d_start(); +} + +#if defined(__GLIBC__) +# define FIRST_TIME 0 +#else /* pthread_mutex_lock calls malloc on FreeBSD */ + static int once; +# define FIRST_TIME (uatomic_cmpxchg(&once, 0, 1)) +#endif + +static __attribute__((noinline)) mstate mstate_acquire_harder(void) +{ + bool do_lock = FIRST_TIME ? false : true; + if (do_lock) + CHECK(int, 0, pthread_mutex_lock(&global_mtx)); + if (cds_list_empty(&arenas_unused)) { + ms_tsd = create_mspace(0, 0); + ms_tsd->seg.sflags = EXTERN_BIT | USE_MMAP_BIT; + } else { /* reuse existing */ + ms_tsd = cds_list_first_entry(&arenas_unused, + struct malloc_state, arena_node); + cds_list_del(&ms_tsd->arena_node); + } + + cds_list_add(&ms_tsd->arena_node, &arenas_active); + if (!tlskey) + CHECK(int, 0, pthread_key_create(&tlskey, mstate_tsd_dtor)); + + if (do_lock) + CHECK(int, 0, pthread_mutex_unlock(&global_mtx)); + CHECK(int, 0, pthread_setspecific(tlskey, ms_tsd)); + return ms_tsd; +} + +/* process remote free requests, returns allocations freed */ +static size_t remote_free_step(mstate ms) +{ + size_t nfree = 0; + struct cds_wfcq_node *node, *n; + struct __cds_wfcq_head tmp_hd; + struct cds_wfcq_tail tmp_tl; + enum cds_wfcq_ret ret; + + ___cds_wfcq_init(&tmp_hd, &tmp_tl); + ret = __cds_wfcq_splice_nonblocking(&tmp_hd, &tmp_tl, + &ms->remote_free_head, + &ms->remote_free_tail); + + if (ret == CDS_WFCQ_RET_DEST_EMPTY) { + __cds_wfcq_for_each_blocking_safe(&tmp_hd, &tmp_tl, node, n) { + ++nfree; + mspace_free(ms, node); + } + } + mwrap_assert(ret != CDS_WFCQ_RET_DEST_NON_EMPTY); + + return nfree; +} + +static void remote_free_finish(mstate ms) +{ + while (remote_free_step(ms)) ; +} + +int malloc_trim(size_t pad) +{ + mstate m; + int ret = 0; + + CHECK(int, 0, pthread_mutex_lock(&global_mtx)); + + /* be lazy for active sibling threads, readers are not synchronized */ + cds_list_for_each_entry(m, &arenas_active, arena_node) + uatomic_set(&m->trim_check, 0); + + /* nobody is using idle arenas, clean immediately */ + cds_list_for_each_entry(m, &arenas_unused, arena_node) { + m->trim_check = 0; + remote_free_finish(m); + ret |= sys_trim(m, pad); + } + + CHECK(int, 0, pthread_mutex_unlock(&global_mtx)); + + m = ms_tsd; + if (m) { /* trim our own arena immediately */ + remote_free_finish(m); + ret |= sys_trim(m, pad); + } + return ret; +} + +static void remote_free_enqueue(mstate fm, void *mem) +{ + struct cds_wfcq_node *node = mem; + + cds_wfcq_node_init(node); + cds_wfcq_enqueue(&fm->remote_free_head, &fm->remote_free_tail, node); + /* other thread calls remote_free_step */ +} + +static void *real_malloc(size_t bytes) +{ + mstate ms = ms_tsd; + if (!caa_unlikely(ms)) + ms = mstate_acquire_harder(); + + remote_free_step(ms); + return mspace_malloc(ms, bytes); +} + +static void real_free(void *mem) +{ + mstate ms = ms_tsd; + if (mem) { + mchunkptr p = mem2chunk(mem); + mstate fm = get_mstate_for(p); + if (fm == ms) + mspace_free(fm, mem); + else + remote_free_enqueue(fm, mem); + } + if (ms) + remote_free_step(ms); +} |