about summary refs log tree commit homepage
path: root/ext/mwrap/mymalloc.h
diff options
context:
space:
mode:
Diffstat (limited to 'ext/mwrap/mymalloc.h')
-rw-r--r--ext/mwrap/mymalloc.h299
1 files changed, 299 insertions, 0 deletions
diff --git a/ext/mwrap/mymalloc.h b/ext/mwrap/mymalloc.h
new file mode 100644
index 0000000..196ccc0
--- /dev/null
+++ b/ext/mwrap/mymalloc.h
@@ -0,0 +1,299 @@
+/*
+ * Copyright (C) mwrap hackers <mwrap-perl@80x24.org>
+ * License: LGPL-2.1+ <https://www.gnu.org/licenses/lgpl-2.1.txt>
+ *
+ * Unlike the rest of the project, I'm happy with this being LGPL-2.1+
+ * since the remote_free_* stuff is meant for glibc, anyways.
+ *
+ * This is a small wrapper on top of dlmalloc (dlmalloc_c.h) which
+ * adds wait-free free(3) multi-threading support to avoid contention
+ * with call_rcu.
+
+ * The wait-free free(3) implementation was proposed for inclusion into
+   glibc in 2018 and may eventually become part of glibc:
+   https://inbox.sourceware.org/libc-alpha/20180731084936.g4yw6wnvt677miti@dcvr/
+
+ * Arenas are thread-local and returned to a global pool upon thread
+   destruction.  This works well for processes with stable thread counts,
+   but wastes memory in processes with unstable thread counts.
+
+ * On Linux with O_TMPFILE support, all allocations are backed by
+   a temporary file (in TMPDIR).  This avoids OOM errors on
+   memory-constrained systems due to the higher-than-normal memory
+   usage of mwrap itself.
+
+ * memalign-family support is ignored (and reimplemented in mwrap_core.h).
+   dlmalloc's attempts to improve memory-efficiency is prone to fragmentation
+   if memaligned-allocations are repeatedly freed and relalocated while
+   normal mallocs are happening.  The complexity and work needed to
+   avoid it does not seem worthwhile nowadays given:
+   1) memalign usage isn't common
+   2) 64-bit systems with virtually unlimited VA space are common
+   see https://sourceware.org/bugzilla/show_bug.cgi?id=14581
+
+ * realloc and calloc are also reimplemented naively in mwrap_core.h since
+   the optimizations in made it harder to deal with accounting needs
+   of mwrap.  They may be reinstated in the future.
+ */
+#include "check.h"
+#include "gcc.h"
+#include <urcu/rculist.h>
+#include <urcu/wfcqueue.h>
+#include <urcu-bp.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+
+/* this is fine on most x86-64, especially with file-backed mmap(2) */
+#define DEFAULT_GRANULARITY (64U * 1024U * 1024U)
+
+#if !defined(MWRAP_FILE_BACKED) && defined(__linux__) && defined(O_TMPFILE)
+#        define MWRAP_FILE_BACKED 1
+#else
+#        define MWRAP_FILE_BACKED 0
+#endif
+#if MWRAP_FILE_BACKED
+#        include <sys/mman.h>
+static void *my_mmap(size_t size)
+{
+        int flags = MAP_PRIVATE;
+        const char *tmpdir = getenv("TMPDIR");
+        int fd;
+        void *ret;
+
+        if (!tmpdir)
+                tmpdir = "/tmp";
+
+        fd = open(tmpdir, O_TMPFILE|O_RDWR|S_IRUSR|S_IWUSR, 0600);
+        if (fd < 0) {
+                flags |= MAP_ANONYMOUS;
+        } else {
+                int t = ftruncate(fd, size); /* sparse file */
+
+                if (t < 0) {
+                        flags |= MAP_ANONYMOUS;
+                        close(fd);
+                        fd = -1;
+                }
+        }
+        ret = mmap(NULL, size, PROT_READ|PROT_WRITE, flags, fd, 0);
+        if (fd >= 0) {
+                int err = errno;
+                close(fd);
+                errno = err;
+        }
+        return ret;
+}
+#endif /* MWRAP_FILE_BACKED */
+
+/* knobs for dlmalloc */
+#define FOOTERS 1 /* required for remote_free_* stuff */
+#define USE_DL_PREFIX
+#define ONLY_MSPACES 1 /* aka per-thread "arenas" */
+#define DLMALLOC_EXPORT static inline
+/* #define NO_MALLOC_STATS 1 */
+#define USE_LOCKS 0 /* we do our own global_mtx + ms_tsd */
+#if MWRAP_FILE_BACKED
+#        define MMAP(size) my_mmap(size)
+#        define HAVE_MREMAP 0
+#endif
+#include "dlmalloc_c.h"
+#undef ABORT /* conflicts with Perl */
+#undef NOINLINE /* conflicts with Ruby, defined by dlmalloc_c.h */
+#undef HAVE_MREMAP /* conflicts with Ruby 3.2 */
+
+static MWRAP_TSD mstate ms_tsd;
+
+/* global_mtx protects arenas_active, arenas_unused, and tlskey init */
+static pthread_mutex_t global_mtx = PTHREAD_MUTEX_INITIALIZER;
+static pthread_key_t tlskey;
+static CDS_LIST_HEAD(arenas_active);
+static CDS_LIST_HEAD(arenas_unused);
+
+/* called on pthread exit */
+ATTR_COLD static void mstate_tsd_dtor(void *p)
+{
+        mstate ms = p;
+
+        /*
+         * In case another destructor calls free (or any allocation function,
+         * in that case we leak the mstate forever)
+         */
+        ms_tsd = 0;
+
+        if (!ms)
+                return;
+
+        CHECK(int, 0, pthread_mutex_lock(&global_mtx));
+        cds_list_del(&ms->arena_node);        /* remove from arenas_active */
+        cds_list_add(&ms->arena_node, &arenas_unused);
+        CHECK(int, 0, pthread_mutex_unlock(&global_mtx));
+}
+
+/* see httpd.h */
+static void h1d_atfork_prepare(void);
+static void h1d_atfork_parent(void);
+static void h1d_start(void);
+
+ATTR_COLD static void atfork_prepare(void)
+{
+        h1d_atfork_prepare();
+        call_rcu_before_fork();
+        CHECK(int, 0, pthread_mutex_lock(&global_mtx));
+}
+
+ATTR_COLD static void atfork_parent(void)
+{
+        CHECK(int, 0, pthread_mutex_unlock(&global_mtx));
+        call_rcu_after_fork_parent();
+        CHECK(int, 0, pthread_mutex_lock(&global_mtx));
+        h1d_atfork_parent();
+        CHECK(int, 0, pthread_mutex_unlock(&global_mtx));
+}
+
+ATTR_COLD static void reset_mutexes(void); /* mwrap_core.h */
+
+ATTR_COLD static void atfork_child(void)
+{
+        CHECK(int, 0, pthread_mutex_init(&global_mtx, 0));
+
+        /*
+         * We should be the only active thread at this point.
+         * Theoretically the application could register another atfork_child
+         * handler which runs before this handler AND spawns a thread which
+         * calls malloc, not much we can do about that, though.
+         */
+        cds_list_splice(&arenas_active, &arenas_unused);
+        CDS_INIT_LIST_HEAD(&arenas_active);
+        if (ms_tsd) {
+                cds_list_del(&ms_tsd->arena_node);        /* remove from unused */
+                cds_list_add(&ms_tsd->arena_node, &arenas_active);
+        }
+        reset_mutexes();
+        call_rcu_after_fork_child();
+        h1d_start();
+}
+
+#if defined(__GLIBC__)
+#        define FIRST_TIME 0
+#else /* pthread_mutex_lock calls malloc on FreeBSD */
+        static int once;
+#        define FIRST_TIME (uatomic_cmpxchg(&once, 0, 1))
+#endif
+
+static __attribute__((noinline)) mstate mstate_acquire_harder(void)
+{
+        bool do_lock = FIRST_TIME ? false : true;
+        if (do_lock)
+                CHECK(int, 0, pthread_mutex_lock(&global_mtx));
+        if (cds_list_empty(&arenas_unused)) {
+                ms_tsd = create_mspace(0, 0);
+                ms_tsd->seg.sflags = EXTERN_BIT | USE_MMAP_BIT;
+        } else { /* reuse existing */
+                ms_tsd = cds_list_first_entry(&arenas_unused,
+                                              struct malloc_state, arena_node);
+                cds_list_del(&ms_tsd->arena_node);
+        }
+
+        cds_list_add(&ms_tsd->arena_node, &arenas_active);
+        if (!tlskey)
+                CHECK(int, 0, pthread_key_create(&tlskey, mstate_tsd_dtor));
+
+        if (do_lock)
+                CHECK(int, 0, pthread_mutex_unlock(&global_mtx));
+        CHECK(int, 0, pthread_setspecific(tlskey, ms_tsd));
+        return ms_tsd;
+}
+
+/* process remote free requests, returns allocations freed */
+static size_t remote_free_step(mstate ms)
+{
+        size_t nfree = 0;
+        struct cds_wfcq_node *node, *n;
+        struct __cds_wfcq_head tmp_hd;
+        struct cds_wfcq_tail tmp_tl;
+        enum cds_wfcq_ret ret;
+
+        ___cds_wfcq_init(&tmp_hd, &tmp_tl);
+        ret = __cds_wfcq_splice_nonblocking(&tmp_hd, &tmp_tl,
+                                            &ms->remote_free_head,
+                                            &ms->remote_free_tail);
+
+        if (ret == CDS_WFCQ_RET_DEST_EMPTY) {
+                __cds_wfcq_for_each_blocking_safe(&tmp_hd, &tmp_tl, node, n) {
+                        ++nfree;
+                        mspace_free(ms, node);
+                }
+        }
+        mwrap_assert(ret != CDS_WFCQ_RET_DEST_NON_EMPTY);
+
+        return nfree;
+}
+
+static void remote_free_finish(mstate ms)
+{
+        while (remote_free_step(ms)) ;
+}
+
+int malloc_trim(size_t pad)
+{
+        mstate m;
+        int ret = 0;
+
+        CHECK(int, 0, pthread_mutex_lock(&global_mtx));
+
+        /* be lazy for active sibling threads, readers are not synchronized */
+        cds_list_for_each_entry(m, &arenas_active, arena_node)
+                uatomic_set(&m->trim_check, 0);
+
+        /* nobody is using idle arenas, clean immediately */
+        cds_list_for_each_entry(m, &arenas_unused, arena_node) {
+                m->trim_check = 0;
+                remote_free_finish(m);
+                ret |= sys_trim(m, pad);
+        }
+
+        CHECK(int, 0, pthread_mutex_unlock(&global_mtx));
+
+        m = ms_tsd;
+        if (m) { /* trim our own arena immediately */
+                remote_free_finish(m);
+                ret |= sys_trim(m, pad);
+        }
+        return ret;
+}
+
+static void remote_free_enqueue(mstate fm, void *mem)
+{
+        struct cds_wfcq_node *node = mem;
+
+        cds_wfcq_node_init(node);
+        cds_wfcq_enqueue(&fm->remote_free_head, &fm->remote_free_tail, node);
+        /* other thread calls remote_free_step */
+}
+
+static void *real_malloc(size_t bytes)
+{
+        mstate ms = ms_tsd;
+        if (!caa_unlikely(ms))
+                ms = mstate_acquire_harder();
+
+        remote_free_step(ms);
+        return mspace_malloc(ms, bytes);
+}
+
+static void real_free(void *mem)
+{
+        mstate ms = ms_tsd;
+        if (mem) {
+                mchunkptr p = mem2chunk(mem);
+                mstate fm = get_mstate_for(p);
+                if (fm == ms)
+                        mspace_free(fm, mem);
+                else
+                        remote_free_enqueue(fm, mem);
+        }
+        if (ms)
+                remote_free_step(ms);
+}