about summary refs log tree commit homepage
path: root/ext/mwrap/mwrap.c
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2018-07-02 02:33:57 +0000
committerEric Wong <e@80x24.org>2018-07-02 06:08:07 +0000
commit50c83d99b7875c45fa72967eaf636f74db931665 (patch)
treef938e645e7e6058eca4e5bf7d6122d70520e76a6 /ext/mwrap/mwrap.c
parentc0a29c0f7c829737782c18a0349b8655b3c0388d (diff)
downloadmwrap-50c83d99b7875c45fa72967eaf636f74db931665.tar.gz
Diffstat (limited to 'ext/mwrap/mwrap.c')
-rw-r--r--ext/mwrap/mwrap.c350
1 files changed, 270 insertions, 80 deletions
diff --git a/ext/mwrap/mwrap.c b/ext/mwrap/mwrap.c
index a302d8f..22906bf 100644
--- a/ext/mwrap/mwrap.c
+++ b/ext/mwrap/mwrap.c
@@ -2,11 +2,11 @@
  * Copyright (C) 2018 mwrap hackers <mwrap-public@80x24.org>
  * License: GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt>
  */
+#define _LGPL_SOURCE /* allows URCU to inline some stuff */
 #include <ruby/ruby.h>
 #include <ruby/thread.h>
-#include <ruby/util.h>
-#include <ruby/st.h>
 #include <ruby/io.h>
+#include <execinfo.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -16,38 +16,81 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
+#include <urcu-bp.h>
+#include <urcu/rculfhash.h>
+#include "jhash.h"
 
+static ID id_uminus;
 const char *rb_source_location_cstr(int *line); /* requires 2.6.0dev */
 static int *(*has_gvl_p)(void);
+#ifdef __FreeBSD__
+void *__malloc(size_t);
+void *__calloc(size_t, size_t);
+void *__realloc(void *, size_t);
+static void *(*real_malloc)(size_t) = __malloc;
+static void *(*real_calloc)(size_t, size_t) = __calloc;
+static void *(*real_realloc)(void *, size_t) = __realloc;
+#  define RETURN_IF_NOT_READY() do {} while (0) /* nothing */
+#else
+static int ready;
 static void *(*real_malloc)(size_t);
 static void *(*real_calloc)(size_t, size_t);
 static void *(*real_realloc)(void *, size_t);
 
 /*
- * rb_source_location_cstr relies on GET_EC(), and it's possible
- * to have a native thread but no EC during the early and late
- * (teardown) phases of the Ruby process
- */
-static void **ec_loc;
-
-/*
  * we need to fake an OOM condition while dlsym is running,
  * as that calls calloc under glibc, but we don't have the
  * symbol for the jemalloc calloc, yet
  */
-#  define RETURN_IF_NOT_READY(x) do { \
-        if (!x) { \
+#  define RETURN_IF_NOT_READY() do { \
+        if (!ready) { \
                 errno = ENOMEM; \
                 return NULL; \
         } \
 } while (0)
 
+#endif /* !FreeBSD */
+
+/*
+ * rb_source_location_cstr relies on GET_EC(), and it's possible
+ * to have a native thread but no EC during the early and late
+ * (teardown) phases of the Ruby process
+ */
+static void **ec_loc;
+
+static struct cds_lfht *totals;
+
+static struct cds_lfht *
+lfht_new(void)
+{
+        return cds_lfht_new(16384, 1, 0, CDS_LFHT_AUTO_RESIZE, 0);
+}
+
 __attribute__((constructor)) static void resolve_malloc(void)
 {
-        real_calloc = dlsym(RTLD_NEXT, "calloc");
+        int err;
+
+#ifndef __FreeBSD__
         real_malloc = dlsym(RTLD_NEXT, "malloc");
+        real_calloc = dlsym(RTLD_NEXT, "calloc");
         real_realloc = dlsym(RTLD_NEXT, "realloc");
-        assert(real_calloc && real_malloc && real_realloc);
+        if (!real_calloc || !real_malloc || !real_realloc) {
+                fprintf(stderr, "missing calloc/malloc/realloc %p %p %p\n",
+                        real_calloc, real_malloc, real_realloc);
+                _exit(1);
+        }
+        ready = 1;
+#endif
+
+        totals = lfht_new();
+        if (!totals)
+                fprintf(stderr, "failed to allocate totals table\n");
+
+        err = pthread_atfork(call_rcu_before_fork,
+                                call_rcu_after_fork_parent,
+                                call_rcu_after_fork_child);
+        if (err)
+                fprintf(stderr, "pthread_atfork failed: %s\n", strerror(err));
 
         has_gvl_p = dlsym(RTLD_DEFAULT, "ruby_thread_has_gvl_p");
 
@@ -59,18 +102,19 @@ __attribute__((constructor)) static void resolve_malloc(void)
 }
 
 #ifndef HAVE_MEMPCPY
-#  define mempcpy(dst,src,n) ((char *)memcpy((dst),(src),(n)) + n)
+static void *
+my_mempcpy(void *dest, const void *src, size_t n)
+{
+        return (char *)memcpy(dest, src, n) + n;
+}
+#define mempcpy(dst,src,n) my_mempcpy(dst,src,n)
 #endif
 
 /* stolen from glibc: */
 #define RETURN_ADDRESS(nr) \
-  __builtin_extract_return_addr(__builtin_return_address(nr))
+  (uintptr_t)(__builtin_extract_return_addr(__builtin_return_address(nr)))
 
 static __thread size_t locating;
-static st_table *stats;        /* rb_source_location => size */
-
-/* bytes allocated outside of GVL */
-static size_t unknown_bytes;
 
 #define INT2STR_MAX (sizeof(int) == 4 ? 10 : 19)
 static char *int2str(int num, char *dst, size_t * size)
@@ -99,58 +143,110 @@ static char *int2str(int num, char *dst, size_t * size)
         return NULL;
 }
 
-static int
-update_stat(st_data_t *k, st_data_t *v, st_data_t arg, int existing)
+static int has_ec_p(void)
 {
-        size_t *total = (size_t *) v;
-        size_t size = arg;
+        return (ec_loc && *ec_loc);
+}
 
-        if (existing) {
-                *total += size;
-        } else {
-                char *key = *(char **)k;
-                *k = (st_data_t)ruby_strdup(key);
-                *total = size;
-        }
-        return ST_CONTINUE;
+struct src_loc {
+        struct rcu_head rcu_head;
+        size_t calls;
+        size_t total;
+        struct cds_lfht_node hnode;
+        uint32_t hval;
+        uint32_t kcapa;
+        char k[];
+};
+
+static inline int loc_eq(struct cds_lfht_node *node, const void *key)
+{
+        const struct src_loc *existing;
+        const struct src_loc *k = key;
+
+        existing = caa_container_of(node, struct src_loc, hnode);
+
+        return (k->hval == existing->hval &&
+                k->kcapa == existing->kcapa &&
+                memcmp(k->k, existing->k, k->kcapa == UINT32_MAX ?
+                       sizeof(uintptr_t) : k->kcapa) == 0);
 }
 
-static int has_ec_p(void)
+static void totals_add(struct src_loc *k)
 {
-        return (ec_loc && *ec_loc);
+        struct cds_lfht_iter iter;
+        struct cds_lfht_node *cur;
+        struct src_loc *l;
+        struct cds_lfht *t;
+
+
+again:
+        rcu_read_lock();
+        t = rcu_dereference(totals);
+        if (!t) goto out_unlock;
+        cds_lfht_lookup(t, k->hval, loc_eq, k, &iter);
+        cur = cds_lfht_iter_get_node(&iter);
+        if (cur) {
+                l = caa_container_of(cur, struct src_loc, hnode);
+                uatomic_add(&l->total, k->total);
+                uatomic_add(&l->calls, 1);
+        } else {
+                size_t n = k->kcapa == UINT32_MAX ? sizeof(uintptr_t) : k->kcapa;
+                l = malloc(sizeof(*l) + n);
+                if (!l) goto out_unlock;
+
+                memcpy(l, k, sizeof(*l) + n);
+                l->calls = 1;
+                cur = cds_lfht_add_unique(t, k->hval, loc_eq, l, &l->hnode);
+                if (cur != &l->hnode) { /* lost race */
+                        rcu_read_unlock();
+                        free(l);
+                        goto again;
+                }
+        }
+out_unlock:
+        rcu_read_unlock();
 }
 
-static void update_stats(size_t size, const void *caller)
+static void update_stats(size_t size, uintptr_t caller)
 {
+        struct src_loc *k;
+        static const size_t xlen = sizeof(caller);
+        char *dst;
+
         if (locating++) goto out; /* do not recurse into another *alloc */
 
         if (has_gvl_p && has_gvl_p() && has_ec_p()) {
                 int line;
-                size_t len;
-                char *key, *dst;
                 const char *ptr = rb_source_location_cstr(&line);
+                size_t len;
                 size_t int_size = INT2STR_MAX;
 
-                if (!stats) stats = st_init_strtable_with_size(16384);
                 if (!ptr) goto unknown;
 
                 /* avoid vsnprintf or anything which could call malloc here: */
                 len = strlen(ptr);
-                key = alloca(len + 1 + int_size + 1);
-                dst = mempcpy(key, ptr, len);
+                k = alloca(sizeof(*k) + len + 1 + int_size + 1);
+                k->total = size;
+                dst = mempcpy(k->k, ptr, len);
                 *dst++ = ':';
                 dst = int2str(line, dst, &int_size);
                 if (dst) {
                         *dst = 0;        /* terminate string */
-                        st_update(stats, (st_data_t)key,
-                                   update_stat, (st_data_t)size);
+                        k->kcapa = (uint32_t)(dst - k->k + 1);
+                        k->hval = jhash(k->k, k->kcapa, 0xdeadbeef);
+                        totals_add(k);
                 } else {
                         rb_bug("bad math making key from location %s:%d\n",
                                 ptr, line);
                 }
-        } else { /* TODO: do something with caller */
+        } else {
 unknown:
-                __sync_add_and_fetch(&unknown_bytes, size);
+                k = alloca(sizeof(*k) + xlen);
+                k->total = size;
+                memcpy(k->k, &caller, xlen);
+                k->kcapa = UINT32_MAX;
+                k->hval = jhash(k->k, xlen, 0xdeadbeef);
+                totals_add(k);
         }
 out:
         --locating;
@@ -162,14 +258,14 @@ out:
  */
 void *malloc(size_t size)
 {
-        RETURN_IF_NOT_READY(real_malloc);
+        RETURN_IF_NOT_READY();
         update_stats(size, RETURN_ADDRESS(0));
         return real_malloc(size);
 }
 
 void *calloc(size_t nmemb, size_t size)
 {
-        RETURN_IF_NOT_READY(real_calloc);
+        RETURN_IF_NOT_READY();
         /* ruby_xcalloc already does overflow checking */
         update_stats(nmemb * size, RETURN_ADDRESS(0));
         return real_calloc(nmemb, size);
@@ -177,7 +273,7 @@ void *calloc(size_t nmemb, size_t size)
 
 void *realloc(void *ptr, size_t size)
 {
-        RETURN_IF_NOT_READY(real_realloc);
+        RETURN_IF_NOT_READY();
         update_stats(size, RETURN_ADDRESS(0));
         return real_realloc(ptr, size);
 }
@@ -187,32 +283,30 @@ struct dump_arg {
         size_t min;
 };
 
-static int dump_i(const char *key, size_t val, struct dump_arg *a)
+static void dump_to_file(struct dump_arg *a)
 {
-        if (val > a->min) {
-                fprintf(a->fp, "%20" PRIuSIZE " %s\n", val, key);
-        }
-
-        return ST_CONTINUE;
-}
-
-static VALUE dump_to_file(VALUE x)
-{
-        struct dump_arg *a = (struct dump_arg *)x;
-
-        if (stats) st_foreach(stats, dump_i, (st_data_t) a);
-        if (unknown_bytes > a->min) {
-                fprintf(a->fp, "%20" PRIuSIZE " (unknown[%d])\n",
-                        unknown_bytes, getpid());
+        struct cds_lfht_iter iter;
+        struct src_loc *l;
+        struct cds_lfht *t;
+
+        rcu_read_lock();
+        t = rcu_dereference(totals);
+        if (t) {
+                cds_lfht_for_each_entry(t, &iter, l, hnode) {
+                        const void *p = l->k;
+                        char **s = 0;
+                        if (l->total <= a->min) continue;
+
+                        if (l->kcapa == UINT32_MAX) {
+                                s = backtrace_symbols(p, 1);
+                                p = s[0];
+                        }
+                        fprintf(a->fp, "%16zu %12zu %s\n",
+                                l->total, l->calls, (const char *)p);
+                        if (s) free(s);
+                }
         }
-
-        return Qnil;
-}
-
-static VALUE dump_ensure(VALUE ignored)
-{
-        --locating;
-        return Qfalse;
+        rcu_read_unlock();
 }
 
 static VALUE mwrap_dump(int argc, VALUE * argv, VALUE mod)
@@ -224,6 +318,7 @@ static VALUE mwrap_dump(int argc, VALUE * argv, VALUE mod)
         rb_scan_args(argc, argv, "02", &io, &min);
 
         if (NIL_P(io))
+                /* library may be linked w/o Ruby */
                 io = *((VALUE *)dlsym(RTLD_DEFAULT, "rb_stderr"));
 
         a.min = NIL_P(min) ? 0 : NUM2SIZET(min);
@@ -232,30 +327,124 @@ static VALUE mwrap_dump(int argc, VALUE * argv, VALUE mod)
         a.fp = rb_io_stdio_file(fptr);
 
         ++locating;
-        return rb_ensure(dump_to_file, (VALUE) & a, dump_ensure, Qfalse);
+        dump_to_file(&a);
+        --locating;
+        return Qnil;
 }
 
-static int clear_i(char *key, size_t val, void *ignored)
+static void
+free_src_loc(struct rcu_head *head)
 {
-        xfree(key);
-        return ST_DELETE;
+        struct src_loc *l = caa_container_of(head, struct src_loc, rcu_head);
+        free(l);
 }
 
 static VALUE mwrap_clear(VALUE mod)
 {
-        unknown_bytes = 0;
-        st_foreach(stats, clear_i, 0);
+        struct cds_lfht *new, *old;
+        struct cds_lfht_iter iter;
+        struct src_loc *l;
+
+        new = lfht_new();
+        rcu_read_lock();
+        old = rcu_dereference(totals);
+        rcu_assign_pointer(totals, new);
+        cds_lfht_for_each_entry(old, &iter, l, hnode) {
+                cds_lfht_del(old, &l->hnode);
+                call_rcu(&l->rcu_head, free_src_loc);
+        }
+        rcu_read_unlock();
+
+        synchronize_rcu(); /* ensure totals points to new */
+
+        cds_lfht_destroy(old, NULL);
+
         return Qnil;
 }
 
+static VALUE mwrap_reset(VALUE mod)
+{
+        struct cds_lfht *t;
+        struct cds_lfht_iter iter;
+        struct src_loc *l;
+
+        rcu_read_lock();
+        t = rcu_dereference(totals);
+        cds_lfht_for_each_entry(t, &iter, l, hnode) {
+                uatomic_set(&l->total, 0);
+                uatomic_set(&l->calls, 0);
+        }
+        rcu_read_unlock();
+
+        return Qnil;
+}
+
+static VALUE dump_ensure(VALUE ignored)
+{
+        rcu_read_unlock();
+        --locating;
+        return Qfalse;
+}
+
+static VALUE dump_each_rcu(VALUE x)
+{
+        struct dump_arg *a = (struct dump_arg *)x;
+        struct cds_lfht *t;
+        struct cds_lfht_iter iter;
+        struct src_loc *l;
+
+        t = rcu_dereference(totals);
+        if (t) {
+                cds_lfht_for_each_entry(t, &iter, l, hnode) {
+                        VALUE v[3];
+                        if (l->total <= a->min) continue;
+
+                        if (l->kcapa == UINT32_MAX) {
+                                char **s = backtrace_symbols((void *)l->k, 1);
+                                v[1] = rb_str_new_cstr(s[0]);
+                                free(s);
+                        }
+                        else {
+                                v[1] = rb_str_new(l->k, l->kcapa - 1);
+                        }
+                        v[0] = rb_funcall(v[1], id_uminus, 0);
+
+                        if (!OBJ_FROZEN_RAW(v[1]))
+                                rb_str_resize(v[1], 0);
+
+                        v[1] = SIZET2NUM(l->total);
+                        v[2] = SIZET2NUM(l->calls);
+
+                        rb_yield_values2(3, v);
+                        assert(rcu_read_ongoing());
+                }
+        }
+        return Qnil;
+}
+
+static VALUE mwrap_each(int argc, VALUE * argv, VALUE mod)
+{
+        VALUE min;
+        struct dump_arg a;
+
+        rb_scan_args(argc, argv, "01", &min);
+        a.min = NIL_P(min) ? 0 : NUM2SIZET(min);
+
+        ++locating;
+        rcu_read_lock();
+
+        return rb_ensure(dump_each_rcu, (VALUE)&a, dump_ensure, 0);
+}
+
 void Init_mwrap(void)
 {
         VALUE mod = rb_define_module("Mwrap");
-
-        if (!stats) stats = st_init_strtable_with_size(16384);
+        id_uminus = rb_intern("-@");
 
         rb_define_singleton_method(mod, "dump", mwrap_dump, -1);
         rb_define_singleton_method(mod, "clear", mwrap_clear, 0);
+        rb_define_singleton_method(mod, "reset", mwrap_reset, 0);
+        rb_define_singleton_method(mod, "each", mwrap_each, -1);
 }
 
 /* rb_cloexec_open isn't usable by non-Ruby processes */
@@ -283,7 +472,8 @@ static void mwrap_dump_destructor(void)
                 char *end = strchr(dump_path, ',');
                 if (end) {
                         char *tmp = alloca(end - dump_path + 1);
-                        *((char *)mempcpy(tmp, dump_path, end - dump_path)) = 0;
+                        end = mempcpy(tmp, dump_path, end - dump_path);
+                        *end = 0;
                         dump_path = tmp;
                 }
                 dump_fd = open(dump_path, O_CLOEXEC|O_WRONLY|O_APPEND|O_CREAT,
@@ -319,7 +509,7 @@ static void mwrap_dump_destructor(void)
                 }
                 /* we'll leak some memory here, but this is a destructor */
         }
-        dump_to_file((VALUE)&a);
+        dump_to_file(&a);
 out:
     --locating;
 }