about summary refs log tree commit homepage
path: root/ext/mwrap/mwrap.c
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2023-01-08 05:03:26 +0000
committerEric Wong <e@80x24.org>2023-01-08 05:05:01 +0000
commitb5ab9be6686aa778a4cfd7622c598736b9c42321 (patch)
treea967297606a1f91f8359e287d5a85ad018e185e3 /ext/mwrap/mwrap.c
parent4356beb8237a92b3902b17f55cfe93d347b593d5 (diff)
parent2c25edb01139365f4754985c1e3494765dd1e5a7 (diff)
downloadmwrap-b5ab9be6686aa778a4cfd7622c598736b9c42321.tar.gz
This contains many changes from https://80x24.org/mwrap-perl.git
commit

* Built-in RCU-friendly version of dlmalloc, no more fragile dlsym(3m)
  resolution of malloc-family functions in the constructor

* Allocations are now backed by O_TMPFILE on $TMPDIR on modern Linux.
  Since mwrap increases memory usage greatly and I needed to use it
  on a system where I needed more VM space but lacked the ability
  to add swap.

* Configurable C backtrace level via MWRAP=bt:$DEPTH where $DEPTH
  is a non-negative integer.  Be careful about increasing it, even
  a depth of 3-4 can be orders-of-magnitude more expensive in
  time and space.  This can be changed dynamically at runtime via
  local HTTP (see below).

* Embedded per-process local-socket-only HTTP server obsoletes
  MwrapRack when combined with mwrap-rproxy from the Perl dist
  (set `MWRAP=socket_dir:/dir/of/sockets')
  See https://80x24.org/mwrap-perl/20221210015518.272576-4-e@80x24.org/
  for more info.

  It now supports downloading CSV (suitable for importing into sqlite 3.32.0+)

* License switched to GPL-3+ to be compatible with GNU binutils
  since we may take code from addr2line in the future.

* libxxhash supported if XXH3_64bits is available.
Diffstat (limited to 'ext/mwrap/mwrap.c')
-rw-r--r--ext/mwrap/mwrap.c396
1 files changed, 396 insertions, 0 deletions
diff --git a/ext/mwrap/mwrap.c b/ext/mwrap/mwrap.c
new file mode 100644
index 0000000..d88fee6
--- /dev/null
+++ b/ext/mwrap/mwrap.c
@@ -0,0 +1,396 @@
+/*
+ * Copyright (C) mwrap hackers <mwrap-public@80x24.org>
+ * License: GPL-2.0+ <https://www.gnu.org/licenses/gpl-2.0.txt>
+ */
+#define MWRAP_RUBY 1
+#include "mwrap_core.h"
+
+static ID id_uminus;
+extern VALUE __attribute__((weak)) rb_cObject;
+extern VALUE __attribute__((weak)) rb_eTypeError;
+extern VALUE __attribute__((weak)) rb_yield(VALUE);
+
+/*
+ * call-seq:
+ *
+ *        Mwrap.dump([[io] [, min]] -> nil
+ *
+ * Dumps the current totals to +io+ which must be an IO object
+ * (StringIO and similar are not supported).  Total sizes smaller
+ * than or equal to +min+ are skipped.
+ *
+ * The output is space-delimited by 3 columns:
+ *
+ * total_size      call_count      location
+ */
+static VALUE mwrap_dump(int argc, VALUE *argv, VALUE mod)
+{
+        VALUE io, min;
+        struct dump_arg a;
+        rb_io_t *fptr;
+
+        rb_scan_args(argc, argv, "02", &io, &min);
+
+        if (NIL_P(io))
+                /* library may be linked w/o Ruby */
+                io = *((VALUE *)dlsym(RTLD_DEFAULT, "rb_stderr"));
+
+        a.min = NIL_P(min) ? 0 : NUM2SIZET(min);
+        io = rb_io_get_io(io);
+        io = rb_io_get_write_io(io);
+        GetOpenFile(io, fptr);
+        a.fp = rb_io_stdio_file(fptr);
+
+        rb_thread_call_without_gvl((void *(*)(void *))dump_to_file, &a, 0, 0);
+        RB_GC_GUARD(io);
+        return Qnil;
+}
+
+/* The whole operation is not remotely atomic... */
+static void *totals_reset(void *ign)
+{
+        mwrap_reset();
+        return NULL;
+}
+
+/*
+ * call-seq:
+ *
+ *        Mwrap.reset -> nil
+ *
+ * Resets the the total tables by zero-ing all counters.
+ * This resets all statistics.  This is not an atomic operation
+ * as other threads (outside of GVL) may increment counters.
+ */
+static VALUE reset_m(VALUE mod)
+{
+        rb_thread_call_without_gvl(totals_reset, 0, 0, 0);
+        return Qnil;
+}
+
+static VALUE rcu_unlock_ensure(VALUE ignored)
+{
+        rcu_read_unlock();
+        --locating;
+        return Qfalse;
+}
+
+static VALUE location_string(const struct src_loc *l)
+{
+        VALUE tmp = rb_str_new(NULL, 0);
+
+        if (l->f) {
+                rb_str_cat(tmp, l->f->fn, l->f->fn_len);
+                if (l->lineno == U24_MAX)
+                        rb_str_cat_cstr(tmp, ":-");
+                else
+                        rb_str_catf(tmp, ":%u", l->lineno);
+        }
+        if (l->bt_len) {
+                AUTO_FREE char **s = bt_syms(l->bt, l->bt_len);
+
+                if (s) {
+                        if (l->f)
+                                rb_str_cat_cstr(tmp, "\n");
+                        rb_str_cat_cstr(tmp, s[0]);
+                        for (uint32_t i = 1; i < l->bt_len; ++i)
+                                rb_str_catf(tmp, "\n%s", s[i]);
+                }
+        }
+
+        /* deduplicate and try to free up some memory */
+        VALUE ret = rb_funcall(tmp, id_uminus, 0);
+        if (!OBJ_FROZEN_RAW(tmp))
+                rb_str_resize(tmp, 0);
+
+        return ret;
+}
+
+static VALUE dump_each_rcu(VALUE x)
+{
+        struct dump_arg *a = (struct dump_arg *)x;
+        struct cds_lfht *t;
+        struct cds_lfht_iter iter;
+        struct src_loc *l;
+
+        t = CMM_LOAD_SHARED(totals);
+        cds_lfht_for_each_entry(t, &iter, l, hnode) {
+                VALUE v[6];
+                if (l->total <= a->min) continue;
+
+                v[0] = location_string(l);
+                v[1] = SIZET2NUM(l->total);
+                v[2] = SIZET2NUM(l->allocations);
+                v[3] = SIZET2NUM(l->frees);
+                v[4] = SIZET2NUM(l->age_total);
+                v[5] = SIZET2NUM(l->max_lifespan);
+
+                rb_yield_values2(6, v);
+                assert(rcu_read_ongoing());
+        }
+        return Qnil;
+}
+
+/*
+ * call-seq:
+ *
+ *        Mwrap.each([min]) do |location,total,allocations,frees,age_total,max_lifespan|
+ *          ...
+ *        end
+ *
+ * Yields each entry of the of the table to a caller-supplied block.
+ * +min+ may be specified to filter out lines with +total+ bytes
+ * equal-to-or-smaller-than the supplied minimum.
+ */
+static VALUE mwrap_each(int argc, VALUE * argv, VALUE mod)
+{
+        VALUE min;
+        struct dump_arg a;
+
+        rb_scan_args(argc, argv, "01", &min);
+        a.min = NIL_P(min) ? 0 : NUM2SIZET(min);
+
+        ++locating;
+        rcu_read_lock();
+
+        return rb_ensure(dump_each_rcu, (VALUE)&a, rcu_unlock_ensure, 0);
+}
+
+static size_t
+src_loc_memsize(const void *p)
+{
+        return sizeof(struct src_loc);
+}
+
+static const rb_data_type_t src_loc_type = {
+        "source_location",
+        /* no marking, no freeing */
+        { 0, 0, src_loc_memsize, /* reserved */ },
+        /* parent, data, [ flags ] */
+};
+
+static VALUE cSrcLoc;
+
+/*
+ * call-seq:
+ *        Mwrap[location] -> Mwrap::SourceLocation
+ *
+ * Returns the associated Mwrap::SourceLocation given the +location+
+ * String.  +location+ is either a Ruby source location path:line
+ * (e.g. "/path/to/foo.rb:5") or a hexadecimal memory address with
+ * square-braces part yielded by Mwrap.dump (e.g. "[0xdeadbeef]")
+ */
+static VALUE mwrap_aref(VALUE mod, VALUE loc)
+{
+        const char *str = StringValueCStr(loc);
+        long len = RSTRING_LEN(loc);
+        assert(len >= 0);
+        struct src_loc *l = mwrap_get(str, (size_t)len);
+
+        return l ? TypedData_Wrap_Struct(cSrcLoc, &src_loc_type, l) : Qnil;
+}
+
+static VALUE src_loc_each_i(VALUE p)
+{
+        struct alloc_hdr *h;
+        struct src_loc *l = (struct src_loc *)p;
+
+        cds_list_for_each_entry_rcu(h, &l->allocs, anode) {
+                size_t gen = uatomic_read(&h->as.live.gen);
+                size_t size = uatomic_read(&h->size);
+
+                if (size) {
+                        VALUE v[2];
+                        v[0] = SIZET2NUM(size);
+                        v[1] = SIZET2NUM(gen);
+
+                        rb_yield_values2(2, v);
+                }
+        }
+
+        return Qfalse;
+}
+
+static struct src_loc *src_loc_of(VALUE self)
+{
+        struct src_loc *l;
+        TypedData_Get_Struct(self, struct src_loc, &src_loc_type, l);
+        assert(l);
+        return l;
+}
+
+/*
+ * call-seq:
+ *        loc = Mwrap[location]
+ *        loc.each { |size,generation| ... }
+ *
+ * Iterates through live allocations for a given Mwrap::SourceLocation,
+ * yielding the +size+ (in bytes) and +generation+ of each allocation.
+ * The +generation+ is the value of the GC.count method at the time
+ * the allocation was made.
+ *
+ * This functionality is only available in mwrap 2.0.0+
+ */
+static VALUE src_loc_each(VALUE self)
+{
+        struct src_loc *l = src_loc_of(self);
+
+        assert(locating == 0 && "forgot to clear locating");
+        ++locating;
+        rcu_read_lock();
+        rb_ensure(src_loc_each_i, (VALUE)l, rcu_unlock_ensure, 0);
+        return self;
+}
+
+/*
+ * The the mean lifespan (in GC generations) of allocations made from this
+ * location.  This does not account for live allocations.
+ */
+static VALUE src_loc_mean_lifespan(VALUE self)
+{
+        struct src_loc *l = src_loc_of(self);
+        size_t tot, frees;
+
+        frees = uatomic_read(&l->frees);
+        tot = uatomic_read(&l->age_total);
+        return DBL2NUM(frees ? ((double)tot/(double)frees) : HUGE_VAL);
+}
+
+/* The number of frees made from this location */
+static VALUE src_loc_frees(VALUE self)
+{
+        return SIZET2NUM(uatomic_read(&src_loc_of(self)->frees));
+}
+
+/* The number of allocations made from this location */
+static VALUE src_loc_allocations(VALUE self)
+{
+        return SIZET2NUM(uatomic_read(&src_loc_of(self)->allocations));
+}
+
+/* The total number of bytes allocated from this location */
+static VALUE src_loc_total(VALUE self)
+{
+        return SIZET2NUM(uatomic_read(&src_loc_of(self)->total));
+}
+
+/*
+ * The maximum age (in GC generations) of an allocation before it was freed.
+ * This does not account for live allocations.
+ */
+static VALUE src_loc_max_lifespan(VALUE self)
+{
+        return SIZET2NUM(uatomic_read(&src_loc_of(self)->max_lifespan));
+}
+
+/*
+ * Returns a frozen String location of the given SourceLocation object.
+ */
+static VALUE src_loc_name(VALUE self)
+{
+        struct src_loc *l = src_loc_of(self);
+        VALUE ret;
+
+        ++locating;
+        ret = location_string(l);
+        --locating;
+        return ret;
+}
+
+static VALUE reset_locating(VALUE ign) { --locating; return Qfalse; }
+
+/*
+ * call-seq:
+ *
+ *        Mwrap.quiet do |depth|
+ *          # expensive sort/calculate/emitting results of Mwrap.each
+ *          # affecting statistics of the rest of the app
+ *        end
+ *
+ * Stops allocation tracking inside the block.  This is useful for
+ * monitoring code which calls other Mwrap (or ObjectSpace/GC)
+ * functions which unavoidably allocate memory.
+ *
+ * This feature was added in mwrap 2.0.0+
+ */
+static VALUE mwrap_quiet(VALUE mod)
+{
+        size_t cur = ++locating;
+        return rb_ensure(rb_yield, SIZET2NUM(cur), reset_locating, 0);
+}
+
+/*
+ * total bytes allocated as tracked by mwrap
+ */
+static VALUE total_inc(VALUE mod)
+{
+        return SIZET2NUM(total_bytes_inc);
+}
+
+/*
+ * total bytes freed as tracked by mwrap
+ */
+static VALUE total_dec(VALUE mod)
+{
+        return SIZET2NUM(total_bytes_dec);
+}
+
+/*
+ * Document-module: Mwrap
+ *
+ *   require 'mwrap'
+ *
+ * Mwrap has a dual function as both a Ruby C extension and LD_PRELOAD
+ * wrapper.  As a Ruby C extension, it exposes a limited Ruby API.
+ * To be effective at gathering status, mwrap must be loaded as a
+ * LD_PRELOAD (using the mwrap(1) executable makes it easy)
+ *
+ * ENVIRONMENT
+ *
+ * The "MWRAP" environment variable contains a comma-delimited list
+ * of key:value options for automatically dumping at program exit.
+ *
+ * * dump_fd: a writable FD to dump to
+ * * dump_path: a path to dump to, the file is opened in O_APPEND mode
+ * * dump_min: the minimum allocation size (total) to dump
+ *
+ * If both `dump_fd' and `dump_path' are specified, dump_path takes
+ * precedence.
+ */
+void Init_mwrap(void)
+{
+        VALUE mod;
+
+        ++locating;
+        mod = rb_define_module("Mwrap");
+        id_uminus = rb_intern("-@");
+
+        /*
+         * Represents a location in source code or library
+         * address which calls a memory allocation.  It is
+         * updated automatically as allocations are made, so
+         * there is no need to reload or reread it from Mwrap#[].
+         * This class is only available since mwrap 2.0.0+.
+         */
+        cSrcLoc = rb_define_class_under(mod, "SourceLocation", rb_cObject);
+        rb_undef_alloc_func(cSrcLoc);
+        rb_define_singleton_method(mod, "dump", mwrap_dump, -1);
+        rb_define_singleton_method(mod, "reset", reset_m, 0);
+        rb_define_singleton_method(mod, "clear", reset_m, 0);
+        rb_define_singleton_method(mod, "each", mwrap_each, -1);
+        rb_define_singleton_method(mod, "[]", mwrap_aref, 1);
+        rb_define_singleton_method(mod, "quiet", mwrap_quiet, 0);
+        rb_define_singleton_method(mod, "total_bytes_allocated", total_inc, 0);
+        rb_define_singleton_method(mod, "total_bytes_freed", total_dec, 0);
+
+
+        rb_define_method(cSrcLoc, "each", src_loc_each, 0);
+        rb_define_method(cSrcLoc, "frees", src_loc_frees, 0);
+        rb_define_method(cSrcLoc, "allocations", src_loc_allocations, 0);
+        rb_define_method(cSrcLoc, "total", src_loc_total, 0);
+        rb_define_method(cSrcLoc, "mean_lifespan", src_loc_mean_lifespan, 0);
+        rb_define_method(cSrcLoc, "max_lifespan", src_loc_max_lifespan, 0);
+        rb_define_method(cSrcLoc, "name", src_loc_name, 0);
+
+        --locating;
+}